diff options
Diffstat (limited to 'compiler/optimizing')
197 files changed, 5145 insertions, 2437 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index e1f061ae70..703584c537 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -22,7 +22,7 @@ #include "dex/dex_file_exception_helpers.h" #include "quicken_info.h" -namespace art { +namespace art HIDDEN { HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph, const DexFile* const dex_file, diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 42a3f327e7..8668ef8221 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -17,13 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "dex/code_item_accessors.h" #include "dex/dex_file.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class HBasicBlockBuilder : public ValueObject { public: diff --git a/compiler/optimizing/block_namer.cc b/compiler/optimizing/block_namer.cc index d30448cd23..029e26b2be 100644 --- a/compiler/optimizing/block_namer.cc +++ b/compiler/optimizing/block_namer.cc @@ -18,7 +18,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { std::ostream& BlockNamer::PrintName(std::ostream& os, HBasicBlock* blk) const { os << "B"; diff --git a/compiler/optimizing/block_namer.h b/compiler/optimizing/block_namer.h index ed396b9bf8..39c5973297 100644 --- a/compiler/optimizing/block_namer.h +++ b/compiler/optimizing/block_namer.h @@ -19,7 +19,9 @@ #include <ostream> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HBasicBlock; struct BlockNamer { diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index dad3c818fa..919abfdc49 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -24,7 +24,7 @@ #include "nodes.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class MonotonicValueRange; @@ -490,7 +490,7 @@ class MonotonicValueRange : public ValueRange { DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); }; -class BCEVisitor : public HGraphVisitor { +class BCEVisitor final : public HGraphVisitor { public: // The least number of bounds checks that should be eliminated by triggering // the deoptimization technique. @@ -564,6 +564,19 @@ class BCEVisitor : public HGraphVisitor { early_exit_loop_.clear(); taken_test_loop_.clear(); finite_loop_.clear(); + + // We may have eliminated all bounds checks so we should update the flag. + // TODO(solanes): Do this without a linear pass of the graph? + GetGraph()->SetHasBoundsChecks(false); + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsBoundsCheck()) { + GetGraph()->SetHasBoundsChecks(true); + return; + } + } + } } private: @@ -1818,6 +1831,7 @@ class BCEVisitor : public HGraphVisitor { HInstruction* condition, bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); + DCHECK(suspend != nullptr); block->InsertInstructionBefore(condition, block->GetLastInstruction()); DeoptimizationKind kind = is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; @@ -1997,7 +2011,7 @@ class BCEVisitor : public HGraphVisitor { phi->SetRawInputAt(0, instruction); phi->SetRawInputAt(1, zero); if (type == DataType::Type::kReference) { - phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo()); + phi->SetReferenceTypeInfoIfValid(instruction->GetReferenceTypeInfo()); } new_preheader->AddPhi(phi); return phi; diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index ef08877daa..f210fa9127 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; class HInductionVarAnalysis; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 5927d681b2..929a9e7fe7 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -17,6 +17,7 @@ #include "bounds_check_elimination.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -27,7 +28,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the BoundsCheckElimination tests. diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e7826bbba3..48d1a9da2f 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -33,7 +33,7 @@ #include "ssa_builder.h" #include "thread.h" -namespace art { +namespace art HIDDEN { HGraphBuilder::HGraphBuilder(HGraph* graph, const CodeItemDebugInfoAccessor& accessor, @@ -103,7 +103,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() { graph_->SetNumberOfVRegs(code_item_accessor_.RegistersSize()); graph_->SetNumberOfInVRegs(code_item_accessor_.InsSize()); graph_->SetMaximumNumberOfOutVRegs(code_item_accessor_.OutsSize()); - graph_->SetHasTryCatch(code_item_accessor_.TriesSize() != 0); // Use ScopedArenaAllocator for all local allocations. ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); @@ -168,7 +167,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) { graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs); graph_->SetNumberOfInVRegs(num_arg_vregs); graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs); - graph_->SetHasTryCatch(false); // Use ScopedArenaAllocator for all local allocations. ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 580769e0f9..ef225d9a6a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -19,12 +19,13 @@ #include "base/arena_object.h" #include "base/array_ref.h" +#include "base/macros.h" #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class ArtMethod; class CodeGenerator; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index c6232ef661..20a763cf6d 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -16,7 +16,7 @@ #include "cha_guard_optimization.h" -namespace art { +namespace art HIDDEN { // Note we can only do CHA guard elimination/motion in a single pass, since // if a guard is not removed, another guard might be removed due to @@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, block->RemoveInstruction(deopt); HInstruction* suspend = loop_info->GetSuspendCheck(); + DCHECK(suspend != nullptr); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h index 440d51a969..5c1fdd90de 100644 --- a/compiler/optimizing/cha_guard_optimization.h +++ b/compiler/optimizing/cha_guard_optimization.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimize CHA guards by removing/moving them. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 27eabafb8f..c9f42b52f5 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -15,6 +15,7 @@ */ #include "code_generator.h" +#include "base/globals.h" #ifdef ART_ENABLE_CODEGEN_arm #include "code_generator_arm_vixl.h" @@ -24,6 +25,10 @@ #include "code_generator_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "code_generator_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "code_generator_x86.h" #endif @@ -39,7 +44,6 @@ #include "base/leb128.h" #include "class_linker.h" #include "class_root-inl.h" -#include "compiled_method.h" #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "graph_visualizer.h" @@ -61,7 +65,7 @@ #include "thread-current-inl.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { // Return whether a location is consistent with a type. static bool CheckType(DataType::Type type, Location location) { @@ -389,7 +393,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { core_spill_mask_, fpu_spill_mask_, GetGraph()->GetNumberOfVRegs(), - GetGraph()->IsCompilingBaseline()); + GetGraph()->IsCompilingBaseline(), + GetGraph()->IsDebuggable()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); @@ -412,7 +417,13 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->HasEnvironment()) { - // Create stackmap for HNativeDebugInfo or any instruction which calls native code. + // Catch StackMaps are dealt with later on in `RecordCatchBlockInfo`. + if (block->IsCatchBlock() && block->GetFirstInstruction() == current) { + DCHECK(current->IsNop()); + continue; + } + + // Create stackmap for HNop or any instruction which calls native code. // Note that we need correct mapping for the native PC of the call instruction, // so the runtime's stackmap is not sufficient since it is at PC after the call. MaybeRecordNativeDebugInfo(current, block->GetDexPc()); @@ -1030,6 +1041,9 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, } #endif default: + UNUSED(allocator); + UNUSED(graph); + UNUSED(stats); return nullptr; } } @@ -1041,7 +1055,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics) : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), @@ -1066,7 +1081,8 @@ CodeGenerator::CodeGenerator(HGraph* graph, is_leaf_(true), needs_suspend_check_entry_(false), requires_current_method_(false), - code_generation_data_() { + code_generation_data_(), + unimplemented_intrinsics_(unimplemented_intrinsics) { if (GetGraph()->IsCompilingOsr()) { // Make OSR methods have all registers spilled, this simplifies the logic of // jumping to the compiled code directly. @@ -1123,7 +1139,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, for (HBasicBlock* block : graph.GetReversePostOrder()) { if (block->IsLoopHeader()) { HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); - if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } } @@ -1333,53 +1349,43 @@ void CodeGenerator::RecordCatchBlockInfo() { continue; } - uint32_t dex_pc = block->GetDexPc(); - uint32_t num_vregs = graph_->GetNumberOfVRegs(); - uint32_t native_pc = GetAddressOf(block); + // Get the outer dex_pc. We save the full environment list for DCHECK purposes in kIsDebugBuild. + std::vector<uint32_t> dex_pc_list_for_verification; + if (kIsDebugBuild) { + dex_pc_list_for_verification.push_back(block->GetDexPc()); + } + DCHECK(block->GetFirstInstruction()->IsNop()); + DCHECK(block->GetFirstInstruction()->AsNop()->NeedsEnvironment()); + HEnvironment* const environment = block->GetFirstInstruction()->GetEnvironment(); + DCHECK(environment != nullptr); + HEnvironment* outer_environment = environment; + while (outer_environment->GetParent() != nullptr) { + outer_environment = outer_environment->GetParent(); + if (kIsDebugBuild) { + dex_pc_list_for_verification.push_back(outer_environment->GetDexPc()); + } + } + + if (kIsDebugBuild) { + // dex_pc_list_for_verification is set from innnermost to outermost. Let's reverse it + // since we are expected to pass from outermost to innermost. + std::reverse(dex_pc_list_for_verification.begin(), dex_pc_list_for_verification.end()); + DCHECK_EQ(dex_pc_list_for_verification.front(), outer_environment->GetDexPc()); + } - stack_map_stream->BeginStackMapEntry(dex_pc, + uint32_t native_pc = GetAddressOf(block); + stack_map_stream->BeginStackMapEntry(outer_environment->GetDexPc(), native_pc, /* register_mask= */ 0, /* sp_mask= */ nullptr, - StackMap::Kind::Catch); - - HInstruction* current_phi = block->GetFirstPhi(); - for (size_t vreg = 0; vreg < num_vregs; ++vreg) { - while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { - HInstruction* next_phi = current_phi->GetNext(); - DCHECK(next_phi == nullptr || - current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) - << "Phis need to be sorted by vreg number to keep this a linear-time loop."; - current_phi = next_phi; - } + StackMap::Kind::Catch, + /* needs_vreg_info= */ true, + dex_pc_list_for_verification); - if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); - } else { - Location location = current_phi->GetLocations()->Out(); - switch (location.GetKind()) { - case Location::kStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - break; - } - case Location::kDoubleStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); - ++vreg; - DCHECK_LT(vreg, num_vregs); - break; - } - default: { - // All catch phis must be allocated to a stack slot. - LOG(FATAL) << "Unexpected kind " << location.GetKind(); - UNREACHABLE(); - } - } - } - } + EmitEnvironment(environment, + /* slow_path= */ nullptr, + /* needs_vreg_info= */ true, + /* is_for_catch_handler= */ true); stack_map_stream->EndStackMapEntry(); } @@ -1390,7 +1396,9 @@ void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) { code_generation_data_->AddSlowPath(slow_path); } -void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path) { +void CodeGenerator::EmitVRegInfo(HEnvironment* environment, + SlowPathCode* slow_path, + bool is_for_catch_handler) { StackMapStream* stack_map_stream = GetStackMapStream(); // Walk over the environment, and record the location of dex registers. for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { @@ -1445,6 +1453,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kRegister : { + DCHECK(!is_for_catch_handler); int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); @@ -1466,6 +1475,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kFpuRegister : { + DCHECK(!is_for_catch_handler); int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); @@ -1487,6 +1497,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kFpuRegisterPair : { + DCHECK(!is_for_catch_handler); int low = location.low(); int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { @@ -1508,6 +1519,7 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } case Location::kRegisterPair : { + DCHECK(!is_for_catch_handler); int low = location.low(); int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { @@ -1538,9 +1550,54 @@ void CodeGenerator::EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_p } } +void CodeGenerator::EmitVRegInfoOnlyCatchPhis(HEnvironment* environment) { + StackMapStream* stack_map_stream = GetStackMapStream(); + DCHECK(environment->GetHolder()->GetBlock()->IsCatchBlock()); + DCHECK_EQ(environment->GetHolder()->GetBlock()->GetFirstInstruction(), environment->GetHolder()); + HInstruction* current_phi = environment->GetHolder()->GetBlock()->GetFirstPhi(); + for (size_t vreg = 0; vreg < environment->Size(); ++vreg) { + while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { + HInstruction* next_phi = current_phi->GetNext(); + DCHECK(next_phi == nullptr || + current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) + << "Phis need to be sorted by vreg number to keep this a linear-time loop."; + current_phi = next_phi; + } + + if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + } else { + Location location = current_phi->GetLocations()->Out(); + switch (location.GetKind()) { + case Location::kStackSlot: { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); + break; + } + case Location::kDoubleStackSlot: { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetStackIndex()); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, + location.GetHighStackIndex(kVRegSize)); + ++vreg; + DCHECK_LT(vreg, environment->Size()); + break; + } + default: { + LOG(FATAL) << "All catch phis must be allocated to a stack slot. Unexpected kind " + << location.GetKind(); + UNREACHABLE(); + } + } + } + } +} + void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path, - bool needs_vreg_info) { + bool needs_vreg_info, + bool is_for_catch_handler, + bool innermost_environment) { if (environment == nullptr) return; StackMapStream* stack_map_stream = GetStackMapStream(); @@ -1548,7 +1605,11 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, if (emit_inline_info) { // We emit the parent environment first. - EmitEnvironment(environment->GetParent(), slow_path, needs_vreg_info); + EmitEnvironment(environment->GetParent(), + slow_path, + needs_vreg_info, + is_for_catch_handler, + /* innermost_environment= */ false); stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(), environment->GetDexPc(), needs_vreg_info ? environment->Size() : 0, @@ -1556,9 +1617,13 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, this); } + // If a dex register map is not required we just won't emit it. if (needs_vreg_info) { - // If a dex register map is not required we just won't emit it. - EmitVRegInfo(environment, slow_path); + if (innermost_environment && is_for_catch_handler) { + EmitVRegInfoOnlyCatchPhis(environment); + } else { + EmitVRegInfo(environment, slow_path, is_for_catch_handler); + } } if (emit_inline_info) { @@ -1671,7 +1736,7 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, // When (non-Baker) read barriers are enabled, some instructions // use a slow path to emit a read barrier, which does not trigger // GC. - (kEmitCompilerReadBarrier && + (gUseReadBarrier && !kUseBakerReadBarrier && (instruction->IsInstanceFieldGet() || instruction->IsPredicatedInstanceFieldGet() || diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index d81a7b5382..9872efaa4a 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -26,6 +26,7 @@ #include "base/bit_utils.h" #include "base/enums.h" #include "base/globals.h" +#include "base/macros.h" #include "base/memory_region.h" #include "class_root.h" #include "dex/string_reference.h" @@ -33,13 +34,15 @@ #include "graph_visualizer.h" #include "locations.h" #include "nodes.h" +#include "oat_quick_method_header.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" +#include "subtype_check.h" #include "utils/assembler.h" #include "utils/label.h" -namespace art { +namespace art HIDDEN { // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -56,8 +59,18 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); -static constexpr ReadBarrierOption kCompilerReadBarrierOption = - kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; +static const ReadBarrierOption gCompilerReadBarrierOption = + gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; + +constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); +constexpr size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); +constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); +constexpr uint32_t shifted_initializing_value = + enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte); +constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); class Assembler; class CodeGenerator; @@ -291,6 +304,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } + // Returns true if `invoke` is an implemented intrinsic in this codegen's arch. + bool IsImplementedIntrinsic(HInvoke* invoke) const { + return invoke->IsIntrinsic() && + !unimplemented_intrinsics_[static_cast<size_t>(invoke->GetIntrinsic())]; + } + size_t GetNumberOfCoreCalleeSaveRegisters() const { return POPCOUNT(core_callee_save_mask_); } @@ -460,7 +479,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // If the target class is in the boot image, it's non-moveable and it doesn't matter // if we compare it with a from-space or to-space reference, the result is the same. // It's OK to traverse a class hierarchy jumping between from-space and to-space. - return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); + return gUseReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); } static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { @@ -475,7 +494,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayObjectCheck: case TypeCheckKind::kInterfaceCheck: { bool needs_read_barrier = - kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); + gUseReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); // We do not emit read barriers for HCheckCast, so we can get false negatives // and the slow path shall re-check and simply return if the cast is actually OK. return !needs_read_barrier; @@ -678,7 +697,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: DCHECK(!load->NeedsEnvironment()); - return kEmitCompilerReadBarrier + return gUseReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; @@ -736,7 +755,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats); + OptimizingCompilerStats* stats, + const art::ArrayRef<const bool>& unimplemented_intrinsics); virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -836,8 +856,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void BlockIfInRegister(Location location, bool is_out = false) const; void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path, - bool needs_vreg_info = true); - void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path); + bool needs_vreg_info = true, + bool is_for_catch_handler = false, + bool innermost_environment = true); + void EmitVRegInfo(HEnvironment* environment, SlowPathCode* slow_path, bool is_for_catch_handler); + void EmitVRegInfoOnlyCatchPhis(HEnvironment* environment); static void PrepareCriticalNativeArgumentMoves( HInvokeStaticOrDirect* invoke, @@ -877,6 +900,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. std::unique_ptr<CodeGenerationData> code_generation_data_; + // Which intrinsics we don't have handcrafted code for. + art::ArrayRef<const bool> unimplemented_intrinsics_; + friend class OptimizingCFITest; ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeSIMD); ART_FRIEND_TEST(CodegenTest, ARM64FrameSizeNoSIMD); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2a0b481b2d..41db9a2542 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -27,7 +27,6 @@ #include "class_root-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -44,6 +43,7 @@ #include "mirror/var_handle.h" #include "offsets.h" #include "optimizing/common_arm64.h" +#include "optimizing/nodes.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" @@ -58,7 +58,7 @@ using vixl::EmissionCheckScope; #error "ARM64 Codegen VIXL macro-assembler macro already defined." #endif -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -77,7 +77,6 @@ using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; using helpers::Int64FromLocation; -using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; using helpers::OutputCPURegister; @@ -583,7 +582,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -762,7 +761,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) : SlowPathCodeARM64(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -825,6 +824,9 @@ class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize()); + } arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -933,6 +935,33 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const return Location::RegisterLocation(x15.GetCode()); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -943,7 +972,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, callee_saved_core_registers.GetList(), callee_saved_fp_registers.GetList(), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_neon_(graph, this), @@ -1169,9 +1199,21 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ Ldr(value, MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); + __ Cbnz(value, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ Mov(temp, address + offset); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ Mov(temp, address + offset.Int32Value()); __ Ldrb(value, MemOperand(temp, 0)); __ Cbnz(value, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -1233,6 +1275,54 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(masm); + vixl::aarch64::Label resolution; + vixl::aarch64::Label memory_barrier; + + Register temp1 = temps.AcquireW(); + Register temp2 = temps.AcquireW(); + + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(hs, &frame_entry_label_); + + // Check if we're initialized and jump to code that does a memory barrier if + // so. + __ Cmp(temp2, shifted_initialized_value); + __ B(hs, &memory_barrier); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value())); + __ Br(temp1.X()); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } __ Bind(&frame_entry_label_); bool do_overflow_check = @@ -1364,12 +1454,12 @@ void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* l } } -void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { +void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register card = temps.AcquireX(); Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. vixl::aarch64::Label done; - if (value_can_be_null) { + if (emit_null_check) { __ Cbz(value, &done); } // Load the address of the card table into `card`. @@ -1391,7 +1481,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&done); } } @@ -1904,11 +1994,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code @@ -1974,6 +2059,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ B(codegen_->GetLabelOf(successor)); + } + return; + } + if (codegen_->CanUseImplicitSuspendCheck()) { __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister)); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2051,7 +2143,7 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier @@ -2107,7 +2199,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, MemOperand field = HeapOperand(InputRegisterAt(instruction, receiver_input), field_info.GetFieldOffset()); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && + if (gUseReadBarrier && kUseBakerReadBarrier && load_type == DataType::Type::kReference) { // Object FieldGet with Baker's read barrier case. // /* HeapReference<Object> */ out = *(base + offset) @@ -2154,9 +2246,10 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (IsConstantZeroBitPattern(instruction->InputAt(1))) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + HInstruction* value = instruction->InputAt(1); + if (IsZeroBitPattern(value)) { + locations->SetInAt(1, Location::ConstantLocation(value)); + } else if (DataType::IsFloatingPointType(value->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -2165,7 +2258,8 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); bool is_predicated = instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); @@ -2205,8 +2299,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, } } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - codegen_->MarkGCCard(obj, Register(value), value_can_be_null); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { + codegen_->MarkGCCard( + obj, + Register(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_predicated) { @@ -2382,7 +2480,7 @@ void LocationsBuilderARM64::VisitDataProcWithShifterOp( LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); if (instruction->GetInstrKind() == HInstruction::kNeg) { - locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0))); } else { locations->SetInAt(0, Location::RequiresRegister()); } @@ -2475,7 +2573,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn // data offset constant generation out of the loop and reduce the critical path length in the // loop. locations->SetInAt(1, shift->GetValue() == 0 - ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) + ? Location::ConstantLocation(instruction->GetOffset()) : Location::RequiresRegister()); locations->SetInAt(2, Location::ConstantLocation(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2549,7 +2647,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -2605,10 +2703,10 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // does not support the HIntermediateAddress instruction. DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && - kEmitCompilerReadBarrier && + gUseReadBarrier && !kUseBakerReadBarrier)); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. @@ -2750,9 +2848,10 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { instruction, needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (IsConstantZeroBitPattern(instruction->InputAt(2))) { - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetIndex())); + HInstruction* value = instruction->GetValue(); + if (IsZeroBitPattern(value)) { + locations->SetInAt(2, Location::ConstantLocation(value)); } else if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { @@ -2871,7 +2970,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -2929,10 +3032,10 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { HInstruction* length = instruction->InputAt(1); bool both_const = index->IsConstant() && length->IsConstant(); locations->SetInAt(0, both_const - ? Location::ConstantLocation(index->AsConstant()) + ? Location::ConstantLocation(index) : ARM64EncodableConstantOrRegister(index, instruction)); locations->SetInAt(1, both_const - ? Location::ConstantLocation(length->AsConstant()) + ? Location::ConstantLocation(length) : ARM64EncodableConstantOrRegister(length, instruction)); } @@ -3030,6 +3133,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); DataType::Type in_type = compare->InputAt(0)->GetType(); + HInstruction* rhs = compare->InputAt(1); switch (in_type) { case DataType::Type::kBool: case DataType::Type::kUint8: @@ -3039,7 +3143,7 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case DataType::Type::kInt32: case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, compare)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -3047,8 +3151,8 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, - IsFloatingPointZeroConstant(compare->InputAt(1)) - ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) + IsFloatingPointZeroConstant(rhs) + ? Location::ConstantLocation(rhs) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; @@ -3096,16 +3200,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + HInstruction* rhs = instruction->InputAt(1); if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, - IsFloatingPointZeroConstant(instruction->InputAt(1)) - ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) + IsFloatingPointZeroConstant(rhs) + ? Location::ConstantLocation(rhs) : Location::RequiresFpuRegister()); } else { // Integer cases. locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); + locations->SetInAt(1, ARM64EncodableConstantOrRegister(rhs, instruction)); } if (!instruction->IsEmittedAtUseSite()) { @@ -3845,12 +3950,12 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { } } -void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderARM64::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorARM64::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorARM64::IncreaseFrame(size_t adjustment) { @@ -3893,12 +3998,15 @@ void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -3948,9 +4056,9 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -4194,9 +4302,9 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -5313,7 +5421,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -5327,7 +5435,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -5354,7 +5462,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5523,7 +5631,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -5577,7 +5685,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD temp, /* offset placeholder */ 0u, ldr_label, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); @@ -5601,7 +5709,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD out.X(), /* offset= */ 0, /* fixup_label= */ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); return; } default: @@ -6156,7 +6264,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -6462,7 +6573,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( DataType::Type type = DataType::Type::kReference; Register out_reg = RegisterFrom(out, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6503,7 +6614,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( Register out_reg = RegisterFrom(out, type); Register obj_reg = RegisterFrom(obj, type); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6538,7 +6649,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, DataType::Type::kReference); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -6604,7 +6715,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier( vixl::aarch64::Register marked_old_value, vixl::aarch64::Register old_value) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. @@ -6626,7 +6737,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the @@ -6722,7 +6833,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -6800,7 +6911,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instru void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -6829,7 +6940,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -6854,7 +6965,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -6869,7 +6980,7 @@ void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // @@ -7003,6 +7114,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, vixl::aarch64::MemOperand& lock_word, vixl::aarch64::Label* slow_path, vixl::aarch64::Label* throw_npe = nullptr) { + vixl::aarch64::Label throw_npe_cont; // Load the lock word containing the rb_state. __ Ldr(ip0.W(), lock_word); // Given the numeric representation, it's enough to check the low bit of the rb_state. @@ -7014,7 +7126,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, "Field and array LDR offsets must be the same to reuse the same code."); // To throw NPE, we return to the fast path; the artificial dependence below does not matter. if (throw_npe != nullptr) { - __ Bind(throw_npe); + __ Bind(&throw_npe_cont); } // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), @@ -7026,6 +7138,12 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // a memory barrier (which would be more expensive). __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); __ Br(lr); // And return back to the function. + if (throw_npe != nullptr) { + // Clear IP0 before returning to the fast path. + __ Bind(throw_npe); + __ Mov(ip0.X(), xzr); + __ B(&throw_npe_cont); + } // Note: The fake dependency is unnecessary for the slow path. } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index f4d652c29c..6190364d1d 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #include "base/bit_field.h" +#include "base/macros.h" #include "class_root.h" #include "code_generator.h" #include "common_arm64.h" @@ -36,7 +37,7 @@ #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace linker { class Arm64RelativePatcherTest; @@ -92,7 +93,10 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers = vixl::aarch64::CPURegList( tr, // Reserve X20 as Marking Register when emitting Baker read barriers. - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), + // TODO: We don't need to reserve marking-register for userfaultfd GC. But + // that would require some work in the assembler code as the right GC is + // chosen at load-time and not compile time. + (kReserveMarkingRegister ? mr : vixl::aarch64::NoCPUReg), kImplicitSuspendCheckRegister, vixl::aarch64::lr); @@ -111,9 +115,7 @@ inline Location FixedTempLocation() { const vixl::aarch64::CPURegList callee_saved_core_registers( vixl::aarch64::CPURegister::kRegister, vixl::aarch64::kXRegSize, - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) - ? vixl::aarch64::x21.GetCode() - : vixl::aarch64::x20.GetCode()), + (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()), vixl::aarch64::x30.GetCode()); const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister, vixl::aarch64::kDRegSize, @@ -121,6 +123,41 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi vixl::aarch64::d15.GetCode()); Location ARM64ReturnLocation(DataType::Type return_type); +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM64(V) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class SlowPathCodeARM64 : public SlowPathCode { public: explicit SlowPathCodeARM64(HInstruction* instruction) @@ -327,7 +364,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); @@ -615,7 +653,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::aarch64::Register object, vixl::aarch64::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 09fa598203..d69e77045b 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -26,7 +26,6 @@ #include "class_table.h" #include "code_generator_utils.h" #include "common_arm.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "gc/space/image_space.h" @@ -46,7 +45,7 @@ #include "utils/assembler.h" #include "utils/stack_checks.h" -namespace art { +namespace art HIDDEN { namespace arm { namespace vixl32 = vixl::aarch32; @@ -744,7 +743,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -922,7 +921,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: ReadBarrierForRootSlowPathARMVIXL(HInstruction* instruction, Location out, Location root) : SlowPathCodeARMVIXL(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -974,6 +973,10 @@ class MethodEntryExitHooksSlowPathARMVIXL : public SlowPathCodeARMVIXL { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + // Load frame size to pass to the exit hooks + __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize()); + } arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -1845,7 +1848,7 @@ static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) { DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -1904,6 +1907,33 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, return final_label; } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1914,7 +1944,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, kCoreCalleeSaves.GetList(), ComputeSRegisterListMask(kFpuCalleeSaves), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), @@ -2101,7 +2132,10 @@ void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { blocked_core_registers_[LR] = true; blocked_core_registers_[PC] = true; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: We don't need to reserve marking-register for userfaultfd GC. But + // that would require some work in the assembler code as the right GC is + // chosen at load-time and not compile time. + if (kReserveMarkingRegister) { // Reserve marking register. blocked_core_registers_[MR] = true; } @@ -2164,9 +2198,24 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + GetAssembler()->LoadFromOffset(kLoadWord, + temp, + sp, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); + __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); + } + + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation()); - __ Mov(temp, address + offset); + __ Mov(temp, address + offset.Int32Value()); __ Ldrb(temp, MemOperand(temp, 0)); __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -2234,6 +2283,61 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label resolution; + vixl32::Label memory_barrier; + + // Check if we're visibly initialized. + + vixl32::Register temp1 = temps.Acquire(); + // Use r4 as other temporary register. + DCHECK(!blocked_core_registers_[R4]); + DCHECK(!kCoreCalleeSaves.Includes(r4)); + vixl32::Register temp2 = r4; + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, MemOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(cs, &frame_entry_label_); + + // Check if we're initialized and jump to code that does a memory barrier if + // so. + __ Cmp(temp2, shifted_initialized_value); + __ B(cs, &memory_barrier); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value())); + __ Bx(temp1); + + __ Bind(&memory_barrier); + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } + __ Bind(&frame_entry_label_); if (HasEmptyFrame()) { @@ -3069,12 +3173,12 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } } -void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderARMVIXL::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorARMVIXL::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorARMVIXL::IncreaseFrame(size_t adjustment) { @@ -4514,10 +4618,11 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case DataType::Type::kInt32: { - if (div->InputAt(1)->IsConstant()) { + HInstruction* divisor = div->InputAt(1); + if (divisor->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); - int32_t value = Int32ConstantFrom(div->InputAt(1)); + locations->SetInAt(1, Location::ConstantLocation(divisor)); + int32_t value = Int32ConstantFrom(divisor); Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap; if (value == 1 || value == 0 || value == -1) { // No temp register required. @@ -4631,10 +4736,11 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { switch (type) { case DataType::Type::kInt32: { - if (rem->InputAt(1)->IsConstant()) { + HInstruction* divisor = rem->InputAt(1); + if (divisor->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); - int32_t value = Int32ConstantFrom(rem->InputAt(1)); + locations->SetInAt(1, Location::ConstantLocation(divisor)); + int32_t value = Int32ConstantFrom(divisor); Location::OutputOverlap out_overlaps = Location::kNoOutputOverlap; if (value == 1 || value == 0 || value == -1) { // No temp register required. @@ -5187,17 +5293,18 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); + HInstruction* shift = ror->InputAt(1); switch (ror->GetResultType()) { case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrConstant(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - if (ror->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); } else { locations->SetInAt(1, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -5234,11 +5341,12 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); + HInstruction* shift = op->InputAt(1); switch (op->GetResultType()) { case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); - if (op->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -5250,8 +5358,8 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { } case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); - if (op->InputAt(1)->IsConstant()) { - locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); + if (shift->IsConstant()) { + locations->SetInAt(1, Location::ConstantLocation(shift)); // For simplicity, use kOutputOverlap even though we only require that low registers // don't clash with high registers which the register allocator currently guarantees. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5727,8 +5835,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a __ CompareAndBranchIfNonZero(temp1, &fail); } -void LocationsBuilderARMVIXL::HandleFieldSet( - HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5751,8 +5860,12 @@ void LocationsBuilderARMVIXL::HandleFieldSet( // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - locations->AddTemp(Location::RequiresRegister()); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } else if (generate_volatile) { // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive @@ -5773,7 +5886,8 @@ void LocationsBuilderARMVIXL::HandleFieldSet( void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5889,10 +6003,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); vixl32::Register card = RegisterFrom(locations->GetTemp(1)); - codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + RegisterFrom(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -5911,7 +6031,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); + gUseReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, @@ -5975,7 +6095,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType(); if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { - return Location::ConstantLocation(input->AsConstant()); + return Location::ConstantLocation(input); } else { return Location::RequiresFpuRegister(); } @@ -5986,7 +6106,7 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); } @@ -6082,7 +6202,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. @@ -6165,11 +6285,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, } void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -6202,11 +6325,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr } void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -6386,7 +6512,7 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type, void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6534,14 +6660,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // The read barrier instrumentation of object ArrayGet // instructions does not support the HIntermediateAddress // instruction. - DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier)); + DCHECK(!(has_intermediate_address && gUseReadBarrier)); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); @@ -6688,8 +6814,10 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RequiresRegister()); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Temporary registers for the write barrier or register poisoning. + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorARMVIXL::VisitArraySet. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } @@ -6841,7 +6969,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsReferenced()); @@ -7025,10 +7157,10 @@ void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) { // locations. bool both_const = index->IsConstant() && length->IsConstant(); locations->SetInAt(0, both_const - ? Location::ConstantLocation(index->AsConstant()) + ? Location::ConstantLocation(index) : ArmEncodableConstantOrRegister(index, CMP)); locations->SetInAt(1, both_const - ? Location::ConstantLocation(length->AsConstant()) + ? Location::ConstantLocation(length) : ArmEncodableConstantOrRegister(length, CMP)); } @@ -7072,9 +7204,9 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, vixl32::Register card, vixl32::Register object, vixl32::Register value, - bool value_can_be_null) { + bool emit_null_check) { vixl32::Label is_null; - if (value_can_be_null) { + if (emit_null_check) { __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false); } // Load the address of the card table into `card`. @@ -7097,7 +7229,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -7459,7 +7591,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7473,7 +7605,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7501,7 +7633,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -7622,12 +7754,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position; - - const size_t status_offset = mirror::Class::StatusOffset().SizeValue(); - GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset); + __ Ldrb(temp, MemOperand(class_reg, status_byte_offset)); __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -7721,7 +7848,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need, including temps. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7760,7 +7887,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); // All aligned loads are implicitly atomic consume operations on ARM. codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); @@ -7781,7 +7908,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetString())); // /* GcRoot<mirror::String> */ out = *out codegen_->GenerateGcRootFieldLoad( - load, out_loc, out, /*offset=*/ 0, kCompilerReadBarrierOption); + load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption); return; } default: @@ -7838,7 +7965,7 @@ void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7888,9 +8015,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -8185,9 +8312,9 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::RequiresRegister()); } @@ -8773,7 +8900,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { vixl32::Register out_reg = RegisterFrom(out); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); DCHECK(maybe_temp.IsRegister()) << maybe_temp; if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. @@ -8808,7 +8935,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( vixl32::Register out_reg = RegisterFrom(out); vixl32::Register obj_reg = RegisterFrom(obj); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { DCHECK(maybe_temp.IsRegister()) << maybe_temp; // Load with fast path based Baker's read barrier. @@ -8837,7 +8964,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { vixl32::Register root_reg = RegisterFrom(root); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. @@ -8901,7 +9028,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( void CodeGeneratorARMVIXL::GenerateIntrinsicCasMoveWithBakerReadBarrier( vixl::aarch32::Register marked_old_value, vixl::aarch32::Register old_value) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. @@ -8935,7 +9062,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Register obj, const vixl32::MemOperand& src, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the @@ -9028,7 +9155,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, Location index, Location temp, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -9094,7 +9221,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + if (kIsDebugBuild && gUseReadBarrier && kUseBakerReadBarrier) { // The following condition is a run-time one; it is executed after the // previous compile-time test, to avoid penalizing non-debug builds. if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { @@ -9124,7 +9251,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -9150,7 +9277,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -9165,7 +9292,7 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 790ad0f8f7..f5abe6951a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ #include "base/enums.h" +#include "base/macros.h" #include "class_root.h" #include "code_generator.h" #include "common_arm.h" @@ -36,7 +37,7 @@ #include "aarch32/macro-assembler-aarch32.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace linker { class Thumb2RelativePatcherTest; @@ -84,7 +85,7 @@ static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::Regis vixl::aarch32::r6, vixl::aarch32::r7), // Do not consider r8 as a callee-save register with Baker read barriers. - ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) + (kReserveMarkingRegister ? vixl::aarch32::RegisterList() : vixl::aarch32::RegisterList(vixl::aarch32::r8)), vixl::aarch32::RegisterList(vixl::aarch32::r10, @@ -118,6 +119,65 @@ class CodeGeneratorARMVIXL; using VIXLInt32Literal = vixl::aarch32::Literal<int32_t>; using VIXLUInt32Literal = vixl::aarch32::Literal<uint32_t>; +#define UNIMPLEMENTED_INTRINSIC_LIST_ARM(V) \ + V(MathRoundDouble) /* Could be done by changing rounding mode, maybe? */ \ + V(UnsafeCASLong) /* High register pressure */ \ + V(SystemArrayCopyChar) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + V(SystemArrayCopyByte) \ + V(SystemArrayCopyInt) \ + /* 1.8 */ \ + V(MathFmaDouble) \ + V(MathFmaFloat) \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeCASLong) /* High register pressure */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) \ + V(JdkUnsafeCompareAndSetLong) + class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { public: explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) @@ -309,7 +369,9 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { void HandleIntegerRotate(LocationSummary* locations); void HandleLongRotate(LocationSummary* locations); void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); Location ArithmeticZeroOrFpuRegister(HInstruction* input); @@ -378,7 +440,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min); @@ -542,7 +605,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register card, vixl::aarch32::Register object, vixl::aarch32::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); @@ -602,7 +665,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { struct PcRelativePatchInfo { PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx) { } - PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; // Target dex file or null for .data.bmig.rel.ro patches. const DexFile* target_dex_file; diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h new file mode 100644 index 0000000000..405b39aa0a --- /dev/null +++ b/compiler/optimizing/code_generator_riscv64.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2023 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ +#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ + +#include "code_generator.h" +#include "driver/compiler_options.h" + +#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_ diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index abec26464a..99805928e4 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -20,7 +20,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift) { diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 64665adc15..9d9ab2b118 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,7 +21,9 @@ #include <cstdlib> #include <limits> -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HInstruction; diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc index 0fe9898635..6b6e25cf0c 100644 --- a/compiler/optimizing/code_generator_vector_arm64_neon.cc +++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc @@ -23,7 +23,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { using helpers::DRegisterFrom; @@ -65,7 +65,7 @@ inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -94,7 +94,7 @@ void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* ins case DataType::Type::kFloat64: if (input->IsConstant() && NEONCanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { - locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -881,7 +881,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -1008,13 +1008,13 @@ void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc index 824b6c9476..fe15791d3f 100644 --- a/compiler/optimizing/code_generator_vector_arm64_sve.cc +++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc @@ -23,17 +23,14 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm64 { using helpers::DRegisterFrom; -using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64FromLocation; using helpers::LocationFrom; using helpers::OutputRegister; -using helpers::QRegisterFrom; -using helpers::StackOperandFrom; using helpers::SveStackOperandFrom; using helpers::VRegisterFrom; using helpers::ZRegisterFrom; @@ -67,7 +64,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -96,7 +93,7 @@ void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* inst case DataType::Type::kFloat64: if (input->IsConstant() && SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { - locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -754,7 +751,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -878,13 +875,13 @@ void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index c46f9b7986..e8ecf28386 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -20,7 +20,7 @@ namespace vixl32 = vixl::aarch32; using namespace vixl32; // NOLINT(build/namespaces) -namespace art { +namespace art HIDDEN { namespace arm { using helpers::DRegisterFrom; @@ -640,7 +640,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt16: case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: @@ -749,7 +749,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 9c837dd986..343a6e1af4 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -19,7 +19,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace x86 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. @@ -42,13 +42,13 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(is_zero ? Location::RequiresFpuRegister() : Location::SameAsFirstInput()); @@ -981,7 +981,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -1094,13 +1094,13 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 330bf76a4a..fb6e4e753f 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -19,7 +19,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. @@ -37,13 +37,13 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(is_zero ? Location::RequiresFpuRegister() : Location::SameAsFirstInput()); @@ -964,7 +964,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -1072,13 +1072,13 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { case DataType::Type::kInt16: case DataType::Type::kInt32: case DataType::Type::kInt64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: - locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input) : Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8c6b8027cd..cb1cecc45a 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -20,7 +20,6 @@ #include "art_method-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -36,6 +35,7 @@ #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -43,7 +43,7 @@ #include "utils/x86/assembler_x86.h" #include "utils/x86/managed_register_x86.h" -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -503,7 +503,7 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } @@ -590,7 +590,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { field_addr_(field_addr), unpoison_ref_before_marking_(unpoison_ref_before_marking), temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } @@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: @@ -918,7 +918,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -967,6 +967,9 @@ class MethodEntryExitHooksSlowPathX86 : public SlowPathCode { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + __ movl(EBX, Immediate(codegen->GetFrameSize())); + } x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -1103,6 +1106,33 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { __ fs()->call(Address::Absolute(entry_point_offset)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) @@ -1115,7 +1145,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, | (1 << kFakeReturnRegister), 0, compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -1197,9 +1228,21 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ cmpl(Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ cmpb(Address::Absolute(address + offset), Immediate(0)); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() : + instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0)); __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -1261,6 +1304,44 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel continue_execution, resolution; + // We'll use EBP as temporary. + __ pushl(EBP); + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &continue_execution); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value())); + __ j(kEqual, &continue_execution); + __ Bind(&resolution); + + __ popl(EBP); + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline); + __ fs()->jmp(Address::Absolute(entrypoint_offset)); + + __ Bind(&continue_execution); + __ popl(EBP); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -1619,7 +1700,7 @@ void CodeGeneratorX86::LoadFromMemoryNoBarrier(DataType::Type dst_type, __ movsd(dst.AsFpuRegister<XmmRegister>(), src); break; case DataType::Type::kReference: - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!gUseReadBarrier); __ movl(dst.AsRegister<Register>(), src); __ MaybeUnpoisonHeapReference(dst.AsRegister<Register>()); break; @@ -2230,12 +2311,12 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } } -void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderX86::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorX86::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorX86::IncreaseFrame(size_t adjustment) { @@ -2913,7 +2994,7 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { case DataType::Type::kInt64: { HInstruction* input = conversion->InputAt(0); Location input_location = input->IsConstant() - ? Location::ConstantLocation(input->AsConstant()) + ? Location::ConstantLocation(input) : Location::RegisterPairLocation(EAX, EDX); locations->SetInAt(0, input_location); // Make the output overlap to please the register allocator. This greatly simplifies @@ -5689,13 +5770,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke DCHECK_EQ(size, linker_patches->size()); } -void CodeGeneratorX86::MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null) { +void CodeGeneratorX86::MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -5720,7 +5798,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp, // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -5731,11 +5809,11 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, - kEmitCompilerReadBarrier + gUseReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { @@ -5793,7 +5871,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -5824,7 +5902,9 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } } -void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5861,10 +5941,13 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -5875,7 +5958,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, Address field_addr, Register base, bool is_volatile, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); bool needs_write_barrier = @@ -5988,10 +6072,15 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (needs_write_barrier) { + if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<Register>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -6001,7 +6090,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -6026,7 +6116,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, field_addr, base, is_volatile, - value_can_be_null); + value_can_be_null, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -6042,19 +6133,25 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi } void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitPredicatedInstanceFieldGet( @@ -6202,7 +6299,7 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -6244,7 +6341,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -6315,10 +6412,12 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + // Used by reference poisoning or emitting write barrier. + locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -6435,9 +6534,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } } - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<Register>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -7057,7 +7163,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -7071,7 +7177,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (call_kind == LocationSummary::kCallOnSlowPath && cls->HasPcRelativeLoadKind()) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7109,7 +7215,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE bool generate_null_check = false; const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -7233,12 +7339,6 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -7296,7 +7396,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -7345,7 +7445,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); // No need for memory fence, thanks to the x86 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); @@ -7365,7 +7465,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); return; } default: @@ -7416,7 +7516,7 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -7466,9 +7566,9 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7734,9 +7834,9 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // a memory address. locations->SetInAt(1, Location::RequiresRegister()); } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -8188,7 +8288,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -8222,7 +8322,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( Register out_reg = out.AsRegister<Register>(); Register obj_reg = obj.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -8250,7 +8350,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -8314,7 +8414,7 @@ void CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Register obj, uint32_t offset, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) @@ -8328,7 +8428,7 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -8347,7 +8447,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i bool needs_null_check, bool always_update_field, Register* temp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // In slow path based read barriers, the read barrier call is @@ -8428,7 +8528,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -8455,7 +8555,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -8470,7 +8570,7 @@ void CodeGeneratorX86::MaybeGenerateReadBarrierSlow(HInstruction* instruction, void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 75c5cebb5e..d27155f31d 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,6 +19,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "base/enums.h" +#include "base/macros.h" #include "code_generator.h" #include "dex/dex_file_types.h" #include "driver/compiler_options.h" @@ -26,7 +27,7 @@ #include "parallel_move_resolver.h" #include "utils/x86/assembler_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { // Use a local definition to prevent copying mistakes. @@ -47,6 +48,61 @@ static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); +#define UNIMPLEMENTED_INTRINSIC_LIST_X86(V) \ + V(MathRoundDouble) \ + V(FloatIsInfinite) \ + V(DoubleIsInfinite) \ + V(IntegerHighestOneBit) \ + V(LongHighestOneBit) \ + V(LongDivideUnsigned) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(MathMultiplyHigh) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) + class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: InvokeRuntimeCallingConvention() @@ -196,7 +252,9 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); @@ -249,7 +307,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { Address field_addr, Register base, bool is_volatile, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); private: // Generate code for the given suspend check. If not null, `successor` @@ -279,7 +338,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); // Generate a heap reference load using one register `out`: @@ -519,11 +579,8 @@ class CodeGeneratorX86 : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Emit a write barrier. - void MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null); + void MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 511917a735..eea6b204fa 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -21,7 +21,6 @@ #include "class_root-inl.h" #include "class_table.h" #include "code_generator_utils.h" -#include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "gc/space/image_space.h" @@ -37,6 +36,7 @@ #include "mirror/class-inl.h" #include "mirror/object_reference.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -45,7 +45,7 @@ #include "utils/x86_64/constants_x86_64.h" #include "utils/x86_64/managed_register_x86_64.h" -namespace art { +namespace art HIDDEN { template<class MirrorType> class GcRoot; @@ -510,7 +510,7 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), ref_(ref), unpoison_ref_before_marking_(unpoison_ref_before_marking) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } @@ -601,7 +601,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { unpoison_ref_before_marking_(unpoison_ref_before_marking), temp1_(temp1), temp2_(temp2) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } const char* GetDescription() const override { @@ -761,7 +761,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { obj_(obj), offset_(offset), index_(index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial // object has been overwritten by (or after) the heap object // reference load to be instrumented, e.g.: @@ -937,7 +937,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { public: ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) : SlowPathCode(instruction), out_(out), root_(root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { @@ -986,6 +986,10 @@ class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode { (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + if (instruction_->IsMethodExitHook()) { + // Load FrameSize to pass to the exit hook. + __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize())); + } x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this); RestoreLiveRegisters(codegen, locations); __ jmp(GetExitLabel()); @@ -1490,6 +1494,33 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); } +namespace detail { +// Mark which intrinsics we don't have handcrafted code for. +template <Intrinsics T> +struct IsUnimplemented { + bool is_unimplemented = false; +}; + +#define TRUE_OVERRIDE(Name) \ + template <> \ + struct IsUnimplemented<Intrinsics::k##Name> { \ + bool is_unimplemented = true; \ + }; +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE) +#undef TRUE_OVERRIDE + +#include "intrinsics_list.h" +static constexpr bool kIsIntrinsicUnimplemented[] = { + false, // kNone +#define IS_UNIMPLEMENTED(Intrinsic, ...) \ + IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented, + INTRINSICS_LIST(IS_UNIMPLEMENTED) +#undef IS_UNIMPLEMENTED +}; +#undef INTRINSICS_LIST + +} // namespace detail + static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); @@ -1506,7 +1537,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), arraysize(kFpuCalleeSaves)), compiler_options, - stats), + stats, + ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), @@ -1561,9 +1593,22 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); + if (instruction->IsMethodExitHook()) { + // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it + // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check + // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is + // disabled in debuggable runtime. The other bit is used when this method itself requires a + // deoptimization due to redefinition. So it is safe to just check for non-zero value here. + __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()), + Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation()); - int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value(); - __ movq(CpuRegister(TMP), Immediate(address + offset)); + MemberOffset offset = instruction->IsMethodExitHook() ? + instrumentation::Instrumentation::HaveMethodExitListenersOffset() + : instrumentation::Instrumentation::HaveMethodEntryListenersOffset(); + __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value())); __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0)); __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -1653,6 +1698,44 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86_64::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel resolution; + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(CpuRegister(TMP), + Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), + Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &frame_entry_label_); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(CpuRegister(TMP), + Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ gs()->cmpl( + CpuRegister(TMP), + Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true)); + __ j(kEqual, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline); + __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true)); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -2274,12 +2357,12 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { } } -void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetAllocator()) LocationSummary(info); +void LocationsBuilderX86_64::VisitNop(HNop* nop) { + new (GetGraph()->GetAllocator()) LocationSummary(nop); } -void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { - // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. +void InstructionCodeGeneratorX86_64::VisitNop(HNop*) { + // The environment recording already happened in CodeGenerator::Compile. } void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) { @@ -5013,7 +5096,7 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { instruction->IsPredicatedInstanceFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); bool is_predicated = instruction->IsPredicatedInstanceFieldGet(); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, @@ -5064,7 +5147,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, if (load_type == DataType::Type::kReference) { // /* HeapReference<Object> */ out = *(base + offset) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -5119,6 +5202,9 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); } } + + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorX86_64::HandleFieldSet. if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); @@ -5180,7 +5266,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap) { + bool byte_swap, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); @@ -5298,10 +5385,16 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<CpuRegister>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -5311,7 +5404,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5336,7 +5430,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, base, is_volatile, /*is_atomic=*/ false, - value_can_be_null); + value_can_be_null, + /*byte_swap=*/ false, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -5348,7 +5444,10 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio } void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet( @@ -5388,7 +5487,10 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -5513,7 +5615,7 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); + gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_array_get_with_read_barrier @@ -5551,7 +5653,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( @@ -5619,9 +5721,12 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Used by reference poisoning or emitting write barrier. locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5739,9 +5844,16 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } } - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<CpuRegister>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -5940,9 +6052,9 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null) { + bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -5967,7 +6079,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -6282,12 +6394,6 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -6352,7 +6458,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { load_kind == HLoadClass::LoadKind::kBssEntryPublic || load_kind == HLoadClass::LoadKind::kBssEntryPackage); - const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + const bool requires_read_barrier = gUseReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; @@ -6366,7 +6472,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { } locations->SetOut(Location::RequiresRegister()); if (load_kind == HLoadClass::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -6403,7 +6509,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier - : kCompilerReadBarrierOption; + : gCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -6550,7 +6656,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { } else { locations->SetOut(Location::RequiresRegister()); if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { - if (!kUseReadBarrier || kUseBakerReadBarrier) { + if (!gUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { @@ -6598,7 +6704,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); // No need for memory fence, thanks to the x86-64 memory model. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); @@ -6619,7 +6725,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption); return; } default: @@ -6672,7 +6778,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -6722,9 +6828,9 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7000,9 +7106,9 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // a memory address. locations->SetInAt(1, Location::RequiresRegister()); } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { - locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1))); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2))); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3))); } else { locations->SetInAt(1, Location::Any()); } @@ -7426,7 +7532,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( ReadBarrierOption read_barrier_option) { CpuRegister out_reg = out.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -7460,7 +7566,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( CpuRegister out_reg = out.AsRegister<CpuRegister>(); CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - CHECK(kEmitCompilerReadBarrier); + CHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -7488,7 +7594,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( ReadBarrierOption read_barrier_option) { CpuRegister root_reg = root.AsRegister<CpuRegister>(); if (read_barrier_option == kWithReadBarrier) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: @@ -7552,7 +7658,7 @@ void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in CpuRegister obj, uint32_t offset, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // /* HeapReference<Object> */ ref = *(obj + offset) @@ -7566,7 +7672,7 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in uint32_t data_offset, Location index, bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); static_assert( @@ -7586,7 +7692,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction bool always_update_field, CpuRegister* temp1, CpuRegister* temp2) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); // In slow path based read barriers, the read barrier call is @@ -7668,7 +7774,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, Location obj, uint32_t offset, Location index) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the reference load. // @@ -7695,7 +7801,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction Location obj, uint32_t offset, Location index) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Baker's read barriers shall be handled by the fast path // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). DCHECK(!kUseBakerReadBarrier); @@ -7710,7 +7816,7 @@ void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); // Insert a slow path based read barrier *after* the GC root load. // diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 39a72d8211..dff2e799e0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -18,13 +18,14 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #include "arch/x86_64/instruction_set_features_x86_64.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" #include "utils/x86_64/assembler_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { // Use a local definition to prevent copying mistakes. @@ -52,6 +53,53 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = // these are not clobbered by any direct call to native code (such as math intrinsics). static constexpr FloatRegister non_volatile_xmm_regs[] = { XMM12, XMM13, XMM14, XMM15 }; +#define UNIMPLEMENTED_INTRINSIC_LIST_X86_64(V) \ + V(CRC32Update) \ + V(CRC32UpdateBytes) \ + V(CRC32UpdateByteBuffer) \ + V(FP16ToFloat) \ + V(FP16ToHalf) \ + V(FP16Floor) \ + V(FP16Ceil) \ + V(FP16Rint) \ + V(FP16Greater) \ + V(FP16GreaterEquals) \ + V(FP16Less) \ + V(FP16LessEquals) \ + V(FP16Compare) \ + V(FP16Min) \ + V(FP16Max) \ + V(StringStringIndexOf) \ + V(StringStringIndexOfAfter) \ + V(StringBufferAppend) \ + V(StringBufferLength) \ + V(StringBufferToString) \ + V(StringBuilderAppendObject) \ + V(StringBuilderAppendString) \ + V(StringBuilderAppendCharSequence) \ + V(StringBuilderAppendCharArray) \ + V(StringBuilderAppendBoolean) \ + V(StringBuilderAppendChar) \ + V(StringBuilderAppendInt) \ + V(StringBuilderAppendLong) \ + V(StringBuilderAppendFloat) \ + V(StringBuilderAppendDouble) \ + V(StringBuilderLength) \ + V(StringBuilderToString) \ + /* 1.8 */ \ + V(UnsafeGetAndAddInt) \ + V(UnsafeGetAndAddLong) \ + V(UnsafeGetAndSetInt) \ + V(UnsafeGetAndSetLong) \ + V(UnsafeGetAndSetObject) \ + V(MethodHandleInvokeExact) \ + V(MethodHandleInvoke) \ + /* OpenJDK 11 */ \ + V(JdkUnsafeGetAndAddInt) \ + V(JdkUnsafeGetAndAddLong) \ + V(JdkUnsafeGetAndSetInt) \ + V(JdkUnsafeGetAndSetLong) \ + V(JdkUnsafeGetAndSetObject) class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> { public: @@ -250,7 +298,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap = false); + bool byte_swap, + WriteBarrierKind write_barrier_kind); void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); @@ -273,7 +322,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); @@ -435,7 +485,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 766bb01978..d759a16f48 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -19,30 +19,55 @@ #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/globals.h" #include "base/logging.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "common_dominator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { bool CodeSinking::Run() { - HBasicBlock* exit = graph_->GetExitBlock(); - if (exit == nullptr) { + if (graph_->GetExitBlock() == nullptr) { // Infinite loop, just bail. return false; } + + UncommonBranchSinking(); + ReturnSinking(); + return true; +} + +void CodeSinking::UncommonBranchSinking() { + HBasicBlock* exit = graph_->GetExitBlock(); + DCHECK(exit != nullptr); // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { HInstruction* last = exit_predecessor->GetLastInstruction(); + + // TryBoundary instructions are sometimes inserted between the last instruction (e.g. Throw, + // Return) and Exit. We don't want to use that instruction for our "uncommon branch" heuristic + // because they are not as good an indicator as throwing branches, so we skip them and fetch the + // actual last instruction. + if (last->IsTryBoundary()) { + // We have an exit try boundary. Fetch the previous instruction. + DCHECK(!last->AsTryBoundary()->IsEntry()); + if (last->GetPrevious() == nullptr) { + DCHECK(exit_predecessor->IsSingleTryBoundary()); + exit_predecessor = exit_predecessor->GetSinglePredecessor(); + last = exit_predecessor->GetLastInstruction(); + } else { + last = last->GetPrevious(); + } + } + // Any predecessor of the exit that does not return, throws an exception. if (!last->IsReturn() && !last->IsReturnVoid()) { SinkCodeToUncommonBranch(exit_predecessor); } } - return true; } static bool IsInterestingInstruction(HInstruction* instruction) { @@ -88,7 +113,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // We can only store on local allocations. Other heap references can // be escaping. Note that allocations can escape too, but we only move - // allocations if their users can move to, or are in the list of + // allocations if their users can move too, or are in the list of // post dominated blocks. if (instruction->IsInstanceFieldSet()) { if (!instruction->InputAt(0)->IsNewInstance()) { @@ -102,7 +127,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) { } } - // Heap accesses cannot go pass instructions that have memory side effects, which + // Heap accesses cannot go past instructions that have memory side effects, which // we are not tracking here. Note that the load/store elimination optimization // runs before this optimization, and should have removed interesting ones. // In theory, we could handle loads of local allocations, but this is currently @@ -171,7 +196,6 @@ static bool ShouldFilterUse(HInstruction* instruction, return false; } - // Find the ideal position for moving `instruction`. If `filter` is true, // we filter out store instructions to that instruction, which are processed // first in the step (3) of the sinking algorithm. @@ -210,56 +234,52 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, return nullptr; } - // Move to the first dominator not in a loop, if we can. - while (target_block->IsInLoop()) { + // Move to the first dominator not in a loop, if we can. We only do this if we are trying to hoist + // `instruction` out of a loop it wasn't a part of. + const HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + while (target_block->IsInLoop() && target_block->GetLoopInformation() != loop_info) { if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { break; } target_block = target_block->GetDominator(); DCHECK(target_block != nullptr); } - const bool was_in_loop = target_block->IsInLoop(); - - // For throwing instructions we can move them into: - // * Blocks that are not part of a try - // * Catch blocks are suitable as well, as long as they are not part of an outer try. - // * Blocks that are part of the same try that the instrucion was already in. - // - // We cannot move an instruction that can throw into a try that said instruction is not a part of - // already, as that would mean it will throw into a different catch block. If we detect that - // `target_block` is not a valid block to move `instruction` to, we traverse up the dominator tree - // to find if we have a suitable block. - while (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) { - if (target_block->IsCatchBlock()) { - // If the catch block has an xhandler, it means it is inside of an outer try. - const bool inside_of_another_try_catch = target_block->GetSuccessors().size() != 1; - if (!inside_of_another_try_catch) { - // If we have a catch block, it's okay to sink as long as that catch is not inside of - // another try catch. - break; + + if (instruction->CanThrow()) { + // Consistency check: We shouldn't land in a loop if we weren't in one before traversing up the + // dominator tree regarding try catches. + const bool was_in_loop = target_block->IsInLoop(); + + // We cannot move an instruction that can throw into a try that said instruction is not a part + // of already, as that would mean it will throw into a different catch block. In short, for + // throwing instructions: + // * If the throwing instruction is part of a try, they should only be sunk into that same try. + // * If the throwing instruction is not part of any try, they shouldn't be sunk to any try. + if (instruction->GetBlock()->IsTryBlock()) { + const HTryBoundary& try_entry = + instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry(); + while (!(target_block->IsTryBlock() && + try_entry.HasSameExceptionHandlersAs( + target_block->GetTryCatchInformation()->GetTryEntry()))) { + target_block = target_block->GetDominator(); + if (!post_dominated.IsBitSet(target_block->GetBlockId())) { + // We couldn't find a suitable block. + return nullptr; + } } } else { - DCHECK(target_block->IsTryBlock()); - if (instruction->GetBlock()->IsTryBlock() && - instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry().GetId() == - target_block->GetTryCatchInformation()->GetTryEntry().GetId()) { - // Sink within the same try block is allowed. - break; + // Search for the first block also not in a try block + while (target_block->IsTryBlock()) { + target_block = target_block->GetDominator(); + if (!post_dominated.IsBitSet(target_block->GetBlockId())) { + // We couldn't find a suitable block. + return nullptr; + } } } - // We are now in the case where we would be moving to a different try. Since we don't want - // that, traverse up the dominator tree to find a suitable block. - if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { - // We couldn't find a suitable block. - return nullptr; - } - target_block = target_block->GetDominator(); - DCHECK(target_block != nullptr); - } - // We shouldn't land in a loop if we weren't in one before traversing up the dominator tree - // regarding try catches. - DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop); + DCHECK_IMPLIES(target_block->IsInLoop(), was_in_loop); + } // Find insertion position. No need to filter anymore, as we have found a // target block. @@ -271,10 +291,21 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, } } for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { - HInstruction* user = use.GetUser()->GetHolder(); + HEnvironment* env = use.GetUser(); + HInstruction* user = env->GetHolder(); if (user->GetBlock() == target_block && (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) { - insert_pos = user; + if (target_block->IsCatchBlock() && target_block->GetFirstInstruction() == user) { + // We can sink the instructions past the environment setting Nop. If we do that, we have to + // remove said instruction from the environment. Since we know that we will be sinking the + // instruction to this block and there are no more instructions to consider, we can safely + // remove it from the environment now. + DCHECK(target_block->GetFirstInstruction()->IsNop()); + env->RemoveAsUserOfInput(use.GetIndex()); + env->SetRawEnvAt(use.GetIndex(), /*instruction=*/ nullptr); + } else { + insert_pos = user; + } } } if (insert_pos == nullptr) { @@ -310,8 +341,8 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. - // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by - // computint the post dominator tree, but that could be too time consuming. Also, + // TODO(ngeoffray): Getting the full set of post-dominated should be done by + // computing the post dominator tree, but that could be too time consuming. Also, // we should start the analysis from blocks dominated by an uncommon branch, but we // don't profile branches yet. bool found_block = false; @@ -321,45 +352,43 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { post_dominated.SetBit(block->GetBlockId()); } else if (found_block) { bool is_post_dominated = true; - if (block->GetSuccessors().empty()) { - // We currently bail for loops. - is_post_dominated = false; - } else { - // BasicBlock that are try entries look like this: - // BasicBlock i: - // instr 1 - // ... - // instr N - // TryBoundary kind:entry ---Try begins here--- - // - // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor - // since we are starting a try. If we use `GetSuccessors` for this case, we will check if - // the catch block is post_dominated. - // - // However, this catch block doesn't matter: when we sink the instruction into that - // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the - // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code - // right before the start of a try block. - // - // On the other side of the coin, BasicBlock that are try exits look like this: - // BasicBlock j: - // instr 1 - // ... - // instr N - // TryBoundary kind:exit ---Try ends here--- - // - // If we sink to these basic blocks we would be sinking inside of the try so we would like - // to check the catch block for post dominance. - const bool ends_with_try_boundary_entry = - block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry(); - ArrayRef<HBasicBlock* const> successors = - ends_with_try_boundary_entry ? block->GetNormalSuccessors() : - ArrayRef<HBasicBlock* const>(block->GetSuccessors()); - for (HBasicBlock* successor : successors) { - if (!post_dominated.IsBitSet(successor->GetBlockId())) { - is_post_dominated = false; - break; - } + DCHECK_NE(block, graph_->GetExitBlock()) + << "We shouldn't encounter the exit block after `end_block`."; + + // BasicBlock that are try entries look like this: + // BasicBlock i: + // instr 1 + // ... + // instr N + // TryBoundary kind:entry ---Try begins here--- + // + // Due to how our BasicBlocks are structured, BasicBlock i will have an xhandler successor + // since we are starting a try. If we use `GetSuccessors` for this case, we will check if + // the catch block is post_dominated. + // + // However, this catch block doesn't matter: when we sink the instruction into that + // BasicBlock i, we do it before the TryBoundary (i.e. outside of the try and outside the + // catch's domain). We can ignore catch blocks using `GetNormalSuccessors` to sink code + // right before the start of a try block. + // + // On the other side of the coin, BasicBlock that are try exits look like this: + // BasicBlock j: + // instr 1 + // ... + // instr N + // TryBoundary kind:exit ---Try ends here--- + // + // If we sink to these basic blocks we would be sinking inside of the try so we would like + // to check the catch block for post dominance. + const bool ends_with_try_boundary_entry = + block->EndsWithTryBoundary() && block->GetLastInstruction()->AsTryBoundary()->IsEntry(); + ArrayRef<HBasicBlock* const> successors = + ends_with_try_boundary_entry ? block->GetNormalSuccessors() : + ArrayRef<HBasicBlock* const>(block->GetSuccessors()); + for (HBasicBlock* successor : successors) { + if (!post_dominated.IsBitSet(successor->GetBlockId())) { + is_post_dominated = false; + break; } } if (is_post_dominated) { @@ -509,4 +538,79 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } } +void CodeSinking::ReturnSinking() { + HBasicBlock* exit = graph_->GetExitBlock(); + DCHECK(exit != nullptr); + + int number_of_returns = 0; + bool saw_return = false; + for (HBasicBlock* pred : exit->GetPredecessors()) { + // TODO(solanes): We might have Return/ReturnVoid->TryBoundary->Exit. We can theoretically + // handle them and move them out of the TryBoundary. However, it is a border case and it adds + // codebase complexity. + if (pred->GetLastInstruction()->IsReturn() || pred->GetLastInstruction()->IsReturnVoid()) { + saw_return |= pred->GetLastInstruction()->IsReturn(); + ++number_of_returns; + } + } + + if (number_of_returns < 2) { + // Nothing to do. + return; + } + + // `new_block` will coalesce the Return instructions into Phi+Return, or the ReturnVoid + // instructions into a ReturnVoid. + HBasicBlock* new_block = new (graph_->GetAllocator()) HBasicBlock(graph_, exit->GetDexPc()); + if (saw_return) { + HPhi* new_phi = nullptr; + for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) { + HBasicBlock* pred = exit->GetPredecessors()[i]; + if (!pred->GetLastInstruction()->IsReturn()) { + ++i; + continue; + } + + HReturn* ret = pred->GetLastInstruction()->AsReturn(); + if (new_phi == nullptr) { + // Create the new_phi, if we haven't done so yet. We do it here since we need to know the + // type to assign to it. + new_phi = new (graph_->GetAllocator()) HPhi(graph_->GetAllocator(), + kNoRegNumber, + /*number_of_inputs=*/0, + ret->InputAt(0)->GetType()); + new_block->AddPhi(new_phi); + } + new_phi->AddInput(ret->InputAt(0)); + pred->ReplaceAndRemoveInstructionWith(ret, + new (graph_->GetAllocator()) HGoto(ret->GetDexPc())); + pred->ReplaceSuccessor(exit, new_block); + // Since we are removing a predecessor, there's no need to increment `i`. + } + new_block->AddInstruction(new (graph_->GetAllocator()) HReturn(new_phi, exit->GetDexPc())); + } else { + for (size_t i = 0; i < exit->GetPredecessors().size(); /*++i in loop*/) { + HBasicBlock* pred = exit->GetPredecessors()[i]; + if (!pred->GetLastInstruction()->IsReturnVoid()) { + ++i; + continue; + } + + HReturnVoid* ret = pred->GetLastInstruction()->AsReturnVoid(); + pred->ReplaceAndRemoveInstructionWith(ret, + new (graph_->GetAllocator()) HGoto(ret->GetDexPc())); + pred->ReplaceSuccessor(exit, new_block); + // Since we are removing a predecessor, there's no need to increment `i`. + } + new_block->AddInstruction(new (graph_->GetAllocator()) HReturnVoid(exit->GetDexPc())); + } + + new_block->AddSuccessor(exit); + graph_->AddBlock(new_block); + + // Recompute dominance since we added a new block. + graph_->ClearDominanceInformation(); + graph_->ComputeDominanceInformation(); +} + } // namespace art diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h index 8eb3a520c3..c743db40d9 100644 --- a/compiler/optimizing/code_sinking.h +++ b/compiler/optimizing/code_sinking.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ #define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass to move instructions into uncommon branches, @@ -38,10 +39,16 @@ class CodeSinking : public HOptimization { static constexpr const char* kCodeSinkingPassName = "code_sinking"; private: - // Try to move code only used by `end_block` and all its post-dominated / dominated + // Tries to sink code to uncommon branches. + void UncommonBranchSinking(); + // Tries to move code only used by `end_block` and all its post-dominated / dominated // blocks, to these blocks. void SinkCodeToUncommonBranch(HBasicBlock* end_block); + // Coalesces the Return/ReturnVoid instructions into one, if we have two or more. We do this to + // avoid generating the exit frame code several times. + void ReturnSinking(); + DISALLOW_COPY_AND_ASSIGN(CodeSinking); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index c0441b07ed..2d9acc49b3 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -33,7 +33,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { // Return all combinations of ISA and code generator that are executable on // hardware, or on simulator, and that we'd like to test. @@ -64,7 +64,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class CodegenTest : public OptimizingUnitTest { +class CodegenTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0); void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected); diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 397e601cee..7af9d0f44c 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -20,6 +20,7 @@ #include "arch/arm/registers_arm.h" #include "arch/instruction_set.h" #include "arch/x86/registers_x86.h" +#include "base/macros.h" #include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" @@ -35,6 +36,10 @@ #include "code_generator_arm64.h" #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +#include "code_generator_riscv64.h" +#endif + #ifdef ART_ENABLE_CODEGEN_x86 #include "code_generator_x86.h" #endif @@ -43,9 +48,9 @@ #include "code_generator_x86_64.h" #endif -namespace art { +namespace art HIDDEN { -typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&); +using CreateCodegenFn = CodeGenerator* (*)(HGraph*, const CompilerOptions&); class CodegenTargetConfig { public: @@ -254,15 +259,11 @@ static void Run(const InternalCodeAllocator& allocator, Runtime* GetRuntime() override { return nullptr; } }; CodeHolder code_holder; - const void* code_ptr = + const void* method_code = code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa); - typedef Expected (*fptr)(); - fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(code_ptr)); - if (target_isa == InstructionSet::kThumb2) { - // For thumb we need the bottom bit set. - f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); - } + using fptr = Expected (*)(); + fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code)); VerifyGeneratedCode(target_isa, f, has_result, expected); } @@ -332,6 +333,10 @@ inline CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& } #endif +#ifdef ART_ENABLE_CODEGEN_riscv64 +inline CodeGenerator* create_codegen_riscv64(HGraph*, const CompilerOptions&) { return nullptr; } +#endif + #ifdef ART_ENABLE_CODEGEN_x86 inline CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options); diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 320915ee57..5f71cb906c 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ +#include "base/macros.h" #include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" @@ -28,7 +29,7 @@ #include "aarch32/macro-assembler-aarch32.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { using helpers::HasShifterOperand; diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 81c6561318..20b0e38af5 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ +#include "base/macros.h" #include "code_generator.h" #include "instruction_simplifier_shared.h" #include "locations.h" @@ -31,7 +32,7 @@ #include "aarch64/simulator-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -153,7 +154,7 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst int index) { HInstruction* input = instr->InputAt(index); DataType::Type input_type = input->GetType(); - if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) { + if (IsZeroBitPattern(input)) { return (DataType::Size(input_type) >= vixl::aarch64::kXRegSizeInBytes) ? vixl::aarch64::Register(vixl::aarch64::xzr) : vixl::aarch64::Register(vixl::aarch64::wzr); @@ -314,7 +315,7 @@ inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { - return Location::ConstantLocation(constant->AsConstant()); + return Location::ConstantLocation(constant); } return Location::RequiresRegister(); @@ -380,10 +381,6 @@ inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { return instruction->IsAdd() || instruction->IsSub(); } -inline bool IsConstantZeroBitPattern(const HInstruction* instruction) { - return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); -} - } // namespace helpers } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h index 9f012cfbb2..f01270ee4a 100644 --- a/compiler/optimizing/common_dominator.h +++ b/compiler/optimizing/common_dominator.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ #define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Helper class for finding common dominators of two or more blocks in a graph. // The domination information of a graph must not be modified while there is diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 2031707759..06d19e3f29 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -16,14 +16,20 @@ #include "constant_folding.h" -namespace art { +#include <algorithm> + +#include "dex/dex_file-inl.h" +#include "optimizing/data_type.h" +#include "optimizing/nodes.h" + +namespace art HIDDEN { // This visitor tries to simplify instructions that can be evaluated // as constants. -class HConstantFoldingVisitor : public HGraphDelegateVisitor { +class HConstantFoldingVisitor final : public HGraphDelegateVisitor { public: - explicit HConstantFoldingVisitor(HGraph* graph) - : HGraphDelegateVisitor(graph) {} + HConstantFoldingVisitor(HGraph* graph, OptimizingCompilerStats* stats, bool use_all_optimizations) + : HGraphDelegateVisitor(graph, stats), use_all_optimizations_(use_all_optimizations) {} private: void VisitBasicBlock(HBasicBlock* block) override; @@ -31,8 +37,15 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor { void VisitUnaryOperation(HUnaryOperation* inst) override; void VisitBinaryOperation(HBinaryOperation* inst) override; - void VisitTypeConversion(HTypeConversion* inst) override; + void VisitArrayLength(HArrayLength* inst) override; void VisitDivZeroCheck(HDivZeroCheck* inst) override; + void VisitIf(HIf* inst) override; + void VisitTypeConversion(HTypeConversion* inst) override; + + void PropagateValue(HBasicBlock* starting_block, HInstruction* variable, HConstant* constant); + + // Use all optimizations without restrictions. + bool use_all_optimizations_; DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor); }; @@ -55,6 +68,11 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { void VisitBelow(HBelow* instruction) override; void VisitBelowOrEqual(HBelowOrEqual* instruction) override; + void VisitGreaterThan(HGreaterThan* instruction) override; + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) override; + void VisitLessThan(HLessThan* instruction) override; + void VisitLessThanOrEqual(HLessThanOrEqual* instruction) override; + void VisitAnd(HAnd* instruction) override; void VisitCompare(HCompare* instruction) override; void VisitMul(HMul* instruction) override; @@ -69,7 +87,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { bool HConstantFolding::Run() { - HConstantFoldingVisitor visitor(graph_); + HConstantFoldingVisitor visitor(graph_, stats_, use_all_optimizations_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second @@ -111,16 +129,6 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { } } -void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { - // Constant folding: replace `TypeConversion(a)' with a constant at - // compile time if `a' is a constant. - HConstant* constant = inst->TryStaticEvaluation(); - if (constant != nullptr) { - inst->ReplaceWith(constant); - inst->GetBlock()->RemoveInstruction(inst); - } -} - void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) { // We can safely remove the check if the input is a non-null constant. HInstruction* check_input = inst->InputAt(0); @@ -130,6 +138,169 @@ void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) { } } +void HConstantFoldingVisitor::PropagateValue(HBasicBlock* starting_block, + HInstruction* variable, + HConstant* constant) { + const bool recording_stats = stats_ != nullptr; + size_t uses_before = 0; + size_t uses_after = 0; + if (recording_stats) { + uses_before = variable->GetUses().SizeSlow(); + } + + if (variable->GetUses().HasExactlyOneElement()) { + // Nothing to do, since we only have the `if (variable)` use or the `condition` use. + return; + } + + variable->ReplaceUsesDominatedBy( + starting_block->GetFirstInstruction(), constant, /* strictly_dominated= */ false); + + if (recording_stats) { + uses_after = variable->GetUses().SizeSlow(); + DCHECK_GE(uses_after, 1u) << "we must at least have the use in the if clause."; + DCHECK_GE(uses_before, uses_after); + MaybeRecordStat(stats_, MethodCompilationStat::kPropagatedIfValue, uses_before - uses_after); + } +} + +void HConstantFoldingVisitor::VisitIf(HIf* inst) { + // This optimization can take a lot of compile time since we have a lot of If instructions in + // graphs. + if (!use_all_optimizations_) { + return; + } + + // Consistency check: the true and false successors do not dominate each other. + DCHECK(!inst->IfTrueSuccessor()->Dominates(inst->IfFalseSuccessor()) && + !inst->IfFalseSuccessor()->Dominates(inst->IfTrueSuccessor())); + + HInstruction* if_input = inst->InputAt(0); + + // Already a constant. + if (if_input->IsConstant()) { + return; + } + + // if (variable) { + // SSA `variable` guaranteed to be true + // } else { + // and here false + // } + PropagateValue(inst->IfTrueSuccessor(), if_input, GetGraph()->GetIntConstant(1)); + PropagateValue(inst->IfFalseSuccessor(), if_input, GetGraph()->GetIntConstant(0)); + + // If the input is a condition, we can propagate the information of the condition itself. + if (!if_input->IsCondition()) { + return; + } + HCondition* condition = if_input->AsCondition(); + + // We want either `==` or `!=`, since we cannot make assumptions for other conditions e.g. `>` + if (!condition->IsEqual() && !condition->IsNotEqual()) { + return; + } + + HInstruction* left = condition->GetLeft(); + HInstruction* right = condition->GetRight(); + + // We want one of them to be a constant and not the other. + if (left->IsConstant() == right->IsConstant()) { + return; + } + + // At this point we have something like: + // if (variable == constant) { + // SSA `variable` guaranteed to be equal to constant here + // } else { + // No guarantees can be made here (usually, see boolean case below). + // } + // Similarly with variable != constant, except that we can make guarantees in the else case. + + HConstant* constant = left->IsConstant() ? left->AsConstant() : right->AsConstant(); + HInstruction* variable = left->IsConstant() ? right : left; + + // Don't deal with floats/doubles since they bring a lot of edge cases e.g. + // if (f == 0.0f) { + // // f is not really guaranteed to be 0.0f. It could be -0.0f, for example + // } + if (DataType::IsFloatingPointType(variable->GetType())) { + return; + } + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); + + // Sometimes we have an HCompare flowing into an Equals/NonEquals, which can act as a proxy. For + // example: `Equals(Compare(var, constant), 0)`. This is common for long, float, and double. + if (variable->IsCompare()) { + // We only care about equality comparisons so we skip if it is a less or greater comparison. + if (!constant->IsArithmeticZero()) { + return; + } + + // Update left and right to be the ones from the HCompare. + left = variable->AsCompare()->GetLeft(); + right = variable->AsCompare()->GetRight(); + + // Re-check that one of them to be a constant and not the other. + if (left->IsConstant() == right->IsConstant()) { + return; + } + + constant = left->IsConstant() ? left->AsConstant() : right->AsConstant(); + variable = left->IsConstant() ? right : left; + + // Re-check floating point values. + if (DataType::IsFloatingPointType(variable->GetType())) { + return; + } + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); + } + + // From this block forward we want to replace the SSA value. We use `starting_block` and not the + // `if` block as we want to update one of the branches but not the other. + HBasicBlock* starting_block = + condition->IsEqual() ? inst->IfTrueSuccessor() : inst->IfFalseSuccessor(); + + PropagateValue(starting_block, variable, constant); + + // Special case for booleans since they have only two values so we know what to propagate in the + // other branch. However, sometimes our boolean values are not compared to 0 or 1. In those cases + // we cannot make an assumption for the `else` branch. + if (variable->GetType() == DataType::Type::kBool && + constant->IsIntConstant() && + (constant->AsIntConstant()->IsTrue() || constant->AsIntConstant()->IsFalse())) { + HBasicBlock* other_starting_block = + condition->IsEqual() ? inst->IfFalseSuccessor() : inst->IfTrueSuccessor(); + DCHECK_NE(other_starting_block, starting_block); + + HConstant* other_constant = constant->AsIntConstant()->IsTrue() ? + GetGraph()->GetIntConstant(0) : + GetGraph()->GetIntConstant(1); + DCHECK_NE(other_constant, constant); + PropagateValue(other_starting_block, variable, other_constant); + } +} + +void HConstantFoldingVisitor::VisitArrayLength(HArrayLength* inst) { + HInstruction* input = inst->InputAt(0); + if (input->IsLoadString()) { + DCHECK(inst->IsStringLength()); + HLoadString* load_string = input->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + const dex::StringId& string_id = dex_file.GetStringId(load_string->GetStringIndex()); + inst->ReplaceWith(GetGraph()->GetIntConstant(dex_file.GetStringLength(string_id))); + } +} + +void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { + // Constant folding: replace `TypeConversion(a)' with a constant at + // compile time if `a' is a constant. + HConstant* constant = inst->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } +} void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); @@ -145,8 +316,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instr } void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { - if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || - (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + if (instruction->GetLeft() == instruction->GetRight() && + !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) { + // Replace code looking like + // EQUAL lhs, lhs + // CONSTANT true + // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the + // opposite value. + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { // Replace code looking like // EQUAL lhs, null // where lhs cannot be null with @@ -157,8 +337,17 @@ void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instruction) { - if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || - (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { + if (instruction->GetLeft() == instruction->GetRight() && + !DataType::IsFloatingPointType(instruction->GetLeft()->GetType())) { + // Replace code looking like + // NOT_EQUAL lhs, lhs + // CONSTANT false + // We don't perform this optimizations for FP types since Double.NaN != Double.NaN, which is the + // opposite value. + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if ((instruction->GetLeft()->IsNullConstant() && !instruction->GetRight()->CanBeNull()) || + (instruction->GetRight()->IsNullConstant() && !instruction->GetLeft()->CanBeNull())) { // Replace code looking like // NOT_EQUAL lhs, null // where lhs cannot be null with @@ -169,8 +358,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instructi } void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { - if (instruction->GetLeft()->IsConstant() && - instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // ABOVE lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // ABOVE dst, 0, src // unsigned 0 > src is always false // with @@ -181,8 +376,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* instruction) { - if (instruction->GetRight()->IsConstant() && - instruction->GetRight()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // ABOVE_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetRight()->IsConstant() && + instruction->GetRight()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // ABOVE_OR_EQUAL dst, src, 0 // unsigned src >= 0 is always true // with @@ -193,8 +394,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* i } void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) { - if (instruction->GetRight()->IsConstant() && - instruction->GetRight()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // BELOW lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetRight()->IsConstant() && + instruction->GetRight()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // BELOW dst, src, 0 // unsigned src < 0 is always false // with @@ -205,8 +412,14 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* instruction) { - if (instruction->GetLeft()->IsConstant() && - instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { + if (instruction->GetLeft() == instruction->GetRight()) { + // Replace code looking like + // BELOW_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->GetLeft()->IsConstant() && + instruction->GetLeft()->AsConstant()->IsArithmeticZero()) { // Replace code looking like // BELOW_OR_EQUAL dst, 0, src // unsigned 0 <= src is always true // with @@ -216,6 +429,55 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i } } +void InstructionWithAbsorbingInputSimplifier::VisitGreaterThan(HGreaterThan* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsLtBias())) { + // Replace code looking like + // GREATER_THAN lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitGreaterThanOrEqual( + HGreaterThanOrEqual* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsGtBias())) { + // Replace code looking like + // GREATER_THAN_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitLessThan(HLessThan* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsGtBias())) { + // Replace code looking like + // LESS_THAN lhs, lhs + // CONSTANT false + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + +void InstructionWithAbsorbingInputSimplifier::VisitLessThanOrEqual(HLessThanOrEqual* instruction) { + if (instruction->GetLeft() == instruction->GetRight() && + (!DataType::IsFloatingPointType(instruction->GetLeft()->GetType()) || + instruction->IsLtBias())) { + // Replace code looking like + // LESS_THAN_OR_EQUAL lhs, lhs + // CONSTANT true + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { DataType::Type type = instruction->GetType(); HConstant* input_cst = instruction->GetConstantRight(); diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index 72bd95b3cb..29648e907c 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -17,10 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ #define ART_COMPILER_OPTIMIZING_CONSTANT_FOLDING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" +#include "optimizing/optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass performing a simple constant-expression @@ -39,13 +41,20 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {} + HConstantFolding(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kConstantFoldingPassName, + bool use_all_optimizations = false) + : HOptimization(graph, name, stats), use_all_optimizations_(use_all_optimizations) {} bool Run() override; static constexpr const char* kConstantFoldingPassName = "constant_folding"; private: + // Use all optimizations without restrictions. + bool use_all_optimizations_; + DISALLOW_COPY_AND_ASSIGN(HConstantFolding); }; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 74d9d3a993..741fd3f822 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -17,6 +17,8 @@ #include <functional> #include "constant_folding.h" + +#include "base/macros.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" #include "graph_checker.h" @@ -25,12 +27,12 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the constant folding and dce tests. */ -class ConstantFoldingTest : public OptimizingUnitTest { +class ConstantFoldingTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: ConstantFoldingTest() : graph_(nullptr) { } @@ -58,7 +60,9 @@ class ConstantFoldingTest : public OptimizingUnitTest { std::string actual_before = printer_before.str(); EXPECT_EQ(expected_before, actual_before); - HConstantFolding(graph_, "constant_folding").Run(); + HConstantFolding constant_folding( + graph_, /* stats= */ nullptr, "constant_folding", /* use_all_optimizations= */ true); + constant_folding.Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); ASSERT_TRUE(graph_checker_cf.IsValid()); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index 3a1a9e023d..d9b7652f32 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -20,12 +20,12 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -namespace art { +namespace art HIDDEN { static constexpr bool kCfreLogFenceInputCount = false; // TODO: refactor this code by reusing escape analysis. -class CFREVisitor : public HGraphVisitor { +class CFREVisitor final : public HGraphVisitor { public: CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), @@ -147,16 +147,6 @@ class CFREVisitor : public HGraphVisitor { void VisitAlias(HInstruction* aliasing_inst) { // An object is considered "published" if it becomes aliased by other instructions. if (HasInterestingPublishTargetAsInput(aliasing_inst)) { - // Note that constructing a "NullCheck" for new-instance, new-array, - // or a 'this' (receiver) reference is impossible. - // - // If by some reason we actually encounter such a NullCheck(FenceTarget), - // we LOG(WARNING). - if (UNLIKELY(aliasing_inst->IsNullCheck())) { - LOG(kIsDebugBuild ? FATAL : WARNING) - << "Unexpected instruction: NullCheck; should not be legal in graph"; - // We then do a best-effort to handle this case. - } MergeCandidateFences(); } } diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h index 014b342258..e04b986171 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.h +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /* * Constructor Fence Redundancy Elimination (CFRE). diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.cc b/compiler/optimizing/critical_native_abi_fixup_arm.cc index 3c4db4bca7..77e156608b 100644 --- a/compiler/optimizing/critical_native_abi_fixup_arm.cc +++ b/compiler/optimizing/critical_native_abi_fixup_arm.cc @@ -23,7 +23,7 @@ #include "scoped_thread_state_change-inl.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { namespace arm { // Fix up FP arguments passed in core registers for call to @CriticalNative by inserting fake calls @@ -45,9 +45,9 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) { if (DataType::IsFloatingPointType(input_type)) { bool is_double = (input_type == DataType::Type::kFloat64); DataType::Type converted_type = is_double ? DataType::Type::kInt64 : DataType::Type::kInt32; - jmethodID known_method = is_double ? WellKnownClasses::java_lang_Double_doubleToRawLongBits - : WellKnownClasses::java_lang_Float_floatToRawIntBits; - ArtMethod* resolved_method = jni::DecodeArtMethod(known_method); + ArtMethod* resolved_method = is_double + ? WellKnownClasses::java_lang_Double_doubleToRawLongBits + : WellKnownClasses::java_lang_Float_floatToRawIntBits; DCHECK(resolved_method != nullptr); DCHECK(resolved_method->IsIntrinsic()); MethodReference target_method(nullptr, 0); @@ -74,7 +74,8 @@ static void FixUpArguments(HInvokeStaticOrDirect* invoke) { dispatch_info, kStatic, target_method, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !block->GetGraph()->IsDebuggable()); // The intrinsic has no side effects and does not need environment or dex cache on ARM. new_input->SetSideEffects(SideEffects::None()); IntrinsicOptimizations opt(new_input); diff --git a/compiler/optimizing/critical_native_abi_fixup_arm.h b/compiler/optimizing/critical_native_abi_fixup_arm.h index faa3c7a5fe..c2068f5e2d 100644 --- a/compiler/optimizing/critical_native_abi_fixup_arm.h +++ b/compiler/optimizing/critical_native_abi_fixup_arm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_ #define ART_COMPILER_OPTIMIZING_CRITICAL_NATIVE_ABI_FIXUP_ARM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm { class CriticalNativeAbiFixupArm : public HOptimization { diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h index 1b33b775da..bbfe90451b 100644 --- a/compiler/optimizing/data_type-inl.h +++ b/compiler/optimizing/data_type-inl.h @@ -20,7 +20,7 @@ #include "data_type.h" #include "dex/primitive.h" -namespace art { +namespace art HIDDEN { // Note: Not declared in data_type.h to avoid pulling in "primitive.h". constexpr DataType::Type DataTypeFromPrimitive(Primitive::Type type) { diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc index cb354f46cc..183cf2c622 100644 --- a/compiler/optimizing/data_type.cc +++ b/compiler/optimizing/data_type.cc @@ -16,7 +16,7 @@ #include "data_type.h" -namespace art { +namespace art HIDDEN { static const char* kTypeNames[] = { "Reference", diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index ec6ca7accb..b6d9519150 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -22,8 +22,9 @@ #include <android-base/logging.h> #include "base/bit_utils.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { class DataType { public: diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc index 8fea22bce8..f6f614d8c4 100644 --- a/compiler/optimizing/data_type_test.cc +++ b/compiler/optimizing/data_type_test.cc @@ -22,7 +22,7 @@ #include "base/macros.h" #include "dex/primitive.h" -namespace art { +namespace art HIDDEN { template <DataType::Type data_type, Primitive::Type primitive_type> static void CheckConversion() { diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index d808f2ca3a..cf49e39849 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -16,14 +16,17 @@ #include "dead_code_elimination.h" +#include "android-base/logging.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" +#include "optimizing/nodes.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { // Use local allocator for allocating memory. @@ -178,6 +181,13 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr } else if (!cond->InputAt(0)->IsNullConstant()) { return false; } + + // We can't create a BoundType for an object with an invalid RTI. + const ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + if (!ti.IsValid()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. HBoundType* bound = nullptr; const HUseList<HInstruction*>& uses = obj->GetUses(); @@ -190,7 +200,6 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr user_block != throws && block->Dominates(user_block)) { if (bound == nullptr) { - ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); bound->SetUpperBound(ti, /*can_be_null*/ false); bound->SetReferenceTypeInfo(ti); @@ -213,6 +222,9 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr // | ... // | instr_n // | foo() // always throws +// | instr_n+2 +// | ... +// | instr_n+m // \ goto B2 // \ / // B2 @@ -230,11 +242,14 @@ static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* thr // B2 Exit // // Rationale: -// Removal of the never taken edge to B2 may expose -// other optimization opportunities, such as code sinking. +// Removal of the never taken edge to B2 may expose other optimization opportunities, such as code +// sinking. +// +// Note: The example above is a simple one that uses a `goto` but we could end the block with an If, +// for example. bool HDeadCodeElimination::SimplifyAlwaysThrows() { HBasicBlock* exit = graph_->GetExitBlock(); - if (exit == nullptr) { + if (!graph_->HasAlwaysThrowingInvokes() || exit == nullptr) { return false; } @@ -242,54 +257,55 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { // Order does not matter, just pick one. for (HBasicBlock* block : graph_->GetReversePostOrder()) { - if (block->GetTryCatchInformation() != nullptr) { + if (block->IsTryBlock()) { // We don't want to perform the simplify always throws optimizations for throws inside of - // tries since those throws might not go to the exit block. We do that by checking the - // TryCatchInformation of the blocks. - // - // As a special case the `catch_block` is the first block of the catch and it has - // TryCatchInformation. Other blocks in the catch don't have try catch information (as long as - // they are not part of an outer try). Knowing if a `catch_block` is part of an outer try is - // possible by checking its successors, but other restrictions of the simplify always throws - // optimization will block `catch_block` nevertheless (e.g. only one predecessor) so it is not - // worth the effort. - - // TODO(solanes): Maybe we can do a `goto catch` if inside of a try catch instead of going to - // the exit. If we do so, we have to take into account that we should go to the nearest valid - // catch i.e. one that would accept our exception type. + // tries since those throws might not go to the exit block. continue; } - HInstruction* last = block->GetLastInstruction(); - HInstruction* prev = last->GetPrevious(); - if (prev == nullptr) { - DCHECK_EQ(block->GetFirstInstruction(), block->GetLastInstruction()); + // We iterate to find the first instruction that always throws. If two instructions always + // throw, the first one will throw and the second one will never be reached. + HInstruction* throwing_invoke = nullptr; + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->IsInvoke() && it.Current()->AsInvoke()->AlwaysThrows()) { + throwing_invoke = it.Current(); + break; + } + } + + if (throwing_invoke == nullptr) { + // No always-throwing instruction found. Continue with the rest of the blocks. continue; } - if (prev->AlwaysThrows() && - last->IsGoto() && - block->GetPhis().IsEmpty() && - block->GetPredecessors().size() == 1u) { - HBasicBlock* pred = block->GetSinglePredecessor(); - HBasicBlock* succ = block->GetSingleSuccessor(); - // Ensure no computations are merged through throwing block. - // This does not prevent the optimization per se, but would - // require an elaborate clean up of the SSA graph. - if (succ != exit && - !block->Dominates(pred) && - pred->Dominates(succ) && - succ->GetPredecessors().size() > 1u && - succ->GetPhis().IsEmpty()) { - block->ReplaceSuccessor(succ, exit); - rerun_dominance_and_loop_analysis = true; - MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); - // Perform a quick follow up optimization on object != null control dependences - // that is much cheaper to perform now than in a later phase. - if (RemoveNonNullControlDependences(pred, block)) { - MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); - } - } + // If we are already pointing at the exit block we could still remove the instructions + // between the always throwing instruction, and the exit block. If we have no other + // instructions, just continue since there's nothing to do. + if (block->GetSuccessors().size() == 1 && + block->GetSingleSuccessor() == exit && + block->GetLastInstruction()->GetPrevious() == throwing_invoke) { + continue; + } + + // We split the block at the throwing instruction, and the instructions after the throwing + // instructions will be disconnected from the graph after `block` points to the exit. + // `RemoveDeadBlocks` will take care of removing this new block and its instructions. + // Even though `SplitBefore` doesn't guarantee the graph to remain in SSA form, it is fine + // since we do not break it. + HBasicBlock* new_block = block->SplitBefore(throwing_invoke->GetNext(), + /* require_graph_not_in_ssa_form= */ false); + DCHECK_EQ(block->GetSingleSuccessor(), new_block); + block->ReplaceSuccessor(new_block, exit); + + rerun_dominance_and_loop_analysis = true; + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + // If there are multiple predecessors, none may end with a HIf as required in + // RemoveNonNullControlDependences because we split critical edges. + if (block->GetPredecessors().size() == 1u && + RemoveNonNullControlDependences(block->GetSinglePredecessor(), block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); } } @@ -303,54 +319,45 @@ bool HDeadCodeElimination::SimplifyAlwaysThrows() { return false; } -// Simplify the pattern: -// -// B1 B2 ... -// goto goto goto -// \ | / -// \ | / -// B3 -// i1 = phi(input, input) -// (i2 = condition on i1) -// if i1 (or i2) -// / \ -// / \ -// B4 B5 -// -// Into: -// -// B1 B2 ... -// | | | -// B4 B5 B? -// -// Note that individual edges can be redirected (for example B2->B3 -// can be redirected as B2->B5) without applying this optimization -// to other incoming edges. -// -// This simplification cannot be applied to catch blocks, because -// exception handler edges do not represent normal control flow. -// Though in theory this could still apply to normal control flow -// going directly to a catch block, we cannot support it at the -// moment because the catch Phi's inputs do not correspond to the -// catch block's predecessors, so we cannot identify which -// predecessor corresponds to a given statically evaluated input. -// -// We do not apply this optimization to loop headers as this could -// create irreducible loops. We rely on the suspend check in the -// loop header to prevent the pattern match. -// -// Note that we rely on the dead code elimination to get rid of B3. bool HDeadCodeElimination::SimplifyIfs() { bool simplified_one_or_more_ifs = false; bool rerun_dominance_and_loop_analysis = false; - for (HBasicBlock* block : graph_->GetReversePostOrder()) { + // Iterating in PostOrder it's better for MaybeAddPhi as it can add a Phi for multiple If + // instructions in a chain without updating the dominator chain. The branch redirection itself can + // work in PostOrder or ReversePostOrder without issues. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (block->IsCatchBlock()) { + // This simplification cannot be applied to catch blocks, because exception handler edges do + // not represent normal control flow. Though in theory this could still apply to normal + // control flow going directly to a catch block, we cannot support it at the moment because + // the catch Phi's inputs do not correspond to the catch block's predecessors, so we cannot + // identify which predecessor corresponds to a given statically evaluated input. + continue; + } + HInstruction* last = block->GetLastInstruction(); - HInstruction* first = block->GetFirstInstruction(); - if (!block->IsCatchBlock() && - last->IsIf() && - block->HasSinglePhi() && + if (!last->IsIf()) { + continue; + } + + if (block->IsLoopHeader()) { + // We do not apply this optimization to loop headers as this could create irreducible loops. + continue; + } + + // We will add a Phi which allows the simplification to take place in cases where it wouldn't. + MaybeAddPhi(block); + + // TODO(solanes): Investigate support for multiple phis in `block`. We can potentially "push + // downwards" existing Phis into the true/false branches. For example, let's say we have another + // Phi: Phi(x1,x2,x3,x4,x5,x6). This could turn into Phi(x1,x2) in the true branch, Phi(x3,x4) + // in the false branch, and remain as Phi(x5,x6) in `block` (for edges that we couldn't + // redirect). We might even be able to remove some phis altogether as they will have only one + // value. + if (block->HasSinglePhi() && block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) { + HInstruction* first = block->GetFirstInstruction(); bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi()); bool has_only_phi_condition_and_if = !has_only_phi_and_if && @@ -361,7 +368,6 @@ bool HDeadCodeElimination::SimplifyIfs() { first->HasOnlyOneNonEnvironmentUse(); if (has_only_phi_and_if || has_only_phi_condition_and_if) { - DCHECK(!block->IsLoopHeader()); HPhi* phi = block->GetFirstPhi()->AsPhi(); bool phi_input_is_left = (first->InputAt(0) == phi); @@ -446,6 +452,125 @@ bool HDeadCodeElimination::SimplifyIfs() { return simplified_one_or_more_ifs; } +void HDeadCodeElimination::MaybeAddPhi(HBasicBlock* block) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + if (if_instruction->InputAt(0)->IsConstant()) { + // Constant values are handled in RemoveDeadBlocks. + return; + } + + if (block->GetNumberOfPredecessors() < 2u) { + // Nothing to redirect. + return; + } + + if (!block->GetPhis().IsEmpty()) { + // SimplifyIf doesn't currently work with multiple phis. Adding a phi here won't help that + // optimization. + return; + } + + HBasicBlock* dominator = block->GetDominator(); + if (!dominator->EndsWithIf()) { + return; + } + + HInstruction* input = if_instruction->InputAt(0); + HInstruction* dominator_input = dominator->GetLastInstruction()->AsIf()->InputAt(0); + const bool same_input = dominator_input == input; + if (!same_input) { + // Try to see if the dominator has the opposite input (e.g. if(cond) and if(!cond)). If that's + // the case, we can perform the optimization with the false and true branches reversed. + if (!dominator_input->IsCondition() || !input->IsCondition()) { + return; + } + + HCondition* block_cond = input->AsCondition(); + HCondition* dominator_cond = dominator_input->AsCondition(); + + if (block_cond->GetLeft() != dominator_cond->GetLeft() || + block_cond->GetRight() != dominator_cond->GetRight() || + block_cond->GetOppositeCondition() != dominator_cond->GetCondition()) { + return; + } + } + + if (kIsDebugBuild) { + // `block`'s successors should have only one predecessor. Otherwise, we have a critical edge in + // the graph. + for (HBasicBlock* succ : block->GetSuccessors()) { + DCHECK_EQ(succ->GetNumberOfPredecessors(), 1u); + } + } + + const size_t pred_size = block->GetNumberOfPredecessors(); + HPhi* new_phi = new (graph_->GetAllocator()) + HPhi(graph_->GetAllocator(), kNoRegNumber, pred_size, DataType::Type::kInt32); + + for (size_t index = 0; index < pred_size; index++) { + HBasicBlock* pred = block->GetPredecessors()[index]; + const bool dominated_by_true = + dominator->GetLastInstruction()->AsIf()->IfTrueSuccessor()->Dominates(pred); + const bool dominated_by_false = + dominator->GetLastInstruction()->AsIf()->IfFalseSuccessor()->Dominates(pred); + if (dominated_by_true == dominated_by_false) { + // In this case, we can't know if we are coming from the true branch, or the false branch. It + // happens in cases like: + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ | + // 6 | + // \ | + // 7 (has the same if(cond) as 1) + // | + // 8 + // `7` (which would be `block` in this example), and `6` will come from both the true path and + // the false path of `1`. We bumped into something similar in SelectGenerator. See + // HSelectGenerator::TryFixupDoubleDiamondPattern. + // TODO(solanes): Figure out if we can fix up the graph into a double diamond in a generic way + // so that DeadCodeElimination and SelectGenerator can take advantage of it. + + if (!same_input) { + // `1` and `7` having the opposite condition is a case we are missing. We could potentially + // add a BooleanNot instruction to be able to add the Phi, but it seems like overkill since + // this case is not that common. + return; + } + + // The Phi will have `0`, `1`, and `cond` as inputs. If SimplifyIf redirects 0s and 1s, we + // will end up with Phi(cond,...,cond) which will be replaced by `cond`. Effectively, we will + // redirect edges that we are able to redirect and the rest will remain as before (i.e. we + // won't have an extra Phi). + new_phi->SetRawInputAt(index, input); + } else { + // Redirect to either the true branch (1), or the false branch (0). + // Given that `dominated_by_true` is the exact opposite of `dominated_by_false`, + // `(same_input && dominated_by_true) || (!same_input && dominated_by_false)` is equivalent to + // `same_input == dominated_by_true`. + new_phi->SetRawInputAt( + index, + same_input == dominated_by_true ? graph_->GetIntConstant(1) : graph_->GetIntConstant(0)); + } + } + + block->AddPhi(new_phi); + if_instruction->ReplaceInput(new_phi, 0); + + // Remove the old input now, if possible. This allows the branch redirection in SimplifyIf to + // work without waiting for another pass of DCE. + if (input->IsDeadAndRemovable()) { + DCHECK(!same_input) + << " if both blocks have the same condition, it shouldn't be dead and removable since the " + << "dominator block's If instruction would be using that condition."; + input->GetBlock()->RemoveInstruction(input); + } + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyIfAddedPhi); +} + void HDeadCodeElimination::ConnectSuccessiveBlocks() { // Order does not matter. Skip the entry block by starting at index 1 in reverse post order. for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) { @@ -466,7 +591,192 @@ void HDeadCodeElimination::ConnectSuccessiveBlocks() { } } -bool HDeadCodeElimination::RemoveDeadBlocks() { +struct HDeadCodeElimination::TryBelongingInformation { + explicit TryBelongingInformation(ScopedArenaAllocator* allocator) + : blocks_in_try(allocator->Adapter(kArenaAllocDCE)), + coalesced_try_entries(allocator->Adapter(kArenaAllocDCE)) {} + + // Which blocks belong in the try. + ScopedArenaSet<HBasicBlock*> blocks_in_try; + // Which other try entries are referencing this same try. + ScopedArenaSet<HBasicBlock*> coalesced_try_entries; +}; + +bool HDeadCodeElimination::CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info) { + for (HBasicBlock* block : try_belonging_info.blocks_in_try) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (it.Current()->CanThrow()) { + return false; + } + } + } + return true; +} + +void HDeadCodeElimination::DisconnectHandlersAndUpdateTryBoundary( + HBasicBlock* block, + /* out */ bool* any_block_in_loop) { + if (block->IsInLoop()) { + *any_block_in_loop = true; + } + + // Disconnect the handlers. + while (block->GetSuccessors().size() > 1) { + HBasicBlock* handler = block->GetSuccessors()[1]; + DCHECK(handler->IsCatchBlock()); + block->RemoveSuccessor(handler); + handler->RemovePredecessor(block); + if (handler->IsInLoop()) { + *any_block_in_loop = true; + } + } + + // Change TryBoundary to Goto. + DCHECK(block->EndsWithTryBoundary()); + HInstruction* last = block->GetLastInstruction(); + block->RemoveInstruction(last); + block->AddInstruction(new (graph_->GetAllocator()) HGoto(last->GetDexPc())); + DCHECK_EQ(block->GetSuccessors().size(), 1u); +} + +void HDeadCodeElimination::RemoveTry(HBasicBlock* try_entry, + const TryBelongingInformation& try_belonging_info, + /* out */ bool* any_block_in_loop) { + // Update all try entries. + DCHECK(try_entry->EndsWithTryBoundary()); + DCHECK(try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(try_entry, any_block_in_loop); + + for (HBasicBlock* other_try_entry : try_belonging_info.coalesced_try_entries) { + DCHECK(other_try_entry->EndsWithTryBoundary()); + DCHECK(other_try_entry->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(other_try_entry, any_block_in_loop); + } + + // Update the blocks in the try. + for (HBasicBlock* block : try_belonging_info.blocks_in_try) { + // Update the try catch information since now the try doesn't exist. + block->SetTryCatchInformation(nullptr); + if (block->IsInLoop()) { + *any_block_in_loop = true; + } + + if (block->EndsWithTryBoundary()) { + // Try exits. + DCHECK(!block->GetLastInstruction()->AsTryBoundary()->IsEntry()); + DisconnectHandlersAndUpdateTryBoundary(block, any_block_in_loop); + + if (block->GetSingleSuccessor()->IsExitBlock()) { + // `block` used to be a single exit TryBoundary that got turned into a Goto. It + // is now pointing to the exit which we don't allow. To fix it, we disconnect + // `block` from its predecessor and RemoveDeadBlocks will remove it from the + // graph. + DCHECK(block->IsSingleGoto()); + HBasicBlock* predecessor = block->GetSinglePredecessor(); + predecessor->ReplaceSuccessor(block, graph_->GetExitBlock()); + + if (!block->GetDominatedBlocks().empty()) { + // Update domination tree if `block` dominates a block to keep the graph consistent. + DCHECK_EQ(block->GetDominatedBlocks().size(), 1u); + DCHECK_EQ(graph_->GetExitBlock()->GetDominator(), block); + predecessor->AddDominatedBlock(graph_->GetExitBlock()); + graph_->GetExitBlock()->SetDominator(predecessor); + block->RemoveDominatedBlock(graph_->GetExitBlock()); + } + } + } + } +} + +bool HDeadCodeElimination::RemoveUnneededTries() { + if (!graph_->HasTryCatch()) { + return false; + } + + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + + // Collect which blocks are part of which try. + std::unordered_map<HBasicBlock*, TryBelongingInformation> tries; + for (HBasicBlock* block : graph_->GetReversePostOrderSkipEntryBlock()) { + if (block->IsTryBlock()) { + HBasicBlock* key = block->GetTryCatchInformation()->GetTryEntry().GetBlock(); + auto it = tries.find(key); + if (it == tries.end()) { + it = tries.insert({key, TryBelongingInformation(&allocator)}).first; + } + it->second.blocks_in_try.insert(block); + } + } + + // Deduplicate the tries which have different try entries but they are really the same try. + for (auto it = tries.begin(); it != tries.end(); it++) { + DCHECK(it->first->EndsWithTryBoundary()); + HTryBoundary* try_boundary = it->first->GetLastInstruction()->AsTryBoundary(); + for (auto other_it = next(it); other_it != tries.end(); /*other_it++ in the loop*/) { + DCHECK(other_it->first->EndsWithTryBoundary()); + HTryBoundary* other_try_boundary = other_it->first->GetLastInstruction()->AsTryBoundary(); + if (try_boundary->HasSameExceptionHandlersAs(*other_try_boundary)) { + // Merge the entries as they are really the same one. + // Block merging. + it->second.blocks_in_try.insert(other_it->second.blocks_in_try.begin(), + other_it->second.blocks_in_try.end()); + + // Add the coalesced try entry to update it too. + it->second.coalesced_try_entries.insert(other_it->first); + + // Erase the other entry. + other_it = tries.erase(other_it); + } else { + other_it++; + } + } + } + + size_t removed_tries = 0; + bool any_block_in_loop = false; + + // Check which tries contain throwing instructions. + for (const auto& entry : tries) { + if (CanPerformTryRemoval(entry.second)) { + ++removed_tries; + RemoveTry(entry.first, entry.second, &any_block_in_loop); + } + } + + if (removed_tries != 0) { + // We want to: + // 1) Update the dominance information + // 2) Remove catch block subtrees, if they are now unreachable. + // If we run the dominance recomputation without removing the code, those catch blocks will + // not be part of the post order and won't be removed. If we don't run the dominance + // recomputation, we risk RemoveDeadBlocks not running it and leaving the graph in an + // inconsistent state. So, what we can do is run RemoveDeadBlocks and force a recomputation. + // Note that we are not guaranteed to remove a catch block if we have nested try blocks: + // + // try { + // ... nothing can throw. TryBoundary A ... + // try { + // ... can throw. TryBoundary B... + // } catch (Error e) {} + // } catch (Exception e) {} + // + // In the example above, we can remove the TryBoundary A but the Exception catch cannot be + // removed as the TryBoundary B might still throw into that catch. TryBoundary A and B don't get + // coalesced since they have different catch handlers. + + RemoveDeadBlocks(/* force_recomputation= */ true, any_block_in_loop); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedTry, removed_tries); + return true; + } else { + return false; + } +} + +bool HDeadCodeElimination::RemoveDeadBlocks(bool force_recomputation, + bool force_loop_recomputation) { + DCHECK_IMPLIES(force_loop_recomputation, force_recomputation); + // Use local allocator for allocating memory. ScopedArenaAllocator allocator(graph_->GetArenaStack()); @@ -495,8 +805,8 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { // If we removed at least one block, we need to recompute the full // dominator tree and try block membership. - if (removed_one_or_more_blocks) { - if (rerun_dominance_and_loop_analysis) { + if (removed_one_or_more_blocks || force_recomputation) { + if (rerun_dominance_and_loop_analysis || force_loop_recomputation) { graph_->ClearLoopInformation(); graph_->ClearDominanceInformation(); graph_->BuildDominatorTree(); @@ -530,6 +840,33 @@ void HDeadCodeElimination::RemoveDeadInstructions() { } } +void HDeadCodeElimination::UpdateGraphFlags() { + bool has_monitor_operations = false; + bool has_simd = false; + bool has_bounds_checks = false; + bool has_always_throwing_invokes = false; + + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsMonitorOperation()) { + has_monitor_operations = true; + } else if (instruction->IsVecOperation()) { + has_simd = true; + } else if (instruction->IsBoundsCheck()) { + has_bounds_checks = true; + } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) { + has_always_throwing_invokes = true; + } + } + } + + graph_->SetHasMonitorOperations(has_monitor_operations); + graph_->SetHasSIMD(has_simd); + graph_->SetHasBoundsChecks(has_bounds_checks); + graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes); +} + bool HDeadCodeElimination::Run() { // Do not eliminate dead blocks if the graph has irreducible loops. We could // support it, but that would require changes in our loop representation to handle @@ -541,6 +878,11 @@ bool HDeadCodeElimination::Run() { did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); did_any_simplification |= RemoveDeadBlocks(); + // We call RemoveDeadBlocks before RemoveUnneededTries to remove the dead blocks from the + // previous optimizations. Otherwise, we might detect that a try has throwing instructions but + // they are actually dead code. RemoveUnneededTryBoundary will call RemoveDeadBlocks again if + // needed. + did_any_simplification |= RemoveUnneededTries(); if (did_any_simplification) { // Connect successive blocks created by dead branches. ConnectSuccessiveBlocks(); @@ -548,6 +890,7 @@ bool HDeadCodeElimination::Run() { } SsaRedundantPhiElimination(graph_).Run(); RemoveDeadInstructions(); + UpdateGraphFlags(); return true; } diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 799721acf2..ddd01f7103 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -17,11 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_DEAD_CODE_ELIMINATION_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { /** * Optimization pass performing dead code elimination (removal of @@ -39,11 +40,87 @@ class HDeadCodeElimination : public HOptimization { private: void MaybeRecordDeadBlock(HBasicBlock* block); void MaybeRecordSimplifyIf(); - bool RemoveDeadBlocks(); + // If `force_recomputation` is true, we will recompute the dominance information even when we + // didn't delete any blocks. `force_loop_recomputation` is similar but it also forces the loop + // information recomputation. + bool RemoveDeadBlocks(bool force_recomputation = false, bool force_loop_recomputation = false); void RemoveDeadInstructions(); bool SimplifyAlwaysThrows(); + // Simplify the pattern: + // + // B1 B2 ... + // goto goto goto + // \ | / + // \ | / + // B3 + // i1 = phi(input, input) + // (i2 = condition on i1) + // if i1 (or i2) + // / \ + // / \ + // B4 B5 + // + // Into: + // + // B1 B2 ... + // | | | + // B4 B5 B? + // + // Note that individual edges can be redirected (for example B2->B3 + // can be redirected as B2->B5) without applying this optimization + // to other incoming edges. + // + // Note that we rely on the dead code elimination to get rid of B3. bool SimplifyIfs(); void ConnectSuccessiveBlocks(); + // Updates the graph flags related to instructions (e.g. HasSIMD()) since we may have eliminated + // the relevant instructions. There's no need to update `SetHasTryCatch` since we do that in + // `ComputeTryBlockInformation`. Similarly with `HasLoops` and `HasIrreducibleLoops`: They are + // cleared in `ClearLoopInformation` and then set as true as part of `HLoopInformation::Populate`, + // if needed. + void UpdateGraphFlags(); + + // Helper struct to eliminate tries. + struct TryBelongingInformation; + // Disconnects `block`'s handlers and update its `TryBoundary` instruction to a `Goto`. + // Sets `any_block_in_loop` to true if any block is currently a loop to later update the loop + // information if needed. + void DisconnectHandlersAndUpdateTryBoundary(HBasicBlock* block, + /* out */ bool* any_block_in_loop); + // Returns true iff the try doesn't contain throwing instructions. + bool CanPerformTryRemoval(const TryBelongingInformation& try_belonging_info); + // Removes the try by disconnecting all try entries and exits from their handlers. Also updates + // the graph in the case that a `TryBoundary` instruction of kind `exit` has the Exit block as + // its successor. + void RemoveTry(HBasicBlock* try_entry, + const TryBelongingInformation& try_belonging_info, + bool* any_block_in_loop); + // Checks which tries (if any) are currently in the graph, coalesces the different try entries + // that are referencing the same try, and removes the tries which don't contain any throwing + // instructions. + bool RemoveUnneededTries(); + + // Adds a phi in `block`, if `block` and its dominator have the same (or opposite) condition. + // For example it turns: + // if(cond) + // / \ + // B1 B2 + // \ / + // if(cond) + // / \ + // B3 B4 + // + // into: + // if(cond) + // / \ + // B1 B2 + // \ / + // if(Phi(1, 0)) + // / \ + // B3 B4 + // + // Following this, SimplifyIfs is able to connect B1->B3 and B2->B4 effectively skipping an if. + void MaybeAddPhi(HBasicBlock* block); DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index f5cd4dc27a..b789434add 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,6 +16,7 @@ #include "dead_code_elimination.h" +#include "base/macros.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -23,9 +24,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class DeadCodeEliminationTest : public OptimizingUnitTest { +class DeadCodeEliminationTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 1d72ba116e..5f366ebcd9 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_instruction.h" #include "nodes.h" @@ -22,9 +23,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class OptimizerTest : public OptimizingUnitTest { +class OptimizerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length); }; diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc index 617833c697..cebe94fd0d 100644 --- a/compiler/optimizing/escape.cc +++ b/compiler/optimizing/escape.cc @@ -18,7 +18,7 @@ #include "nodes.h" -namespace art { +namespace art HIDDEN { void VisitEscapes(HInstruction* reference, EscapeVisitor& escape_visitor) { // References not allocated in the method are intrinsically escaped. diff --git a/compiler/optimizing/escape.h b/compiler/optimizing/escape.h index 5402cb1763..3b284fbf43 100644 --- a/compiler/optimizing/escape.h +++ b/compiler/optimizing/escape.h @@ -17,7 +17,9 @@ #ifndef ART_COMPILER_OPTIMIZING_ESCAPE_H_ #define ART_COMPILER_OPTIMIZING_ESCAPE_H_ -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HInstruction; diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc index 66fdfcda5b..06aabbe040 100644 --- a/compiler/optimizing/execution_subgraph.cc +++ b/compiler/optimizing/execution_subgraph.cc @@ -26,7 +26,7 @@ #include "base/scoped_arena_allocator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator) : graph_(graph), diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h index 7d2a66077d..5ddf17de60 100644 --- a/compiler/optimizing/execution_subgraph.h +++ b/compiler/optimizing/execution_subgraph.h @@ -27,6 +27,7 @@ #include "base/bit_vector-inl.h" #include "base/globals.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/mutex.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" @@ -34,7 +35,7 @@ #include "base/transform_iterator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Helper for transforming blocks to block_ids. class BlockToBlockIdTransformer { diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc index 74c243b5b4..921ef056ba 100644 --- a/compiler/optimizing/execution_subgraph_test.cc +++ b/compiler/optimizing/execution_subgraph_test.cc @@ -37,7 +37,7 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { using BlockSet = std::unordered_set<const HBasicBlock*>; diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h index 13cb2bc7c5..cee105a045 100644 --- a/compiler/optimizing/execution_subgraph_test.h +++ b/compiler/optimizing/execution_subgraph_test.h @@ -19,7 +19,9 @@ #include "android-base/macros.h" -namespace art { +#include "base/macros.h" + +namespace art HIDDEN { class HGraph; class ExecutionSubgraph; diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 75b8e9609e..8857b2a775 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -25,9 +26,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class FindLoopsTest : public OptimizingUnitTest {}; +class FindLoopsTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; TEST_F(FindLoopsTest, CFG1) { // Constant is not used. diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index d1769cea0d..190b362145 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -32,7 +32,7 @@ #include "scoped_thread_state_change-inl.h" #include "subtype_check.h" -namespace art { +namespace art HIDDEN { using android::base::StringPrintf; @@ -80,9 +80,91 @@ size_t GraphChecker::Run(bool pass_change, size_t last_size) { // as the latter might visit dead blocks removed by the dominator // computation. VisitReversePostOrder(); + CheckGraphFlags(); return current_size; } +void GraphChecker::VisitReversePostOrder() { + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { + if (block->IsInLoop()) { + flag_info_.seen_loop = true; + if (block->GetLoopInformation()->IsIrreducible()) { + flag_info_.seen_irreducible_loop = true; + } + } + + VisitBasicBlock(block); + } +} + +static const char* StrBool(bool val) { + return val ? "true" : "false"; +} + +void GraphChecker::CheckGraphFlags() { + if (GetGraph()->HasMonitorOperations() != flag_info_.seen_monitor_operation) { + AddError( + StringPrintf("Flag mismatch: HasMonitorOperations() (%s) should be equal to " + "flag_info_.seen_monitor_operation (%s)", + StrBool(GetGraph()->HasMonitorOperations()), + StrBool(flag_info_.seen_monitor_operation))); + } + + if (GetGraph()->HasTryCatch() != flag_info_.seen_try_boundary) { + AddError( + StringPrintf("Flag mismatch: HasTryCatch() (%s) should be equal to " + "flag_info_.seen_try_boundary (%s)", + StrBool(GetGraph()->HasTryCatch()), + StrBool(flag_info_.seen_try_boundary))); + } + + if (GetGraph()->HasLoops() != flag_info_.seen_loop) { + AddError( + StringPrintf("Flag mismatch: HasLoops() (%s) should be equal to " + "flag_info_.seen_loop (%s)", + StrBool(GetGraph()->HasLoops()), + StrBool(flag_info_.seen_loop))); + } + + if (GetGraph()->HasIrreducibleLoops() && !GetGraph()->HasLoops()) { + AddError(StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) implies HasLoops() (%s)", + StrBool(GetGraph()->HasIrreducibleLoops()), + StrBool(GetGraph()->HasLoops()))); + } + + if (GetGraph()->HasIrreducibleLoops() != flag_info_.seen_irreducible_loop) { + AddError( + StringPrintf("Flag mismatch: HasIrreducibleLoops() (%s) should be equal to " + "flag_info_.seen_irreducible_loop (%s)", + StrBool(GetGraph()->HasIrreducibleLoops()), + StrBool(flag_info_.seen_irreducible_loop))); + } + + if (GetGraph()->HasSIMD() != flag_info_.seen_SIMD) { + AddError( + StringPrintf("Flag mismatch: HasSIMD() (%s) should be equal to " + "flag_info_.seen_SIMD (%s)", + StrBool(GetGraph()->HasSIMD()), + StrBool(flag_info_.seen_SIMD))); + } + + if (GetGraph()->HasBoundsChecks() != flag_info_.seen_bounds_checks) { + AddError( + StringPrintf("Flag mismatch: HasBoundsChecks() (%s) should be equal to " + "flag_info_.seen_bounds_checks (%s)", + StrBool(GetGraph()->HasBoundsChecks()), + StrBool(flag_info_.seen_bounds_checks))); + } + + if (GetGraph()->HasAlwaysThrowingInvokes() != flag_info_.seen_always_throwing_invokes) { + AddError( + StringPrintf("Flag mismatch: HasAlwaysThrowingInvokes() (%s) should be equal to " + "flag_info_.seen_always_throwing_invokes (%s)", + StrBool(GetGraph()->HasAlwaysThrowingInvokes()), + StrBool(flag_info_.seen_always_throwing_invokes))); + } +} + void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; @@ -159,6 +241,24 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Make sure the first instruction of a catch block is always a Nop that emits an environment. + if (block->IsCatchBlock()) { + if (!block->GetFirstInstruction()->IsNop()) { + AddError(StringPrintf("Block %d doesn't have a Nop as its first instruction.", + current_block_->GetBlockId())); + } else { + HNop* nop = block->GetFirstInstruction()->AsNop(); + if (!nop->NeedsEnvironment()) { + AddError( + StringPrintf("%s:%d is a Nop and the first instruction of block %d, but it doesn't " + "need an environment.", + nop->DebugName(), + nop->GetId(), + current_block_->GetBlockId())); + } + } + } + // Visit this block's list of phis. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); @@ -219,6 +319,12 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Ensure all blocks have at least one successor, except the Exit block. + if (block->GetSuccessors().empty() && !block->IsExitBlock()) { + AddError(StringPrintf("Block %d has no successor and it is not the Exit block.", + block->GetBlockId())); + } + // Ensure there is no critical edge (i.e., an edge connecting a // block with multiple successors to a block with multiple // predecessors). Exceptional edges are synthesized and hence @@ -291,27 +397,30 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { + VisitInstruction(check); + if (!GetGraph()->HasBoundsChecks()) { - AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, " - "but HasBoundsChecks() returns false", - check->DebugName(), - check->GetId())); + AddError( + StringPrintf("The graph doesn't have the HasBoundsChecks flag set but we saw " + "%s:%d in block %d.", + check->DebugName(), + check->GetId(), + check->GetBlock()->GetBlockId())); } - // Perform the instruction base checks too. - VisitInstruction(check); + flag_info_.seen_bounds_checks = true; } void GraphChecker::VisitDeoptimize(HDeoptimize* deopt) { + VisitInstruction(deopt); if (GetGraph()->IsCompilingOsr()) { AddError(StringPrintf("A graph compiled OSR cannot have a HDeoptimize instruction")); } - - // Perform the instruction base checks too. - VisitInstruction(deopt); } void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { + VisitInstruction(try_boundary); + ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers(); // Ensure that all exception handlers are catch blocks. @@ -338,24 +447,65 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { } } - VisitInstruction(try_boundary); + if (!GetGraph()->HasTryCatch()) { + AddError( + StringPrintf("The graph doesn't have the HasTryCatch flag set but we saw " + "%s:%d in block %d.", + try_boundary->DebugName(), + try_boundary->GetId(), + try_boundary->GetBlock()->GetBlockId())); + } + + flag_info_.seen_try_boundary = true; +} + +void GraphChecker::VisitLoadClass(HLoadClass* load) { + VisitInstruction(load); + + if (load->GetLoadedClassRTI().IsValid() && !load->GetLoadedClassRTI().IsExact()) { + std::stringstream ssRTI; + ssRTI << load->GetLoadedClassRTI(); + AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId(), + ssRTI.str().c_str())); + } } void GraphChecker::VisitLoadException(HLoadException* load) { - // Ensure that LoadException is the first instruction in a catch block. + VisitInstruction(load); + + // Ensure that LoadException is the second instruction in a catch block. The first one should be a + // Nop (checked separately). if (!load->GetBlock()->IsCatchBlock()) { AddError(StringPrintf("%s:%d is in a non-catch block %d.", load->DebugName(), load->GetId(), load->GetBlock()->GetBlockId())); - } else if (load->GetBlock()->GetFirstInstruction() != load) { - AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.", + } else if (load->GetBlock()->GetFirstInstruction()->GetNext() != load) { + AddError(StringPrintf("%s:%d is not the second instruction in catch block %d.", load->DebugName(), load->GetId(), load->GetBlock()->GetBlockId())); } } +void GraphChecker::VisitMonitorOperation(HMonitorOperation* monitor_op) { + VisitInstruction(monitor_op); + + if (!GetGraph()->HasMonitorOperations()) { + AddError( + StringPrintf("The graph doesn't have the HasMonitorOperations flag set but we saw " + "%s:%d in block %d.", + monitor_op->DebugName(), + monitor_op->GetId(), + monitor_op->GetBlock()->GetBlockId())); + } + + flag_info_.seen_monitor_operation = true; +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -497,33 +647,16 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure that reference type instructions have reference type info. - if (check_reference_type_info_ && instruction->GetType() == DataType::Type::kReference) { - if (!instruction->GetReferenceTypeInfo().IsValid()) { - AddError(StringPrintf("Reference type instruction %s:%d does not have " - "valid reference type information.", - instruction->DebugName(), - instruction->GetId())); - } - } - if (instruction->CanThrow() && !instruction->HasEnvironment()) { AddError(StringPrintf("Throwing instruction %s:%d in block %d does not have an environment.", instruction->DebugName(), instruction->GetId(), current_block_->GetBlockId())); } else if (instruction->CanThrowIntoCatchBlock()) { - // Find the top-level environment. This corresponds to the environment of - // the catch block since we do not inline methods with try/catch. - HEnvironment* environment = instruction->GetEnvironment(); - while (environment->GetParent() != nullptr) { - environment = environment->GetParent(); - } - - // Find all catch blocks and test that `instruction` has an environment - // value for each one. + // Find all catch blocks and test that `instruction` has an environment value for each one. const HTryBoundary& entry = instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry(); for (HBasicBlock* catch_block : entry.GetExceptionHandlers()) { + const HEnvironment* environment = catch_block->GetFirstInstruction()->GetEnvironment(); for (HInstructionIterator phi_it(catch_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HPhi* catch_phi = phi_it.Current()->AsPhi(); if (environment->GetInstructionAt(catch_phi->GetRegNumber()) == nullptr) { @@ -541,9 +674,26 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } -void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { +void GraphChecker::VisitInvoke(HInvoke* invoke) { VisitInstruction(invoke); + if (invoke->AlwaysThrows()) { + if (!GetGraph()->HasAlwaysThrowingInvokes()) { + AddError( + StringPrintf("The graph doesn't have the HasAlwaysThrowingInvokes flag set but we saw " + "%s:%d in block %d and it always throws.", + invoke->DebugName(), + invoke->GetId(), + invoke->GetBlock()->GetBlockId())); + } + flag_info_.seen_always_throwing_invokes = true; + } +} + +void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // We call VisitInvoke and not VisitInstruction to de-duplicate the always throwing code check. + VisitInvoke(invoke); + if (invoke->IsStaticWithExplicitClinitCheck()) { const HInstruction* last_input = invoke->GetInputs().back(); if (last_input == nullptr) { @@ -612,6 +762,17 @@ void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { VisitInstruction(check); + + if (check->GetTargetClassRTI().IsValid() && !check->GetTargetClassRTI().IsExact()) { + std::stringstream ssRTI; + ssRTI << check->GetTargetClassRTI(); + AddError(StringPrintf("%s:%d in block %d with RTI %s has valid but inexact RTI.", + check->DebugName(), + check->GetId(), + check->GetBlock()->GetBlockId(), + ssRTI.str().c_str())); + } + HInstruction* input = check->InputAt(1); if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { if (!input->IsNullConstant()) { @@ -674,13 +835,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } - if (loop_information->GetSuspendCheck() == nullptr) { - AddError(StringPrintf( - "Loop with header %d does not have a suspend check.", - loop_header->GetBlockId())); + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf("Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); } - if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { AddError(StringPrintf( "Loop header %d does not have the loop suspend check as the first instruction.", loop_header->GetBlockId())); @@ -1051,6 +1213,21 @@ void GraphChecker::VisitNeg(HNeg* instruction) { } } +void GraphChecker::VisitArraySet(HArraySet* instruction) { + VisitInstruction(instruction); + + if (instruction->NeedsTypeCheck() != + instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + AddError( + StringPrintf("%s %d has a flag mismatch. An ArraySet instruction can trigger a GC iff it " + "needs a type check. Needs type check: %s, Can trigger GC: %s", + instruction->DebugName(), + instruction->GetId(), + StrBool(instruction->NeedsTypeCheck()), + StrBool(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())))); + } +} + void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); DataType::Type lhs_type = op->InputAt(0)->GetType(); @@ -1111,6 +1288,8 @@ void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) { } void GraphChecker::VisitConstant(HConstant* instruction) { + VisitInstruction(instruction); + HBasicBlock* block = instruction->GetBlock(); if (!block->IsEntryBlock()) { AddError(StringPrintf( @@ -1149,6 +1328,18 @@ void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) { void GraphChecker::VisitVecOperation(HVecOperation* instruction) { VisitInstruction(instruction); + + if (!GetGraph()->HasSIMD()) { + AddError( + StringPrintf("The graph doesn't have the HasSIMD flag set but we saw " + "%s:%d in block %d.", + instruction->DebugName(), + instruction->GetId(), + instruction->GetBlock()->GetBlockId())); + } + + flag_info_.seen_SIMD = true; + if (codegen_ == nullptr) { return; } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 04c8d2103c..d6644f3b50 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -21,10 +21,11 @@ #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; @@ -54,6 +55,7 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitInstruction(HInstruction* instruction) override; void VisitPhi(HPhi* phi) override; + void VisitArraySet(HArraySet* instruction) override; void VisitBinaryOperation(HBinaryOperation* op) override; void VisitBooleanNot(HBooleanNot* instruction) override; void VisitBoundType(HBoundType* instruction) override; @@ -64,8 +66,11 @@ class GraphChecker : public HGraphDelegateVisitor { void VisitDeoptimize(HDeoptimize* instruction) override; void VisitIf(HIf* instruction) override; void VisitInstanceOf(HInstanceOf* check) override; + void VisitInvoke(HInvoke* invoke) override; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitLoadClass(HLoadClass* load) override; void VisitLoadException(HLoadException* load) override; + void VisitMonitorOperation(HMonitorOperation* monitor_operation) override; void VisitNeg(HNeg* instruction) override; void VisitPackedSwitch(HPackedSwitch* instruction) override; void VisitReturn(HReturn* ret) override; @@ -102,15 +107,6 @@ class GraphChecker : public HGraphDelegateVisitor { } } - // Enable/Disable the reference type info check. - // - // Return: the previous status of the check. - bool SetRefTypeInfoCheckEnabled(bool value = true) { - bool old_value = check_reference_type_info_; - check_reference_type_info_ = value; - return old_value; - } - protected: // Report a new error. void AddError(const std::string& error) { @@ -123,18 +119,30 @@ class GraphChecker : public HGraphDelegateVisitor { ArenaVector<std::string> errors_; private: + void VisitReversePostOrder(); + + // Checks that the graph's flags are set correctly. + void CheckGraphFlags(); + // String displayed before dumped errors. const char* const dump_prefix_; ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; - // Whether to perform the reference type info check for instructions which use or produce - // object references, e.g. HNewInstance, HLoadClass. - // The default value is true. - bool check_reference_type_info_ = true; // Used to access target information. CodeGenerator* codegen_; + struct FlagInfo { + bool seen_try_boundary = false; + bool seen_monitor_operation = false; + bool seen_loop = false; + bool seen_irreducible_loop = false; + bool seen_SIMD = false; + bool seen_bounds_checks = false; + bool seen_always_throwing_invokes = false; + }; + FlagInfo flag_info_; + DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 08bfa5d80f..b256fbb46d 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "base/macros.h" #include "graph_checker.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { -class GraphCheckerTest : public OptimizingUnitTest { +class GraphCheckerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: HGraph* CreateSimpleCFG(); void TestCode(const std::vector<uint16_t>& data); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 29af808731..b5d712736f 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -22,7 +23,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { class GraphTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 4a6ee13005..73bdd1e223 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -43,7 +43,7 @@ #include "ssa_liveness_analysis.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { // Unique pass-name to identify that the dump is for printing to log. constexpr const char* kDebugDumpName = "debug"; @@ -480,12 +480,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << array_set->GetValueCanBeNull() << std::noboolalpha; StartAttributeStream("needs_type_check") << std::boolalpha << array_set->NeedsTypeCheck() << std::noboolalpha; + StartAttributeStream("can_trigger_gc") + << std::boolalpha << array_set->GetSideEffects().Includes(SideEffects::CanTriggerGC()) + << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << array_set->GetWriteBarrierKind(); } void VisitCompare(HCompare* compare) override { StartAttributeStream("bias") << compare->GetBias(); } + void VisitCondition(HCondition* condition) override { + StartAttributeStream("bias") << condition->GetBias(); + } + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetMethodReference().index; ArtMethod* method = invoke->GetResolvedMethod(); @@ -549,7 +557,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); - StartAttributeStream("predicated") << std::boolalpha << iset->GetIsPredicatedSet(); + StartAttributeStream("predicated") + << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind(); } void VisitStaticFieldGet(HStaticFieldGet* sget) override { @@ -564,6 +574,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sset->GetFieldType(); + StartAttributeStream("write_barrier_kind") << sset->GetWriteBarrierKind(); } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override { @@ -757,15 +768,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { - // The NullConstant may be added to the graph during other passes that happen between - // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner - // doesn't run or doesn't inline anything, the NullConstant remains untyped. - // So we should check NullConstants for validity only after reference type propagation. - DCHECK(graph_in_bad_state_ || - IsDebugDump() || - (!is_after_pass_ && IsPass(HGraphBuilder::kBuilderPassName))) - << instruction->DebugName() << instruction->GetId() << " has invalid rti " - << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_; + StartAttributeStream("klass") << "invalid"; } } if (disasm_info_ != nullptr) { @@ -904,6 +907,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { if (block->IsCatchBlock()) { PrintProperty("flags", "catch_block"); + } else if (block->IsTryBlock()) { + std::stringstream flags_properties; + flags_properties << "try_start " + << namer_.GetName(block->GetTryCatchInformation()->GetTryEntry().GetBlock()); + PrintProperty("flags", flags_properties.str().c_str()); } else if (!IsDebugDump()) { // Don't print useless information to logcat PrintEmptyProperty("flags"); diff --git a/compiler/optimizing/graph_visualizer.h b/compiler/optimizing/graph_visualizer.h index 3429c11cbd..9878917739 100644 --- a/compiler/optimizing/graph_visualizer.h +++ b/compiler/optimizing/graph_visualizer.h @@ -22,10 +22,11 @@ #include "arch/instruction_set.h" #include "base/arena_containers.h" +#include "base/macros.h" #include "base/value_object.h" #include "block_namer.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index c7cd223b51..a6ca057cfc 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -23,7 +23,7 @@ #include "base/utils.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { /** * A ValueSet holds instructions that can replace other instructions. It is updated diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index bbf2265e98..df4e3a8dbf 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_GVN_H_ #define ART_COMPILER_OPTIMIZING_GVN_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 3bf4cc35ba..1eb6307cb1 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -17,12 +17,13 @@ #include "gvn.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class GVNTest : public OptimizingUnitTest {}; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 3b5a2f1f9d..be6c268f5d 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -16,9 +16,10 @@ #include "induction_var_analysis.h" +#include "base/scoped_arena_containers.h" #include "induction_var_range.h" -namespace art { +namespace art HIDDEN { /** * Returns true if the from/to types denote a narrowing, integral conversion (precision loss). @@ -214,18 +215,25 @@ struct HInductionVarAnalysis::StackEntry { size_t low_depth; }; -HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name) - : HOptimization(graph, name), +HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name) + : HOptimization(graph, name, stats), induction_(std::less<const HLoopInformation*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), - cycles_(std::less<HPhi*>(), - graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { + cycles_(std::less<HPhi*>(), graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { } bool HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. + + if (IsPathologicalCase()) { + MaybeRecordStat(stats_, MethodCompilationStat::kNotVarAnalyzedPathological); + return false; + } + for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) { // Don't analyze irreducible loops. if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { @@ -1576,4 +1584,84 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { return ""; } +void HInductionVarAnalysis::CalculateLoopHeaderPhisInARow( + HPhi* initial_phi, + ScopedArenaSafeMap<HPhi*, int>& cached_values, + ScopedArenaAllocator& allocator) { + DCHECK(initial_phi->IsLoopHeaderPhi()); + ScopedArenaQueue<HPhi*> worklist(allocator.Adapter(kArenaAllocInductionVarAnalysis)); + worklist.push(initial_phi); + // Used to check which phis are in the current chain we are checking. + ScopedArenaSet<HPhi*> phis_in_chain(allocator.Adapter(kArenaAllocInductionVarAnalysis)); + while (!worklist.empty()) { + HPhi* current_phi = worklist.front(); + DCHECK(current_phi->IsLoopHeaderPhi()); + if (cached_values.find(current_phi) != cached_values.end()) { + // Already processed. + worklist.pop(); + continue; + } + + phis_in_chain.insert(current_phi); + int max_value = 0; + bool pushed_other_phis = false; + for (size_t index = 0; index < current_phi->InputCount(); index++) { + // If the input is not a loop header phi, we only have 1 (current_phi). + int current_value = 1; + if (current_phi->InputAt(index)->IsLoopHeaderPhi()) { + HPhi* loop_header_phi = current_phi->InputAt(index)->AsPhi(); + auto it = cached_values.find(loop_header_phi); + if (it != cached_values.end()) { + current_value += it->second; + } else if (phis_in_chain.find(current_phi) == phis_in_chain.end()) { + // Push phis which aren't in the chain already to be processed. + pushed_other_phis = true; + worklist.push(loop_header_phi); + } + // Phis in the chain will get processed later. We keep `current_value` as 1 to avoid + // double counting `loop_header_phi`. + } + max_value = std::max(max_value, current_value); + } + + if (!pushed_other_phis) { + // Only finish processing after all inputs were processed. + worklist.pop(); + phis_in_chain.erase(current_phi); + cached_values.FindOrAdd(current_phi, max_value); + } + } +} + +bool HInductionVarAnalysis::IsPathologicalCase() { + ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HPhi*, int> cached_values( + std::less<HPhi*>(), local_allocator.Adapter(kArenaAllocInductionVarAnalysis)); + + // Due to how our induction passes work, we will take a lot of time compiling if we have several + // loop header phis in a row. If we have more than 15 different loop header phis in a row, we + // don't perform the analysis. + constexpr int kMaximumLoopHeaderPhisInARow = 15; + + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + if (!block->IsLoopHeader()) { + continue; + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + DCHECK(it.Current()->IsLoopHeaderPhi()); + HPhi* phi = it.Current()->AsPhi(); + CalculateLoopHeaderPhisInARow(phi, cached_values, local_allocator); + DCHECK(cached_values.find(phi) != cached_values.end()) + << " we should have a value for Phi " << phi->GetId() + << " in block " << phi->GetBlock()->GetBlockId(); + if (cached_values.find(phi)->second > kMaximumLoopHeaderPhisInARow) { + return true; + } + } + } + + return false; +} + } // namespace art diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 09417722da..050950089a 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -21,11 +21,12 @@ #include "base/arena_containers.h" #include "base/array_ref.h" +#include "base/macros.h" #include "base/scoped_arena_containers.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Induction variable analysis. This class does not have a direct public API. @@ -38,7 +39,9 @@ namespace art { */ class HInductionVarAnalysis : public HOptimization { public: - explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName); + explicit HInductionVarAnalysis(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = kInductionPassName); bool Run() override; @@ -307,6 +310,15 @@ class HInductionVarAnalysis : public HOptimization { static std::string FetchToString(HInstruction* fetch); static std::string InductionToString(InductionInfo* info); + // Returns true if we have a pathological case we don't want to analyze. + bool IsPathologicalCase(); + // Starting with initial_phi, it calculates how many loop header phis in a row we have. To do + // this, we count the loop header phi which are used as an input of other loop header phis. It + // uses `cached_values` to avoid recomputing results. + void CalculateLoopHeaderPhisInARow(HPhi* initial_phi, + ScopedArenaSafeMap<HPhi*, int>& cached_values, + ScopedArenaAllocator& allocator); + /** * Maintains the results of the analysis as a mapping from loops to a mapping from instructions * to the induction information for that instruction in that loop. diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 4c11ad4643..80c15371dc 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -17,12 +17,13 @@ #include <regex> #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the InductionVarAnalysis tests. diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index ad3d1a9321..9b78699ead 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -17,8 +17,9 @@ #include "induction_var_range.h" #include <limits> +#include "optimizing/nodes.h" -namespace art { +namespace art HIDDEN { /** Returns true if 64-bit constant fits in 32-bit constant. */ static bool CanLongValueFitIntoInt(int64_t c) { @@ -1064,10 +1065,13 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context, case HInductionVarAnalysis::kLinear: if (*stride_value > 0) { lower = nullptr; + return GenerateLastValueLinear( + context, loop, info, trip, graph, block, /*is_min=*/false, upper); } else { upper = nullptr; + return GenerateLastValueLinear( + context, loop, info, trip, graph, block, /*is_min=*/true, lower); } - break; case HInductionVarAnalysis::kPolynomial: return GenerateLastValuePolynomial(context, loop, info, trip, graph, block, lower); case HInductionVarAnalysis::kGeometric: @@ -1113,6 +1117,54 @@ bool InductionVarRange::GenerateRangeOrLastValue(const HBasicBlock* context, GenerateCode(context, loop, info, trip, graph, block, /*is_min=*/ false, upper); } +bool InductionVarRange::GenerateLastValueLinear(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + bool is_min, + /*out*/ HInstruction** result) const { + DataType::Type type = info->type; + // Avoid any narrowing linear induction or any type mismatch between the linear induction and the + // trip count expression. + if (HInductionVarAnalysis::IsNarrowingLinear(info) || trip->type != type) { + return false; + } + + // Stride value must be a known constant that fits into int32. + int64_t stride_value = 0; + if (!IsConstant(context, loop, info->op_a, kExact, &stride_value) || + !CanLongValueFitIntoInt(stride_value)) { + return false; + } + + // We require `a` to be a constant value that didn't overflow. + const bool is_min_a = stride_value >= 0 ? is_min : !is_min; + Value val_a = GetVal(context, loop, trip, trip, is_min_a); + HInstruction* opb; + if (!IsConstantValue(val_a) || + !GenerateCode(context, loop, info->op_b, trip, graph, block, is_min, &opb)) { + return false; + } + + if (graph != nullptr) { + ArenaAllocator* allocator = graph->GetAllocator(); + HInstruction* oper; + HInstruction* opa = graph->GetConstant(type, val_a.b_constant); + if (stride_value == 1) { + oper = new (allocator) HAdd(type, opa, opb); + } else if (stride_value == -1) { + oper = new (graph->GetAllocator()) HSub(type, opb, opa); + } else { + HInstruction* mul = new (allocator) HMul(type, graph->GetConstant(type, stride_value), opa); + oper = new (allocator) HAdd(type, Insert(block, mul), opb); + } + *result = Insert(block, oper); + } + return true; +} + bool InductionVarRange::GenerateLastValuePolynomial(const HBasicBlock* context, const HLoopInformation* loop, HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 552837c044..3e1212bec8 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_ #define ART_COMPILER_OPTIMIZING_INDUCTION_VAR_RANGE_H_ +#include "base/macros.h" #include "induction_var_analysis.h" -namespace art { +namespace art HIDDEN { /** * This class implements range analysis on expressions within loops. It takes the results @@ -317,6 +318,15 @@ class InductionVarRange { /*out*/ bool* needs_finite_test, /*out*/ bool* needs_taken_test) const; + bool GenerateLastValueLinear(const HBasicBlock* context, + const HLoopInformation* loop, + HInductionVarAnalysis::InductionInfo* info, + HInductionVarAnalysis::InductionInfo* trip, + HGraph* graph, + HBasicBlock* block, + bool is_min, + /*out*/ HInstruction** result) const; + bool GenerateLastValuePolynomial(const HBasicBlock* context, const HLoopInformation* loop, HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 962123d948..d879897959 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -17,12 +17,13 @@ #include "induction_var_range.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { using Value = InductionVarRange::Value; @@ -1064,10 +1065,6 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); ASSERT_TRUE(last->IsSub()); ExpectInt(1000, last->InputAt(0)); - ASSERT_TRUE(last->InputAt(1)->IsNeg()); - last = last->InputAt(1)->InputAt(0); - ASSERT_TRUE(last->IsSub()); - ExpectInt(0, last->InputAt(0)); ExpectInt(1000, last->InputAt(1)); // Loop logic. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index f73c0d38e4..5a4478dc14 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -46,7 +46,7 @@ #include "thread.h" #include "verifier/verifier_compiler_binding.h" -namespace art { +namespace art HIDDEN { // Instruction limit to control memory. static constexpr size_t kMaximumNumberOfTotalInstructions = 1024; @@ -72,6 +72,9 @@ static constexpr size_t kMaximumNumberOfPolymorphicRecursiveCalls = 0; // Controls the use of inline caches in AOT mode. static constexpr bool kUseAOTInlineCaches = true; +// Controls the use of inlining try catches. +static constexpr bool kInlineTryCatches = true; + // We check for line numbers to make sure the DepthString implementation // aligns the output nicely. #define LOG_INTERNAL(msg) \ @@ -141,7 +144,11 @@ bool HInliner::Run() { } bool did_inline = false; - bool did_set_always_throws = false; + // The inliner is the only phase that sets invokes as `always throwing`, and since we only run the + // inliner once per graph this value should always be false at the beginning of the inlining + // phase. This is important since we use `HasAlwaysThrowingInvokes` to know whether the inliner + // phase performed a relevant change in the graph. + DCHECK(!graph_->HasAlwaysThrowingInvokes()); // Initialize the number of instructions for the method being compiled. Recursive calls // to HInliner::Run have already updated the instruction count. @@ -175,14 +182,14 @@ bool HInliner::Run() { HInstruction* next = instruction->GetNext(); HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. - if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { + if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) { if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = call->GetMethodReference().PrettyMethod(/* with_signature= */ false); // Tests prevent inlining by having $noinline$ in their method names. if (callee_name.find("$noinline$") == std::string::npos) { - if (TryInline(call, &did_set_always_throws)) { + if (TryInline(call)) { did_inline = true; } else if (honor_inline_directives) { bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); @@ -192,7 +199,7 @@ bool HInliner::Run() { } else { DCHECK(!honor_inline_directives); // Normal case: try to inline. - if (TryInline(call, &did_set_always_throws)) { + if (TryInline(call)) { did_inline = true; } } @@ -201,7 +208,9 @@ bool HInliner::Run() { } } - return did_inline || did_set_always_throws; + // We return true if we either inlined at least one method, or we marked one of our methods as + // always throwing. + return did_inline || graph_->HasAlwaysThrowingInvokes(); } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) @@ -436,7 +445,7 @@ static bool AlwaysThrows(ArtMethod* method) return throw_seen; } -bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws) { +bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kTryInline); // Don't bother to move further if we know the method is unresolved or the invocation is @@ -472,7 +481,8 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp= */ true); + /* do_rtp= */ true, + /* is_speculative= */ false); if (result) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); if (outermost_graph_ == graph_) { @@ -487,11 +497,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al } else { invoke_to_analyze = invoke_instruction; } - // Set always throws property for non-inlined method call with single - // target. - if (AlwaysThrows(actual_method)) { - invoke_to_analyze->SetAlwaysThrows(true); - *did_set_always_throws = true; + // Set always throws property for non-inlined method call with single target. + if (invoke_instruction->AlwaysThrows() || AlwaysThrows(actual_method)) { + invoke_to_analyze->SetAlwaysThrows(/* always_throws= */ true); + graph_->SetHasAlwaysThrowingInvokes(/* value= */ true); } } return result; @@ -499,10 +508,27 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_al DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); + // No try catch inlining allowed here, or recursively. For try catch inlining we are banking on + // the fact that we have a unique dex pc list. We cannot guarantee that for some TryInline methods + // e.g. `TryInlinePolymorphicCall`. + // TODO(solanes): Setting `try_catch_inlining_allowed_` to false here covers all cases from + // `TryInlineFromCHA` and from `TryInlineFromInlineCache` as well (e.g. + // `TryInlinePolymorphicCall`). Reassess to see if we can inline inline catch blocks in + // `TryInlineFromCHA`, `TryInlineMonomorphicCall` and `TryInlinePolymorphicCallToSameTarget`. + + // We store the value to restore it since we will use the same HInliner instance for other inlinee + // candidates. + const bool previous_value = try_catch_inlining_allowed_; + try_catch_inlining_allowed_ = false; + if (TryInlineFromCHA(invoke_instruction)) { + try_catch_inlining_allowed_ = previous_value; return true; } - return TryInlineFromInlineCache(invoke_instruction); + + const bool result = TryInlineFromInlineCache(invoke_instruction); + try_catch_inlining_allowed_ = previous_value; + return result; } bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) { @@ -518,7 +544,8 @@ bool HInliner::TryInlineFromCHA(HInvoke* invoke_instruction) { if (!TryInlineAndReplace(invoke_instruction, method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp= */ true)) { + /* do_rtp= */ true, + /* is_speculative= */ true)) { return false; } AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor); @@ -786,7 +813,8 @@ bool HInliner::TryInlineMonomorphicCall( if (!TryInlineAndReplace(invoke_instruction, resolved_method, ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true), - /* do_rtp= */ false)) { + /* do_rtp= */ false, + /* is_speculative= */ true)) { return false; } @@ -802,7 +830,6 @@ bool HInliner::TryInlineMonomorphicCall( // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -982,7 +1009,8 @@ bool HInliner::TryInlinePolymorphicCall( !TryBuildAndInline(invoke_instruction, method, ReferenceTypeInfo::Create(handle, /* is_exact= */ true), - &return_replacement)) { + &return_replacement, + /* is_speculative= */ true)) { all_targets_inlined = false; } else { one_target_inlined = true; @@ -1024,7 +1052,6 @@ bool HInliner::TryInlinePolymorphicCall( // Run type propagation to get the guards typed. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -1160,7 +1187,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( if (!TryBuildAndInline(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - &return_replacement)) { + &return_replacement, + /* is_speculative= */ true)) { return false; } @@ -1215,7 +1243,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( // Run type propagation to get the guard typed. ReferenceTypePropagation rtp_fixup(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false); rtp_fixup.Run(); @@ -1232,7 +1259,6 @@ void HInliner::MaybeRunReferenceTypePropagation(HInstruction* replacement, // Actual return value has a more specific type than the method's declared // return type. Run RTP again on the outer graph to propagate it. ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), /* is_first_run= */ false).Run(); } @@ -1246,6 +1272,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, return false; } + // Don't try to devirtualize intrinsics as it breaks pattern matching from later phases. + // TODO(solanes): This `if` could be removed if we update optimizations like + // TryReplaceStringBuilderAppend. + if (invoke_instruction->IsIntrinsic()) { + return false; + } + // Don't bother trying to call directly a default conflict method. It // doesn't have a proper MethodReference, but also `GetCanonicalMethod` // will return an actual default implementation. @@ -1288,7 +1321,8 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, dispatch_info, kDirect, MethodReference(method->GetDexFile(), method->GetDexMethodIndex()), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInputsRef inputs = invoke_instruction->GetInputs(); DCHECK_EQ(inputs.size(), invoke_instruction->GetNumberOfArguments()); for (size_t index = 0; index != inputs.size(); ++index) { @@ -1301,7 +1335,7 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (invoke_instruction->GetType() == DataType::Type::kReference) { - new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo()); } *replacement = new_invoke; @@ -1316,11 +1350,13 @@ bool HInliner::TryDevirtualize(HInvoke* invoke_instruction, bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, - bool do_rtp) { - DCHECK(!invoke_instruction->IsIntrinsic()); + bool do_rtp, + bool is_speculative) { + DCHECK(!codegen_->IsImplementedIntrinsic(invoke_instruction)); HInstruction* return_replacement = nullptr; - if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { + if (!TryBuildAndInline( + invoke_instruction, method, receiver_type, &return_replacement, is_speculative)) { return false; } @@ -1378,6 +1414,15 @@ bool HInliner::IsInliningAllowed(ArtMethod* method, const CodeItemDataAccessor& return false; } + if (annotations::MethodIsNeverInline(*method->GetDexFile(), + method->GetClassDef(), + method->GetDexMethodIndex())) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNeverInlineAnnotation) + << "Method " << method->PrettyMethod() + << " has the @NeverInline annotation so it won't be inlined"; + return false; + } + return true; } @@ -1397,9 +1442,25 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction, } if (accessor.TriesSize() != 0) { - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee) - << "Method " << method->PrettyMethod() << " is not inlined because of try block"; - return false; + if (!kInlineTryCatches) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchDisabled) + << "Method " << method->PrettyMethod() + << " is not inlined because inlining try catches is disabled globally"; + return false; + } + const bool disallowed_try_catch_inlining = + // Direct parent is a try block. + invoke_instruction->GetBlock()->IsTryBlock() || + // Indirect parent disallows try catch inlining. + !try_catch_inlining_allowed_; + if (disallowed_try_catch_inlining) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCallee) + << "Method " << method->PrettyMethod() + << " is not inlined because it has a try catch and we are not supporting it for this" + << " particular call. This is could be because e.g. it would be inlined inside another" + << " try block, we arrived here from TryInlinePolymorphicCall, etc."; + return false; + } } if (invoke_instruction->IsInvokeStaticOrDirect() && @@ -1416,9 +1477,9 @@ bool HInliner::IsInliningSupported(const HInvoke* invoke_instruction, return true; } -// Returns whether our resource limits allow inlining this method. -bool HInliner::IsInliningBudgetAvailable(ArtMethod* method, - const CodeItemDataAccessor& accessor) const { +bool HInliner::IsInliningEncouraged(const HInvoke* invoke_instruction, + ArtMethod* method, + const CodeItemDataAccessor& accessor) const { if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget) << "Method " @@ -1438,13 +1499,21 @@ bool HInliner::IsInliningBudgetAvailable(ArtMethod* method, return false; } + if (invoke_instruction->GetBlock()->GetLastInstruction()->IsThrow()) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEndsWithThrow) + << "Method " << method->PrettyMethod() + << " is not inlined because its block ends with a throw"; + return false; + } + return true; } bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) { + HInstruction** return_replacement, + bool is_speculative) { // If invoke_instruction is devirtualized to a different method, give intrinsics // another chance before we try to inline it. if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) { @@ -1459,7 +1528,8 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->GetMethodReference(), // Use existing invoke's method's reference. method, MethodReference(method->GetDexFile(), method->GetDexMethodIndex()), - method->GetMethodIndex()); + method->GetMethodIndex(), + !graph_->IsDebuggable()); DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); HInputsRef inputs = invoke_instruction->GetInputs(); for (size_t index = 0; index != inputs.size(); ++index) { @@ -1468,7 +1538,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (invoke_instruction->GetType() == DataType::Type::kReference) { - new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + new_invoke->SetReferenceTypeInfoIfValid(invoke_instruction->GetReferenceTypeInfo()); } *return_replacement = new_invoke; return true; @@ -1503,12 +1573,12 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (!IsInliningBudgetAvailable(method, accessor)) { + if (!IsInliningEncouraged(invoke_instruction, method, accessor)) { return false; } if (!TryBuildAndInlineHelper( - invoke_instruction, method, receiver_type, return_replacement)) { + invoke_instruction, method, receiver_type, return_replacement, is_speculative)) { return false; } @@ -1627,7 +1697,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); - if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) { + if (!IsZeroBitPattern(value)) { uint16_t field_index = iput_field_indexes[i]; bool is_final; HInstanceFieldSet* iput = @@ -1684,7 +1754,6 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, Handle<mirror::DexCache> dex_cache = graph_->GetHandleCache()->NewHandle(referrer->GetDexCache()); ReferenceTypePropagation rtp(graph_, - outer_compilation_unit_.GetClassLoader(), dex_cache, /* is_first_run= */ false); rtp.Visit(iget); @@ -1795,7 +1864,7 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, run_rtp = true; current->SetReferenceTypeInfo(receiver_type); } else { - current->SetReferenceTypeInfo(argument->GetReferenceTypeInfo()); + current->SetReferenceTypeInfoIfValid(argument->GetReferenceTypeInfo()); } current->AsParameterValue()->SetCanBeNull(argument->CanBeNull()); } @@ -1807,7 +1876,6 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, // are more specific than the declared ones, run RTP again on the inner graph. if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { ReferenceTypePropagation(callee_graph, - outer_compilation_unit_.GetClassLoader(), dex_compilation_unit.GetDexCache(), /* is_first_run= */ false).Run(); } @@ -1821,8 +1889,9 @@ void HInliner::SubstituteArguments(HGraph* callee_graph, // If this function returns true, it will also set out_number_of_instructions to // the number of instructions in the inlined body. bool HInliner::CanInlineBody(const HGraph* callee_graph, - const HBasicBlock* target_block, - size_t* out_number_of_instructions) const { + HInvoke* invoke, + size_t* out_number_of_instructions, + bool is_speculative) const { ArtMethod* const resolved_method = callee_graph->GetArtMethod(); HBasicBlock* exit_block = callee_graph->GetExitBlock(); @@ -1835,15 +1904,30 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, bool has_one_return = false; for (HBasicBlock* predecessor : exit_block->GetPredecessors()) { - if (predecessor->GetLastInstruction()->IsThrow()) { - if (target_block->IsTryBlock()) { - // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatchCaller) + const HInstruction* last_instruction = predecessor->GetLastInstruction(); + // On inlinees, we can have Return/ReturnVoid/Throw -> TryBoundary -> Exit. To check for the + // actual last instruction, we have to skip the TryBoundary instruction. + if (last_instruction->IsTryBoundary()) { + predecessor = predecessor->GetSinglePredecessor(); + last_instruction = predecessor->GetLastInstruction(); + + // If the last instruction chain is Return/ReturnVoid -> TryBoundary -> Exit we will have to + // split a critical edge in InlineInto and might recompute loop information, which is + // unsupported for irreducible loops. + if (!last_instruction->IsThrow() && graph_->HasIrreducibleLoops()) { + DCHECK(last_instruction->IsReturn() || last_instruction->IsReturnVoid()); + // TODO(ngeoffray): Support re-computing loop information to graphs with + // irreducible loops? + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller) << "Method " << resolved_method->PrettyMethod() - << " could not be inlined because one branch always throws and" - << " caller is in a try/catch block"; + << " could not be inlined because we will have to recompute the loop information and" + << " the caller has irreducible loops"; return false; - } else if (graph_->GetExitBlock() == nullptr) { + } + } + + if (last_instruction->IsThrow()) { + if (graph_->GetExitBlock() == nullptr) { // TODO(ngeoffray): Support adding HExit in the caller graph. LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop) << "Method " << resolved_method->PrettyMethod() @@ -1853,9 +1937,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, } else if (graph_->HasIrreducibleLoops()) { // TODO(ngeoffray): Support re-computing loop information to graphs with // irreducible loops? - VLOG(compiler) << "Method " << resolved_method->PrettyMethod() - << " could not be inlined because one branch always throws and" - << " caller has irreducible loops"; + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCaller) + << "Method " << resolved_method->PrettyMethod() + << " could not be inlined because one branch always throws and" + << " the caller has irreducible loops"; return false; } } else { @@ -1864,6 +1949,15 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, } if (!has_one_return) { + if (!is_speculative) { + // If we know that the method always throws with the particular parameters, set it as such. + // This is better than using the dex instructions as we have more information about this + // particular call. We don't mark speculative inlines (e.g. the ones from the inline cache) as + // always throwing since they might not throw when executed. + invoke->SetAlwaysThrows(/* always_throws= */ true); + graph_->SetHasAlwaysThrowingInvokes(/* value= */ true); + } + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it always throws"; @@ -1882,7 +1976,7 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, if (block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoopCallee) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it contains an irreducible loop"; return false; @@ -1930,8 +2024,10 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, if (current->IsUnresolvedStaticFieldGet() || current->IsUnresolvedInstanceFieldGet() || current->IsUnresolvedStaticFieldSet() || - current->IsUnresolvedInstanceFieldSet()) { - // Entrypoint for unresolved fields does not handle inlined frames. + current->IsUnresolvedInstanceFieldSet() || + current->IsInvokeUnresolved()) { + // Unresolved invokes / field accesses are expensive at runtime when decoding inlining info, + // so don't inline methods that have them. LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint) << "Method " << resolved_method->PrettyMethod() << " could not be inlined because it is using an unresolved" @@ -1964,7 +2060,8 @@ bool HInliner::CanInlineBody(const HGraph* callee_graph, bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) { + HInstruction** return_replacement, + bool is_speculative) { DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); const dex::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); @@ -2057,10 +2154,18 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, SubstituteArguments(callee_graph, invoke_instruction, receiver_type, dex_compilation_unit); - RunOptimizations(callee_graph, code_item, dex_compilation_unit); + const bool try_catch_inlining_allowed_for_recursive_inline = + // It was allowed previously. + try_catch_inlining_allowed_ && + // The current invoke is not a try block. + !invoke_instruction->GetBlock()->IsTryBlock(); + RunOptimizations(callee_graph, + code_item, + dex_compilation_unit, + try_catch_inlining_allowed_for_recursive_inline); size_t number_of_instructions = 0; - if (!CanInlineBody(callee_graph, invoke_instruction->GetBlock(), &number_of_instructions)) { + if (!CanInlineBody(callee_graph, invoke_instruction, &number_of_instructions, is_speculative)) { return false; } @@ -2095,16 +2200,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, void HInliner::RunOptimizations(HGraph* callee_graph, const dex::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) { + const DexCompilationUnit& dex_compilation_unit, + bool try_catch_inlining_allowed_for_recursive_inline) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); - HConstantFolding fold(callee_graph, "constant_folding$inliner"); + HConstantFolding fold(callee_graph, inline_stats_, "constant_folding$inliner"); InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); HOptimization* optimizations[] = { - &simplify, &fold, + &simplify, &dce, }; @@ -2141,7 +2247,8 @@ void HInliner::RunOptimizations(HGraph* callee_graph, total_number_of_dex_registers_ + accessor.RegistersSize(), total_number_of_instructions_ + number_of_instructions, this, - depth_ + 1); + depth_ + 1, + try_catch_inlining_allowed_for_recursive_inline); inliner.Run(); } @@ -2155,6 +2262,10 @@ static bool IsReferenceTypeRefinement(ObjPtr<mirror::Class> declared_class, } ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); + if (!actual_rti.IsValid()) { + return false; + } + ObjPtr<mirror::Class> actual_class = actual_rti.GetTypeHandle().Get(); return (actual_rti.IsExact() && !declared_is_exact) || (declared_class != actual_class && declared_class->IsAssignableFrom(actual_class)); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index a2c2085e00..af067dae73 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -17,13 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ #define ART_COMPILER_OPTIMIZING_INLINER_H_ +#include "base/macros.h" #include "dex/dex_file_types.h" #include "dex/invoke_type.h" #include "jit/profiling_info.h" #include "optimization.h" #include "profile/profile_compilation_info.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; @@ -42,7 +43,8 @@ class HInliner : public HOptimization { size_t total_number_of_dex_registers, size_t total_number_of_instructions, HInliner* parent, - size_t depth = 0, + size_t depth, + bool try_catch_inlining_allowed, const char* name = kInlinerPassName) : HOptimization(outer_graph, name, stats), outermost_graph_(outermost_graph), @@ -54,6 +56,7 @@ class HInliner : public HOptimization { parent_(parent), depth_(depth), inlining_budget_(0), + try_catch_inlining_allowed_(try_catch_inlining_allowed), inline_stats_(nullptr) {} bool Run() override; @@ -70,9 +73,7 @@ class HInliner : public HOptimization { kInlineCacheMissingTypes = 5 }; - // We set `did_set_always_throws` as true if we analyzed `invoke_instruction` and it always - // throws. - bool TryInline(HInvoke* invoke_instruction, /*inout*/ bool* did_set_always_throws); + bool TryInline(HInvoke* invoke_instruction); // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether // reference type propagation can run after the inlining. If the inlining is successful, this @@ -80,19 +81,22 @@ class HInliner : public HOptimization { bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - bool do_rtp) + bool do_rtp, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) + HInstruction** return_replacement, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, - HInstruction** return_replacement) + HInstruction** return_replacement, + bool is_speculative) REQUIRES_SHARED(Locks::mutator_lock_); // Substitutes parameters in the callee graph with their values from the caller. @@ -105,8 +109,9 @@ class HInliner : public HOptimization { // Run simple optimizations on `callee_graph`. void RunOptimizations(HGraph* callee_graph, const dex::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) - REQUIRES_SHARED(Locks::mutator_lock_); + const DexCompilationUnit& dex_compilation_unit, + bool try_catch_inlining_allowed_for_recursive_inline) + REQUIRES_SHARED(Locks::mutator_lock_); // Try to recognize known simple patterns and replace invoke call with appropriate instructions. bool TryPatternSubstitution(HInvoke* invoke_instruction, @@ -129,12 +134,14 @@ class HInliner : public HOptimization { const CodeItemDataAccessor& accessor) const REQUIRES_SHARED(Locks::mutator_lock_); - // Returns whether the inlining budget allows inlining method. + // Returns whether inlining is encouraged. // // For example, this checks whether the function has grown too large and // inlining should be prevented. - bool IsInliningBudgetAvailable(art::ArtMethod* method, const CodeItemDataAccessor& accessor) const - REQUIRES_SHARED(Locks::mutator_lock_); + bool IsInliningEncouraged(const HInvoke* invoke_instruction, + art::ArtMethod* method, + const CodeItemDataAccessor& accessor) const + REQUIRES_SHARED(Locks::mutator_lock_); // Inspects the body of a method (callee_graph) and returns whether it can be // inlined. @@ -142,8 +149,9 @@ class HInliner : public HOptimization { // This checks for instructions and constructs that we do not support // inlining, such as inlining a throw instruction into a try block. bool CanInlineBody(const HGraph* callee_graph, - const HBasicBlock* target_block, - size_t* out_number_of_instructions) const + HInvoke* invoke, + size_t* out_number_of_instructions, + bool is_speculative) const REQUIRES_SHARED(Locks::mutator_lock_); // Create a new HInstanceFieldGet. @@ -320,6 +328,9 @@ class HInliner : public HOptimization { // The budget left for inlining, in number of instructions. size_t inlining_budget_; + // States if we are allowing try catch inlining to occur at this particular instance of inlining. + bool try_catch_inlining_allowed_; + // Used to record stats about optimizations on the inlined graph. // If the inlining is successful, these stats are merged to the caller graph's stats. OptimizingCompilerStats* inline_stats_; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index e0bdd0963c..fee9091145 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -42,7 +42,7 @@ #include "ssa_builder.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { namespace { @@ -343,6 +343,10 @@ static bool IsBlockPopulated(HBasicBlock* block) { // Suspend checks were inserted into loop headers during building of dominator tree. DCHECK(block->GetFirstInstruction()->IsSuspendCheck()); return block->GetFirstInstruction() != block->GetLastInstruction(); + } else if (block->IsCatchBlock()) { + // Nops were inserted into the beginning of catch blocks. + DCHECK(block->GetFirstInstruction()->IsNop()); + return block->GetFirstInstruction() != block->GetLastInstruction(); } else { return !block->GetInstructions().IsEmpty(); } @@ -387,6 +391,11 @@ bool HInstructionBuilder::Build() { // This is slightly odd because the loop header might not be empty (TryBoundary). // But we're still creating the environment with locals from the top of the block. InsertInstructionAtTop(suspend_check); + } else if (current_block_->IsCatchBlock()) { + // We add an environment emitting instruction at the beginning of each catch block, in order + // to support try catch inlining. + // This is slightly odd because the catch block might not be empty (TryBoundary). + InsertInstructionAtTop(new (allocator_) HNop(block_dex_pc, /* needs_environment= */ true)); } if (block_dex_pc == kNoDexPc || current_block_ != block_builder_->GetBlockAt(block_dex_pc)) { @@ -414,7 +423,7 @@ bool HInstructionBuilder::Build() { } if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) { - AppendInstruction(new (allocator_) HNativeDebugInfo(dex_pc)); + AppendInstruction(new (allocator_) HNop(dex_pc, /* needs_environment= */ true)); } // Note: There may be no Thread for gtests. @@ -460,6 +469,9 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { current_block_ = graph_->GetEntryBlock(); InitializeBlockLocals(); InitializeParameters(); + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodEntryHook(0u)); + } AppendInstruction(new (allocator_) HGoto(0u)); // Fill the body. @@ -495,14 +507,21 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { dispatch_info, invoke_type, target_method, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } // Add the return instruction. if (return_type_ == DataType::Type::kVoid) { + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(graph_->GetNullConstant(), kNoDexPc)); + } AppendInstruction(new (allocator_) HReturnVoid()); } else { + if (graph_->IsDebuggable() && code_generator_->GetCompilerOptions().IsJitCompiler()) { + AppendInstruction(new (allocator_) HMethodExitHook(latest_result_, kNoDexPc)); + } AppendInstruction(new (allocator_) HReturn(latest_result_)); } @@ -972,11 +991,11 @@ static ArtMethod* ResolveMethod(uint16_t method_idx, *imt_or_vtable_index = resolved_method->GetVtableIndex(); } else if (*invoke_type == kInterface) { // For HInvokeInterface we need the IMT index. - *imt_or_vtable_index = ImTable::GetImtIndex(resolved_method); + *imt_or_vtable_index = resolved_method->GetImtIndex(); + DCHECK_EQ(*imt_or_vtable_index, ImTable::GetImtIndex(resolved_method)); } - *is_string_constructor = - resolved_method->IsConstructor() && resolved_method->GetDeclaringClass()->IsStringClass(); + *is_string_constructor = resolved_method->IsStringConstructor(); return resolved_method; } @@ -1041,7 +1060,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dispatch_info, invoke_type, resolved_method_reference, - HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit, + !graph_->IsDebuggable()); return HandleStringInit(invoke, operands, shorty); } @@ -1054,7 +1074,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } // Try to build an HIR replacement for the intrinsic. - if (UNLIKELY(resolved_method->IsIntrinsic())) { + if (UNLIKELY(resolved_method->IsIntrinsic()) && !graph_->IsDebuggable()) { // All intrinsics are in the primary boot image, so their class can always be referenced // and we do not need to rely on the implicit class initialization check. The class should // be initialized but we do not require that here. @@ -1105,7 +1125,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dispatch_info, invoke_type, resolved_method_reference, - clinit_check_requirement); + clinit_check_requirement, + !graph_->IsDebuggable()); if (clinit_check != nullptr) { // Add the class initialization check as last input of `invoke`. DCHECK_EQ(clinit_check_requirement, HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit); @@ -1121,7 +1142,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, method_reference, resolved_method, resolved_method_reference, - /*vtable_index=*/ imt_or_vtable_index); + /*vtable_index=*/ imt_or_vtable_index, + !graph_->IsDebuggable()); } else { DCHECK_EQ(invoke_type, kInterface); if (kIsDebugBuild) { @@ -1142,7 +1164,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method, resolved_method_reference, /*imt_index=*/ imt_or_vtable_index, - load_kind); + load_kind, + !graph_->IsDebuggable()); } return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } @@ -1341,12 +1364,14 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, method_reference, resolved_method, resolved_method_reference, - proto_idx); + proto_idx, + !graph_->IsDebuggable()); if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) { return false; } - if (invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke && + if (invoke->GetIntrinsic() != Intrinsics::kNone && + invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvoke && invoke->GetIntrinsic() != Intrinsics::kMethodHandleInvokeExact && VarHandleAccessorNeedsReturnTypeCheck(invoke, return_type)) { // Type check is needed because VarHandle intrinsics do not type check the retrieved reference. @@ -1379,7 +1404,8 @@ bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc, call_site_idx, return_type, dex_pc, - method_reference); + method_reference, + !graph_->IsDebuggable()); return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 817fbaa9e8..3d65d8fb54 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ #include "base/array_ref.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "data_type.h" @@ -27,7 +28,7 @@ #include "handle.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class ArenaBitVector; class ArtField; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 789f07786c..0c2fd5de56 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -31,13 +31,13 @@ #include "sharpening.h" #include "string_builder_append.h" -namespace art { +namespace art HIDDEN { // Whether to run an exhaustive test of individual HInstructions cloning when each instruction // is replaced with its copy if it is clonable. static constexpr bool kTestInstructionClonerExhaustively = false; -class InstructionSimplifierVisitor : public HGraphDelegateVisitor { +class InstructionSimplifierVisitor final : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, @@ -970,7 +970,7 @@ void InstructionSimplifierVisitor::VisitPredicatedInstanceFieldGet( pred_get->GetFieldInfo().GetDexFile(), pred_get->GetDexPc()); if (pred_get->GetType() == DataType::Type::kReference) { - replace_with->SetReferenceTypeInfo(pred_get->GetReferenceTypeInfo()); + replace_with->SetReferenceTypeInfoIfValid(pred_get->GetReferenceTypeInfo()); } pred_get->GetBlock()->InsertInstructionBefore(replace_with, pred_get); pred_get->ReplaceWith(replace_with); @@ -1117,6 +1117,10 @@ void InstructionSimplifierVisitor::VisitIf(HIf* instruction) { } } +// TODO(solanes): This optimization should be in ConstantFolding since we are folding to a constant. +// However, we get code size regressions when we do that since we sometimes have a NullCheck between +// HArrayLength and IsNewArray, and said NullCheck is eliminated in InstructionSimplifier. If we run +// ConstantFolding and InstructionSimplifier in lockstep this wouldn't be an issue. void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { HInstruction* input = instruction->InputAt(0); // If the array is a NewArray with constant size, replace the array length @@ -1142,13 +1146,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { if (value->IsArrayGet()) { if (value->AsArrayGet()->GetArray() == instruction->GetArray()) { // If the code is just swapping elements in the array, no need for a type check. - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } } if (value->IsNullConstant()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } @@ -1160,13 +1164,13 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { } if (value_rti.IsValid() && array_rti.CanArrayHold(value_rti)) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } if (array_rti.IsObjectArray()) { if (array_rti.IsExact()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); return; } instruction->SetStaticTypeOfArrayIsObjectArray(); @@ -1860,13 +1864,16 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { // Search HDiv having the specified dividend and divisor which is in the specified basic block. // Return nullptr if nothing has been found. -static HInstruction* FindDivWithInputsInBasicBlock(HInstruction* dividend, - HInstruction* divisor, - HBasicBlock* basic_block) { +static HDiv* FindDivWithInputsInBasicBlock(HInstruction* dividend, + HInstruction* divisor, + HBasicBlock* basic_block) { for (const HUseListNode<HInstruction*>& use : dividend->GetUses()) { HInstruction* user = use.GetUser(); - if (user->GetBlock() == basic_block && user->IsDiv() && user->InputAt(1) == divisor) { - return user; + if (user->GetBlock() == basic_block && + user->IsDiv() && + user->InputAt(0) == dividend && + user->InputAt(1) == divisor) { + return user->AsDiv(); } } return nullptr; @@ -1900,7 +1907,7 @@ void InstructionSimplifierVisitor::TryToReuseDiv(HRem* rem) { } } - HInstruction* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block); + HDiv* quotient = FindDivWithInputsInBasicBlock(dividend, divisor, basic_block); if (quotient == nullptr) { return; } @@ -2458,7 +2465,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) DCHECK(method != nullptr); DCHECK(method->IsStatic()); DCHECK(method->GetDeclaringClass() == system); - invoke->SetResolvedMethod(method); + invoke->SetResolvedMethod(method, !codegen_->GetGraph()->IsDebuggable()); // Sharpen the new invoke. Note that we do not update the dex method index of // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed @@ -2647,15 +2654,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Collect args and check for unexpected uses. // We expect one call to a constructor with no arguments, one constructor fence (unless // eliminated), some number of append calls and one call to StringBuilder.toString(). - bool constructor_inlined = false; bool seen_constructor = false; bool seen_constructor_fence = false; bool seen_to_string = false; uint32_t format = 0u; uint32_t num_args = 0u; + bool has_fp_args = false; HInstruction* args[StringBuilderAppend::kMaxArgs]; // Added in reverse order. - // When inlining, `maybe_new_array` tracks an environment use that we want to allow. - HInstruction* maybe_new_array = nullptr; for (HBackwardInstructionIterator iter(block->GetInstructions()); !iter.Done(); iter.Advance()) { HInstruction* user = iter.Current(); // Instructions of interest apply to `sb`, skip those that do not involve `sb`. @@ -2700,6 +2705,14 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { case Intrinsics::kStringBuilderAppendLong: arg = StringBuilderAppend::Argument::kLong; break; + case Intrinsics::kStringBuilderAppendFloat: + arg = StringBuilderAppend::Argument::kFloat; + has_fp_args = true; + break; + case Intrinsics::kStringBuilderAppendDouble: + arg = StringBuilderAppend::Argument::kDouble; + has_fp_args = true; + break; case Intrinsics::kStringBuilderAppendCharSequence: { ReferenceTypeInfo rti = user->AsInvokeVirtual()->InputAt(1)->GetReferenceTypeInfo(); if (!rti.IsValid()) { @@ -2719,10 +2732,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { } break; } - case Intrinsics::kStringBuilderAppendFloat: - case Intrinsics::kStringBuilderAppendDouble: - // TODO: Unimplemented, needs to call FloatingDecimal.getBinaryToASCIIConverter(). - return false; default: { return false; } @@ -2736,25 +2745,13 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { format = (format << StringBuilderAppend::kBitsPerArg) | static_cast<uint32_t>(arg); args[num_args] = as_invoke_virtual->InputAt(1u); ++num_args; - } else if (!seen_constructor) { - // At this point, we should see the constructor. However, we might have inlined it so we have - // to take care of both cases. We accept only the constructor with no extra arguments. This - // means that if we inline it, we have to check it is setting its field to a new array. - if (user->IsInvokeStaticOrDirect() && - user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr && - user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() && - user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) { - constructor_inlined = false; - } else if (user->IsInstanceFieldSet() && - user->AsInstanceFieldSet()->GetFieldType() == DataType::Type::kReference && - user->AsInstanceFieldSet()->InputAt(0) == sb && - user->AsInstanceFieldSet()->GetValue()->IsNewArray()) { - maybe_new_array = user->AsInstanceFieldSet()->GetValue(); - constructor_inlined = true; - } else { - // We were expecting a constructor but we haven't seen it. Abort optimization. - return false; - } + } else if (user->IsInvokeStaticOrDirect() && + user->AsInvokeStaticOrDirect()->GetResolvedMethod() != nullptr && + user->AsInvokeStaticOrDirect()->GetResolvedMethod()->IsConstructor() && + user->AsInvokeStaticOrDirect()->GetNumberOfArguments() == 1u) { + // After arguments, we should see the constructor. + // We accept only the constructor with no extra arguments. + DCHECK(!seen_constructor); DCHECK(!seen_constructor_fence); seen_constructor = true; } else if (user->IsConstructorFence()) { @@ -2780,17 +2777,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Accept only calls on the StringBuilder (which shall all be removed). // TODO: Carve-out for const-string? Or rely on environment pruning (to be implemented)? if (holder->InputCount() == 0 || holder->InputAt(0) != sb) { - // When inlining the constructor, we have a NewArray and may have a LoadClass as an - // environment use. - if (constructor_inlined) { - if (holder == maybe_new_array) { - continue; - } - if (holder == maybe_new_array->InputAt(0)) { - DCHECK(holder->IsLoadClass()); - continue; - } - } return false; } } @@ -2798,9 +2784,9 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { // Create replacement instruction. HIntConstant* fmt = block->GetGraph()->GetIntConstant(static_cast<int32_t>(format)); ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); - HStringBuilderAppend* append = - new (allocator) HStringBuilderAppend(fmt, num_args, allocator, invoke->GetDexPc()); - append->SetReferenceTypeInfo(invoke->GetReferenceTypeInfo()); + HStringBuilderAppend* append = new (allocator) HStringBuilderAppend( + fmt, num_args, has_fp_args, allocator, invoke->GetDexPc()); + append->SetReferenceTypeInfoIfValid(invoke->GetReferenceTypeInfo()); for (size_t i = 0; i != num_args; ++i) { append->SetArgumentAt(i, args[num_args - 1u - i]); } @@ -2824,33 +2810,6 @@ static bool TryReplaceStringBuilderAppend(HInvoke* invoke) { while (sb->HasNonEnvironmentUses()) { block->RemoveInstruction(sb->GetUses().front().GetUser()); } - if (constructor_inlined) { - // We need to remove the inlined constructor instructions, - // and all remaining environment uses (if any). - DCHECK(sb->HasEnvironmentUses()); - DCHECK(maybe_new_array != nullptr); - DCHECK(maybe_new_array->IsNewArray()); - DCHECK(maybe_new_array->HasNonEnvironmentUses()); - HInstruction* fence = maybe_new_array->GetUses().front().GetUser(); - DCHECK(fence->IsConstructorFence()); - block->RemoveInstruction(fence); - block->RemoveInstruction(maybe_new_array); - if (sb->HasEnvironmentUses()) { - // We know the only remaining uses are from the LoadClass. - HInstruction* load_class = maybe_new_array->InputAt(0); - DCHECK(load_class->IsLoadClass()); - for (HEnvironment* env = load_class->GetEnvironment(); - env != nullptr; - env = env->GetParent()) { - for (size_t i = 0, size = env->Size(); i != size; ++i) { - if (env->GetInstructionAt(i) == sb) { - env->RemoveAsUserOfInput(i); - env->SetRawEnvAt(i, /*instruction=*/ nullptr); - } - } - } - } - } DCHECK(!sb->HasEnvironmentUses()); block->RemoveInstruction(sb); return true; diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index feea771096..98ebaafebc 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -17,11 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 1371ea7781..05a518d544 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -23,7 +23,7 @@ #include "mirror/string.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -31,7 +31,7 @@ using helpers::IsSubRightSubLeftShl; namespace arm { -class InstructionSimplifierArmVisitor : public HGraphVisitor { +class InstructionSimplifierArmVisitor final : public HGraphVisitor { public: InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), stats_(stats) {} diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index fca9341d59..0517e4f49e 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm { class InstructionSimplifierArm : public HOptimization { diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index a6ec02012c..671900bd9d 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -21,7 +21,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; @@ -31,7 +31,7 @@ namespace arm64 { using helpers::ShifterOperandSupportsExtension; -class InstructionSimplifierArm64Visitor : public HGraphVisitor { +class InstructionSimplifierArm64Visitor final : public HGraphVisitor { public: InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats) : HGraphVisitor(graph), stats_(stats) {} diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8d93c01ebf..374638ab9e 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_ARM64_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { namespace arm64 { class InstructionSimplifierArm64 : public HOptimization { diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index dc60ba62bb..34daae21ee 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -18,7 +18,7 @@ #include "mirror/array-inl.h" -namespace art { +namespace art HIDDEN { namespace { @@ -244,7 +244,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // The access may require a runtime call or the original array pointer. return false; } - if (kEmitCompilerReadBarrier && + if (gUseReadBarrier && !kUseBakerReadBarrier && access->IsArrayGet() && access->GetType() == DataType::Type::kReference) { diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 876ed21a22..ddc3a867b8 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { namespace helpers { diff --git a/compiler/optimizing/instruction_simplifier_test.cc b/compiler/optimizing/instruction_simplifier_test.cc index c7c5b12e25..966f5b91cf 100644 --- a/compiler/optimizing/instruction_simplifier_test.cc +++ b/compiler/optimizing/instruction_simplifier_test.cc @@ -26,13 +26,15 @@ #include "optimizing/data_type.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { namespace mirror { class ClassExt; class Throwable; } // namespace mirror +static constexpr bool kDebugSimplifierTests = false; + template<typename SuperClass> class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTestHelper { public: @@ -49,6 +51,19 @@ class InstructionSimplifierTestBase : public SuperClass, public OptimizingUnitTe SuperClass::TearDown(); gLogVerbosity.compiler = false; } + + void PerformSimplification(const AdjacencyListGraph& blks) { + if (kDebugSimplifierTests) { + LOG(INFO) << "Pre simplification " << blks; + } + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + InstructionSimplifier simp(graph_, /*codegen=*/nullptr); + simp.Run(); + if (kDebugSimplifierTests) { + LOG(INFO) << "Post simplify " << blks; + } + } }; class InstructionSimplifierTest : public InstructionSimplifierTestBase<CommonCompilerTest> {}; @@ -197,13 +212,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoMerge) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_INS_RETAINED(read_end); @@ -289,13 +298,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetMerge) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_FALSE(obj3->CanBeNull()); EXPECT_INS_RETAINED(read_end); @@ -373,13 +376,7 @@ TEST_F(InstructionSimplifierTest, SimplifyPredicatedFieldGetNoNull) { SetupExit(exit); - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); EXPECT_FALSE(obj1->CanBeNull()); EXPECT_FALSE(obj2->CanBeNull()); @@ -464,16 +461,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassInstanceOfOther) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) { EXPECT_INS_RETAINED(target_klass); @@ -532,16 +520,7 @@ TEST_P(InstanceOfInstructionSimplifierTestGroup, ExactClassCheckCastOther) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre simplification " << blks; - graph_->ClearDominanceInformation(); - graph_->BuildDominatorTree(); - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post simplify " << blks; + PerformSimplification(blks); if (!GetConstantResult() || GetParam() == InstanceOfKind::kSelf) { EXPECT_INS_RETAINED(target_klass); diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc index 2d8f94a85b..5a4345d589 100644 --- a/compiler/optimizing/instruction_simplifier_x86.cc +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -17,11 +17,11 @@ #include "instruction_simplifier_x86_shared.h" #include "code_generator_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { -class InstructionSimplifierX86Visitor : public HGraphVisitor { +class InstructionSimplifierX86Visitor final : public HGraphVisitor { public: InstructionSimplifierX86Visitor(HGraph* graph, CodeGenerator* codegen, diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h index 6f10006db2..25ebe203b8 100644 --- a/compiler/optimizing/instruction_simplifier_x86.h +++ b/compiler/optimizing/instruction_simplifier_x86.h @@ -16,10 +16,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; namespace x86 { diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc index 56c6b414d7..9ba1a8a960 100644 --- a/compiler/optimizing/instruction_simplifier_x86_64.cc +++ b/compiler/optimizing/instruction_simplifier_x86_64.cc @@ -17,11 +17,11 @@ #include "instruction_simplifier_x86_shared.h" #include "code_generator_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { -class InstructionSimplifierX86_64Visitor : public HGraphVisitor { +class InstructionSimplifierX86_64Visitor final : public HGraphVisitor { public: InstructionSimplifierX86_64Visitor(HGraph* graph, CodeGenerator* codegen, diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h index 6cae24d11a..1654dc4774 100644 --- a/compiler/optimizing/instruction_simplifier_x86_64.h +++ b/compiler/optimizing/instruction_simplifier_x86_64.h @@ -16,10 +16,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc index 2805abb2bb..74c5ca2466 100644 --- a/compiler/optimizing/instruction_simplifier_x86_shared.cc +++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc @@ -14,9 +14,10 @@ */ #include "instruction_simplifier_x86_shared.h" + #include "nodes_x86.h" -namespace art { +namespace art HIDDEN { bool TryCombineAndNot(HAnd* instruction) { DataType::Type type = instruction->GetType(); diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h index 7f94d7ea4c..1a44d0fdb5 100644 --- a/compiler/optimizing/instruction_simplifier_x86_shared.h +++ b/compiler/optimizing/instruction_simplifier_x86_shared.h @@ -16,13 +16,16 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { + bool TryCombineAndNot(HAnd* instruction); bool TryGenerateResetLeastSetBit(HAnd* instruction); bool TryGenerateMaskUptoLeastSetBit(HXor* instruction); bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc index 5f6f562161..7e542117a9 100644 --- a/compiler/optimizing/intrinsic_objects.cc +++ b/compiler/optimizing/intrinsic_objects.cc @@ -22,7 +22,7 @@ #include "image.h" #include "obj_ptr-inl.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kIntrinsicObjectsOffset = enum_cast<size_t>(ImageHeader::kIntrinsicObjectsStart); diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h index ed764bd4b2..d750f2934b 100644 --- a/compiler/optimizing/intrinsic_objects.h +++ b/compiler/optimizing/intrinsic_objects.h @@ -19,9 +19,10 @@ #include "base/bit_field.h" #include "base/bit_utils.h" +#include "base/macros.h" #include "base/mutex.h" -namespace art { +namespace art HIDDEN { class ClassLinker; template <class MirrorType> class ObjPtr; @@ -56,15 +57,15 @@ class IntrinsicObjects { } // Functions for retrieving data for Integer.valueOf(). - static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( + EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache( Thread* self, ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); - static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( + EXPORT static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) REQUIRES_SHARED(Locks::mutator_lock_); - static ObjPtr<mirror::Object> GetIntegerValueOfObject( + EXPORT static ObjPtr<mirror::Object> GetIntegerValueOfObject( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); - static MemberOffset GetIntegerValueOfArrayDataOffset( + EXPORT static MemberOffset GetIntegerValueOfArrayDataOffset( ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index f2d2b45da9..774deec438 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -32,7 +32,7 @@ #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" -namespace art { +namespace art HIDDEN { std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { switch (intrinsic) { @@ -171,6 +171,7 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, if (!CanReferenceBootImageObjects(invoke, compiler_options)) { return; } + HInstruction* const input = invoke->InputAt(0); if (compiler_options.IsBootImage()) { if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || !compiler_options.IsImageClass(kIntegerDescriptor)) { @@ -207,8 +208,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, CHECK_EQ(value_field->GetInt(current_object), low + i); } } - if (invoke->InputAt(0)->IsIntConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < static_cast<uint32_t>(high - low + 1)) { // No call, we shall use direct pointer to the Integer object. @@ -232,8 +233,8 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, } else { DCHECK(compiler_options.IsAotCompiler()); DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); - if (invoke->InputAt(0)->IsIntConstant()) { - int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (input->IsIntConstant()) { + int32_t value = input->AsIntConstant()->GetValue(); // Retrieve the `value` from the lowest cached Integer. ObjPtr<mirror::Object> low_integer = IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); @@ -255,11 +256,11 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, ArenaAllocator* allocator = codegen->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified); if (call_kind == LocationSummary::kCallOnMainOnly) { - locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0))); + locations->SetInAt(0, Location::RegisterOrConstant(input)); locations->AddTemp(first_argument_location); locations->SetOut(return_location); } else { - locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant())); + locations->SetInAt(0, Location::ConstantLocation(input)); locations->SetOut(Location::RequiresRegister()); } } @@ -392,7 +393,7 @@ void IntrinsicVisitor::CreateReferenceGetReferentLocations(HInvoke* invoke, } void IntrinsicVisitor::CreateReferenceRefersToLocations(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // Unimplemented for non-Baker read barrier. return; } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 5109882295..893cd04411 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "base/macros.h" #include "code_generator.h" #include "nodes.h" #include "optimization.h" #include "parallel_move_resolver.h" -namespace art { +namespace art HIDDEN { class DexFile; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 646f4f2ea7..d2dbaa32e3 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -46,7 +46,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) #include "aarch64/macro-assembler-aarch64.h" #pragma GCC diagnostic pop -namespace art { +namespace art HIDDEN { namespace arm64 { @@ -55,7 +55,6 @@ using helpers::DRegisterFrom; using helpers::HeapOperand; using helpers::LocationFrom; using helpers::InputCPURegisterOrZeroRegAt; -using helpers::IsConstantZeroBitPattern; using helpers::OperandFrom; using helpers::RegisterFrom; using helpers::SRegisterFrom; @@ -92,7 +91,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) : SlowPathCodeARM64(instruction), tmp_(tmp) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -711,7 +710,7 @@ static void GenUnsafeGet(HInvoke* invoke, Location trg_loc = locations->Out(); Register trg = RegisterFrom(trg_loc, type); - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); MacroAssembler* masm = codegen->GetVIXLAssembler(); @@ -754,7 +753,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { } static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1096,7 +1095,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) } static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke); + const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -1448,7 +1447,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM6 vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && type == DataType::Type::kReference) { // We need to store the `old_value` in a non-scratch register to make sure // the read barrier in the slow path does not clobber it. old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. @@ -1523,12 +1522,12 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* in } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } CreateUnsafeCASLocations(allocator_, invoke); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // We need two non-scratch temporary registers for read barrier. LocationSummary* locations = invoke->GetLocations(); if (kUseBakerReadBarrier) { @@ -1578,7 +1577,7 @@ void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invok } void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -2576,9 +2575,9 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } -// Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native -// implementation there for longer copy lengths. -static constexpr int32_t kSystemArrayCopyCharThreshold = 32; +// This value is greater than ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, +// so if we choose to jump to the slow path we will end up in the native implementation. +static constexpr int32_t kSystemArrayCopyCharThreshold = 192; static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, uint32_t at, @@ -2710,11 +2709,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm, __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); } - if (copy_length.IsConstant()) { - int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); - __ Add(src_end, src_base, element_size * constant); - } else { - __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); + if (src_end.IsValid()) { + if (copy_length.IsConstant()) { + int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); + __ Add(src_end, src_base, element_size * constant); + } else { + __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); + } } } @@ -2745,13 +2746,14 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { if (!length.IsConstant()) { // Merge the following two comparisons into one: // If the length is negative, bail out (delegate to libcore's native implementation). - // If the length > 32 then (currently) prefer libcore's native implementation. + // If the length > kSystemArrayCopyCharThreshold then (currently) prefer libcore's + // native implementation. __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold); __ B(slow_path->GetEntryLabel(), hi); } else { // We have already checked in the LocationsBuilder for the constant case. DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0); - DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32); + DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), kSystemArrayCopyCharThreshold); } Register src_curr_addr = WRegisterFrom(locations->GetTemp(0)); @@ -2787,21 +2789,102 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { length, src_curr_addr, dst_curr_addr, - src_stop_addr); + Register()); // Iterate over the arrays and do a raw copy of the chars. const int32_t char_size = DataType::Size(DataType::Type::kUint16); UseScratchRegisterScope temps(masm); - Register tmp = temps.AcquireW(); - vixl::aarch64::Label loop, done; - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); - __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); - __ B(&loop); - __ Bind(&done); + // We split processing of the array in two parts: head and tail. + // A first loop handles the head by copying a block of characters per + // iteration (see: chars_per_block). + // A second loop handles the tail by copying the remaining characters. + // If the copy length is not constant, we copy them one-by-one. + // If the copy length is constant, we optimize by always unrolling the tail + // loop, and also unrolling the head loop when the copy length is small (see: + // unroll_threshold). + // + // Both loops are inverted for better performance, meaning they are + // implemented as conditional do-while loops. + // Here, the loop condition is first checked to determine if there are + // sufficient chars to run an iteration, then we enter the do-while: an + // iteration is performed followed by a conditional branch only if another + // iteration is necessary. As opposed to a standard while-loop, this inversion + // can save some branching (e.g. we don't branch back to the initial condition + // at the end of every iteration only to potentially immediately branch + // again). + // + // A full block of chars is subtracted and added before and after the head + // loop, respectively. This ensures that any remaining length after each + // head loop iteration means there is a full block remaining, reducing the + // number of conditional checks required on every iteration. + constexpr int32_t chars_per_block = 4; + constexpr int32_t unroll_threshold = 2 * chars_per_block; + vixl::aarch64::Label loop1, loop2, pre_loop2, done; + + Register length_tmp = src_stop_addr.W(); + Register tmp = temps.AcquireRegisterOfSize(char_size * chars_per_block * kBitsPerByte); + + auto emitHeadLoop = [&]() { + __ Bind(&loop1); + __ Ldr(tmp, MemOperand(src_curr_addr, char_size * chars_per_block, PostIndex)); + __ Subs(length_tmp, length_tmp, chars_per_block); + __ Str(tmp, MemOperand(dst_curr_addr, char_size * chars_per_block, PostIndex)); + __ B(&loop1, ge); + }; + + auto emitTailLoop = [&]() { + __ Bind(&loop2); + __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); + __ Subs(length_tmp, length_tmp, 1); + __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); + __ B(&loop2, gt); + }; + + auto emitUnrolledTailLoop = [&](const int32_t tail_length) { + DCHECK_LT(tail_length, 4); + + // Don't use post-index addressing, and instead add a constant offset later. + if ((tail_length & 2) != 0) { + __ Ldr(tmp.W(), MemOperand(src_curr_addr)); + __ Str(tmp.W(), MemOperand(dst_curr_addr)); + } + if ((tail_length & 1) != 0) { + const int32_t offset = (tail_length & ~1) * char_size; + __ Ldrh(tmp, MemOperand(src_curr_addr, offset)); + __ Strh(tmp, MemOperand(dst_curr_addr, offset)); + } + }; + + if (length.IsConstant()) { + const int32_t constant_length = length.GetConstant()->AsIntConstant()->GetValue(); + if (constant_length >= unroll_threshold) { + __ Mov(length_tmp, constant_length - chars_per_block); + emitHeadLoop(); + } else { + static_assert(unroll_threshold == 8, "The unroll_threshold must be 8."); + // Fully unroll both the head and tail loops. + if ((constant_length & 4) != 0) { + __ Ldr(tmp, MemOperand(src_curr_addr, 4 * char_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, 4 * char_size, PostIndex)); + } + } + emitUnrolledTailLoop(constant_length % chars_per_block); + } else { + Register length_reg = WRegisterFrom(length); + __ Subs(length_tmp, length_reg, chars_per_block); + __ B(&pre_loop2, lt); + + emitHeadLoop(); + + __ Bind(&pre_loop2); + __ Adds(length_tmp, length_tmp, chars_per_block); + __ B(&done, eq); + + emitTailLoop(); + } + + __ Bind(&done); __ Bind(slow_path->GetExitLabel()); } @@ -2814,7 +2897,7 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2866,7 +2949,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Temporary register IP0, obtained from the VIXL scratch register // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 // (because that register is clobbered by ReadBarrierMarkRegX @@ -2884,7 +2967,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2991,7 +3074,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { UseScratchRegisterScope temps(masm); Location temp3_loc; // Used only for Baker read barrier. Register temp3; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { temp3_loc = locations->GetTemp(2); temp3 = WRegisterFrom(temp3_loc); } else { @@ -3004,7 +3087,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, @@ -3165,7 +3248,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, temp1_loc, @@ -3215,7 +3298,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Cbz(WRegisterFrom(length), &done); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -3335,7 +3418,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false); + codegen_->MarkGCCard(dest.W(), Register(), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3451,7 +3534,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) { + if (gUseReadBarrier && kUseBakerReadBarrier && invoke->GetLocations() != nullptr) { invoke->GetLocations()->AddTemp(Location::RequiresRegister()); } } @@ -3466,7 +3549,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); @@ -3493,7 +3576,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, WRegisterFrom(obj), @@ -3533,7 +3616,7 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceRefersTo(HInvoke* invoke) { __ Cmp(tmp, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); vixl::aarch64::Label calculate_result; @@ -4629,7 +4712,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, method.X(), ArtField::DeclaringClassOffset().Int32Value(), /*fixup_label=*/ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -4673,8 +4756,8 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { uint32_t number_of_arguments = invoke->GetNumberOfArguments(); for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) { HInstruction* arg = invoke->InputAt(arg_index); - if (IsConstantZeroBitPattern(arg)) { - locations->SetInAt(arg_index, Location::ConstantLocation(arg->AsConstant())); + if (IsZeroBitPattern(arg)) { + locations->SetInAt(arg_index, Location::ConstantLocation(arg)); } else if (DataType::IsFloatingPointType(arg->GetType())) { locations->SetInAt(arg_index, Location::RequiresFpuRegister()); } else { @@ -4683,7 +4766,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4706,7 +4789,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4746,7 +4829,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, DCHECK(use_load_acquire || order == std::memory_order_relaxed); // Load the value from the target location. - if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) { // Piggy-back on the field load path using introspection for the Baker read barrier. // The `target.offset` is a temporary, use it for field address. Register tmp_ptr = target.offset.X(); @@ -4898,7 +4981,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, } if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(value_index))) { - codegen->MarkGCCard(target.object, Register(value), /*value_can_be_null=*/ true); + codegen->MarkGCCard(target.object, Register(value), /* emit_null_check= */ true); } if (slow_path != nullptr) { @@ -4947,7 +5030,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers @@ -4961,7 +5044,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u); @@ -4985,16 +5068,16 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo // Add a temporary for old value and exclusive store result if floating point // `expected` and/or `new_value` take scratch registers. size_t available_scratch_registers = - (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) + - (IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u); + (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) ? 1u : 0u) + + (IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) ? 1u : 0u); size_t temps_needed = /* pointer, old value, store result */ 3u - available_scratch_registers; // We can reuse the declaring class (if present) and offset temporary. if (temps_needed > old_temp_count) { locations->AddRegisterTemps(temps_needed - old_temp_count); } } else if ((value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) && - !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) && - !IsConstantZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) && + !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 2u)) && + !IsZeroBitPattern(invoke->InputAt(number_of_arguments - 1u)) && GetExpectedVarHandleCoordinatesCount(invoke) == 2u) { // Allocate a normal temporary for store result in the non-native byte order path // because scratch registers are used by the byte-swapped `expected` and `new_value`. @@ -5002,7 +5085,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddTemp(Location::RequiresRegister()); } } - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // Add a temporary for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -5068,7 +5151,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, // except for references that need the offset for the read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { tmp_ptr = temps.AcquireX(); } __ Add(tmp_ptr, target.object.X(), target.offset.X()); @@ -5151,7 +5234,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl::aarch64::Label* exit_loop = &exit_loop_label; vixl::aarch64::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register. size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); @@ -5296,7 +5379,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value @@ -5316,7 +5399,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, DCHECK(get_and_update_op == GetAndUpdateOp::kSet); // We can reuse the declaring class temporary if present. if (old_temp_count == 1u && - !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { // Add a temporary for `old_value` if floating point `new_value` takes a scratch register. locations->AddTemp(Location::RequiresRegister()); } @@ -5327,7 +5410,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, if (old_temp_count == 1u && (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd) && GetExpectedVarHandleCoordinatesCount(invoke) == 2u && - !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + !IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { DataType::Type value_type = GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u); if (value_type != DataType::Type::kReference && DataType::Size(value_type) != 1u) { @@ -5372,7 +5455,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // except for references that need the offset for the non-Baker read barrier. UseScratchRegisterScope temps(masm); Register tmp_ptr = target.offset.X(); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { tmp_ptr = temps.AcquireX(); } @@ -5402,7 +5485,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, // the new value unless it is zero bit pattern (+0.0f or +0.0) and need another one // in GenerateGetAndUpdate(). We have allocated a normal temporary to handle that. old_value = CPURegisterFrom(locations->GetTemp(1u), load_store_type); - } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) && + } else if ((gUseReadBarrier && kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Load the old value initially to a scratch register. // We shall move it to `out` later with a read barrier. @@ -5450,7 +5533,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, __ Sxtb(out.W(), old_value.W()); } else if (value_type == DataType::Type::kInt16) { __ Sxth(out.W(), old_value.W()); - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W()); } else { @@ -5647,7 +5730,7 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { // Byte order check. For native byte order return to the main path. if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet && - IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { + IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) { // There is no reason to differentiate between native byte order and byte-swap // for setting a zero bit pattern. Just return to the main path. __ B(GetNativeByteOrderLabel()); @@ -5677,42 +5760,9 @@ void VarHandleSlowPathARM64::EmitByteArrayViewCode(CodeGenerator* codegen_in) { __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARM64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARM64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARM64) diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 9c46efddec..a0ccf87f7b 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM64_H_ +#include "base/macros.h" #include "intrinsics.h" namespace vixl { @@ -27,7 +28,7 @@ class MacroAssembler; } // namespace aarch64 } // namespace vixl -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index d850cadc2b..266b5bc799 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -34,7 +34,7 @@ #include "aarch32/constants-aarch32.h" -namespace art { +namespace art HIDDEN { namespace arm { #define __ assembler->GetVIXLAssembler()-> @@ -120,7 +120,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -1242,7 +1242,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1265,7 +1265,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) { locations->SetInAt(4, Location::RequiresRegister()); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Temporary register IP cannot be used in // ReadBarrierSystemArrayCopySlowPathARM (because that register // is clobbered by ReadBarrierMarkRegX entry points). Get an extra @@ -1339,7 +1339,7 @@ static void CheckPosition(ArmVIXLAssembler* assembler, void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); ArmVIXLAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1453,7 +1453,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // or the destination is Object[]. If none of these checks succeed, we go to the // slow path. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -1584,7 +1584,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); @@ -1621,7 +1621,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // TODO: Also convert this intrinsic to the IsGcMarking strategy? // SystemArrayCopy implementation for Baker read barriers (see @@ -1723,7 +1723,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -2511,7 +2511,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); @@ -2539,7 +2539,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, RegisterFrom(obj), @@ -2587,7 +2587,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { assembler->MaybeUnpoisonHeapReference(tmp); codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile. - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); vixl32::Label calculate_result; @@ -2613,7 +2613,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceRefersTo(HInvoke* invoke) { __ Bind(&calculate_result); } else { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK(!gUseReadBarrier); __ Sub(out, tmp, other); } @@ -2732,7 +2732,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke, } break; case DataType::Type::kReference: - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Piggy-back on the field load path using introspection for the Baker read barrier. vixl32::Register temp = RegisterFrom(maybe_temp); __ Add(temp, base, offset); @@ -2777,7 +2777,7 @@ static void GenerateIntrinsicGet(HInvoke* invoke, codegen->GenerateMemoryBarrier( seq_cst_barrier ? MemBarrierKind::kAnyAny : MemBarrierKind::kLoadAny); } - if (type == DataType::Type::kReference && !(kEmitCompilerReadBarrier && kUseBakerReadBarrier)) { + if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) { Location base_loc = LocationFrom(base); Location index_loc = LocationFrom(offset); codegen->MaybeGenerateReadBarrierSlow(invoke, out, out, base_loc, /* offset=*/ 0u, index_loc); @@ -2802,7 +2802,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke, CodeGeneratorARMVIXL* codegen, DataType::Type type, bool atomic) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, @@ -2818,7 +2818,7 @@ static void CreateUnsafeGetLocations(HInvoke* invoke, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier, @@ -2837,7 +2837,7 @@ static void GenUnsafeGet(HInvoke* invoke, vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only. Location out = locations->Out(); Location maybe_temp = Location::NoLocation(); - if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || + if ((gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) || (type == DataType::Type::kInt64 && Use64BitExclusiveLoadStore(atomic, codegen))) { maybe_temp = locations->GetTemp(0); } @@ -3470,7 +3470,7 @@ static void GenerateCompareAndSet(CodeGeneratorARMVIXL* codegen, // branch goes to the read barrier slow path that clobbers `success` anyway. bool init_failure_for_cmp = success.IsValid() && - !(kEmitCompilerReadBarrier && type == DataType::Type::kReference && expected.IsRegister()); + !(gUseReadBarrier && type == DataType::Type::kReference && expected.IsRegister()); // Instruction scheduling: Loading a constant between LDREX* and using the loaded value // is essentially free, so prepare the failure value here if we can. bool init_failure_for_cmp_early = @@ -3655,7 +3655,7 @@ class ReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { }; static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && IsUnsafeCASObject(invoke); + const bool can_call = gUseReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -3706,7 +3706,7 @@ static void GenUnsafeCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMV vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && type == DataType::Type::kReference) { // If marking, check if the stored reference is a from-space reference to the same // object as the to-space reference `expected`. If so, perform a custom CAS loop. ReadBarrierCasSlowPathARMVIXL* slow_path = @@ -3770,7 +3770,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* i } void IntrinsicLocationsBuilderARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3798,7 +3798,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetInt(HInvoke* invo } void IntrinsicCodeGeneratorARMVIXL::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers (b/173104084). - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenUnsafeCas(invoke, DataType::Type::kReference, codegen_); } @@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, LocationFrom(target.object), method, ArtField::DeclaringClassOffset().Int32Value(), - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -4403,7 +4403,7 @@ static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) { } // Add a temporary for offset. - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && GetExpectedVarHandleCoordinatesCount(invoke) == 0u) { // For static fields. // To preserve the offset value across the non-Baker read barrier slow path // for loading the declaring class, use a fixed callee-save register. @@ -4428,7 +4428,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference && invoke->GetIntrinsic() != Intrinsics::kVarHandleGet && invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) { @@ -4476,7 +4476,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location maybe_temp = Location::NoLocation(); Location maybe_temp2 = Location::NoLocation(); Location maybe_temp3 = Location::NoLocation(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) { + if (gUseReadBarrier && kUseBakerReadBarrier && type == DataType::Type::kReference) { // Reuse the offset temporary. maybe_temp = LocationFrom(target.offset); } else if (DataType::Is64BitType(type) && Use64BitExclusiveLoadStore(atomic, codegen)) { @@ -4590,7 +4590,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke, HInstruction* arg = invoke->InputAt(number_of_arguments - 1u); bool has_reverse_bytes_slow_path = (expected_coordinates_count == 2u) && - !(arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()); + !IsZeroBitPattern(arg); if (Use64BitExclusiveLoadStore(atomic, codegen)) { // We need 4 temporaries in the byte array view slow path. Otherwise, we need // 2 or 3 temporaries for GenerateIntrinsicSet() depending on the value type. @@ -4699,7 +4699,7 @@ static void GenerateVarHandleSet(HInvoke* invoke, vixl32::Register temp = target.offset; vixl32::Register card = temps.Acquire(); vixl32::Register value_reg = RegisterFrom(value); - codegen->MarkGCCard(temp, card, target.object, value_reg, /*value_can_be_null=*/ true); + codegen->MarkGCCard(temp, card, target.object, value_reg, /* emit_null_check= */ true); } if (slow_path != nullptr) { @@ -4749,7 +4749,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1u); - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field. This breaks the read barriers @@ -4763,7 +4763,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo LocationSummary* locations = CreateVarHandleCommonLocations(invoke); - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { // We need callee-save registers for both the class object and offset instead of // the temporaries reserved in CreateVarHandleCommonLocations(). static_assert(POPCOUNT(kArmCalleeSaveRefSpills) >= 2u); @@ -4799,7 +4799,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, boo locations->AddRegisterTemps(2u); } } - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // Add a temporary for store result, also used for the `old_value_temp` in slow path. locations->AddTemp(Location::RequiresRegister()); } @@ -4930,7 +4930,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, vixl32::Label* exit_loop = &exit_loop_label; vixl32::Label* cmp_failure = &exit_loop_label; - if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + if (gUseReadBarrier && value_type == DataType::Type::kReference) { // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked // reloaded old value for subsequent CAS in the slow path. This must not clobber `old_value`. vixl32::Register old_value_temp = return_success ? RegisterFrom(out) : store_result; @@ -5086,7 +5086,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, return; } - if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + if ((gUseReadBarrier && !kUseBakerReadBarrier) && invoke->GetType() == DataType::Type::kReference) { // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores // the passed reference and reloads it from the field, thus seeing the new value @@ -5107,7 +5107,7 @@ static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke, // Add temps needed to do the GenerateGetAndUpdate() with core registers. size_t temps_needed = (value_type == DataType::Type::kFloat64) ? 5u : 3u; locations->AddRegisterTemps(temps_needed - locations->GetTempCount()); - } else if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) && + } else if ((gUseReadBarrier && !kUseBakerReadBarrier) && value_type == DataType::Type::kReference) { // We need to preserve the declaring class (if present) and offset for read barrier // slow paths, so we must use a separate temporary for the exclusive store result. @@ -5213,7 +5213,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, if (byte_swap) { GenerateReverseBytes(assembler, DataType::Type::kInt32, arg, arg); } - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { // Load the old value initially to a temporary register. // We shall move it to `out` later with a read barrier. @@ -5296,7 +5296,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, } else { __ Vmov(SRegisterFrom(out), RegisterFrom(old_value)); } - } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) { + } else if (gUseReadBarrier && value_type == DataType::Type::kReference) { if (kUseBakerReadBarrier) { codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(RegisterFrom(out), RegisterFrom(old_value)); @@ -5517,7 +5517,7 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) // Byte order check. For native byte order return to the main path. if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) { HInstruction* arg = invoke->InputAt(invoke->GetNumberOfArguments() - 1u); - if (arg->IsConstant() && arg->AsConstant()->IsZeroBitPattern()) { + if (IsZeroBitPattern(arg)) { // There is no reason to differentiate between native byte order and byte-swap // for setting a zero bit pattern. Just return to the main path. __ B(GetNativeByteOrderLabel()); @@ -5549,69 +5549,9 @@ void VarHandleSlowPathARMVIXL::EmitByteArrayViewCode(CodeGenerator* codegen_in) __ B(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Floor) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Rint) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Greater) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Less) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Compare) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Min) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, FP16Max) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString); - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyByte); -UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyInt); - -// 1.8. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaDouble) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFmaFloat) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCASLong) // High register pressure. -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, JdkUnsafeCompareAndSetLong) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(ARMVIXL, Name) +UNIMPLEMENTED_INTRINSIC_LIST_ARM(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(ARMVIXL) diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 3103cec8f0..54475bcc7e 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_ +#include "base/macros.h" #include "intrinsics.h" #include "utils/arm/assembler_arm_vixl.h" -namespace art { +namespace art HIDDEN { namespace arm { diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 19f5e332a8..13cabdafed 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -29,7 +29,7 @@ #include "utils/assembler.h" #include "utils/label.h" -namespace art { +namespace art HIDDEN { // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an // intrinsified call. This will copy the arguments into the positions for a regular call. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 7d90aae984..d2072201f8 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -38,7 +38,7 @@ #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { @@ -75,7 +75,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -1699,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke, case DataType::Type::kReference: { Register output = output_loc.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1757,7 +1757,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, DataType::Type type, bool is_volatile) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2103,7 +2103,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, DataType::Type type, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && + const bool can_call = gUseReadBarrier && kUseBakerReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = @@ -2175,7 +2175,7 @@ void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2304,7 +2304,7 @@ static void GenReferenceCAS(HInvoke* invoke, DCHECK_EQ(expected, EAX); DCHECK_NE(temp, temp2); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2391,7 +2391,7 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); Register temp = locations->GetTemp(0).AsRegister<Register>(); Register temp2 = locations->GetTemp(1).AsRegister<Register>(); @@ -2413,7 +2413,7 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -2443,7 +2443,7 @@ void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -2843,7 +2843,7 @@ static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2875,7 +2875,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -2995,7 +2995,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. if (!optimizations.GetSourceIsNonPrimitiveArray()) { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3022,7 +3022,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { if (length.Equals(Location::RegisterLocation(temp3))) { // When Baker read barriers are enabled, register `temp3`, // which in the present case contains the `length` parameter, @@ -3120,7 +3120,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -3151,7 +3151,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // Compute the base source address in `temp1`. GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // If it is needed (in the case of the fast-path loop), the base // destination address is computed later, as `temp2` is used for // intermediate computations. @@ -3259,7 +3259,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3377,7 +3377,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>(); __ fs()->cmpl(Address::Absolute(offset), @@ -3400,7 +3400,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<Register>(), @@ -3442,7 +3442,7 @@ void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) { NearLabel end, return_true, return_false; __ cmpl(out, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); __ j(kEqual, &return_true); @@ -3781,7 +3781,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, Location::RegisterLocation(temp), Address(temp, declaring_class_offset), /* fixup_label= */ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); return temp; } @@ -3794,7 +3794,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke, static void CreateVarHandleGetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3836,7 +3836,7 @@ static void CreateVarHandleGetLocations(HInvoke* invoke) { static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -3860,7 +3860,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) { Address field_addr(ref, offset, TIMES_1, 0); // Load the value from the field - if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) { + if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, ref, field_addr, /* needs_null_check= */ false); } else if (type == DataType::Type::kInt64 && @@ -3917,7 +3917,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) { static void CreateVarHandleSetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -3963,7 +3963,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) { case DataType::Type::kInt64: // We only handle constant non-atomic int64 values. DCHECK(value->IsConstant()); - locations->SetInAt(value_index, Location::ConstantLocation(value->AsConstant())); + locations->SetInAt(value_index, Location::ConstantLocation(value)); break; case DataType::Type::kReference: locations->SetInAt(value_index, Location::RequiresRegister()); @@ -3990,7 +3990,7 @@ static void CreateVarHandleSetLocations(HInvoke* invoke) { static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4041,13 +4041,16 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { InstructionCodeGeneratorX86* instr_codegen = down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor()); // Store the value to the field - instr_codegen->HandleFieldSet(invoke, - value_index, - value_type, - Address(reference, offset, TIMES_1, 0), - reference, - is_volatile, - /* value_can_be_null */ true); + instr_codegen->HandleFieldSet( + invoke, + value_index, + value_type, + Address(reference, offset, TIMES_1, 0), + reference, + is_volatile, + /* value_can_be_null */ true, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); __ Bind(slow_path->GetExitLabel()); } @@ -4087,7 +4090,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) { static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4135,7 +4138,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4194,7 +4197,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX); break; case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -4208,7 +4211,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codege &temp2); } codegen->MarkGCCard( - temp, temp2, reference, value.AsRegister<Register>(), /* value_can_be_null= */ false); + temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false); if (kPoisonHeapReferences) { __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); @@ -4258,7 +4261,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4322,7 +4325,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4441,7 +4444,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4490,7 +4493,7 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) { static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4591,7 +4594,7 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -4659,7 +4662,7 @@ static void GenerateBitwiseOp(HInvoke* invoke, static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) { // The only read barrier implementation supporting the // VarHandleGet intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4829,64 +4832,9 @@ void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) { } } -UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned) -UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86, FP16Max) -UNIMPLEMENTED_INTRINSIC(X86, MathMultiplyHigh) - -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86) diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index ae150dad43..77c236d244 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_H_ +#include "base/macros.h" #include "intrinsics.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 3c31374f67..9d0d5f155e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -36,7 +36,7 @@ #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" -namespace art { +namespace art HIDDEN { namespace x86_64 { @@ -71,7 +71,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { public: explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) { - DCHECK(kEmitCompilerReadBarrier); + DCHECK(gUseReadBarrier); DCHECK(kUseBakerReadBarrier); } @@ -836,7 +836,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -887,7 +887,7 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1002,7 +1002,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // slow path. bool did_unpoison = false; - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); @@ -1034,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); @@ -1055,7 +1055,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // For the same reason given earlier, `temp1` is not trashed by the // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ @@ -1081,7 +1081,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { NearLabel do_copy; __ j(kEqual, &do_copy); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); @@ -1109,7 +1109,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); // Bail out if the source is not a non primitive array. - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); @@ -1141,7 +1141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { GenSystemArrayCopyAddresses( GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // SystemArrayCopy implementation for Baker read barriers (see // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier): // @@ -1224,7 +1224,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false); + codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* emit_null_check= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -1888,7 +1888,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -1930,7 +1930,7 @@ static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) { } static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - bool can_call = kEmitCompilerReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); + bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic()); LocationSummary* locations = new (allocator) LocationSummary(invoke, can_call @@ -2230,7 +2230,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) static void CreateUnsafeCASLocations(ArenaAllocator* allocator, DataType::Type type, HInvoke* invoke) { - const bool can_call = kEmitCompilerReadBarrier && + const bool can_call = gUseReadBarrier && kUseBakerReadBarrier && IsUnsafeCASObject(invoke); LocationSummary* locations = @@ -2253,7 +2253,7 @@ static void CreateUnsafeCASLocations(ArenaAllocator* allocator, // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -2298,7 +2298,7 @@ void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* i void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2438,7 +2438,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, CpuRegister temp3, bool is_cmpxchg) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); @@ -2447,7 +2447,7 @@ static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen, codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); Address field_addr(base, offset, TIMES_1, 0); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. codegen->GenerateReferenceLoadWithBakerReadBarrier( @@ -2556,7 +2556,7 @@ static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen, CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>(); CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>(); CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>(); - CpuRegister temp3 = kEmitCompilerReadBarrier + CpuRegister temp3 = gUseReadBarrier ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>() : CpuRegister(kNoRegister); DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3})); @@ -2624,7 +2624,7 @@ void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); GenCAS(DataType::Type::kReference, invoke, codegen_); } @@ -3128,7 +3128,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Check self->GetWeakRefAccessEnabled(). ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>(); __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true), @@ -3150,7 +3150,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { // Load the value from the field. uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, out, obj.AsRegister<CpuRegister>(), @@ -3191,7 +3191,7 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) { __ cmpl(out, other); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); NearLabel calculate_result; @@ -3771,7 +3771,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, Location::RegisterLocation(target.object), Address(method, ArtField::DeclaringClassOffset()), /*fixup_label=*/ nullptr, - kCompilerReadBarrierOption); + gCompilerReadBarrierOption); } } } else { @@ -3790,7 +3790,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke, static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) { // The only supported read barrier implementation is the Baker-style read barriers. - if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + if (gUseReadBarrier && !kUseBakerReadBarrier) { return false; } @@ -3876,7 +3876,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, Location out = locations->Out(); if (type == DataType::Type::kReference) { - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { DCHECK(kUseBakerReadBarrier); codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false); @@ -3985,16 +3985,19 @@ static void GenerateVarHandleSet(HInvoke* invoke, Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0); // Store the value to the field. - codegen->GetInstructionCodegen()->HandleFieldSet(invoke, - value_index, - last_temp_index, - value_type, - dst, - CpuRegister(target.object), - is_volatile, - is_atomic, - /*value_can_be_null=*/ true, - byte_swap); + codegen->GetInstructionCodegen()->HandleFieldSet( + invoke, + value_index, + last_temp_index, + value_type, + dst, + CpuRegister(target.object), + is_volatile, + is_atomic, + /*value_can_be_null=*/true, + byte_swap, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that. @@ -4070,7 +4073,7 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4085,7 +4088,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86_64* codegen, bool is_cmpxchg, bool byte_swap = false) { - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4218,7 +4221,7 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) { // Need two temporaries for MarkGCCard. locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (kEmitCompilerReadBarrier) { + if (gUseReadBarrier) { // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier. DCHECK(kUseBakerReadBarrier); locations->AddTemp(Location::RequiresRegister()); @@ -4267,7 +4270,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>(); CpuRegister valreg = value.AsRegister<CpuRegister>(); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { codegen->GenerateReferenceLoadWithBakerReadBarrier( invoke, locations->GetTemp(temp_count - 3), @@ -4278,7 +4281,7 @@ static void GenerateVarHandleGetAndSet(HInvoke* invoke, &temp1, &temp2); } - codegen->MarkGCCard(temp1, temp2, ref, valreg, /*value_can_be_null=*/ false); + codegen->MarkGCCard(temp1, temp2, ref, valreg, /* emit_null_check= */ false); DCHECK_EQ(valreg, out.AsRegister<CpuRegister>()); if (kPoisonHeapReferences) { @@ -4647,7 +4650,7 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke, bool need_any_store_barrier, bool need_any_any_barrier, bool byte_swap = false) { - DCHECK_IMPLIES(kEmitCompilerReadBarrier, kUseBakerReadBarrier); + DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier); X86_64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -4987,57 +4990,9 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen __ jmp(GetExitLabel()); } -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) -UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToFloat) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16ToHalf) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Floor) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Ceil) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Rint) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Greater) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Compare) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Min) -UNIMPLEMENTED_INTRINSIC(X86_64, FP16Max) - -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); -UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferAppend); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBufferToString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendObject); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendString); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharSequence); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendCharArray); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendBoolean); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendChar); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendInt); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendLong); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendFloat); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderAppendDouble); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderLength); -UNIMPLEMENTED_INTRINSIC(X86_64, StringBuilderToString); - -// 1.8. - -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndSetObject) - -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvokeExact) -UNIMPLEMENTED_INTRINSIC(X86_64, MethodHandleInvoke) - -// OpenJDK 11 -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndAddLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetInt) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetLong) -UNIMPLEMENTED_INTRINSIC(X86_64, JdkUnsafeGetAndSetObject) +#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name) +UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED); +#undef MARK_UNIMPLEMENTED UNREACHABLE_INTRINSICS(X86_64) diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 199cfede1a..59fe815a94 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_X86_64_H_ +#include "base/macros.h" #include "intrinsics.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class HInvokeStaticOrDirect; diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 0edb23b857..0c791b640d 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -18,7 +18,7 @@ #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { static bool IsPhiOf(HInstruction* instruction, HBasicBlock* block) { return instruction->IsPhi() && instruction->GetBlock() == block; diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index 9cafddb05a..1a86b6eb9f 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_LICM_H_ #define ART_COMPILER_OPTIMIZING_LICM_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index adc3cabe87..f8481099f4 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -17,12 +17,13 @@ #include "licm.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the LICM tests. diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc index 58e00a810d..25ca866b2c 100644 --- a/compiler/optimizing/linear_order.cc +++ b/compiler/optimizing/linear_order.cc @@ -19,7 +19,7 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" -namespace art { +namespace art HIDDEN { static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) { return first_loop == second_loop; diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h index 151db001e1..75e75048a3 100644 --- a/compiler/optimizing/linear_order.h +++ b/compiler/optimizing/linear_order.h @@ -19,9 +19,10 @@ #include <type_traits> +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index d56ae11ca9..01daa23511 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -17,6 +17,7 @@ #include <fstream> #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -28,9 +29,9 @@ #include "pretty_printer.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LinearizeTest : public OptimizingUnitTest { +class LinearizeTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: template <size_t number_of_blocks> void TestCode(const std::vector<uint16_t>& data, diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index c60386d7b7..b5d1336d4a 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -15,12 +15,13 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { TEST(LiveInterval, GetStart) { ArenaPoolAndAllocator pool; diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index bb8a4dc08e..fb1a23eef4 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -25,9 +26,9 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LiveRangesTest : public OptimizingUnitTest { +class LiveRangesTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: HGraph* BuildGraph(const std::vector<uint16_t>& data); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index ba3787e9be..0b421cf9e6 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "dex/dex_file.h" @@ -25,9 +26,9 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { -class LivenessTest : public OptimizingUnitTest { +class LivenessTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 3fe42aff2e..f1c50ac03c 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -19,7 +19,7 @@ #include "base/scoped_arena_allocator.h" #include "optimizing/escape.h" -namespace art { +namespace art HIDDEN { // A cap for the number of heap locations to prevent pathological time/space consumption. // The number of heap locations for most of the methods stays below this threshold. @@ -283,14 +283,6 @@ bool LoadStoreAnalysis::Run() { heap_location_collector_.CleanUp(); return false; } - if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { - // Don't do load/store elimination if the method has volatile field accesses or - // monitor operations, for now. - // TODO: do it right. - heap_location_collector_.CleanUp(); - return false; - } - heap_location_collector_.BuildAliasingMatrix(); heap_location_collector_.DumpReferenceStats(stats_); return true; diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 4975bae2a2..c46a5b9cc1 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -20,6 +20,7 @@ #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "optimizing/optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { enum class LoadStoreAnalysisType { kBasic, @@ -170,14 +171,16 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) + int16_t declaring_class_def_index, + bool is_vec_op) : ref_info_(ref_info), type_(DataType::ToSigned(type)), offset_(offset), index_(index), vector_length_(vector_length), declaring_class_def_index_(declaring_class_def_index), - has_aliased_locations_(false) { + has_aliased_locations_(false), + is_vec_op_(is_vec_op) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); @@ -188,6 +191,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { size_t GetOffset() const { return offset_; } HInstruction* GetIndex() const { return index_; } size_t GetVectorLength() const { return vector_length_; } + bool IsVecOp() const { return is_vec_op_; } // Returns the definition of declaring class' dex index. // It's kDeclaringClassDefIndexForArrays for an array element. @@ -226,11 +230,12 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { // Declaring class's def's dex index. // Invalid when this HeapLocation is not field access. const int16_t declaring_class_def_index_; - // Has aliased heap locations in the method, due to either the // reference is aliased or the array element is aliased via different // index names. bool has_aliased_locations_; + // Whether this HeapLocation represents a vector operation. + bool is_vec_op_; DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -253,8 +258,6 @@ class HeapLocationCollector : public HGraphVisitor { heap_locations_(allocator->Adapter(kArenaAllocLSA)), aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA), has_heap_stores_(false), - has_volatile_(false), - has_monitor_operations_(false), lse_type_(lse_type) { aliasing_matrix_.ClearAllBits(); } @@ -319,7 +322,8 @@ class HeapLocationCollector : public HGraphVisitor { field->GetFieldOffset().SizeValue(), nullptr, HeapLocation::kScalar, - field->GetDeclaringClassDefIndex()); + field->GetDeclaringClassDefIndex(), + /*is_vec_op=*/false); } size_t GetArrayHeapLocation(HInstruction* instruction) const { @@ -328,10 +332,10 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetType(); size_t vector_length = HeapLocation::kScalar; + const bool is_vec_op = instruction->IsVecStore() || instruction->IsVecLoad(); if (instruction->IsArraySet()) { type = instruction->AsArraySet()->GetComponentType(); - } else if (instruction->IsVecStore() || - instruction->IsVecLoad()) { + } else if (is_vec_op) { HVecOperation* vec_op = instruction->AsVecOperation(); type = vec_op->GetPackedType(); vector_length = vec_op->GetVectorLength(); @@ -343,21 +347,14 @@ class HeapLocationCollector : public HGraphVisitor { HeapLocation::kInvalidFieldOffset, index, vector_length, - HeapLocation::kDeclaringClassDefIndexForArrays); + HeapLocation::kDeclaringClassDefIndexForArrays, + is_vec_op); } bool HasHeapStores() const { return has_heap_stores_; } - bool HasVolatile() const { - return has_volatile_; - } - - bool HasMonitorOps() const { - return has_monitor_operations_; - } - // Find and return the heap location index in heap_locations_. // NOTE: When heap locations are created, potentially aliasing/overlapping // accesses are given different indexes. This find function also @@ -373,7 +370,8 @@ class HeapLocationCollector : public HGraphVisitor { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) const { + int16_t declaring_class_def_index, + bool is_vec_op) const { DataType::Type lookup_type = DataType::ToSigned(type); for (size_t i = 0; i < heap_locations_.size(); i++) { HeapLocation* loc = heap_locations_[i]; @@ -382,7 +380,8 @@ class HeapLocationCollector : public HGraphVisitor { loc->GetOffset() == offset && loc->GetIndex() == index && loc->GetVectorLength() == vector_length && - loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { + loc->GetDeclaringClassDefIndex() == declaring_class_def_index && + loc->IsVecOp() == is_vec_op) { return i; } } @@ -527,22 +526,20 @@ class HeapLocationCollector : public HGraphVisitor { size_t offset, HInstruction* index, size_t vector_length, - int16_t declaring_class_def_index) { + int16_t declaring_class_def_index, + bool is_vec_op) { HInstruction* original_ref = HuntForOriginalReference(ref); ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); size_t heap_location_idx = FindHeapLocationIndex( - ref_info, type, offset, index, vector_length, declaring_class_def_index); + ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op); if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (allocator_) - HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index); + HeapLocation* heap_loc = new (allocator_) HeapLocation( + ref_info, type, offset, index, vector_length, declaring_class_def_index, is_vec_op); heap_locations_.push_back(heap_loc); } } void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { - if (field_info.IsVolatile()) { - has_volatile_ = true; - } DataType::Type type = field_info.GetFieldType(); const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); @@ -551,19 +548,22 @@ class HeapLocationCollector : public HGraphVisitor { offset, nullptr, HeapLocation::kScalar, - declaring_class_def_index); + declaring_class_def_index, + /*is_vec_op=*/false); } void VisitArrayAccess(HInstruction* array, HInstruction* index, DataType::Type type, - size_t vector_length) { + size_t vector_length, + bool is_vec_op) { MaybeCreateHeapLocation(array, type, HeapLocation::kInvalidFieldOffset, index, vector_length, - HeapLocation::kDeclaringClassDefIndexForArrays); + HeapLocation::kDeclaringClassDefIndexForArrays, + is_vec_op); } void VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet* instruction) override { @@ -597,7 +597,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetType(); - VisitArrayAccess(array, index, type, HeapLocation::kScalar); + VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false); CreateReferenceInfoForReferenceType(instruction); } @@ -605,7 +605,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetComponentType(); - VisitArrayAccess(array, index, type, HeapLocation::kScalar); + VisitArrayAccess(array, index, type, HeapLocation::kScalar, /*is_vec_op=*/false); has_heap_stores_ = true; } @@ -613,7 +613,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); - VisitArrayAccess(array, index, type, instruction->GetVectorLength()); + VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true); CreateReferenceInfoForReferenceType(instruction); } @@ -621,7 +621,7 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); DataType::Type type = instruction->GetPackedType(); - VisitArrayAccess(array, index, type, instruction->GetVectorLength()); + VisitArrayAccess(array, index, type, instruction->GetVectorLength(), /*is_vec_op=*/true); has_heap_stores_ = true; } @@ -637,18 +637,12 @@ class HeapLocationCollector : public HGraphVisitor { CreateReferenceInfoForReferenceType(instruction); } - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override { - has_monitor_operations_ = true; - } - ScopedArenaAllocator* allocator_; ScopedArenaVector<ReferenceInfo*> ref_info_array_; // All references used for heap accesses. ScopedArenaVector<HeapLocation*> heap_locations_; // All heap locations. ArenaBitVector aliasing_matrix_; // aliasing info between each pair of locations. bool has_heap_stores_; // If there is no heap stores, LSE acts as GVN with better // alias analysis and won't be as effective. - bool has_volatile_; // If there are volatile field accesses. - bool has_monitor_operations_; // If there are monitor operations. LoadStoreAnalysisType lse_type_; DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 3c26c8d6ce..865febbd31 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "load_store_analysis.h" #include <array> @@ -36,7 +37,7 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { class LoadStoreAnalysisTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: @@ -117,12 +118,13 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { size_t field = HeapLocation::kInvalidFieldOffset; size_t vec = HeapLocation::kScalar; size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; + const bool is_vec_op = false; size_t loc1 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, c1, vec, class_def); + ref, type, field, c1, vec, class_def, is_vec_op); size_t loc2 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, c2, vec, class_def); + ref, type, field, c2, vec, class_def, is_vec_op); size_t loc3 = heap_location_collector.FindHeapLocationIndex( - ref, type, field, index, vec, class_def); + ref, type, field, index, vec, class_def, is_vec_op); // must find this reference info for array in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations; @@ -142,7 +144,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3)); - EXPECT_TRUE(CheckGraph(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { @@ -223,15 +225,14 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // accesses to different fields of the same object should not alias. ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); - EXPECT_TRUE(CheckGraph(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { CreateGraph(); - HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(entry); - graph_->SetEntryBlock(entry); - graph_->BuildDominatorTree(); + AdjacencyListGraph blks( + SetupFromAdjacencyList("entry", "exit", {{"entry", "body"}, {"body", "exit"}})); + HBasicBlock* body = blks.Get("body"); HInstruction* array = new (GetAllocator()) HParameterValue( graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); @@ -261,23 +262,25 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { HInstruction* arr_set8 = new (GetAllocator()) HArraySet(array, sub_neg1, c0, DataType::Type::kInt32, 0); - entry->AddInstruction(array); - entry->AddInstruction(index); - entry->AddInstruction(add0); - entry->AddInstruction(add1); - entry->AddInstruction(sub0); - entry->AddInstruction(sub1); - entry->AddInstruction(sub_neg1); - entry->AddInstruction(rev_sub1); - - entry->AddInstruction(arr_set1); // array[0] = c0 - entry->AddInstruction(arr_set2); // array[1] = c0 - entry->AddInstruction(arr_set3); // array[i+0] = c0 - entry->AddInstruction(arr_set4); // array[i+1] = c0 - entry->AddInstruction(arr_set5); // array[i-0] = c0 - entry->AddInstruction(arr_set6); // array[i-1] = c0 - entry->AddInstruction(arr_set7); // array[1-i] = c0 - entry->AddInstruction(arr_set8); // array[i-(-1)] = c0 + body->AddInstruction(array); + body->AddInstruction(index); + body->AddInstruction(add0); + body->AddInstruction(add1); + body->AddInstruction(sub0); + body->AddInstruction(sub1); + body->AddInstruction(sub_neg1); + body->AddInstruction(rev_sub1); + + body->AddInstruction(arr_set1); // array[0] = c0 + body->AddInstruction(arr_set2); // array[1] = c0 + body->AddInstruction(arr_set3); // array[i+0] = c0 + body->AddInstruction(arr_set4); // array[i+1] = c0 + body->AddInstruction(arr_set5); // array[i-0] = c0 + body->AddInstruction(arr_set6); // array[i-1] = c0 + body->AddInstruction(arr_set7); // array[1-i] = c0 + body->AddInstruction(arr_set8); // array[i-(-1)] = c0 + + body->AddInstruction(new (GetAllocator()) HReturnVoid()); ScopedArenaAllocator allocator(graph_->GetArenaStack()); LoadStoreAnalysis lsa(graph_, nullptr, &allocator, LoadStoreAnalysisType::kBasic); @@ -317,7 +320,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(graph_)); + EXPECT_TRUE(CheckGraph()); } TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { @@ -891,7 +894,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1000,7 +1004,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1123,7 +1128,8 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1403,7 +1409,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) { {}, InvokeType::kStatic, {nullptr, 0}, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1504,7 +1511,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1615,7 +1623,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); left->AddInstruction(call_left); @@ -1631,7 +1640,8 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, c0, nullptr, @@ -1800,7 +1810,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left = new (GetAllocator()) HGoto(); call_left->AsInvoke()->SetRawInputAt(0, new_inst); high_left->AddInstruction(call_left); @@ -1856,7 +1867,8 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { {}, InvokeType::kStatic, { nullptr, 0 }, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_low_left = new (GetAllocator()) HGoto(); call_low_left->AsInvoke()->SetRawInputAt(0, new_inst); low_left->AddInstruction(call_low_left); @@ -2013,7 +2025,8 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) { {}, InvokeType::kStatic, {nullptr, 0}, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); HInstruction* goto_left_merge = new (GetAllocator()) HGoto(); left_phi->SetRawInputAt(0, obj_param); left_phi->SetRawInputAt(1, new_inst); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 9b8f07e969..9cabb12a9f 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -319,7 +319,7 @@ * a hash map to the HeapLocationCollector. */ -namespace art { +namespace art HIDDEN { #define LSE_VLOG \ if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO) @@ -855,25 +855,6 @@ class LSEVisitor final : private HGraphDelegateVisitor { } } - // `instruction` is being removed. Try to see if the null check on it - // can be removed. This can happen if the same value is set in two branches - // but not in dominators. Such as: - // int[] a = foo(); - // if () { - // a[0] = 2; - // } else { - // a[0] = 2; - // } - // // a[0] can now be replaced with constant 2, and the null check on it can be removed. - void TryRemovingNullCheck(HInstruction* instruction) { - HInstruction* prev = instruction->GetPrevious(); - if ((prev != nullptr) && prev->IsNullCheck() && (prev == instruction->InputAt(0))) { - // Previous instruction is a null check for this instruction. Remove the null check. - prev->ReplaceWith(prev->InputAt(0)); - prev->GetBlock()->RemoveInstruction(prev); - } - } - HInstruction* GetDefaultValue(DataType::Type type) { switch (type) { case DataType::Type::kReference: @@ -993,13 +974,63 @@ class LSEVisitor final : private HGraphDelegateVisitor { << " but LSE should be the only source of predicated-ifield-gets!"; } + void HandleAcquireLoad(HInstruction* instruction) { + DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) || + (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) || + (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter())) + << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); + + // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must + // invalidate all current values. + ScopedArenaVector<ValueRecord>& heap_values = + heap_values_for_[instruction->GetBlock()->GetBlockId()]; + for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { + KeepStores(heap_values[i].stored_by); + heap_values[i].stored_by = Value::PureUnknown(); + heap_values[i].value = Value::PartialUnknown(heap_values[i].value); + } + + // Note that there's no need to record the load as subsequent acquire loads shouldn't be + // eliminated either. + } + + void HandleReleaseStore(HInstruction* instruction) { + DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) || + (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) || + (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter())) + << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); + + // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but + // they will push the modifications for other threads to see. Therefore, we must keep the + // stores but there's no need to clobber the value. + ScopedArenaVector<ValueRecord>& heap_values = + heap_values_for_[instruction->GetBlock()->GetBlockId()]; + for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { + KeepStores(heap_values[i].stored_by); + heap_values[i].stored_by = Value::PureUnknown(); + } + + // Note that there's no need to record the store as subsequent release store shouldn't be + // eliminated either. + } + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { + if (instruction->IsVolatile()) { + HandleAcquireLoad(instruction); + return; + } + HInstruction* object = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field)); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + if (instruction->IsVolatile()) { + HandleReleaseStore(instruction); + return; + } + HInstruction* object = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); @@ -1008,12 +1039,22 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitStaticFieldGet(HStaticFieldGet* instruction) override { + if (instruction->IsVolatile()) { + HandleAcquireLoad(instruction); + return; + } + HInstruction* cls = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field)); } void VisitStaticFieldSet(HStaticFieldSet* instruction) override { + if (instruction->IsVolatile()) { + HandleReleaseStore(instruction); + return; + } + HInstruction* cls = instruction->InputAt(0); const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); @@ -1021,6 +1062,14 @@ class LSEVisitor final : private HGraphDelegateVisitor { VisitSetLocation(instruction, idx, value); } + void VisitMonitorOperation(HMonitorOperation* monitor_op) override { + if (monitor_op->IsEnter()) { + HandleAcquireLoad(monitor_op); + } else { + HandleReleaseStore(monitor_op); + } + } + void VisitArrayGet(HArrayGet* instruction) override { VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } @@ -1040,8 +1089,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitDeoptimize(HDeoptimize* instruction) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = instruction->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = instruction->GetBlock()->IsTryBlock(); HBasicBlock* block = instruction->GetBlock(); ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { @@ -1053,7 +1102,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // for singletons that don't escape in the deoptimization environment. bool observable = true; ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (!in_try_catch && info->IsSingleton()) { + if (!inside_a_try && info->IsSingleton()) { HInstruction* reference = info->GetReference(); // Finalizable objects always escape. const bool finalizable_object = @@ -1099,10 +1148,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { void HandleThrowingInstruction(HInstruction* instruction) { DCHECK(instruction->CanThrow()); - // If we are inside of a try catch, singletons can become visible since we may not exit the - // method. - HandleExit(instruction->GetBlock(), - instruction->GetBlock()->GetTryCatchInformation() != nullptr); + // If we are inside of a try, singletons can become visible since we may not exit the method. + HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock()); } void VisitMethodEntryHook(HMethodEntryHook* method_entry) override { @@ -1137,6 +1184,14 @@ class LSEVisitor final : private HGraphDelegateVisitor { } } + void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override { + HandleThrowingInstruction(load_method_handle); + } + + void VisitLoadMethodType(HLoadMethodType* load_method_type) override { + HandleThrowingInstruction(load_method_type); + } + void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override { HandleThrowingInstruction(sb_append); } @@ -1149,18 +1204,11 @@ class LSEVisitor final : private HGraphDelegateVisitor { HandleThrowingInstruction(check_cast); } - void VisitMonitorOperation(HMonitorOperation* monitor_op) override { - if (monitor_op->CanThrow()) { - HandleThrowingInstruction(monitor_op); - } - } - void HandleInvoke(HInstruction* instruction) { // If `instruction` can throw we have to presume all stores are visible. const bool can_throw = instruction->CanThrow(); - // If we are in a try catch, even singletons are observable. - const bool can_throw_in_try_catch = - can_throw && instruction->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock(); SideEffects side_effects = instruction->GetSideEffects(); ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; @@ -1186,7 +1234,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { return cohort.PrecedesBlock(blk); }); }; - if (!can_throw_in_try_catch && + if (!can_throw_inside_a_try && (ref_info->IsSingleton() || // partial and we aren't currently escaping and we haven't escaped yet. (ref_info->IsPartialSingleton() && partial_singleton_did_not_escape(ref_info, blk)))) { @@ -1235,8 +1283,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitNewInstance(HNewInstance* new_instance) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = new_instance->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = new_instance->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance); if (ref_info == nullptr) { // new_instance isn't used for field accesses. No need to process it. @@ -1265,7 +1313,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass()); heap_values[i].stored_by = Value::PureUnknown(); } - } else if (in_try_catch || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info, block, i)) { // Since NewInstance can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1274,8 +1322,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { } void VisitNewArray(HNewArray* new_array) override { - // If we are in a try catch, even singletons are observable. - const bool in_try_catch = new_array->GetBlock()->GetTryCatchInformation() != nullptr; + // If we are in a try, even singletons are observable. + const bool inside_a_try = new_array->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); if (ref_info == nullptr) { // new_array isn't used for array accesses. No need to process it. @@ -1300,7 +1348,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { // Array elements are set to default heap values. heap_values[i].value = Value::Default(); heap_values[i].stored_by = Value::PureUnknown(); - } else if (in_try_catch || IsEscapingObject(info, block, i)) { + } else if (inside_a_try || IsEscapingObject(info, block, i)) { // Since NewArray can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); @@ -1704,8 +1752,7 @@ void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) { ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; DCHECK(heap_values.empty()); size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); - if (block->GetPredecessors().empty() || (block->GetTryCatchInformation() != nullptr && - block->GetTryCatchInformation()->IsCatchBlock())) { + if (block->GetPredecessors().empty() || block->IsCatchBlock()) { DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock()); heap_values.resize(num_heap_locations, {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()}); @@ -1764,7 +1811,6 @@ static HInstruction* FindOrConstructNonLoopPhi( if (type == DataType::Type::kReference) { // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(block->GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(phi); @@ -1877,7 +1923,6 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) { } HInstruction* heap_value = FindSubstitute(record.value.GetInstruction()); AddRemovedLoad(instruction, heap_value); - TryRemovingNullCheck(instruction); } } @@ -2068,9 +2113,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault( HInstruction* replacement = GetDefaultValue(type); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); - phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); + HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); + // We use both vector and non vector operations to analyze the information. However, we replace + // only non vector operations in this code path. + if (!hl->IsVecOp()) { + phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); + } } - phi_placeholders_to_materialize->Subtract(&visited); return true; } @@ -2125,9 +2176,15 @@ bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput( DCHECK(replacement != nullptr); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); - phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); + HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); + // We use both vector and non vector operations to analyze the information. However, we replace + // only vector operations in this code path. + if (hl->IsVecOp()) { + phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); + phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); + } } - phi_placeholders_to_materialize->Subtract(&visited); return true; } @@ -2352,7 +2409,6 @@ bool LSEVisitor::MaterializeLoopPhis(ArrayRef<const size_t> phi_placeholder_inde } // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(ArrayRef<HInstruction* const>(phis)); @@ -2639,7 +2695,6 @@ void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unk record.value = local_heap_values[idx]; HInstruction* heap_value = local_heap_values[idx].GetInstruction(); AddRemovedLoad(load_or_store, heap_value); - TryRemovingNullCheck(load_or_store); } } } @@ -2698,7 +2753,6 @@ void LSEVisitor::ProcessLoadsRequiringLoopPhis() { record.value = Replacement(record.value); HInstruction* heap_value = record.value.GetInstruction(); AddRemovedLoad(load, heap_value); - TryRemovingNullCheck(load); } } } @@ -3013,7 +3067,6 @@ class PartialLoadStoreEliminationHelper { return; } ReferenceTypePropagation rtp_fixup(GetGraph(), - Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), /* is_first_run= */ false); rtp_fixup.Visit(ArrayRef<HInstruction* const>(new_ref_phis_)); @@ -3333,7 +3386,7 @@ class PartialLoadStoreEliminationHelper { ins->GetBlock()->InsertInstructionBefore(new_fget, ins); if (ins->GetType() == DataType::Type::kReference) { // Reference info is the same - new_fget->SetReferenceTypeInfo(ins->GetReferenceTypeInfo()); + new_fget->SetReferenceTypeInfoIfValid(ins->GetReferenceTypeInfo()); } // In this phase, substitute instructions are used only for the predicated get // default values which are used only if the partial singleton did not escape, diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 6ad2eb2c51..42de803ebd 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_LOAD_STORE_ELIMINATION_H_ +#include "base/macros.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis; diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc index 02dc939878..1ee109980f 100644 --- a/compiler/optimizing/load_store_elimination_test.cc +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -36,7 +36,9 @@ #include "optimizing_unit_test.h" #include "scoped_thread_state_change.h" -namespace art { +namespace art HIDDEN { + +static constexpr bool kDebugLseTests = false; #define CHECK_SUBROUTINE_FAILURE() \ do { \ @@ -54,12 +56,16 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest void SetUp() override { SuperTest::SetUp(); - gLogVerbosity.compiler = true; + if (kDebugLseTests) { + gLogVerbosity.compiler = true; + } } void TearDown() override { SuperTest::TearDown(); - gLogVerbosity.compiler = false; + if (kDebugLseTests) { + gLogVerbosity.compiler = false; + } } void PerformLSE(bool with_partial = true) { @@ -67,15 +73,40 @@ class LoadStoreEliminationTestBase : public SuperTest, public OptimizingUnitTest LoadStoreElimination lse(graph_, /*stats=*/nullptr); lse.Run(with_partial); std::ostringstream oss; - EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks(oss)) << oss.str(); + EXPECT_TRUE(CheckGraph(oss)) << oss.str(); } - void PerformLSEWithPartial() { - PerformLSE(true); + void PerformLSEWithPartial(const AdjacencyListGraph& blks) { + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + if (kDebugLseTests) { + LOG(INFO) << "Pre LSE " << blks; + } + PerformLSE(/*with_partial=*/ true); + if (kDebugLseTests) { + LOG(INFO) << "Post LSE " << blks; + } } - void PerformLSENoPartial() { - PerformLSE(false); + void PerformLSENoPartial(const AdjacencyListGraph& blks) { + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + if (kDebugLseTests) { + LOG(INFO) << "Pre LSE " << blks; + } + PerformLSE(/*with_partial=*/ false); + if (kDebugLseTests) { + LOG(INFO) << "Post LSE " << blks; + } + } + + void PerformSimplifications(const AdjacencyListGraph& blks) { + InstructionSimplifier simp(graph_, /*codegen=*/nullptr); + simp.Run(); + + if (kDebugLseTests) { + LOG(INFO) << "Post simplification " << blks; + } } // Create instructions shared among tests. @@ -542,6 +573,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) { AddVecStore(entry_block_, array_, j_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -557,6 +589,7 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) { AddVecStore(entry_block_, array_, i_add1_); HInstruction* vstore = AddVecStore(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore)); @@ -601,6 +634,7 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) { AddArraySet(entry_block_, array_, i_, c1); HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(load1)); @@ -634,6 +668,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) { // a[j] = 1; HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(array_set)); @@ -671,6 +706,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) { // a[j] = 0; HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -709,6 +745,7 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) { // x = a[j]; HInstruction* load = AddArrayGet(return_block_, array_, j_); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vload)); @@ -749,6 +786,7 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) { // down: a[i,... i + 3] = [1,...1] HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_TRUE(IsRemoved(vstore2)); @@ -839,6 +877,7 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) { HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad()); HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2)); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vstore1)); @@ -894,7 +933,7 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects2) { // loop: // array2[i] = array[i] // array[0] = 2 - HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2); + HInstruction* store1 = AddArraySet(pre_header_, array_, c0, c2); HInstruction* load = AddArrayGet(loop_, array_, phi_); HInstruction* store2 = AddArraySet(loop_, array2, phi_, load); @@ -926,6 +965,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects) HInstruction* vload = AddVecLoad(loop_, array_a, phi_); HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -949,6 +989,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) { HInstruction* vload = AddVecLoad(pre_header_, array_a, c0); HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1025,6 +1066,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); HInstruction* store = AddArraySet(return_block_, array_, c0, load); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1055,6 +1097,7 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) { HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad()); HInstruction* store = AddArraySet(return_block_, array_, c0, load); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload)); @@ -1086,6 +1129,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -1116,6 +1160,7 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) { HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad()); HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad()); + graph_->SetHasSIMD(true); PerformLSE(); ASSERT_FALSE(IsRemoved(vload1)); @@ -2024,10 +2069,7 @@ TEST_F(LoadStoreEliminationTest, PartialUnknownMerge) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_c1); @@ -2174,9 +2216,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) { HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom) << *read_bottom; EXPECT_INS_RETAINED(write_right) << *write_right; @@ -2266,9 +2307,8 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) { HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); exit->AddInstruction(read_bottom); exit->AddInstruction(return_exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right_first); @@ -2499,11 +2539,7 @@ TEST_F(LoadStoreEliminationTest, PartialPhiPropagation) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_); @@ -2656,11 +2692,7 @@ TEST_P(OrderDependentTestGroup, PredicatedUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(call_left_left); EXPECT_INS_REMOVED(read1); @@ -2814,11 +2846,7 @@ TEST_P(OrderDependentTestGroup, PredicatedEnvUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst1; HInstanceFieldSet* moved_set1; @@ -2954,11 +2982,7 @@ TEST_P(OrderDependentTestGroup, FieldSetOrderEnv) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(write_entry1); EXPECT_INS_REMOVED(write_entry2); @@ -3115,11 +3139,7 @@ TEST_P(OrderDependentTestGroup, MaterializationMovedUse) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(new_inst1); EXPECT_INS_REMOVED(new_inst2); @@ -3205,11 +3225,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3320,11 +3336,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3497,11 +3509,7 @@ TEST_F(LoadStoreEliminationTest, MutiPartialLoadStore2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst = nullptr; HInstanceFieldSet* moved_set = nullptr; @@ -3639,11 +3647,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HNewInstance* moved_new_inst; HInstanceFieldSet* moved_set; @@ -3746,11 +3750,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); // Each escaping switch path gets its own materialization block. // Blocks: @@ -3877,11 +3877,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_early); EXPECT_EQ(return_early->InputAt(0), c0); @@ -4013,11 +4009,7 @@ TEST_F(LoadStoreEliminationTest, MovePredicatedAlloc5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); // Normal LSE can get rid of these two. EXPECT_INS_REMOVED(store_one); @@ -4504,9 +4496,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(write_left_pre) << *write_left_pre; EXPECT_INS_RETAINED(read_return) << *read_return; @@ -4612,9 +4602,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_return); EXPECT_INS_RETAINED(write_right); @@ -4700,9 +4688,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoadPreserved5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - PerformLSENoPartial(); + PerformLSENoPartial(blks); EXPECT_INS_RETAINED(read_bottom); EXPECT_INS_RETAINED(write_right); @@ -4785,12 +4771,7 @@ TEST_F(LoadStoreEliminationTest, DISABLED_PartialLoadPreserved6) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSENoPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSENoPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -4829,8 +4810,9 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { CreateGraph(/*handles=*/&vshs); AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", - {{"entry", "critical_break"}, - {"entry", "partial"}, + {{"entry", "first_block"}, + {"first_block", "critical_break"}, + {"first_block", "partial"}, {"partial", "merge"}, {"critical_break", "merge"}, {"merge", "left"}, @@ -4839,7 +4821,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { {"right", "breturn"}, {"breturn", "exit"}})); #define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) - GET_BLOCK(entry); + GET_BLOCK(first_block); GET_BLOCK(merge); GET_BLOCK(partial); GET_BLOCK(critical_break); @@ -4858,12 +4840,12 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { HInstruction* write_entry = MakeIFieldSet(new_inst, c3, MemberOffset(32)); ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst); HInstruction* if_inst = new (GetAllocator()) HIf(cmp_instructions.cmp_); - entry->AddInstruction(cls); - entry->AddInstruction(new_inst); - entry->AddInstruction(write_entry); - cmp_instructions.AddSetup(entry); - entry->AddInstruction(cmp_instructions.cmp_); - entry->AddInstruction(if_inst); + first_block->AddInstruction(cls); + first_block->AddInstruction(new_inst); + first_block->AddInstruction(write_entry); + cmp_instructions.AddSetup(first_block); + first_block->AddInstruction(cmp_instructions.cmp_); + first_block->AddInstruction(if_inst); ManuallyBuildEnvFor(cls, {}); cmp_instructions.AddEnvironment(cls->GetEnvironment()); new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); @@ -4897,12 +4879,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonBeforeCohort) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HPredicatedInstanceFieldGet* pred_get; @@ -5026,11 +5003,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortBeforeEscape) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HInstanceFieldSet* init_set = @@ -5157,11 +5130,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; HInstanceFieldSet* init_set = @@ -5290,12 +5259,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); std::vector<HPhi*> merges; std::vector<HInstanceFieldSet*> sets; @@ -5424,12 +5388,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()); @@ -5539,11 +5498,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(write_bottom); EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom; @@ -5627,11 +5582,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -5748,11 +5699,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom1); EXPECT_INS_REMOVED(read_bottom2); @@ -5901,11 +5848,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom1); EXPECT_INS_REMOVED(read_bottom2); @@ -6078,11 +6021,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(early_exit_left_read); EXPECT_INS_REMOVED(early_exit_right_read); @@ -6212,11 +6151,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(read_right); @@ -6334,11 +6269,7 @@ TEST_F(LoadStoreEliminationTest, MultiPredicatedLoad4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(read_early_return); @@ -6447,11 +6378,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -6585,11 +6512,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoad3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_REMOVED(write_right); @@ -6688,11 +6611,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedLoadDefaultValue) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_REMOVED(read_bottom); EXPECT_INS_RETAINED(write_left); @@ -6861,11 +6780,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7045,11 +6960,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7196,11 +7107,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7344,11 +7251,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7492,11 +7395,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7657,11 +7556,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSEWithPartial(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); HPredicatedInstanceFieldGet* pred_get = FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn); @@ -7757,17 +7652,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_right); EXPECT_INS_REMOVED(write_start); @@ -7851,17 +7739,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_right); EXPECT_INS_REMOVED(write_start); @@ -7961,17 +7842,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest3) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_case2); EXPECT_INS_REMOVED(write_case3); @@ -8069,17 +7943,10 @@ TEST_F(LoadStoreEliminationTest, SimplifyTest4) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); + PerformLSEWithPartial(blks); // Run the code-simplifier too - LOG(INFO) << "Pre simplification " << blks; - InstructionSimplifier simp(graph_, /*codegen=*/nullptr); - simp.Run(); - - LOG(INFO) << "Post LSE " << blks; + PerformSimplifications(blks); EXPECT_INS_REMOVED(write_case2); EXPECT_INS_REMOVED(write_case3); @@ -8225,11 +8092,7 @@ TEST_F(LoadStoreEliminationTest, PartialIrreducibleLoop) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_TRUE(loop_header->IsLoopHeader()); EXPECT_TRUE(loop_header->GetLoopInformation()->IsIrreducible()); @@ -8382,11 +8245,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); @@ -8544,11 +8403,7 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) { SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); @@ -8752,11 +8607,7 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3) SetupExit(exit); - // PerformLSE expects this to be empty. - graph_->ClearDominanceInformation(); - LOG(INFO) << "Pre LSE " << blks; - PerformLSE(); - LOG(INFO) << "Post LSE " << blks; + PerformLSEWithPartial(blks); EXPECT_INS_RETAINED(cls); EXPECT_INS_REMOVED(new_inst); diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 5879c6fa07..f40b7f4f0c 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -21,7 +21,7 @@ #include "code_generator.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Verify that Location is trivially copyable. static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable"); @@ -57,7 +57,7 @@ LocationSummary::LocationSummary(HInstruction* instruction, Location Location::RegisterOrConstant(HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RequiresRegister(); } @@ -85,16 +85,23 @@ Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RegisterLocation(reg); } Location Location::FpuRegisterOrConstant(HInstruction* instruction) { return instruction->IsConstant() - ? Location::ConstantLocation(instruction->AsConstant()) + ? Location::ConstantLocation(instruction) : Location::RequiresFpuRegister(); } +void Location::DCheckInstructionIsConstant(HInstruction* instruction) { + DCHECK(instruction != nullptr); + DCHECK(instruction->IsConstant()); + DCHECK_EQ(reinterpret_cast<uintptr_t>(instruction), + reinterpret_cast<uintptr_t>(instruction->AsConstant())); +} + std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); if (location.IsRegister() || location.IsFpuRegister()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index acaea71a49..7ee076f442 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -22,9 +22,10 @@ #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/bit_vector.h" +#include "base/macros.h" #include "base/value_object.h" -namespace art { +namespace art HIDDEN { class HConstant; class HInstruction; @@ -102,8 +103,12 @@ class Location : public ValueObject { return (value_ & kLocationConstantMask) == kConstant; } - static Location ConstantLocation(HConstant* constant) { + static Location ConstantLocation(HInstruction* constant) { DCHECK(constant != nullptr); + if (kIsDebugBuild) { + // Call out-of-line helper to avoid circular dependency with `nodes.h`. + DCheckInstructionIsConstant(constant); + } return Location(kConstant | reinterpret_cast<uintptr_t>(constant)); } @@ -425,6 +430,8 @@ class Location : public ValueObject { return PayloadField::Decode(value_); } + static void DCheckInstructionIsConstant(HInstruction* instruction); + using KindField = BitField<Kind, 0, kBitsForKind>; using PayloadField = BitField<uintptr_t, kBitsForKind, kBitsForPayload>; diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index 76bd8493b2..95e81533da 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -20,7 +20,7 @@ #include "code_generator.h" #include "induction_var_range.h" -namespace art { +namespace art HIDDEN { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, LoopAnalysisInfo* analysis_results, diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index fbf1516f64..cec00fecf4 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class InductionVarRange; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 2d7c20825c..7a52502562 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -27,7 +27,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; @@ -507,9 +507,8 @@ bool HLoopOptimization::Run() { graph_->SetHasLoops(false); // no more loops } - // Detach. + // Detach allocator. loop_allocator_ = nullptr; - last_loop_ = top_loop_ = nullptr; return did_loop_opt; } @@ -530,11 +529,7 @@ bool HLoopOptimization::LocalRun() { AddLoop(block->GetLoopInformation()); } } - - // TODO(solanes): How can `top_loop_` be null if `graph_->HasLoops()` is true? - if (top_loop_ == nullptr) { - return false; - } + DCHECK(top_loop_ != nullptr); // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use // temporary data structures using the phase-local allocator. All new HIR @@ -681,6 +676,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) { } // +// This optimization applies to loops with plain simple operations +// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count +// value. +// +bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (!graph_->SuspendChecksAreAllowedToNoOp()) { + return false; + } + + int64_t trip_count = analysis_info->GetTripCount(); + + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return false; + } + + int64_t instruction_count = analysis_info->GetNumberOfInstructions(); + int64_t total_instruction_count = trip_count * instruction_count; + + // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this + // optimization from being applied to loops that have calls. + bool can_optimize = + total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck && + !analysis_info->HasInstructionsPreventingScalarOpts(); + + if (!can_optimize) { + return false; + } + + // If we should do the optimization, disable codegen for the SuspendCheck. + if (generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + HBasicBlock* header = loop_info->GetHeader(); + HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck(); + // As other optimizations depend on SuspendCheck + // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of + // removing the SuspendCheck instruction. + instruction->SetIsNoOp(true); + } + + return true; +} + +// // Optimization. // @@ -824,7 +863,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); + return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node); } // @@ -928,7 +967,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g return true; } -bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { +bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { HLoopInformation* loop_info = node->loop_info; int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); LoopAnalysisInfo analysis_info(loop_info); @@ -941,10 +980,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && - !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) && + !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) { return false; } + // Try the suspend check removal even for non-clonable loops. Also this + // optimization doesn't interfere with other scalar loop optimizations so it can + // be done prior to them. + bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info); + // Run 'IsLoopClonable' the last as it might be time-consuming. if (!LoopClonerHelper::IsLoopClonable(loop_info)) { return false; @@ -952,7 +997,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { return TryFullUnrolling(&analysis_info) || TryPeelingForLoopInvariantExitsElimination(&analysis_info) || - TryUnrollingForBranchPenaltyReduction(&analysis_info); + TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check; } // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index b17861648f..6dd778ba74 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "induction_var_range.h" @@ -25,7 +26,7 @@ #include "optimization.h" #include "superblock_cloner.h" -namespace art { +namespace art HIDDEN { class CompilerOptions; class ArchNoOptsLoopHelper; @@ -47,6 +48,11 @@ class HLoopOptimization : public HOptimization { static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + // The maximum number of total instructions (trip_count * instruction_count), + // where the optimization of removing SuspendChecks from the loop header could + // be performed. + static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128; + private: /** * A single loop inside the loop hierarchy representation. @@ -179,8 +185,19 @@ class HLoopOptimization : public HOptimization { // should be actually applied. bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); - // Tries to apply scalar loop peeling and unrolling. - bool TryPeelingAndUnrolling(LoopNode* node); + // Tries to remove SuspendCheck for plain loops with a low trip count. The + // SuspendCheck in the codegen makes sure that the thread can be interrupted + // during execution for GC. Not being able to do so might decrease the + // responsiveness of GC when a very long loop or a long recursion is being + // executed. However, for plain loops with a small trip count, the removal of + // SuspendCheck should not affect the GC's responsiveness by a large margin. + // Consequently, since the thread won't be interrupted for plain loops, it is + // assumed that the performance might increase by removing SuspendCheck. + bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop optimizations. + bool TryLoopScalarOpts(LoopNode* node); // // Vectorization analysis and synthesis. diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index bda25283f5..7f694fb655 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "loop_optimization.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for the loop optimization tests. These unit tests focus @@ -94,10 +95,7 @@ class LoopOptimizationTest : public OptimizingUnitTest { void PerformAnalysis() { graph_->BuildDominatorTree(); iva_->Run(); - // Do not release the loop hierarchy. - ScopedArenaAllocator loop_allocator(GetArenaStack()); - loop_opt_->loop_allocator_ = &loop_allocator; - loop_opt_->LocalRun(); + loop_opt_->Run(); } /** Constructs string representation of computed loop hierarchy. */ diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d35ed1c543..3790058879 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -40,7 +40,7 @@ #include "scoped_thread_state_change-inl.h" #include "ssa_builder.h" -namespace art { +namespace art HIDDEN { // Enable floating-point static evaluation during constant folding // only if all floating-point operations and constants evaluate in the @@ -150,30 +150,54 @@ static void RemoveAsUser(HInstruction* instruction) { RemoveEnvironmentUses(instruction); } -void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const { +void HGraph::RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; + + // Remove as user. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } + + // Remove non-catch phi uses, and disconnect the block. + block->DisconnectFromSuccessors(&visited); + } + } +} + +// This method assumes `insn` has been removed from all users with the exception of catch +// phis because of missing exceptional edges in the graph. It removes the +// instruction from catch phi uses, together with inputs of other catch phis in +// the catch block at the same index, as these must be dead too. +static void RemoveCatchPhiUsesOfDeadInstruction(HInstruction* insn) { + DCHECK(!insn->HasEnvironmentUses()); + while (insn->HasNonEnvironmentUses()) { + const HUseListNode<HInstruction*>& use = insn->GetUses().front(); + size_t use_index = use.GetIndex(); + HBasicBlock* user_block = use.GetUser()->GetBlock(); + DCHECK(use.GetUser()->IsPhi()); + DCHECK(user_block->IsCatchBlock()); + for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(use_index); } } } void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { + DCHECK(reverse_post_order_.empty()) << "We shouldn't have dominance information."; for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; - // We only need to update the successor, which might be live. - for (HBasicBlock* successor : block->GetSuccessors()) { - successor->RemovePredecessor(block); - } + + // Remove all remaining uses (which should be only catch phi uses), and the instructions. + block->RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ true); + // Remove the block from the list of blocks, so that further analyses // never see it. blocks_[i] = nullptr; @@ -200,7 +224,8 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { // (2) Remove instructions and phis from blocks not visited during // the initial DFS as users from other instructions, so that // users can be safely removed before uses later. - RemoveInstructionsAsUsersFromDeadBlocks(visited); + // Also disconnect the block from its successors, updating the successor's phis if needed. + RemoveDeadBlocksInstructionsAsUsersAndDisconnect(visited); // (3) Remove blocks not visited during the initial DFS. // Step (5) requires dead blocks to be removed from the @@ -237,6 +262,7 @@ void HGraph::ClearDominanceInformation() { } void HGraph::ClearLoopInformation() { + SetHasLoops(false); SetHasIrreducibleLoops(false); for (HBasicBlock* block : GetActiveBlocks()) { block->SetLoopInformation(nullptr); @@ -544,6 +570,15 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { } } +HBasicBlock* HGraph::SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor) { + HBasicBlock* new_block = SplitEdge(block, successor); + // In the RPO we have {... , block, ... , successor}. We want to insert `new_block` right after + // `block` to have a consistent RPO without recomputing the whole graph's RPO. + reverse_post_order_.insert( + reverse_post_order_.begin() + IndexOfElement(reverse_post_order_, block) + 1, new_block); + return new_block; +} + // Reorder phi inputs to match reordering of the block's predecessors. static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { @@ -653,7 +688,7 @@ void HGraph::TransformLoopToSinglePreheaderFormat(HBasicBlock* header) { 0, header_phi->GetType()); if (header_phi->GetType() == DataType::Type::kReference) { - preheader_phi->SetReferenceTypeInfo(header_phi->GetReferenceTypeInfo()); + preheader_phi->SetReferenceTypeInfoIfValid(header_phi->GetReferenceTypeInfo()); } preheader->AddPhi(preheader_phi); @@ -708,6 +743,8 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { void HGraph::ComputeTryBlockInformation() { // Iterate in reverse post order to propagate try membership information from // predecessors to their successors. + bool graph_has_try_catch = false; + for (HBasicBlock* block : GetReversePostOrder()) { if (block->IsEntryBlock() || block->IsCatchBlock()) { // Catch blocks after simplification have only exceptional predecessors @@ -722,6 +759,7 @@ void HGraph::ComputeTryBlockInformation() { DCHECK_IMPLIES(block->IsLoopHeader(), !block->GetLoopInformation()->IsBackEdge(*first_predecessor)); const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors(); + graph_has_try_catch |= try_entry != nullptr; if (try_entry != nullptr && (block->GetTryCatchInformation() == nullptr || try_entry != &block->GetTryCatchInformation()->GetTryEntry())) { @@ -730,6 +768,8 @@ void HGraph::ComputeTryBlockInformation() { block->SetTryCatchInformation(new (allocator_) TryCatchInformation(*try_entry)); } } + + SetHasTryCatch(graph_has_try_catch); } void HGraph::SimplifyCFG() { @@ -1459,6 +1499,10 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, UNREACHABLE(); } +bool HInstruction::Dominates(HInstruction* other_instruction) const { + return other_instruction == this || StrictlyDominates(other_instruction); +} + bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { if (other_instruction == this) { // An instruction does not strictly dominate itself. @@ -1518,14 +1562,19 @@ void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(env_uses_.empty()); } -void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { +void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, + HInstruction* replacement, + bool strictly_dominated) { const HUseList<HInstruction*>& uses = GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { HInstruction* user = it->GetUser(); size_t index = it->GetIndex(); // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). ++it; - if (dominator->StrictlyDominates(user)) { + const bool dominated = + strictly_dominated ? dominator->StrictlyDominates(user) : dominator->Dominates(user); + + if (dominated) { user->ReplaceInput(replacement, index); } else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) { // If the input flows from a block dominated by `dominator`, we can replace it. @@ -2108,8 +2157,9 @@ void HInstruction::MoveBeforeFirstUserAndOutOfLoops() { MoveBefore(insert_pos); } -HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; +HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form) { + DCHECK_IMPLIES(require_graph_not_in_ssa_form, !graph_->IsInSsaForm()) + << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); HBasicBlock* new_block = @@ -2376,24 +2426,6 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } -// Should be called on instructions in a dead block in post order. This method -// assumes `insn` has been removed from all users with the exception of catch -// phis because of missing exceptional edges in the graph. It removes the -// instruction from catch phi uses, together with inputs of other catch phis in -// the catch block at the same index, as these must be dead too. -static void RemoveUsesOfDeadInstruction(HInstruction* insn) { - DCHECK(!insn->HasEnvironmentUses()); - while (insn->HasNonEnvironmentUses()) { - const HUseListNode<HInstruction*>& use = insn->GetUses().front(); - size_t use_index = use.GetIndex(); - HBasicBlock* user_block = use.GetUser()->GetBlock(); - DCHECK(use.GetUser()->IsPhi() && user_block->IsCatchBlock()); - for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(use_index); - } - } -} - void HBasicBlock::DisconnectAndDelete() { // Dominators must be removed after all the blocks they dominate. This way // a loop header is removed last, a requirement for correct loop information @@ -2418,52 +2450,14 @@ void HBasicBlock::DisconnectAndDelete() { } // (2) Disconnect the block from its successors and update their phis. - for (HBasicBlock* successor : successors_) { - // Delete this block from the list of predecessors. - size_t this_index = successor->GetPredecessorIndexOf(this); - successor->predecessors_.erase(successor->predecessors_.begin() + this_index); - - // Check that `successor` has other predecessors, otherwise `this` is the - // dominator of `successor` which violates the order DCHECKed at the top. - DCHECK(!successor->predecessors_.empty()); - - // Remove this block's entries in the successor's phis. Skip exceptional - // successors because catch phi inputs do not correspond to predecessor - // blocks but throwing instructions. The inputs of the catch phis will be - // updated in step (3). - if (!successor->IsCatchBlock()) { - if (successor->predecessors_.size() == 1u) { - // The successor has just one predecessor left. Replace phis with the only - // remaining input. - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - HPhi* phi = phi_it.Current()->AsPhi(); - phi->ReplaceWith(phi->InputAt(1 - this_index)); - successor->RemovePhi(phi); - } - } else { - for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - phi_it.Current()->AsPhi()->RemoveInputAt(this_index); - } - } - } - } - successors_.clear(); + DisconnectFromSuccessors(); // (3) Remove instructions and phis. Instructions should have no remaining uses // except in catch phis. If an instruction is used by a catch phi at `index`, // remove `index`-th input of all phis in the catch block since they are // guaranteed dead. Note that we may miss dead inputs this way but the // graph will always remain consistent. - for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* insn = it.Current(); - RemoveUsesOfDeadInstruction(insn); - RemoveInstruction(insn); - } - for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { - HPhi* insn = it.Current()->AsPhi(); - RemoveUsesOfDeadInstruction(insn); - RemovePhi(insn); - } + RemoveCatchPhiUsesAndInstruction(/* building_dominator_tree = */ false); // (4) Disconnect the block from its predecessors and update their // control-flow instructions. @@ -2537,6 +2531,70 @@ void HBasicBlock::DisconnectAndDelete() { SetGraph(nullptr); } +void HBasicBlock::DisconnectFromSuccessors(const ArenaBitVector* visited) { + for (HBasicBlock* successor : successors_) { + // Delete this block from the list of predecessors. + size_t this_index = successor->GetPredecessorIndexOf(this); + successor->predecessors_.erase(successor->predecessors_.begin() + this_index); + + if (visited != nullptr && !visited->IsBitSet(successor->GetBlockId())) { + // `successor` itself is dead. Therefore, there is no need to update its phis. + continue; + } + + DCHECK(!successor->predecessors_.empty()); + + // Remove this block's entries in the successor's phis. Skips exceptional + // successors because catch phi inputs do not correspond to predecessor + // blocks but throwing instructions. They are removed in `RemoveCatchPhiUses`. + if (!successor->IsCatchBlock()) { + if (successor->predecessors_.size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } + } + } + } + successors_.clear(); +} + +void HBasicBlock::RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree) { + for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* insn = it.Current(); + RemoveCatchPhiUsesOfDeadInstruction(insn); + + // If we are building the dominator tree, we removed all input records previously. + // `RemoveInstruction` will try to remove them again but that's not something we support and we + // will crash. We check here since we won't be checking that in RemoveInstruction. + if (building_dominator_tree) { + DCHECK(insn->GetUses().empty()); + DCHECK(insn->GetEnvUses().empty()); + } + RemoveInstruction(insn, /* ensure_safety= */ !building_dominator_tree); + } + for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) { + HPhi* insn = it.Current()->AsPhi(); + RemoveCatchPhiUsesOfDeadInstruction(insn); + + // If we are building the dominator tree, we removed all input records previously. + // `RemovePhi` will try to remove them again but that's not something we support and we + // will crash. We check here since we won't be checking that in RemovePhi. + if (building_dominator_tree) { + DCHECK(insn->GetUses().empty()); + DCHECK(insn->GetEnvUses().empty()); + } + RemovePhi(insn, /* ensure_safety= */ !building_dominator_tree); + } +} + void HBasicBlock::MergeInstructionsWith(HBasicBlock* other) { DCHECK(EndsWithControlFlowInstruction()); RemoveInstruction(GetLastInstruction()); @@ -2660,7 +2718,8 @@ void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) { void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, HBasicBlock* reference, - bool replace_if_back_edge) { + bool replace_if_back_edge, + bool has_more_specific_try_catch_info) { if (block->IsLoopHeader()) { // Clear the information of which blocks are contained in that loop. Since the // information is stored as a bit vector based on block ids, we have to update @@ -2687,11 +2746,16 @@ void HGraph::UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, } } - // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block. - TryCatchInformation* try_catch_info = reference->IsTryBlock() - ? reference->GetTryCatchInformation() - : nullptr; - block->SetTryCatchInformation(try_catch_info); + DCHECK_IMPLIES(has_more_specific_try_catch_info, !reference->IsTryBlock()) + << "We don't allow to inline try catches inside of other try blocks."; + + // Update the TryCatchInformation, if we are not inlining a try catch. + if (!has_more_specific_try_catch_info) { + // Copy TryCatchInformation if `reference` is a try block, not if it is a catch block. + TryCatchInformation* try_catch_info = + reference->IsTryBlock() ? reference->GetTryCatchInformation() : nullptr; + block->SetTryCatchInformation(try_catch_info); + } } HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { @@ -2730,9 +2794,15 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasMonitorOperations()) { + outer_graph->SetHasMonitorOperations(true); + } if (HasSIMD()) { outer_graph->SetHasSIMD(true); } + if (HasAlwaysThrowingInvokes()) { + outer_graph->SetHasAlwaysThrowingInvokes(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { @@ -2771,6 +2841,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { HBasicBlock* first = entry_block_->GetSuccessors()[0]; DCHECK(!first->IsInLoop()); + DCHECK(first->GetTryCatchInformation() == nullptr); at->MergeWithInlined(first); exit_block_->ReplaceWith(to); @@ -2801,12 +2872,14 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // and (4) to the blocks that apply. for (HBasicBlock* current : GetReversePostOrder()) { if (current != exit_block_ && current != entry_block_ && current != first) { - DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false); + UpdateLoopAndTryInformationOfNewBlock(current, + at, + /* replace_if_back_edge= */ false, + current->GetTryCatchInformation() != nullptr); } } @@ -2820,25 +2893,62 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update all predecessors of the exit block (now the `to` block) // to not `HReturn` but `HGoto` instead. Special case throwing blocks - // to now get the outer graph exit block as successor. Note that the inliner - // currently doesn't support inlining methods with try/catch. + // to now get the outer graph exit block as successor. HPhi* return_value_phi = nullptr; bool rerun_dominance = false; bool rerun_loop_analysis = false; for (size_t pred = 0; pred < to->GetPredecessors().size(); ++pred) { HBasicBlock* predecessor = to->GetPredecessors()[pred]; HInstruction* last = predecessor->GetLastInstruction(); + + // At this point we might either have: + // A) Return/ReturnVoid/Throw as the last instruction, or + // B) `Return/ReturnVoid/Throw->TryBoundary` as the last instruction chain + + const bool saw_try_boundary = last->IsTryBoundary(); + if (saw_try_boundary) { + DCHECK(predecessor->IsSingleTryBoundary()); + DCHECK(!last->AsTryBoundary()->IsEntry()); + predecessor = predecessor->GetSinglePredecessor(); + last = predecessor->GetLastInstruction(); + } + if (last->IsThrow()) { - DCHECK(!at->IsTryBlock()); - predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + if (at->IsTryBlock()) { + DCHECK(!saw_try_boundary) << "We don't support inlining of try blocks into try blocks."; + // Create a TryBoundary of kind:exit and point it to the Exit block. + HBasicBlock* new_block = outer_graph->SplitEdge(predecessor, to); + new_block->AddInstruction( + new (allocator) HTryBoundary(HTryBoundary::BoundaryKind::kExit, last->GetDexPc())); + new_block->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + + // Copy information from the predecessor. + new_block->SetLoopInformation(predecessor->GetLoopInformation()); + TryCatchInformation* try_catch_info = predecessor->GetTryCatchInformation(); + new_block->SetTryCatchInformation(try_catch_info); + for (HBasicBlock* xhandler : + try_catch_info->GetTryEntry().GetBlock()->GetExceptionalSuccessors()) { + new_block->AddSuccessor(xhandler); + } + DCHECK(try_catch_info->GetTryEntry().HasSameExceptionHandlersAs( + *new_block->GetLastInstruction()->AsTryBoundary())); + } else { + // We either have `Throw->TryBoundary` or `Throw`. We want to point the whole chain to the + // exit, so we recompute `predecessor` + predecessor = to->GetPredecessors()[pred]; + predecessor->ReplaceSuccessor(to, outer_graph->GetExitBlock()); + } + --pred; // We need to re-run dominance information, as the exit block now has - // a new dominator. + // a new predecessor and potential new dominator. + // TODO(solanes): See if it's worth it to hand-modify the domination chain instead of + // rerunning the dominance for the whole graph. rerun_dominance = true; if (predecessor->GetLoopInformation() != nullptr) { - // The exit block and blocks post dominated by the exit block do not belong - // to any loop. Because we do not compute the post dominators, we need to re-run - // loop analysis to get the loop information correct. + // The loop information might have changed e.g. `predecessor` might not be in a loop + // anymore. We only do this if `predecessor` has loop information as it is impossible for + // predecessor to end up in a loop if it wasn't in one before. rerun_loop_analysis = true; } } else { @@ -2863,6 +2973,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } predecessor->AddInstruction(new (allocator) HGoto(last->GetDexPc())); predecessor->RemoveInstruction(last); + + if (saw_try_boundary) { + predecessor = to->GetPredecessors()[pred]; + DCHECK(predecessor->EndsWithTryBoundary()); + DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u); + if (predecessor->GetSuccessors()[0]->GetPredecessors().size() > 1) { + outer_graph->SplitCriticalEdge(predecessor, to); + rerun_dominance = true; + if (predecessor->GetLoopInformation() != nullptr) { + rerun_loop_analysis = true; + } + } + } } } if (rerun_loop_analysis) { @@ -3047,6 +3170,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc()); new_header->AddInstruction(suspend_check); new_body->AddInstruction(new (allocator_) HGoto()); + DCHECK(loop->GetSuspendCheck() != nullptr); suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment( loop->GetSuspendCheck()->GetEnvironment(), header); @@ -3091,6 +3215,12 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact()); } +void HInstruction::SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti) { + if (rti.IsValid()) { + SetReferenceTypeInfo(rti); + } +} + bool HBoundType::InstructionDataEquals(const HInstruction* other) const { const HBoundType* other_bt = other->AsBoundType(); ScopedObjectAccess soa(Thread::Current()); @@ -3441,8 +3571,8 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { return kCanThrow; } -void HInvoke::SetResolvedMethod(ArtMethod* method) { - if (method != nullptr && method->IsIntrinsic()) { +void HInvoke::SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt) { + if (method != nullptr && method->IsIntrinsic() && enable_intrinsic_opt) { Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic()); SetIntrinsic(intrinsic, NeedsEnvironmentIntrinsic(intrinsic), diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7a0059f616..28112d176a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -29,6 +29,7 @@ #include "base/array_ref.h" #include "base/intrusive_forward_list.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/mutex.h" #include "base/quasi_atomic.h" #include "base/stl_util.h" @@ -51,7 +52,7 @@ #include "mirror/method_type.h" #include "offsets.h" -namespace art { +namespace art HIDDEN { class ArenaStack; class CodeGenerator; @@ -406,6 +407,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_loops_(false), has_irreducible_loops_(false), has_direct_critical_native_call_(false), + has_always_throwing_invokes_(false), dead_reference_safe_(dead_reference_safe), debuggable_(debuggable), current_instruction_id_(start_instruction_id), @@ -485,9 +487,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // Update the loop and try membership of `block`, which was spawned from `reference`. // In case `reference` is a back edge, `replace_if_back_edge` notifies whether `block` // should be the new back edge. + // `has_more_specific_try_catch_info` will be set to true when inlining a try catch. void UpdateLoopAndTryInformationOfNewBlock(HBasicBlock* block, HBasicBlock* reference, - bool replace_if_back_edge); + bool replace_if_back_edge, + bool has_more_specific_try_catch_info = false); // Need to add a couple of blocks to test if the loop body is entered and // put deoptimization instructions, etc. @@ -510,6 +514,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); + + // Splits the edge between `block` and `successor` and then updates the graph's RPO to keep + // consistency without recomputing the whole graph. + HBasicBlock* SplitEdgeAndUpdateRPO(HBasicBlock* block, HBasicBlock* successor); + void OrderLoopHeaderPredecessors(HBasicBlock* header); // Transform a loop into a format with a single preheader. @@ -678,6 +687,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return cha_single_implementation_list_; } + // In case of OSR we intend to use SuspendChecks as an entry point to the + // function; for debuggable graphs we might deoptimize to interpreter from + // SuspendChecks. In these cases we should always generate code for them. + bool SuspendChecksAreAllowedToNoOp() const { + return !IsDebuggable() && !IsCompilingOsr(); + } + void AddCHASingleImplementationDependency(ArtMethod* method) { cha_single_implementation_list_.insert(method); } @@ -704,6 +720,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasDirectCriticalNativeCall() const { return has_direct_critical_native_call_; } void SetHasDirectCriticalNativeCall(bool value) { has_direct_critical_native_call_ = value; } + bool HasAlwaysThrowingInvokes() const { return has_always_throwing_invokes_; } + void SetHasAlwaysThrowingInvokes(bool value) { has_always_throwing_invokes_ = value; } + ArtMethod* GetArtMethod() const { return art_method_; } void SetArtMethod(ArtMethod* method) { art_method_ = method; } @@ -719,12 +738,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false); } - uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; } void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } private: - void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; + void RemoveDeadBlocksInstructionsAsUsersAndDisconnect(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); template <class InstructionType, typename ValueType> @@ -792,14 +811,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { size_t temporaries_vreg_slots_; // Flag whether there are bounds checks in the graph. We can skip - // BCE if it's false. It's only best effort to keep it up to date in - // the presence of code elimination so there might be false positives. + // BCE if it's false. bool has_bounds_checks_; // Flag whether there are try/catch blocks in the graph. We will skip - // try/catch-related passes if it's false. It's only best effort to keep - // it up to date in the presence of code elimination so there might be - // false positives. + // try/catch-related passes if it's false. bool has_try_catch_; // Flag whether there are any HMonitorOperation in the graph. If yes this will mandate @@ -812,20 +828,19 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool has_simd_; // Flag whether there are any loops in the graph. We can skip loop - // optimization if it's false. It's only best effort to keep it up - // to date in the presence of code elimination so there might be false - // positives. + // optimization if it's false. bool has_loops_; - // Flag whether there are any irreducible loops in the graph. It's only - // best effort to keep it up to date in the presence of code elimination - // so there might be false positives. + // Flag whether there are any irreducible loops in the graph. bool has_irreducible_loops_; // Flag whether there are any direct calls to native code registered // for @CriticalNative methods. bool has_direct_critical_native_call_; + // Flag whether the graph contains invokes that always throw. + bool has_always_throwing_invokes_; + // Is the code known to be robust against eliminating dead references // and the effects of early finalization? If false, dead reference variables // are kept if they might be visible to the garbage collector. @@ -1291,7 +1306,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // graph, create a Goto at the end of the former block and will create an edge // between the blocks. It will not, however, update the reverse post order or // loop and try/catch information. - HBasicBlock* SplitBefore(HInstruction* cursor); + HBasicBlock* SplitBefore(HInstruction* cursor, bool require_graph_not_in_ssa_form = true); // Split the block into two blocks just before `cursor`. Returns the newly // created block. Note that this method just updates raw block information, @@ -1332,6 +1347,20 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // are safely updated. void DisconnectAndDelete(); + // Disconnects `this` from all its successors and updates their phis, if the successors have them. + // If `visited` is provided, it will use the information to know if a successor is reachable and + // skip updating those phis. + void DisconnectFromSuccessors(const ArenaBitVector* visited = nullptr); + + // Removes the catch phi uses of the instructions in `this`, and then remove the instruction + // itself. If `building_dominator_tree` is true, it will not remove the instruction as user, since + // we do it in a previous step. This is a special case for building up the dominator tree: we want + // to eliminate uses before inputs but we don't have domination information, so we remove all + // connections from input/uses first before removing any instruction. + // This method assumes the instructions have been removed from all users with the exception of + // catch phis because of missing exceptional edges in the graph. + void RemoveCatchPhiUsesAndInstruction(bool building_dominator_tree); + void AddInstruction(HInstruction* instruction); // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); @@ -1540,10 +1569,10 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ - M(NativeDebugInfo, Instruction) \ M(Neg, UnaryOperation) \ M(NewArray, Instruction) \ M(NewInstance, Instruction) \ + M(Nop, Instruction) \ M(Not, UnaryOperation) \ M(NotEqual, Condition) \ M(NullConstant, Instruction) \ @@ -2348,7 +2377,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return GetType() == DataType::Type::kReference; } + // Sets the ReferenceTypeInfo. The RTI must be valid. void SetReferenceTypeInfo(ReferenceTypeInfo rti); + // Same as above, but we only set it if it's valid. Otherwise, we don't change the current RTI. + void SetReferenceTypeInfoIfValid(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { DCHECK_EQ(GetType(), DataType::Type::kReference); @@ -2408,7 +2440,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !CanThrow() && !IsSuspendCheck() && !IsControlFlow() && - !IsNativeDebugInfo() && + !IsNop() && !IsParameterValue() && // If we added an explicit barrier then we should keep it. !IsMemoryBarrier() && @@ -2419,9 +2451,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return IsRemovable() && !HasUses(); } - // Does this instruction strictly dominate `other_instruction`? - // Returns false if this instruction and `other_instruction` are the same. - // Aborts if this instruction and `other_instruction` are both phis. + // Does this instruction dominate `other_instruction`? + // Aborts if this instruction and `other_instruction` are different phis. + bool Dominates(HInstruction* other_instruction) const; + + // Same but with `strictly dominates` i.e. returns false if this instruction and + // `other_instruction` are the same. bool StrictlyDominates(HInstruction* other_instruction) const; int GetId() const { return id_; } @@ -2486,7 +2521,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); - void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceUsesDominatedBy(HInstruction* dominator, + HInstruction* replacement, + bool strictly_dominated = true); void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); @@ -3730,7 +3767,7 @@ class HClassTableGet final : public HExpression<1> { static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize; static_assert(kNumberOfClassTableGetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKind>; + using TableKindField = BitField<TableKind, kFieldTableKind, kFieldTableKindSize>; // The index of the ArtMethod in the table. const size_t index_; @@ -4700,7 +4737,7 @@ class HInvoke : public HVariableInputSizeInstruction { void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } - bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool AlwaysThrows() const override final { return GetPackedFlag<kFlagAlwaysThrows>(); } bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); } @@ -4719,7 +4756,7 @@ class HInvoke : public HVariableInputSizeInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } ArtMethod* GetResolvedMethod() const { return resolved_method_; } - void SetResolvedMethod(ArtMethod* method); + void SetResolvedMethod(ArtMethod* method, bool enable_intrinsic_opt); MethodReference GetMethodReference() const { return method_reference_; } @@ -4748,7 +4785,8 @@ class HInvoke : public HVariableInputSizeInstruction { MethodReference method_reference, ArtMethod* resolved_method, MethodReference resolved_method_reference, - InvokeType invoke_type) + InvokeType invoke_type, + bool enable_intrinsic_opt) : HVariableInputSizeInstruction( kind, return_type, @@ -4764,7 +4802,7 @@ class HInvoke : public HVariableInputSizeInstruction { intrinsic_optimizations_(0) { SetPackedField<InvokeTypeField>(invoke_type); SetPackedFlag<kFlagCanThrow>(true); - SetResolvedMethod(resolved_method); + SetResolvedMethod(resolved_method, enable_intrinsic_opt); } DEFAULT_COPY_CONSTRUCTOR(Invoke); @@ -4797,7 +4835,8 @@ class HInvokeUnresolved final : public HInvoke { method_reference, nullptr, MethodReference(nullptr, 0u), - invoke_type) { + invoke_type, + /* enable_intrinsic_opt= */ false) { } bool IsClonable() const override { return true; } @@ -4820,7 +4859,8 @@ class HInvokePolymorphic final : public HInvoke { // to pass intrinsic information to the HInvokePolymorphic node. ArtMethod* resolved_method, MethodReference resolved_method_reference, - dex::ProtoIndex proto_idx) + dex::ProtoIndex proto_idx, + bool enable_intrinsic_opt) : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, @@ -4830,7 +4870,8 @@ class HInvokePolymorphic final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kPolymorphic), + kPolymorphic, + enable_intrinsic_opt), proto_idx_(proto_idx) { } @@ -4852,7 +4893,8 @@ class HInvokeCustom final : public HInvoke { uint32_t call_site_index, DataType::Type return_type, uint32_t dex_pc, - MethodReference method_reference) + MethodReference method_reference, + bool enable_intrinsic_opt) : HInvoke(kInvokeCustom, allocator, number_of_arguments, @@ -4862,7 +4904,8 @@ class HInvokeCustom final : public HInvoke { method_reference, /* resolved_method= */ nullptr, MethodReference(nullptr, 0u), - kStatic), + kStatic, + enable_intrinsic_opt), call_site_index_(call_site_index) { } @@ -4909,7 +4952,8 @@ class HInvokeStaticOrDirect final : public HInvoke { DispatchInfo dispatch_info, InvokeType invoke_type, MethodReference resolved_method_reference, - ClinitCheckRequirement clinit_check_requirement) + ClinitCheckRequirement clinit_check_requirement, + bool enable_intrinsic_opt) : HInvoke(kInvokeStaticOrDirect, allocator, number_of_arguments, @@ -4922,7 +4966,8 @@ class HInvokeStaticOrDirect final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - invoke_type), + invoke_type, + enable_intrinsic_opt), dispatch_info_(dispatch_info) { SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } @@ -5134,7 +5179,8 @@ class HInvokeVirtual final : public HInvoke { MethodReference method_reference, ArtMethod* resolved_method, MethodReference resolved_method_reference, - uint32_t vtable_index) + uint32_t vtable_index, + bool enable_intrinsic_opt) : HInvoke(kInvokeVirtual, allocator, number_of_arguments, @@ -5144,7 +5190,8 @@ class HInvokeVirtual final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kVirtual), + kVirtual, + enable_intrinsic_opt), vtable_index_(vtable_index) { } @@ -5196,7 +5243,8 @@ class HInvokeInterface final : public HInvoke { ArtMethod* resolved_method, MethodReference resolved_method_reference, uint32_t imt_index, - MethodLoadKind load_kind) + MethodLoadKind load_kind, + bool enable_intrinsic_opt) : HInvoke(kInvokeInterface, allocator, number_of_arguments + (NeedsCurrentMethod(load_kind) ? 1 : 0), @@ -5206,7 +5254,8 @@ class HInvokeInterface final : public HInvoke { method_reference, resolved_method, resolved_method_reference, - kInterface), + kInterface, + enable_intrinsic_opt), imt_index_(imt_index), hidden_argument_load_kind_(load_kind) { } @@ -5321,7 +5370,7 @@ class HNewArray final : public HExpression<2> { kFieldComponentSizeShift + kFieldComponentSizeShiftSize; static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ComponentSizeShiftField = - BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>; + BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShiftSize>; }; class HAdd final : public HBinaryOperation { @@ -6362,6 +6411,27 @@ class HPredicatedInstanceFieldGet final : public HExpression<2> { const FieldInfo field_info_; }; +enum class WriteBarrierKind { + // Emit the write barrier, with a runtime optimization which checks if the value that it is being + // set is null. + kEmitWithNullCheck, + // Emit the write barrier, without the runtime null check optimization. This could be set because: + // A) It is a write barrier for an ArraySet (which does the optimization with the type check, so + // it never does the optimization at the write barrier stage) + // B) We know that the input can't be null + // C) This write barrier is actually several write barriers coalesced into one. Potentially we + // could ask if every value is null for a runtime optimization at the cost of compile time / code + // size. At the time of writing it was deemed not worth the effort. + kEmitNoNullCheck, + // Skip emitting the write barrier. This could be set because: + // A) The write barrier is not needed (e.g. it is not a reference, or the value is the null + // constant) + // B) This write barrier was coalesced into another one so there's no need to emit it. + kDontEmit, + kLast = kDontEmit +}; +std::ostream& operator<<(std::ostream& os, WriteBarrierKind rhs); + class HInstanceFieldSet final : public HExpression<2> { public: HInstanceFieldSet(HInstruction* object, @@ -6386,6 +6456,7 @@ class HInstanceFieldSet final : public HExpression<2> { dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagIsPredicatedSet>(false); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, object); SetRawInputAt(1, value); } @@ -6406,6 +6477,12 @@ class HInstanceFieldSet final : public HExpression<2> { void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); } void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -6415,11 +6492,17 @@ class HInstanceFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfInstanceFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayGet final : public HExpression<2> { @@ -6540,6 +6623,8 @@ class HArraySet final : public HExpression<3> { SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false); + // ArraySets never do the null check optimization at the write barrier stage. + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitNoNullCheck); SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -6560,8 +6645,10 @@ class HArraySet final : public HExpression<3> { return false; } - void ClearNeedsTypeCheck() { + void ClearTypeCheck() { SetPackedFlag<kFlagNeedsTypeCheck>(false); + // Clear the `CanTriggerGC` flag too as we can only trigger a GC when doing a type check. + SetSideEffects(GetSideEffects().Exclusion(SideEffects::CanTriggerGC())); } void ClearValueCanBeNull() { @@ -6610,6 +6697,16 @@ class HArraySet final : public HExpression<3> { : SideEffects::None(); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitNoNullCheck) + << "We shouldn't go back to the original value."; + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We never do the null check optimization for ArraySets."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(ArraySet); protected: @@ -6625,11 +6722,16 @@ class HArraySet final : public HExpression<3> { // Cached information for the reference_type_info_ so that codegen // does not need to inspect the static type. static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfArraySetPackedBits = - kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKind = kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfArraySetPackedBits = kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ExpectedComponentTypeField = BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; + + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayLength final : public HExpression<1> { @@ -6714,9 +6816,10 @@ class HBoundsCheck final : public HExpression<2> { class HSuspendCheck final : public HExpression<0> { public: - explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) + explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false) : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { + SetPackedFlag<kFlagIsNoOp>(is_no_op); } bool IsClonable() const override { return true; } @@ -6725,6 +6828,10 @@ class HSuspendCheck final : public HExpression<0> { return true; } + void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); } + bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); } + + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } SlowPathCode* GetSlowPath() const { return slow_path_; } @@ -6733,28 +6840,42 @@ class HSuspendCheck final : public HExpression<0> { protected: DEFAULT_COPY_CONSTRUCTOR(SuspendCheck); + // True if the HSuspendCheck should not emit any code during codegen. It is + // not possible to simply remove this instruction to disable codegen, as + // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on + // HSuspendCheck being present in every loop. + static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1; + static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); + private: // Only used for code generation, in order to share the same slow path between back edges // of a same loop. SlowPathCode* slow_path_; }; -// Pseudo-instruction which provides the native debugger with mapping information. -// It ensures that we can generate line number and local variables at this point. -class HNativeDebugInfo : public HExpression<0> { +// Pseudo-instruction which doesn't generate any code. +// If `emit_environment` is true, it can be used to generate an environment. It is used, for +// example, to provide the native debugger with mapping information. It ensures that we can generate +// line number and local variables at this point. +class HNop : public HExpression<0> { public: - explicit HNativeDebugInfo(uint32_t dex_pc) - : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + explicit HNop(uint32_t dex_pc, bool needs_environment) + : HExpression<0>(kNop, SideEffects::None(), dex_pc), needs_environment_(needs_environment) { } bool NeedsEnvironment() const override { - return true; + return needs_environment_; } - DECLARE_INSTRUCTION(NativeDebugInfo); + DECLARE_INSTRUCTION(Nop); protected: - DEFAULT_COPY_CONSTRUCTOR(NativeDebugInfo); + DEFAULT_COPY_CONSTRUCTOR(Nop); + + private: + bool needs_environment_; }; /** @@ -7222,6 +7343,10 @@ class HLoadMethodHandle final : public HInstruction { return SideEffects::CanTriggerGC(); } + bool CanThrow() const override { return true; } + + bool NeedsEnvironment() const override { return true; } + DECLARE_INSTRUCTION(LoadMethodHandle); protected: @@ -7266,6 +7391,10 @@ class HLoadMethodType final : public HInstruction { return SideEffects::CanTriggerGC(); } + bool CanThrow() const override { return true; } + + bool NeedsEnvironment() const override { return true; } + DECLARE_INSTRUCTION(LoadMethodType); protected: @@ -7400,6 +7529,7 @@ class HStaticFieldSet final : public HExpression<2> { declaring_class_def_index, dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, cls); SetRawInputAt(1, value); } @@ -7415,6 +7545,13 @@ class HStaticFieldSet final : public HExpression<2> { bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); } void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(StaticFieldSet); protected: @@ -7422,25 +7559,34 @@ class HStaticFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; - static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfStaticFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HStringBuilderAppend final : public HVariableInputSizeInstruction { public: HStringBuilderAppend(HIntConstant* format, uint32_t number_of_arguments, + bool has_fp_args, ArenaAllocator* allocator, uint32_t dex_pc) : HVariableInputSizeInstruction( kStringBuilderAppend, DataType::Type::kReference, - // The runtime call may read memory from inputs. It never writes outside - // of the newly allocated result object (or newly allocated helper objects). - SideEffects::AllReads().Union(SideEffects::CanTriggerGC()), + SideEffects::CanTriggerGC().Union( + // The runtime call may read memory from inputs. It never writes outside + // of the newly allocated result object or newly allocated helper objects, + // except for float/double arguments where we reuse thread-local helper objects. + has_fp_args ? SideEffects::AllWritesAndReads() : SideEffects::AllReads()), dex_pc, allocator, number_of_arguments + /* format */ 1u, @@ -8393,7 +8539,7 @@ class HIntermediateAddress final : public HExpression<2> { #include "nodes_x86.h" #endif -namespace art { +namespace art HIDDEN { class OptimizingCompilerStats; @@ -8457,7 +8603,7 @@ HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); // Create a clone for each clonable instructions/phis and replace the original with the clone. // // Used for testing individual instruction cloner. -class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { +class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor { public: explicit CloneAndReplaceInstructionVisitor(HGraph* graph) : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc index eca97d7a70..b3a7ad9a05 100644 --- a/compiler/optimizing/nodes_shared.cc +++ b/compiler/optimizing/nodes_shared.cc @@ -23,7 +23,7 @@ #include "instruction_simplifier_shared.h" -namespace art { +namespace art HIDDEN { using helpers::CanFitInShifterOperand; diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 7dcac1787e..27e610328f 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -22,7 +22,7 @@ // (defining `HInstruction` and co). #include "nodes.h" -namespace art { +namespace art HIDDEN { class HMultiplyAccumulate final : public HExpression<3> { public: diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 34f0e9b1e1..29210fe10f 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -17,11 +17,12 @@ #include "nodes.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "optimizing_unit_test.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { class NodeTest : public OptimizingUnitTest {}; diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index a2cd86dc33..73f6c40a0d 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -21,7 +21,7 @@ // is included in the header file nodes.h itself. However it gives editing tools better context. #include "nodes.h" -namespace art { +namespace art HIDDEN { // Memory alignment, represented as an offset relative to a base, where 0 <= offset < base, // and base is a power of two. For example, the value Alignment(16, 0) means memory is diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index b0a665d704..e0a48db84f 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -15,10 +15,11 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { /** * Fixture class for testing vector nodes. diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 8e8fbc1581..e246390aa5 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_X86_H_ #define ART_COMPILER_OPTIMIZING_NODES_X86_H_ -namespace art { +namespace art HIDDEN { // Compute the address of the method for X86 Constant area support. class HX86ComputeBaseMethodAddress final : public HExpression<0> { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 2cac38b715..12e9a1046d 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -55,10 +55,11 @@ #include "select_generator.h" #include "sharpening.h" #include "side_effects_analysis.h" +#include "write_barrier_elimination.h" // Decide between default or alternative pass name. -namespace art { +namespace art HIDDEN { const char* OptimizationPassName(OptimizationPass pass) { switch (pass) { @@ -76,6 +77,7 @@ const char* OptimizationPassName(OptimizationPass pass) { return BoundsCheckElimination::kBoundsCheckEliminationPassName; case OptimizationPass::kLoadStoreElimination: return LoadStoreElimination::kLoadStoreEliminationPassName; + case OptimizationPass::kAggressiveConstantFolding: case OptimizationPass::kConstantFolding: return HConstantFolding::kConstantFoldingPassName; case OptimizationPass::kDeadCodeElimination: @@ -95,6 +97,8 @@ const char* OptimizationPassName(OptimizationPass pass) { return ConstructorFenceRedundancyElimination::kCFREPassName; case OptimizationPass::kScheduling: return HInstructionScheduling::kInstructionSchedulingPassName; + case OptimizationPass::kWriteBarrierElimination: + return WriteBarrierElimination::kWBEPassName; #ifdef ART_ENABLE_CODEGEN_arm case OptimizationPass::kInstructionSimplifierArm: return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName; @@ -194,7 +198,8 @@ ArenaVector<HOptimization*> ConstructOptimizations( opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name); break; case OptimizationPass::kInductionVarAnalysis: - opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name); + opt = most_recent_induction = + new (allocator) HInductionVarAnalysis(graph, stats, pass_name); break; // // Passes that need prior analysis. @@ -221,7 +226,11 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Regular passes. // case OptimizationPass::kConstantFolding: - opt = new (allocator) HConstantFolding(graph, pass_name); + opt = new (allocator) HConstantFolding(graph, stats, pass_name); + break; + case OptimizationPass::kAggressiveConstantFolding: + opt = new (allocator) + HConstantFolding(graph, stats, pass_name, /* use_all_optimizations_ = */ true); break; case OptimizationPass::kDeadCodeElimination: opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name); @@ -239,6 +248,7 @@ ArenaVector<HOptimization*> ConstructOptimizations( /* total_number_of_instructions= */ 0, /* parent= */ nullptr, /* depth= */ 0, + /* try_catch_inlining_allowed= */ true, pass_name); break; } @@ -267,6 +277,9 @@ ArenaVector<HOptimization*> ConstructOptimizations( case OptimizationPass::kLoadStoreElimination: opt = new (allocator) LoadStoreElimination(graph, stats, pass_name); break; + case OptimizationPass::kWriteBarrierElimination: + opt = new (allocator) WriteBarrierElimination(graph, stats, pass_name); + break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 2113df0c81..134e3cdc7a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #include "base/arena_object.h" +#include "base/macros.h" #include "nodes.h" #include "optimizing_compiler_stats.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; @@ -42,7 +43,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // Return the name of the pass. Pass names for a single HOptimization should be of form // <optimization_name> or <optimization_name>$<pass_name> for common <optimization_name> prefix. - // Example: 'instruction_simplifier', 'instruction_simplifier$after_bce', + // Example: 'instruction_simplifier', 'instruction_simplifier$before_codegen', // 'instruction_simplifier$before_codegen'. const char* GetPassName() const { return pass_name_; } @@ -66,6 +67,7 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // field is preferred over a string lookup at places where performance matters. // TODO: generate this table and lookup methods below automatically? enum class OptimizationPass { + kAggressiveConstantFolding, kAggressiveInstructionSimplifier, kBoundsCheckElimination, kCHAGuardOptimization, @@ -83,6 +85,7 @@ enum class OptimizationPass { kScheduling, kSelectGenerator, kSideEffectsAnalysis, + kWriteBarrierElimination, #ifdef ART_ENABLE_CODEGEN_arm kInstructionSimplifierArm, kCriticalNativeAbiFixupArm, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index bad540e03c..f12e748941 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -18,6 +18,7 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/macros.h" #include "base/runtime_debug.h" #include "cfi_test.h" #include "driver/compiler_options.h" @@ -32,7 +33,7 @@ namespace vixl32 = vixl::aarch32; -namespace art { +namespace art HIDDEN { // Run the tests only on host. #ifndef ART_TARGET_ANDROID @@ -167,9 +168,20 @@ TEST_ISA(kThumb2) // barrier configuration, and as such is removed from the set of // callee-save registers in the ARM64 code generator of the Optimizing // compiler. -#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) -TEST_ISA(kArm64) -#endif +// +// We can't use compile-time macros for read-barrier as the introduction +// of userfaultfd-GC has made it a runtime choice. +TEST_F(OptimizingCFITest, kArm64) { + if (kUseBakerReadBarrier && gUseReadBarrier) { + std::vector<uint8_t> expected_asm( + expected_asm_kArm64, + expected_asm_kArm64 + arraysize(expected_asm_kArm64)); + std::vector<uint8_t> expected_cfi( + expected_cfi_kArm64, + expected_cfi_kArm64 + arraysize(expected_cfi_kArm64)); + TestImpl(InstructionSet::kArm64, "kArm64", expected_asm, expected_cfi); + } +} #endif #ifdef ART_ENABLE_CODEGEN_x86 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 6eb3d01e42..00eb6e5c42 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -33,12 +33,11 @@ #include "base/timing_logger.h" #include "builder.h" #include "code_generator.h" -#include "compiled_method.h" #include "compiler.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" #include "dex/dex_file_types.h" -#include "driver/compiled_method_storage.h" +#include "driver/compiled_code_storage.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "graph_checker.h" @@ -52,6 +51,7 @@ #include "linker/linker_patch.h" #include "nodes.h" #include "oat_quick_method_header.h" +#include "optimizing/write_barrier_elimination.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" @@ -62,7 +62,7 @@ #include "stack_map_stream.h" #include "utils/assembler.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB; @@ -269,7 +269,7 @@ class PassScope : public ValueObject { class OptimizingCompiler final : public Compiler { public: explicit OptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage); + CompiledCodeStorage* storage); ~OptimizingCompiler() override; bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override; @@ -359,11 +359,11 @@ class OptimizingCompiler final : public Compiler { const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer) const; - private: // Create a 'CompiledMethod' for an optimized graph. CompiledMethod* Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, + bool is_intrinsic, const dex::CodeItem* item) const; // Try compiling a method and return the code generator used for @@ -413,7 +413,7 @@ class OptimizingCompiler final : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage) + CompiledCodeStorage* storage) : Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning) { // Enable C1visualizer output. const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName(); @@ -568,6 +568,9 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, } #endif default: + UNUSED(graph); + UNUSED(dex_compilation_unit); + UNUSED(pass_observer); return false; } } @@ -653,7 +656,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering), // Simplification (TODO: only if GVN occurred). OptDef(OptimizationPass::kSelectGenerator), - OptDef(OptimizationPass::kConstantFolding, + OptDef(OptimizationPass::kAggressiveConstantFolding, "constant_folding$after_gvn"), OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_gvn"), @@ -668,20 +671,27 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, OptDef(OptimizationPass::kLoopOptimization), // Simplification. OptDef(OptimizationPass::kConstantFolding, - "constant_folding$after_bce"), + "constant_folding$after_loop_opt"), OptDef(OptimizationPass::kAggressiveInstructionSimplifier, - "instruction_simplifier$after_bce"), + "instruction_simplifier$after_loop_opt"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_loop_opt"), // Other high-level optimizations. OptDef(OptimizationPass::kLoadStoreElimination), OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kDeadCodeElimination, - "dead_code_elimination$final"), OptDef(OptimizationPass::kCodeSinking), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$before_codegen"), // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. OptDef(OptimizationPass::kAggressiveInstructionSimplifier, "instruction_simplifier$before_codegen"), + // Simplification may result in dead code that should be removed prior to + // code generation. + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$before_codegen"), // Eliminate constructor fences after code sinking to avoid // complicated sinking logic to split a fence with many inputs. OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) @@ -711,18 +721,19 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, + bool is_intrinsic, const dex::CodeItem* code_item_for_osr_check) const { ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check); - CompiledMethodStorage* storage = GetCompiledMethodStorage(); - CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - storage, + CompiledCodeStorage* storage = GetCompiledCodeStorage(); + CompiledMethod* compiled_method = storage->CreateCompiledMethod( codegen->GetInstructionSet(), code_allocator->GetMemory(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), - ArrayRef<const linker::LinkerPatch>(linker_patches)); + ArrayRef<const linker::LinkerPatch>(linker_patches), + is_intrinsic); for (const linker::LinkerPatch& patch : linker_patches) { if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) { @@ -891,6 +902,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); } else { RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); } RegisterAllocator::Strategy regalloc_strategy = @@ -984,6 +997,10 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( optimizations); RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + { + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); + } AllocateRegisters(graph, codegen.get(), @@ -1079,10 +1096,8 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, compiled_method = Emit(&allocator, &code_allocator, codegen.get(), + compiled_intrinsic, compiled_intrinsic ? nullptr : code_item); - if (compiled_intrinsic) { - compiled_method->MarkAsIntrinsic(); - } if (kArenaAllocatorCountAllocations) { codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting. @@ -1115,17 +1130,18 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator, const JniCompiledMethod& jni_compiled_method, - size_t code_size) { + size_t code_size, + bool debuggable) { // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator // to stay clear of the frame size limit. std::unique_ptr<StackMapStream> stack_map_stream( new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet())); - stack_map_stream->BeginMethod( - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), - /* num_dex_registers= */ 0, - /* baseline= */ false); + stack_map_stream->BeginMethod(jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + /* num_dex_registers= */ 0, + /* baseline= */ false, + debuggable); stack_map_stream->EndMethod(code_size); return stack_map_stream->Encode(); } @@ -1172,12 +1188,11 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, method, &handles)); if (codegen != nullptr) { - CompiledMethod* compiled_method = Emit(&allocator, - &code_allocator, - codegen.get(), - /* item= */ nullptr); - compiled_method->MarkAsIntrinsic(); - return compiled_method; + return Emit(&allocator, + &code_allocator, + codegen.get(), + /*is_intrinsic=*/ true, + /*item=*/ nullptr); } } } @@ -1187,19 +1202,22 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. - ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap( - &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size()); - return CompiledMethod::SwapAllocCompiledMethod( - GetCompiledMethodStorage(), + ScopedArenaVector<uint8_t> stack_map = + CreateJniStackMap(&stack_map_allocator, + jni_compiled_method, + jni_compiled_method.GetCode().size(), + compiler_options.GetDebuggable() && compiler_options.IsJitCompiler()); + return GetCompiledCodeStorage()->CreateCompiledMethod( jni_compiled_method.GetInstructionSet(), jni_compiled_method.GetCode(), ArrayRef<const uint8_t>(stack_map), jni_compiled_method.GetCfi(), - /* patches= */ ArrayRef<const linker::LinkerPatch>()); + /*patches=*/ ArrayRef<const linker::LinkerPatch>(), + /*is_intrinsic=*/ false); } Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage) { + CompiledCodeStorage* storage) { return new OptimizingCompiler(compiler_options, storage); } @@ -1233,6 +1251,19 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaAllocator allocator(runtime->GetJitArenaPool()); if (UNLIKELY(method->IsNative())) { + // Use GenericJniTrampoline for critical native methods in debuggable runtimes. We don't + // support calling method entry / exit hooks for critical native methods yet. + // TODO(mythria): Add support for calling method entry / exit hooks in JITed stubs for critical + // native methods too. + if (compiler_options.GetDebuggable() && method->IsCriticalNative()) { + DCHECK(compiler_options.IsJitCompiler()); + return false; + } + // Java debuggable runtimes should set compiler options to debuggable, so that we either + // generate method entry / exit hooks or skip JITing. For critical native methods we don't + // generate method entry / exit hooks so we shouldn't JIT them in debuggable runtimes. + DCHECK_IMPLIES(method->IsCriticalNative(), !runtime->IsJavaDebuggable()); + JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( compiler_options, access_flags, method_idx, *dex_file, &allocator); std::vector<Handle<mirror::Object>> roots; @@ -1241,8 +1272,11 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaStack arena_stack(runtime->GetJitArenaPool()); // StackMapStream is large and it does not fit into this frame, so we need helper method. ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. - ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap( - &stack_map_allocator, jni_compiled_method, jni_compiled_method.GetCode().size()); + ScopedArenaVector<uint8_t> stack_map = + CreateJniStackMap(&stack_map_allocator, + jni_compiled_method, + jni_compiled_method.GetCode().size(), + compiler_options.GetDebuggable() && compiler_options.IsJitCompiler()); ArrayRef<const uint8_t> reserved_code; ArrayRef<const uint8_t> reserved_data; diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h index cd6d684590..737ffd034a 100644 --- a/compiler/optimizing/optimizing_compiler.h +++ b/compiler/optimizing/optimizing_compiler.h @@ -18,18 +18,19 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ #include "base/globals.h" +#include "base/macros.h" #include "base/mutex.h" -namespace art { +namespace art HIDDEN { class ArtMethod; +class CompiledCodeStorage; class Compiler; -class CompiledMethodStorage; class CompilerOptions; class DexFile; Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, - CompiledMethodStorage* storage); + CompiledCodeStorage* storage); bool EncodeArtMethodInInlineInfo(ArtMethod* method); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index d458e42608..a1d0a5a845 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -26,8 +26,9 @@ #include "base/atomic.h" #include "base/globals.h" +#include "base/macros.h" -namespace art { +namespace art HIDDEN { enum class MethodCompilationStat { kAttemptBytecodeCompilation = 0, @@ -46,6 +47,7 @@ enum class MethodCompilationStat { kUnresolvedFieldNotAFastAccess, kRemovedCheckedCast, kRemovedDeadInstruction, + kRemovedTry, kRemovedNullCheck, kNotCompiledSkipped, kNotCompiledInvalidBytecode, @@ -59,6 +61,7 @@ enum class MethodCompilationStat { kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, kNotCompiledUnsupportedIsa, + kNotCompiledInliningIrreducibleLoop, kNotCompiledIrreducibleLoopAndStringInit, kNotCompiledPhiEquivalentInOsr, kInlinedMonomorphicCall, @@ -73,11 +76,13 @@ enum class MethodCompilationStat { kLoopVectorizedIdiom, kSelectGenerated, kRemovedInstanceOf, + kPropagatedIfValue, kInlinedInvokeVirtualOrInterface, kInlinedLastInvokeVirtualOrInterface, kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kSimplifyIfAddedPhi, kSimplifyThrowingInvoke, kInstructionSunk, kNotInlinedUnresolvedEntrypoint, @@ -88,16 +93,19 @@ enum class MethodCompilationStat { kNotInlinedEnvironmentBudget, kNotInlinedInstructionBudget, kNotInlinedLoopWithoutExit, - kNotInlinedIrreducibleLoop, + kNotInlinedIrreducibleLoopCallee, + kNotInlinedIrreducibleLoopCaller, kNotInlinedAlwaysThrows, kNotInlinedInfiniteLoop, - kNotInlinedTryCatchCaller, kNotInlinedTryCatchCallee, + kNotInlinedTryCatchDisabled, kNotInlinedRegisterAllocator, kNotInlinedCannotBuild, + kNotInlinedNeverInlineAnnotation, kNotInlinedNotCompilable, kNotInlinedNotVerified, kNotInlinedCodeItem, + kNotInlinedEndsWithThrow, kNotInlinedWont, kNotInlinedRecursiveBudget, kNotInlinedPolymorphicRecursiveBudget, @@ -105,12 +113,15 @@ enum class MethodCompilationStat { kNotInlinedUnresolved, kNotInlinedPolymorphic, kNotInlinedCustom, + kNotVarAnalyzedPathological, kTryInline, kConstructorFenceGeneratedNew, kConstructorFenceGeneratedFinal, kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kPossibleWriteBarrier, + kRemovedWriteBarrier, kBitstringTypeCheck, kJitOutOfMemoryForCommit, kFullLSEAllocationRemoved, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index e83688039a..2e05c41f01 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -25,6 +25,7 @@ #include <vector> #include <variant> +#include "base/macros.h" #include "base/indenter.h" #include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" @@ -46,7 +47,7 @@ #include "ssa_builder.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { #define NUM_INSTRUCTIONS(...) \ (sizeof((uint16_t[]) {__VA_ARGS__}) /sizeof(uint16_t)) @@ -240,13 +241,14 @@ class OptimizingUnitTestHelper { // Create the dex file based on the fake data. Call the constructor so that we can use virtual // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. - dex_files_.emplace_back(new StandardDexFile( - dex_data, - sizeof(StandardDexFile::Header), - "no_location", - /*location_checksum*/ 0, - /*oat_dex_file*/ nullptr, - /*container*/ nullptr)); + auto container = + std::make_shared<MemoryDexFileContainer>(dex_data, sizeof(StandardDexFile::Header)); + dex_files_.emplace_back(new StandardDexFile(dex_data, + sizeof(StandardDexFile::Header), + "no_location", + /*location_checksum*/ 0, + /*oat_dex_file*/ nullptr, + std::move(container))); graph_ = new (allocator) HGraph( allocator, @@ -260,9 +262,10 @@ class OptimizingUnitTestHelper { // Create a control-flow graph from Dex instructions. HGraph* CreateCFG(const std::vector<uint16_t>& data, - DataType::Type return_type = DataType::Type::kInt32, - VariableSizedHandleScope* handles = nullptr) { - HGraph* graph = CreateGraph(handles); + DataType::Type return_type = DataType::Type::kInt32) { + ScopedObjectAccess soa(Thread::Current()); + VariableSizedHandleScope handles(soa.Self()); + HGraph* graph = CreateGraph(&handles); // The code item data might not aligned to 4 bytes, copy it to ensure that. const size_t code_item_size = data.size() * sizeof(data.front()); @@ -278,7 +281,7 @@ class OptimizingUnitTestHelper { /* class_linker= */ nullptr, graph->GetDexFile(), code_item, - /* class_def_index= */ DexFile::kDexNoIndex16, + /* class_def_idx= */ DexFile::kDexNoIndex16, /* method_idx= */ dex::kDexNoIndex, /* access_flags= */ 0u, /* verified_method= */ nullptr, @@ -320,25 +323,10 @@ class OptimizingUnitTestHelper { // Run GraphChecker with all checks. // // Return: the status whether the run is successful. - bool CheckGraph(HGraph* graph, std::ostream& oss = std::cerr) { - return CheckGraph(graph, /*check_ref_type_info=*/true, oss); - } - bool CheckGraph(std::ostream& oss = std::cerr) { return CheckGraph(graph_, oss); } - // Run GraphChecker with all checks except reference type information checks. - // - // Return: the status whether the run is successful. - bool CheckGraphSkipRefTypeInfoChecks(HGraph* graph, std::ostream& oss = std::cerr) { - return CheckGraph(graph, /*check_ref_type_info=*/false, oss); - } - - bool CheckGraphSkipRefTypeInfoChecks(std::ostream& oss = std::cerr) { - return CheckGraphSkipRefTypeInfoChecks(graph_, oss); - } - HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, ArenaVector<HInstruction*>* current_locals) { HEnvironment* environment = new (GetAllocator()) HEnvironment( @@ -473,7 +461,8 @@ class OptimizingUnitTestHelper { HInvokeStaticOrDirect::DispatchInfo{}, InvokeType::kStatic, /* resolved_method_reference= */ method_reference, - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone, + !graph_->IsDebuggable()); for (auto [ins, idx] : ZipCount(MakeIterationRange(args))) { res->SetRawInputAt(idx, ins); } @@ -531,9 +520,8 @@ class OptimizingUnitTestHelper { } protected: - bool CheckGraph(HGraph* graph, bool check_ref_type_info, std::ostream& oss) { + bool CheckGraph(HGraph* graph, std::ostream& oss) { GraphChecker checker(graph); - checker.SetRefTypeInfoCheckEnabled(check_ref_type_info); checker.Run(); checker.Dump(oss); return checker.IsValid(); @@ -559,7 +547,7 @@ class OptimizingUnitTestHelper { class OptimizingUnitTest : public CommonArtTest, public OptimizingUnitTestHelper {}; // Naive string diff data type. -typedef std::list<std::pair<std::string, std::string>> diff_t; +using diff_t = std::list<std::pair<std::string, std::string>>; // An alias for the empty string used to make it clear that a line is // removed in a diff. @@ -586,7 +574,7 @@ inline std::ostream& operator<<(std::ostream& oss, const AdjacencyListGraph& alg return alg.Dump(oss); } -class PatternMatchGraphVisitor : public HGraphVisitor { +class PatternMatchGraphVisitor final : public HGraphVisitor { private: struct HandlerWrapper { public: diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 2036b4a370..9fc4cc86bf 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -19,7 +19,7 @@ #include "base/stl_util.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { // Perform a linear sweep of the moves to add them to the initial list of diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 5fadcab402..17d5122542 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -18,11 +18,12 @@ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "base/value_object.h" #include "data_type.h" #include "locations.h" -namespace art { +namespace art HIDDEN { class HParallelMove; class MoveOperands; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index a8ab6cdd0c..a1c05e9cad 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -22,7 +23,7 @@ #include "gtest/gtest-typed-test.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { constexpr int kScratchRegisterStartIndexForTest = 100; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 17f37f05c5..d3da3d3ce1 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -18,13 +18,13 @@ #include "code_generator_x86.h" #include "intrinsics_x86.h" -namespace art { +namespace art HIDDEN { namespace x86 { /** * Finds instructions that need the constant area base as an input. */ -class PCRelativeHandlerVisitor : public HGraphVisitor { +class PCRelativeHandlerVisitor final : public HGraphVisitor { public: PCRelativeHandlerVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 3b470a6502..45578d8050 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ #define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index c2f3d0e741..398b10abf3 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -22,7 +22,7 @@ #include "optimizing_compiler_stats.h" #include "well_known_classes.h" -namespace art { +namespace art HIDDEN { void PrepareForRegisterAllocation::Run() { // Order does not matter. @@ -83,7 +83,7 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { if (check->IsStringCharAt()) { // Add a fake environment for String.charAt() inline info as we want the exception // to appear as being thrown from there. Skip if we're compiling String.charAt() itself. - ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt); + ArtMethod* char_at_method = WellKnownClasses::java_lang_String_charAt; if (GetGraph()->GetArtMethod() != char_at_method) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); HEnvironment* environment = new (allocator) HEnvironment(allocator, @@ -109,7 +109,7 @@ void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) { if (value->IsNullConstant()) { DCHECK_EQ(value->GetType(), DataType::Type::kReference); if (instruction->NeedsTypeCheck()) { - instruction->ClearNeedsTypeCheck(); + instruction->ClearTypeCheck(); } } } @@ -295,15 +295,16 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, return false; } - // In debug mode, check that we have not inserted a throwing instruction - // or an instruction with side effects between input and user. - if (kIsDebugBuild) { - for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { - CHECK(between != nullptr); // User must be after input in the same block. - CHECK(!between->CanThrow()) << *between << " User: " << *user; - CHECK(!between->HasSideEffects()) << *between << " User: " << *user; + // If there's a instruction between them that can throw or it has side effects, we cannot move the + // responsibility. + for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) { + DCHECK(between != nullptr) << " User must be after input in the same block. input: " << *input + << ", user: " << *user; + if (between->CanThrow() || between->HasSideEffects()) { + return false; } } + return true; } diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index e0bb76eb22..0426f8470b 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_ #define ART_COMPILER_OPTIMIZING_PREPARE_FOR_REGISTER_ALLOCATION_H_ +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CompilerOptions; class OptimizingCompilerStats; diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index 8ef9ce4e8b..77ddb97707 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -19,9 +19,10 @@ #include "android-base/stringprintf.h" +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class HPrettyPrinter : public HGraphVisitor { public: diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 6ef386b4a5..90d5f8f08f 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -17,6 +17,7 @@ #include "pretty_printer.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -25,9 +26,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class PrettyPrinterTest : public OptimizingUnitTest { +class PrettyPrinterTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index e6024b08cb..91bae5f49b 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -29,7 +29,7 @@ #include "mirror/dex_cache.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( Thread* self, const DexFile& dex_file, Handle<mirror::DexCache> hint_dex_cache) @@ -41,18 +41,14 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( } } -class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { +class ReferenceTypePropagation::RTPVisitor final : public HGraphDelegateVisitor { public: - RTPVisitor(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, - Handle<mirror::DexCache> hint_dex_cache, - bool is_first_run) - : HGraphDelegateVisitor(graph), - class_loader_(class_loader), - hint_dex_cache_(hint_dex_cache), - allocator_(graph->GetArenaStack()), - worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)), - is_first_run_(is_first_run) { + RTPVisitor(HGraph* graph, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run) + : HGraphDelegateVisitor(graph), + hint_dex_cache_(hint_dex_cache), + allocator_(graph->GetArenaStack()), + worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)), + is_first_run_(is_first_run) { worklist_.reserve(kDefaultWorklistSize); } @@ -110,7 +106,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { static constexpr size_t kDefaultWorklistSize = 8; - Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> hint_dex_cache_; // Use local allocator for allocating memory. @@ -122,63 +117,18 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run, const char* name) - : HOptimization(graph, name), - class_loader_(class_loader), - hint_dex_cache_(hint_dex_cache), - is_first_run_(is_first_run) { -} - -void ReferenceTypePropagation::ValidateTypes() { - // TODO: move this to the graph checker. Note: There may be no Thread for gtests. - if (kIsDebugBuild && Thread::Current() != nullptr) { - ScopedObjectAccess soa(Thread::Current()); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { - HInstruction* instr = iti.Current(); - if (instr->GetType() == DataType::Type::kReference) { - DCHECK(instr->GetReferenceTypeInfo().IsValid()) - << "Invalid RTI for instruction: " << instr->DebugName(); - if (instr->IsBoundType()) { - DCHECK(instr->AsBoundType()->GetUpperBound().IsValid()); - } else if (instr->IsLoadClass()) { - HLoadClass* cls = instr->AsLoadClass(); - DCHECK(cls->GetReferenceTypeInfo().IsExact()); - DCHECK_IMPLIES(cls->GetLoadedClassRTI().IsValid(), cls->GetLoadedClassRTI().IsExact()); - } else if (instr->IsNullCheck()) { - DCHECK(instr->GetReferenceTypeInfo().IsEqual(instr->InputAt(0)->GetReferenceTypeInfo())) - << "NullCheck " << instr->GetReferenceTypeInfo() - << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); - } - } else if (instr->IsInstanceOf()) { - HInstanceOf* iof = instr->AsInstanceOf(); - DCHECK_IMPLIES(iof->GetTargetClassRTI().IsValid(), iof->GetTargetClassRTI().IsExact()); - } else if (instr->IsCheckCast()) { - HCheckCast* check = instr->AsCheckCast(); - DCHECK_IMPLIES(check->GetTargetClassRTI().IsValid(), - check->GetTargetClassRTI().IsExact()); - } - } - } - } -} + : HOptimization(graph, name), hint_dex_cache_(hint_dex_cache), is_first_run_(is_first_run) {} void ReferenceTypePropagation::Visit(HInstruction* instruction) { - RTPVisitor visitor(graph_, - class_loader_, - hint_dex_cache_, - is_first_run_); + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); instruction->Accept(&visitor); } void ReferenceTypePropagation::Visit(ArrayRef<HInstruction* const> instructions) { - RTPVisitor visitor(graph_, - class_loader_, - hint_dex_cache_, - is_first_run_); + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); for (HInstruction* instruction : instructions) { if (instruction->IsPhi()) { // Need to force phis to recalculate null-ness. @@ -349,7 +299,10 @@ static void BoundTypeForClassCheck(HInstruction* check) { } bool ReferenceTypePropagation::Run() { - RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, is_first_run_); + DCHECK(Thread::Current() != nullptr) + << "ReferenceTypePropagation requires the use of Thread::Current(). Make sure you have a " + << "Runtime initialized before calling this optimization pass"; + RTPVisitor visitor(graph_, hint_dex_cache_, is_first_run_); // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -359,7 +312,6 @@ bool ReferenceTypePropagation::Run() { } visitor.ProcessWorklist(); - ValidateTypes(); return true; } @@ -446,10 +398,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, if (rhs->AsIntConstant()->IsTrue()) { // Case (1a) *trueBranch = ifInstruction->IfTrueSuccessor(); - } else { + } else if (rhs->AsIntConstant()->IsFalse()) { // Case (2a) - DCHECK(rhs->AsIntConstant()->IsFalse()) << rhs->AsIntConstant()->GetValue(); *trueBranch = ifInstruction->IfFalseSuccessor(); + } else { + // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1. + // In those cases, we cannot do the match if+instance-of. + return false; } *instanceOf = lhs->AsInstanceOf(); return true; @@ -463,10 +418,13 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, if (rhs->AsIntConstant()->IsFalse()) { // Case (1b) *trueBranch = ifInstruction->IfTrueSuccessor(); - } else { + } else if (rhs->AsIntConstant()->IsTrue()) { // Case (2b) - DCHECK(rhs->AsIntConstant()->IsTrue()) << rhs->AsIntConstant()->GetValue(); *trueBranch = ifInstruction->IfFalseSuccessor(); + } else { + // Sometimes we see a comparison of instance-of with a constant which is neither 0 nor 1. + // In those cases, we cannot do the match if+instance-of. + return false; } *instanceOf = lhs->AsInstanceOf(); return true; @@ -583,7 +541,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType( - type_idx, dex_cache, class_loader_.Get()); + type_idx, dex_cache, dex_cache->GetClassLoader()); SetClassAsTypeInfo(instr, klass, is_exact); } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 889a8465e0..655f62b3da 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -18,12 +18,13 @@ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "mirror/class-inl.h" #include "nodes.h" #include "obj_ptr.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Propagates reference types to instructions. @@ -31,7 +32,6 @@ namespace art { class ReferenceTypePropagation : public HOptimization { public: ReferenceTypePropagation(HGraph* graph, - Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, bool is_first_run, const char* name = kReferenceTypePropagationPassName); @@ -71,10 +71,6 @@ class ReferenceTypePropagation : public HOptimization { HandleCache* handle_cache) REQUIRES_SHARED(Locks::mutator_lock_); - void ValidateTypes(); - - Handle<mirror::ClassLoader> class_loader_; - // Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with // graph_->GetDexFile(). Since we may look up also in other dex files, it's used only // as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache(). diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index d1bcab083c..2b012fcd67 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -19,6 +19,7 @@ #include <random> #include "base/arena_allocator.h" +#include "base/macros.h" #include "base/transform_array_ref.h" #include "base/transform_iterator.h" #include "builder.h" @@ -26,7 +27,7 @@ #include "object_lock.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { // TODO It would be good to use the following but there is a miniscule amount of // chance for flakiness so we'll just use a set seed instead. @@ -47,11 +48,8 @@ class ReferenceTypePropagationTestBase : public SuperTest, public OptimizingUnit void SetupPropagation(VariableSizedHandleScope* handles) { graph_ = CreateGraph(handles); - propagation_ = new (GetAllocator()) ReferenceTypePropagation(graph_, - Handle<mirror::ClassLoader>(), - Handle<mirror::DexCache>(), - true, - "test_prop"); + propagation_ = new (GetAllocator()) + ReferenceTypePropagation(graph_, Handle<mirror::DexCache>(), true, "test_prop"); } // Relay method to merge type in reference type propagation. diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 875c633889..53e11f2c3d 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -21,7 +21,7 @@ #include "linear_order.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { RegisterAllocationResolver::RegisterAllocationResolver(CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h index 278371777d..f4782eb48e 100644 --- a/compiler/optimizing/register_allocation_resolver.h +++ b/compiler/optimizing/register_allocation_resolver.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ #include "base/array_ref.h" +#include "base/macros.h" #include "base/value_object.h" #include "data_type.h" -namespace art { +namespace art HIDDEN { class ArenaAllocator; class CodeGenerator; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a9c217fc4f..e4c2d74908 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -27,7 +27,7 @@ #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator, CodeGenerator* codegen, diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 4d226875bf..453e339cba 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -22,7 +22,7 @@ #include "base/arena_object.h" #include "base/macros.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 684aaf5750..a7c891d4e7 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -22,7 +22,7 @@ #include "ssa_liveness_analysis.h" #include "thread-current-inl.h" -namespace art { +namespace art HIDDEN { // Highest number of registers that we support for any platform. This can be used for std::bitset, // for example, which needs to know its size at compile time. diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index e5b86eacee..0e10152049 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -24,7 +24,7 @@ #include "base/scoped_arena_containers.h" #include "register_allocator.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 833c24d5bb..fcdaa2d34f 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -26,7 +26,7 @@ #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kMaxLifetimePosition = -1; static constexpr size_t kDefaultNumberOfSpillSlots = 4; diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index 9a1e0d7f10..c71a9e9ff1 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -22,7 +22,7 @@ #include "base/scoped_arena_containers.h" #include "register_allocator.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class HBasicBlock; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 682315545d..d316aa5dc2 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -18,6 +18,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -31,17 +32,17 @@ #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { using Strategy = RegisterAllocator::Strategy; // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. -class RegisterAllocatorTest : public OptimizingUnitTest { +class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void SetUp() override { - OptimizingUnitTest::SetUp(); + CommonCompilerTest::SetUp(); // This test is using the x86 ISA. compiler_options_ = CommonCompilerTest::CreateCompilerOptions(InstructionSet::kX86, "default"); } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 8f18ccff5f..116f52605e 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -32,7 +32,7 @@ #include "scheduler_arm.h" #endif -namespace art { +namespace art HIDDEN { void SchedulingGraph::AddDependency(SchedulingNode* node, SchedulingNode* dependency, @@ -718,9 +718,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HLoadException // HMemoryBarrier // HMonitorOperation - // HNativeDebugInfo + // HNop // HThrow // HTryBoundary + // All volatile field access e.g. HInstanceFieldGet // TODO: Some of the instructions above may be safe to schedule (maybe as // scheduling barriers). return instruction->IsArrayGet() || diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index f7180a02d7..299fbc93f3 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -19,6 +19,7 @@ #include <fstream> +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/stl_util.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { // General description of instruction scheduling. // diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 965e1bd9f4..3f931c4c49 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -23,7 +23,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace arm { using helpers::Int32ConstantFrom; @@ -669,7 +669,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { } case DataType::Type::kReference: { - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; } else { if (index->IsConstant()) { @@ -937,7 +937,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct break; case DataType::Type::kReference: - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (gUseReadBarrier && kUseBakerReadBarrier) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; } else { diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index d11222d9f4..0da21c187f 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -17,14 +17,12 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#include "base/macros.h" #include "code_generator_arm_vixl.h" #include "scheduler.h" -namespace art { +namespace art HIDDEN { namespace arm { -// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? -typedef CodeGeneratorARMVIXL CodeGeneratorARMType; - // AArch32 instruction latencies. // We currently assume that all ARM CPUs share the same instruction latency list. // The following latencies were tuned based on performance experiments and @@ -49,10 +47,10 @@ static constexpr uint32_t kArmNopLatency = 2; static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; -class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { +class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { public: explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) - : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} + : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} // Default visitor for instructions not handled specifically below. void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { @@ -133,7 +131,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { // The latency setting for each HInstruction depends on how CodeGenerator may generate code, // latency visitors may query CodeGenerator for such information for accurate latency settings. - CodeGeneratorARMType* codegen_; + CodeGeneratorARMVIXL* codegen_; }; class HSchedulerARM : public HScheduler { diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 4f504c2100..3071afd951 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -20,7 +20,7 @@ #include "mirror/array-inl.h" #include "mirror/string.h" -namespace art { +namespace art HIDDEN { namespace arm64 { void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) { diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index ba5a743545..ec41577e9d 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ +#include "base/macros.h" #include "scheduler.h" -namespace art { +namespace art HIDDEN { namespace arm64 { static constexpr uint32_t kArm64MemoryLoadLatency = 5; @@ -55,7 +56,7 @@ static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; -class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { +class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index a1cc202a89..165bfe3d94 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -17,6 +17,7 @@ #include "scheduler.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "codegen_test_utils.h" #include "common_compiler_test.h" @@ -34,7 +35,7 @@ #include "scheduler_arm.h" #endif -namespace art { +namespace art HIDDEN { // Return all combinations of ISA and code generator that are executable on // hardware, or on simulator, and that we'd like to test. @@ -65,7 +66,7 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class SchedulerTest : public OptimizingUnitTest { +class SchedulerTest : public CommonCompilerTest, public OptimizingUnitTestHelper { public: SchedulerTest() : graph_(CreateGraph()) { } diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 54053820ca..6a10440d11 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,10 +16,10 @@ #include "select_generator.h" -#include "base/scoped_arena_containers.h" +#include "optimizing/nodes.h" #include "reference_type_propagation.h" -namespace art { +namespace art HIDDEN { static constexpr size_t kMaxInstructionsInBranch = 1u; @@ -69,156 +69,277 @@ static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { return block1->GetSingleSuccessor() == block2->GetSingleSuccessor(); } -// Returns nullptr if `block` has either no phis or there is more than one phi -// with different inputs at `index1` and `index2`. Otherwise returns that phi. -static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) { +// Returns nullptr if `block` has either no phis or there is more than one phi. Otherwise returns +// that phi. +static HPhi* GetSinglePhi(HBasicBlock* block, size_t index1, size_t index2) { DCHECK_NE(index1, index2); HPhi* select_phi = nullptr; for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - if (phi->InputAt(index1) != phi->InputAt(index2)) { - if (select_phi == nullptr) { - // First phi with different inputs for the two indices found. - select_phi = phi; - } else { - // More than one phis has different inputs for the two indices. - return nullptr; - } + if (select_phi == nullptr) { + // First phi found. + select_phi = phi; + } else { + // More than one phi found, return null. + return nullptr; } } return select_phi; } -bool HSelectGenerator::Run() { - bool didSelect = false; - // Select cache with local allocator. - ScopedArenaAllocator allocator(graph_->GetArenaStack()); - ScopedArenaSafeMap<HInstruction*, HSelect*> cache( - std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator)); +bool HSelectGenerator::TryGenerateSelectSimpleDiamondPattern( + HBasicBlock* block, ScopedArenaSafeMap<HInstruction*, HSelect*>* cache) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + DCHECK_NE(true_block, false_block); - // Iterate in post order in the unlikely case that removing one occurrence of - // the selection pattern empties a branch block of another occurrence. - for (HBasicBlock* block : graph_->GetPostOrder()) { - if (!block->EndsWithIf()) continue; + if (!IsSimpleBlock(true_block) || + !IsSimpleBlock(false_block) || + !BlocksMergeTogether(true_block, false_block)) { + return false; + } + HBasicBlock* merge_block = true_block->GetSingleSuccessor(); - // Find elements of the diamond pattern. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); - HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); - DCHECK_NE(true_block, false_block); + // If the branches are not empty, move instructions in front of the If. + // TODO(dbrazdil): This puts an instruction between If and its condition. + // Implement moving of conditions to first users if possible. + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + HInstruction* instr = true_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); + } + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + HInstruction* instr = false_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); + } + DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); + DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); - if (!IsSimpleBlock(true_block) || - !IsSimpleBlock(false_block) || - !BlocksMergeTogether(true_block, false_block)) { - continue; - } - HBasicBlock* merge_block = true_block->GetSingleSuccessor(); - - // If the branches are not empty, move instructions in front of the If. - // TODO(dbrazdil): This puts an instruction between If and its condition. - // Implement moving of conditions to first users if possible. - while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { - HInstruction* instr = true_block->GetFirstInstruction(); - DCHECK(!instr->CanThrow()); - instr->MoveBefore(if_instruction); - } - while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { - HInstruction* instr = false_block->GetFirstInstruction(); - DCHECK(!instr->CanThrow()); - instr->MoveBefore(if_instruction); - } - DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); - DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); - - // Find the resulting true/false values. - size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); - size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); - DCHECK_NE(predecessor_index_true, predecessor_index_false); - - bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); - HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false); - - HInstruction* true_value = nullptr; - HInstruction* false_value = nullptr; - if (both_successors_return) { - true_value = true_block->GetFirstInstruction()->InputAt(0); - false_value = false_block->GetFirstInstruction()->InputAt(0); - } else if (phi != nullptr) { - true_value = phi->InputAt(predecessor_index_true); - false_value = phi->InputAt(predecessor_index_false); - } else { - continue; - } - DCHECK(both_successors_return || phi != nullptr); - - // Create the Select instruction and insert it in front of the If. - HInstruction* condition = if_instruction->InputAt(0); - HSelect* select = new (graph_->GetAllocator()) HSelect(condition, - true_value, - false_value, - if_instruction->GetDexPc()); - if (both_successors_return) { - if (true_value->GetType() == DataType::Type::kReference) { - DCHECK(false_value->GetType() == DataType::Type::kReference); - ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache()); - } - } else if (phi->GetType() == DataType::Type::kReference) { - select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo()); - } - block->InsertInstructionBefore(select, if_instruction); + // Find the resulting true/false values. + size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); + size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); + DCHECK_NE(predecessor_index_true, predecessor_index_false); - // Remove the true branch which removes the corresponding Phi - // input if needed. If left only with the false branch, the Phi is - // automatically removed. - if (both_successors_return) { - false_block->GetFirstInstruction()->ReplaceInput(select, 0); - } else { - phi->ReplaceInput(select, predecessor_index_false); + bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); + // TODO(solanes): Extend to support multiple phis? e.g. + // int a, b; + // if (bool) { + // a = 0; b = 1; + // } else { + // a = 1; b = 2; + // } + // // use a and b + HPhi* phi = GetSinglePhi(merge_block, predecessor_index_true, predecessor_index_false); + + HInstruction* true_value = nullptr; + HInstruction* false_value = nullptr; + if (both_successors_return) { + true_value = true_block->GetFirstInstruction()->InputAt(0); + false_value = false_block->GetFirstInstruction()->InputAt(0); + } else if (phi != nullptr) { + true_value = phi->InputAt(predecessor_index_true); + false_value = phi->InputAt(predecessor_index_false); + } else { + return false; + } + DCHECK(both_successors_return || phi != nullptr); + + // Create the Select instruction and insert it in front of the If. + HInstruction* condition = if_instruction->InputAt(0); + HSelect* select = new (graph_->GetAllocator()) HSelect(condition, + true_value, + false_value, + if_instruction->GetDexPc()); + if (both_successors_return) { + if (true_value->GetType() == DataType::Type::kReference) { + DCHECK(false_value->GetType() == DataType::Type::kReference); + ReferenceTypePropagation::FixUpInstructionType(select, graph_->GetHandleCache()); } + } else if (phi->GetType() == DataType::Type::kReference) { + select->SetReferenceTypeInfoIfValid(phi->GetReferenceTypeInfo()); + } + block->InsertInstructionBefore(select, if_instruction); - bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); - true_block->DisconnectAndDelete(); + // Remove the true branch which removes the corresponding Phi + // input if needed. If left only with the false branch, the Phi is + // automatically removed. + if (both_successors_return) { + false_block->GetFirstInstruction()->ReplaceInput(select, 0); + } else { + phi->ReplaceInput(select, predecessor_index_false); + } + + bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); + true_block->DisconnectAndDelete(); + + // Merge remaining blocks which are now connected with Goto. + DCHECK_EQ(block->GetSingleSuccessor(), false_block); + block->MergeWith(false_block); + if (!both_successors_return && only_two_predecessors) { + DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); + DCHECK_EQ(block->GetSingleSuccessor(), merge_block); + block->MergeWith(merge_block); + } - // Merge remaining blocks which are now connected with Goto. - DCHECK_EQ(block->GetSingleSuccessor(), false_block); - block->MergeWith(false_block); - if (!both_successors_return && only_two_predecessors) { - DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); - DCHECK_EQ(block->GetSingleSuccessor(), merge_block); - block->MergeWith(merge_block); + MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); + + // Very simple way of finding common subexpressions in the generated HSelect statements + // (since this runs after GVN). Lookup by condition, and reuse latest one if possible + // (due to post order, latest select is most likely replacement). If needed, we could + // improve this by e.g. using the operands in the map as well. + auto it = cache->find(condition); + if (it == cache->end()) { + cache->Put(condition, select); + } else { + // Found cached value. See if latest can replace cached in the HIR. + HSelect* cached_select = it->second; + DCHECK_EQ(cached_select->GetCondition(), select->GetCondition()); + if (cached_select->GetTrueValue() == select->GetTrueValue() && + cached_select->GetFalseValue() == select->GetFalseValue() && + select->StrictlyDominates(cached_select)) { + cached_select->ReplaceWith(select); + cached_select->GetBlock()->RemoveInstruction(cached_select); } + it->second = select; // always cache latest + } + + // No need to update dominance information, as we are simplifying + // a simple diamond shape, where the join block is merged with the + // entry block. Any following blocks would have had the join block + // as a dominator, and `MergeWith` handles changing that to the + // entry block + return true; +} - MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); +HBasicBlock* HSelectGenerator::TryFixupDoubleDiamondPattern(HBasicBlock* block) { + DCHECK(block->GetLastInstruction()->IsIf()); + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + DCHECK_NE(true_block, false_block); - // Very simple way of finding common subexpressions in the generated HSelect statements - // (since this runs after GVN). Lookup by condition, and reuse latest one if possible - // (due to post order, latest select is most likely replacement). If needed, we could - // improve this by e.g. using the operands in the map as well. - auto it = cache.find(condition); - if (it == cache.end()) { - cache.Put(condition, select); + // One branch must be a single goto, and the other one the inner if. + if (true_block->IsSingleGoto() == false_block->IsSingleGoto()) { + return nullptr; + } + + HBasicBlock* single_goto = true_block->IsSingleGoto() ? true_block : false_block; + HBasicBlock* inner_if_block = true_block->IsSingleGoto() ? false_block : true_block; + + // The innner if branch has to be a block with just a comparison and an if. + if (!inner_if_block->EndsWithIf() || + inner_if_block->GetLastInstruction()->AsIf()->InputAt(0) != + inner_if_block->GetFirstInstruction() || + inner_if_block->GetLastInstruction()->GetPrevious() != + inner_if_block->GetFirstInstruction() || + !inner_if_block->GetFirstInstruction()->IsCondition()) { + return nullptr; + } + + HIf* inner_if_instruction = inner_if_block->GetLastInstruction()->AsIf(); + HBasicBlock* inner_if_true_block = inner_if_instruction->IfTrueSuccessor(); + HBasicBlock* inner_if_false_block = inner_if_instruction->IfFalseSuccessor(); + if (!inner_if_true_block->IsSingleGoto() || !inner_if_false_block->IsSingleGoto()) { + return nullptr; + } + + // One must merge into the outer condition and the other must not. + if (BlocksMergeTogether(single_goto, inner_if_true_block) == + BlocksMergeTogether(single_goto, inner_if_false_block)) { + return nullptr; + } + + // First merge merges the outer if with one of the inner if branches. The block must be a Phi and + // a Goto. + HBasicBlock* first_merge = single_goto->GetSingleSuccessor(); + if (first_merge->GetNumberOfPredecessors() != 2 || + first_merge->GetPhis().CountSize() != 1 || + !first_merge->GetLastInstruction()->IsGoto() || + first_merge->GetFirstInstruction() != first_merge->GetLastInstruction()) { + return nullptr; + } + + HPhi* first_phi = first_merge->GetFirstPhi()->AsPhi(); + + // Second merge is first_merge and the remainder branch merging. It must be phi + goto, or phi + + // return. Depending on the first merge, we define the second merge. + HBasicBlock* merges_into_second_merge = + BlocksMergeTogether(single_goto, inner_if_true_block) + ? inner_if_false_block + : inner_if_true_block; + if (!BlocksMergeTogether(first_merge, merges_into_second_merge)) { + return nullptr; + } + + HBasicBlock* second_merge = merges_into_second_merge->GetSingleSuccessor(); + if (second_merge->GetNumberOfPredecessors() != 2 || + second_merge->GetPhis().CountSize() != 1 || + !(second_merge->GetLastInstruction()->IsGoto() || + second_merge->GetLastInstruction()->IsReturn()) || + second_merge->GetFirstInstruction() != second_merge->GetLastInstruction()) { + return nullptr; + } + + size_t index = second_merge->GetPredecessorIndexOf(merges_into_second_merge); + HPhi* second_phi = second_merge->GetFirstPhi()->AsPhi(); + + // Merge the phis. + first_phi->AddInput(second_phi->InputAt(index)); + merges_into_second_merge->ReplaceSuccessor(second_merge, first_merge); + second_phi->ReplaceWith(first_phi); + second_merge->RemovePhi(second_phi); + + // Sort out the new domination before merging the blocks + DCHECK_EQ(second_merge->GetSinglePredecessor(), first_merge); + second_merge->GetDominator()->RemoveDominatedBlock(second_merge); + second_merge->SetDominator(first_merge); + first_merge->AddDominatedBlock(second_merge); + first_merge->MergeWith(second_merge); + + // No need to update dominance information. There's a chance that `merges_into_second_merge` + // doesn't come before `first_merge` but we don't need to fix it since `merges_into_second_merge` + // will disappear from the graph altogether when doing the follow-up + // TryGenerateSelectSimpleDiamondPattern. + + return inner_if_block; +} + +bool HSelectGenerator::Run() { + bool did_select = false; + // Select cache with local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HInstruction*, HSelect*> cache(std::less<HInstruction*>(), + allocator.Adapter(kArenaAllocSelectGenerator)); + + // Iterate in post order in the unlikely case that removing one occurrence of + // the selection pattern empties a branch block of another occurrence. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!block->EndsWithIf()) { + continue; + } + + if (TryGenerateSelectSimpleDiamondPattern(block, &cache)) { + did_select = true; } else { - // Found cached value. See if latest can replace cached in the HIR. - HSelect* cached = it->second; - DCHECK_EQ(cached->GetCondition(), select->GetCondition()); - if (cached->GetTrueValue() == select->GetTrueValue() && - cached->GetFalseValue() == select->GetFalseValue() && - select->StrictlyDominates(cached)) { - cached->ReplaceWith(select); - cached->GetBlock()->RemoveInstruction(cached); + // Try to fix up the odd version of the double diamond pattern. If we could do it, it means + // that we can generate two selects. + HBasicBlock* inner_if_block = TryFixupDoubleDiamondPattern(block); + if (inner_if_block != nullptr) { + // Generate the selects now since `inner_if_block` should be after `block` in PostOrder. + bool result = TryGenerateSelectSimpleDiamondPattern(inner_if_block, &cache); + DCHECK(result); + result = TryGenerateSelectSimpleDiamondPattern(block, &cache); + DCHECK(result); + did_select = true; } - it->second = select; // always cache latest } - - // No need to update dominance information, as we are simplifying - // a simple diamond shape, where the join block is merged with the - // entry block. Any following blocks would have had the join block - // as a dominator, and `MergeWith` handles changing that to the - // entry block. - didSelect = true; } - return didSelect; + + return did_select; } } // namespace art diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index 30ac8a86eb..7aa0803d89 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -57,9 +57,12 @@ #ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ #define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ +#include "base/macros.h" +#include "base/scoped_arena_containers.h" #include "optimization.h" +#include "optimizing/nodes.h" -namespace art { +namespace art HIDDEN { class HSelectGenerator : public HOptimization { public: @@ -72,6 +75,43 @@ class HSelectGenerator : public HOptimization { static constexpr const char* kSelectGeneratorPassName = "select_generator"; private: + bool TryGenerateSelectSimpleDiamondPattern(HBasicBlock* block, + ScopedArenaSafeMap<HInstruction*, HSelect*>* cache); + + // When generating code for nested ternary operators (e.g. `return (x > 100) ? 100 : ((x < -100) ? + // -100 : x);`), a dexer can generate a double diamond pattern but it is not a clear cut one due + // to the merging of the blocks. `TryFixupDoubleDiamondPattern` recognizes that pattern and fixes + // up the graph to have a clean double diamond that `TryGenerateSelectSimpleDiamondPattern` can + // use to generate selects. + // + // In ASCII, it turns: + // + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ | + // 6 | + // \ | + // 7 + // | + // 8 + // into: + // 1 (outer if) + // / \ + // 2 3 (inner if) + // | / \ + // | 4 5 + // \/ / + // 6 + // | + // 8 + // + // In short, block 7 disappears and we merge 6 and 7. Now we have a diamond with {3,4,5,6}, and + // when that gets resolved we get another one with the outer if. + HBasicBlock* TryFixupDoubleDiamondPattern(HBasicBlock* block); + DISALLOW_COPY_AND_ASSIGN(HSelectGenerator); }; diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc index b18d41abbb..fc9e150d92 100644 --- a/compiler/optimizing/select_generator_test.cc +++ b/compiler/optimizing/select_generator_test.cc @@ -17,12 +17,13 @@ #include "select_generator.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { class SelectGeneratorTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 17cf3d3477..277edff33e 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -34,7 +34,7 @@ #include "runtime.h" #include "scoped_thread_state_change-inl.h" -namespace art { +namespace art HIDDEN { static bool IsInBootImage(ArtMethod* method) { gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -63,9 +63,9 @@ HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenLoadMethod( bool for_interface_call, CodeGenerator* codegen) { if (kIsDebugBuild) { - ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below. + ScopedObjectAccess soa(Thread::Current()); // Required for `IsStringConstructor()` below. DCHECK(callee != nullptr); - DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass())); + DCHECK(!callee->IsStringConstructor()); } MethodLoadKind method_load_kind; diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 975366918c..6dfe904f27 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_ #define ART_COMPILER_OPTIMIZING_SHARPENING_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class DexCompilationUnit; diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index ba97b43de9..56719b100e 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -16,7 +16,7 @@ #include "side_effects_analysis.h" -namespace art { +namespace art HIDDEN { bool SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index 56a01e63f1..47fcdc5d1b 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -18,10 +18,11 @@ #define ART_COMPILER_OPTIMIZING_SIDE_EFFECTS_ANALYSIS_H_ #include "base/arena_containers.h" +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class SideEffectsAnalysis : public HOptimization { public: diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index 268798ca7d..f2b781dfa4 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -16,10 +16,11 @@ #include <gtest/gtest.h> +#include "base/macros.h" #include "data_type.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { // Only runtime types other than void are allowed. static const DataType::Type kTestTypes[] = { diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 67ee83c9dd..a658252e69 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -27,7 +27,7 @@ #include "scoped_thread_state_change-inl.h" #include "ssa_phi_elimination.h" -namespace art { +namespace art HIDDEN { void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. @@ -538,7 +538,6 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, - class_loader_, dex_cache_, /* is_first_run= */ true).Run(); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index a7d4e0ebd3..99a5469932 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -17,12 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Transforms a graph into SSA form. The liveness guarantees of diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 18942a1823..317e0999d7 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -21,7 +21,7 @@ #include "linear_order.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { void SsaLivenessAnalysis::Analyze() { // Compute the linear order directly in the graph's data structure diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 7f31585f34..cc2b49cf22 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -21,11 +21,12 @@ #include "base/intrusive_forward_list.h" #include "base/iteration_range.h" +#include "base/macros.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; class SsaLivenessAnalysis; diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index a477893d57..2df0f34c7d 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -20,12 +20,13 @@ #include "arch/instruction_set_features.h" #include "base/arena_allocator.h" #include "base/arena_containers.h" +#include "base/macros.h" #include "code_generator.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" -namespace art { +namespace art HIDDEN { class SsaLivenessAnalysisTest : public OptimizingUnitTest { protected: diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 8fd6962500..ce343dffec 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -21,7 +21,7 @@ #include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" -namespace art { +namespace art HIDDEN { bool SsaDeadPhiElimination::Run() { MarkDeadPhis(); diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index c5cc752ffc..f606f928fa 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { /** * Optimization phase that removes dead phis from the graph. Dead phis are unused diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index e679893af2..980493db34 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -17,6 +17,7 @@ #include "android-base/stringprintf.h" #include "base/arena_allocator.h" +#include "base/macros.h" #include "builder.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" @@ -27,9 +28,9 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { -class SsaTest : public OptimizingUnitTest { +class SsaTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data, const char* expected); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index f55bbee1c8..1a368ed347 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -20,6 +20,7 @@ #include <vector> #include "art_method-inl.h" +#include "base/globals.h" #include "base/stl_util.h" #include "class_linker.h" #include "dex/dex_file.h" @@ -32,7 +33,7 @@ #include "scoped_thread_state_change-inl.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { constexpr static bool kVerifyStackMaps = kIsDebugBuild; @@ -49,7 +50,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, uint32_t num_dex_registers, - bool baseline) { + bool baseline, + bool debuggable) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -60,6 +62,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, fp_spill_mask_ = fp_spill_mask; num_dex_registers_ = num_dex_registers; baseline_ = baseline; + debuggable_ = debuggable; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -99,16 +102,21 @@ void StackMapStream::EndMethod(size_t code_size) { } } -void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* stack_mask, - StackMap::Kind kind, - bool needs_vreg_info) { +void StackMapStream::BeginStackMapEntry( + uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* stack_mask, + StackMap::Kind kind, + bool needs_vreg_info, + const std::vector<uint32_t>& dex_pc_list_for_catch_verification) { DCHECK(in_method_) << "Call BeginMethod first"; DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; in_stack_map_ = true; + DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kind == StackMap::Kind::Catch); + DCHECK_IMPLIES(!dex_pc_list_for_catch_verification.empty(), kIsDebugBuild); + current_stack_map_ = BitTableBuilder<StackMap>::Entry(); current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind); current_stack_map_[StackMap::kPackedNativePc] = @@ -149,7 +157,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, instruction_set_); CHECK_EQ(stack_map.Row(), stack_map_index); } else if (kind == StackMap::Kind::Catch) { - StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc); + StackMap stack_map = code_info.GetCatchStackMapForDexPc( + ArrayRef<const uint32_t>(dex_pc_list_for_catch_verification)); CHECK_EQ(stack_map.Row(), stack_map_index); } StackMap stack_map = code_info.GetStackMapAt(stack_map_index); @@ -367,6 +376,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; flags |= baseline_ ? CodeInfo::kIsBaseline : 0; + flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0; DCHECK_LE(flags, kVarintMax); // Ensure flags can be read directly as byte. uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 27145a174c..643af2da94 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -21,6 +21,7 @@ #include "base/arena_bit_vector.h" #include "base/bit_table.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" @@ -28,7 +29,7 @@ #include "nodes.h" #include "stack_map.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; @@ -64,15 +65,19 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { size_t core_spill_mask, size_t fp_spill_mask, uint32_t num_dex_registers, - bool baseline = false); + bool baseline, + bool debuggable); void EndMethod(size_t code_size); - void BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask = 0, - BitVector* sp_mask = nullptr, - StackMap::Kind kind = StackMap::Kind::Default, - bool needs_vreg_info = true); + void BeginStackMapEntry( + uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask = 0, + BitVector* sp_mask = nullptr, + StackMap::Kind kind = StackMap::Kind::Default, + bool needs_vreg_info = true, + const std::vector<uint32_t>& dex_pc_list_for_catch_verification = std::vector<uint32_t>()); + void EndStackMapEntry(); void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { @@ -125,6 +130,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; bool baseline_; + bool debuggable_; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index f6a739e15a..a2c30e7681 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,12 +18,13 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/macros.h" #include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { // Check that the stack mask of given stack map is identical // to the given bit vector. Returns true if they are same. @@ -52,7 +53,12 @@ TEST(StackMapTest, Test1) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; @@ -106,7 +112,12 @@ TEST(StackMapTest, Test2) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -300,7 +311,12 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -363,7 +379,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; @@ -411,7 +432,12 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; @@ -467,7 +493,12 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 1); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 1, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, false); stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); @@ -512,7 +543,12 @@ TEST(StackMapTest, InlineTest) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 2); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 2, + /* baseline= */ false, + /* debuggable= */ false); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -702,7 +738,12 @@ TEST(StackMapTest, TestDeduplicateStackMask) { ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); - stream.BeginMethod(32, 0, 0, 0); + stream.BeginMethod(/* frame_size_in_bytes= */ 32, + /* core_spill_mask= */ 0, + /* fp_spill_mask= */ 0, + /* num_dex_registers= */ 0, + /* baseline= */ false, + /* debuggable= */ false); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index a5f919c31c..7c0097c6f6 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -22,7 +22,7 @@ #include <sstream> -namespace art { +namespace art HIDDEN { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; @@ -633,7 +633,7 @@ void SuperblockCloner::ConstructSubgraphClosedSSA() { HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); if (value->GetType() == DataType::Type::kReference) { - phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); + phi->SetReferenceTypeInfoIfValid(value->GetReferenceTypeInfo()); } exit_block->AddPhi(phi); diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 1f6ee74fbd..421701fb19 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -20,9 +20,10 @@ #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/bit_vector-inl.h" +#include "base/macros.h" #include "nodes.h" -namespace art { +namespace art HIDDEN { class InductionVarRange; diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index d8d68b7763..ea2563ea7d 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "graph_checker.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -21,7 +22,7 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 33823e2a11..76e7e0c32c 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/macros.h" #include "builder.h" #include "dex/dex_instruction.h" #include "nodes.h" @@ -22,13 +23,13 @@ #include "gtest/gtest.h" -namespace art { +namespace art HIDDEN { /** * Check that the HGraphBuilder adds suspend checks to backward branches. */ -class SuspendCheckTest : public OptimizingUnitTest { +class SuspendCheckTest : public CommonCompilerTest, public OptimizingUnitTestHelper { protected: void TestCode(const std::vector<uint16_t>& data); }; diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc new file mode 100644 index 0000000000..eb70b670fe --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.cc @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "write_barrier_elimination.h" + +#include "base/arena_allocator.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "optimizing/nodes.h" + +namespace art HIDDEN { + +class WBEVisitor final : public HGraphVisitor { + public: + WBEVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArenaStack()), + current_write_barriers_(scoped_allocator_.Adapter(kArenaAllocWBE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) override { + // We clear the map to perform this optimization only in the same block. Doing it across blocks + // would entail non-trivial merging of states. + current_write_barriers_.clear(); + HGraphVisitor::VisitBasicBlock(block); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* obj = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(obj); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsInstanceFieldSet()); + DCHECK(it->second->AsInstanceFieldSet()->GetWriteBarrierKind() != + WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsInstanceFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({obj, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* cls = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(cls); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsStaticFieldSet()); + DCHECK(it->second->AsStaticFieldSet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsStaticFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({cls, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitArraySet(HArraySet* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + + if (instruction->GetComponentType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + HInstruction* arr = HuntForOriginalReference(instruction->InputAt(0)); + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + auto it = current_write_barriers_.find(arr); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsArraySet()); + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + // We never skip the null check in ArraySets so that value is already set. + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() == WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({arr, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitInstruction(HInstruction* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + } + + private: + void ClearCurrentValues() { current_write_barriers_.clear(); } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + // An original reference can be transformed by instructions like: + // i0 NewArray + // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress. + // i2 ArraySet(i1, index, value) + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) { + ref = ref->InputAt(0); + } + return ref; + } + + ScopedArenaAllocator scoped_allocator_; + + // Stores a map of <Receiver, InstructionWhereTheWriteBarrierIs>. + // `InstructionWhereTheWriteBarrierIs` is used for DCHECKs only. + ScopedArenaHashMap<HInstruction*, HInstruction*> current_write_barriers_; + + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(WBEVisitor); +}; + +bool WriteBarrierElimination::Run() { + WBEVisitor wbe_visitor(graph_, stats_); + wbe_visitor.VisitReversePostOrder(); + return true; +} + +} // namespace art diff --git a/compiler/optimizing/write_barrier_elimination.h b/compiler/optimizing/write_barrier_elimination.h new file mode 100644 index 0000000000..a3769e7421 --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ + +#include "base/macros.h" +#include "optimization.h" + +namespace art HIDDEN { + +// Eliminates unnecessary write barriers from InstanceFieldSet, StaticFieldSet, and ArraySet. +// +// We can eliminate redundant write barriers as we don't need several for the same receiver. For +// example: +// MyObject o; +// o.inner_obj = io; +// o.inner_obj2 = io2; +// o.inner_obj3 = io3; +// We can keep the write barrier for `inner_obj` and remove the other two. +// +// In order to do this, we set the WriteBarrierKind of the instruction. The instruction's kind are +// set to kEmitNoNullCheck (if this write barrier coalesced other write barriers, we don't want to +// perform the null check optimization), or to kDontEmit (if the write barrier as a whole is not +// needed). +class WriteBarrierElimination : public HOptimization { + public: + WriteBarrierElimination(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kWBEPassName) + : HOptimization(graph, name, stats) {} + + bool Run() override; + + static constexpr const char* kWBEPassName = "write_barrier_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(WriteBarrierElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index b1abcf6747..e266618980 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -18,13 +18,13 @@ #include "code_generator.h" #include "driver/compiler_options.h" -namespace art { +namespace art HIDDEN { namespace x86 { /** * Replace instructions with memory operand forms. */ -class MemoryOperandVisitor : public HGraphVisitor { +class MemoryOperandVisitor final : public HGraphVisitor { public: MemoryOperandVisitor(HGraph* graph, bool do_implicit_null_checks) : HGraphVisitor(graph), diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h index 3f4178d58a..1cae1a5d3a 100644 --- a/compiler/optimizing/x86_memory_gen.h +++ b/compiler/optimizing/x86_memory_gen.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ #define ART_COMPILER_OPTIMIZING_X86_MEMORY_GEN_H_ +#include "base/macros.h" #include "nodes.h" #include "optimization.h" -namespace art { +namespace art HIDDEN { class CodeGenerator; namespace x86 { |