diff options
Diffstat (limited to 'compiler')
138 files changed, 7897 insertions, 2993 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index b87201ad8d..beb34dce37 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -21,6 +21,7 @@ include art/build/Android.common_build.mk LIBART_COMPILER_SRC_FILES := \ compiled_method.cc \ dex/global_value_numbering.cc \ + dex/gvn_dead_code_elimination.cc \ dex/local_value_numbering.cc \ dex/quick/arm/assemble_arm.cc \ dex/quick/arm/call_arm.cc \ @@ -100,6 +101,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/inliner.cc \ optimizing/instruction_simplifier.cc \ optimizing/intrinsics.cc \ + optimizing/intrinsics_arm.cc \ optimizing/intrinsics_arm64.cc \ optimizing/intrinsics_x86_64.cc \ optimizing/licm.cc \ @@ -117,7 +119,6 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/primitive_type_propagation.cc \ optimizing/reference_type_propagation.cc \ trampolines/trampoline_compiler.cc \ - utils/arena_allocator.cc \ utils/arena_bit_vector.cc \ utils/arm/assembler_arm.cc \ utils/arm/assembler_arm32.cc \ @@ -135,7 +136,6 @@ LIBART_COMPILER_SRC_FILES := \ utils/x86/managed_register_x86.cc \ utils/x86_64/assembler_x86_64.cc \ utils/x86_64/managed_register_x86_64.cc \ - utils/scoped_arena_allocator.cc \ utils/swap_space.cc \ buffered_output_stream.cc \ compiler.cc \ diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 7685200261..93d83c6fd4 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -240,6 +240,41 @@ class GlobalValueNumberingPass : public PassME { }; /** + * @class DeadCodeEliminationPass + * @brief Performs the GVN-based dead code elimination pass. + */ +class DeadCodeEliminationPass : public PassME { + public: + DeadCodeEliminationPass() : PassME("DCE", kPreOrderDFSTraversal, "4_post_dce_cfg") { + } + + bool Gate(const PassDataHolder* data) const OVERRIDE { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + return c_unit->mir_graph->EliminateDeadCodeGate(); + } + + bool Worker(PassDataHolder* data) const { + DCHECK(data != nullptr); + PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data); + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + DCHECK(c_unit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); + return c_unit->mir_graph->EliminateDeadCode(bb); + } + + void End(PassDataHolder* data) const OVERRIDE { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->EliminateDeadCodeEnd(); + down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->MirSsaRepUpToDate(); + } +}; + +/** * @class BBCombine * @brief Perform the basic block combination pass. */ diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index 0c46d4347d..dceea240fa 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -21,11 +21,11 @@ #include <string> #include <vector> +#include "base/arena_allocator.h" +#include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "invoke_type.h" #include "safe_map.h" -#include "utils/arena_allocator.h" -#include "utils/scoped_arena_allocator.h" namespace art { diff --git a/compiler/dex/dex_flags.h b/compiler/dex/dex_flags.h index eaf272bb55..e8eb40ccd2 100644 --- a/compiler/dex/dex_flags.h +++ b/compiler/dex/dex_flags.h @@ -27,6 +27,7 @@ enum OptControlVector { kNullCheckElimination, kClassInitCheckElimination, kGlobalValueNumbering, + kGvnDeadCodeElimination, kLocalValueNumbering, kPromoteRegs, kTrackLiveTemps, diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index f7968c225a..7e916bee4a 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -238,7 +238,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, bool is_volatile; bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put, &field_offset, &is_volatile); - if (fast_path && !is_volatile && IsUint(16, field_offset.Int32Value())) { + if (fast_path && !is_volatile && IsUint<16>(field_offset.Int32Value())) { VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) << " to " << Instruction::Name(new_opcode) << " by replacing field index " << field_idx @@ -274,7 +274,7 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, &target_method, &vtable_idx, &direct_code, &direct_method); if (fast_path && original_invoke_type == invoke_type) { - if (vtable_idx >= 0 && IsUint(16, vtable_idx)) { + if (vtable_idx >= 0 && IsUint<16>(vtable_idx)) { VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) << "(" << PrettyMethod(method_idx, GetDexFile(), true) << ")" << " to " << Instruction::Name(new_opcode) diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc index a8fd8122ff..ab3c946897 100644 --- a/compiler/dex/global_value_numbering.cc +++ b/compiler/dex/global_value_numbering.cc @@ -28,7 +28,7 @@ GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAlloc allocator_(allocator), bbs_processed_(0u), max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()), - last_value_(0u), + last_value_(kNullValue), modifications_allowed_(true), mode_(mode), global_value_map_(std::less<uint64_t>(), allocator->Adapter()), @@ -128,7 +128,11 @@ bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) { merge_lvns_.clear(); bool change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_); - if (change) { + if (mode_ == kModeGvn) { + // In GVN mode, keep the latest LVN even if Equals() indicates no change. This is + // to keep the correct values of fields that do not contribute to Equals() as long + // as they depend only on predecessor LVNs' fields that do contribute to Equals(). + // Currently, that's LVN::merge_map_ used by LVN::GetStartingVregValueNumberImpl(). std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]); lvns_[bb->id] = work_lvn_.release(); } else { @@ -178,7 +182,7 @@ bool GlobalValueNumbering::NullCheckedInAllPredecessors( } // IF_EQZ/IF_NEZ checks some sreg, see if that sreg contains the value_name. int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0]; - if (!pred_lvn->IsSregValue(s_reg, value_name)) { + if (pred_lvn->GetSregValue(s_reg) != value_name) { return false; } } diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h index cdafc68070..6fa658c0cc 100644 --- a/compiler/dex/global_value_numbering.h +++ b/compiler/dex/global_value_numbering.h @@ -17,12 +17,12 @@ #ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_ #define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_ +#include "base/arena_object.h" #include "base/logging.h" #include "base/macros.h" #include "mir_graph.h" #include "compiler_ir.h" #include "dex_flags.h" -#include "utils/arena_object.h" namespace art { @@ -31,6 +31,9 @@ class MirFieldInfo; class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { public: + static constexpr uint16_t kNoValue = 0xffffu; + static constexpr uint16_t kNullValue = 1u; + enum Mode { kModeGvn, kModeGvnPostProcessing, @@ -51,6 +54,14 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode); ~GlobalValueNumbering(); + CompilationUnit* GetCompilationUnit() const { + return cu_; + } + + MIRGraph* GetMirGraph() const { + return mir_graph_; + } + // Prepare LVN for the basic block. LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb, ScopedArenaAllocator* allocator = nullptr); @@ -70,9 +81,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { return modifications_allowed_ && Good(); } - private: - static constexpr uint16_t kNoValue = 0xffffu; + // Retrieve the LVN with GVN results for a given BasicBlock. + const LocalValueNumbering* GetLvn(BasicBlockId bb_id) const; + private: // Allocate a new value name. uint16_t NewValueName(); @@ -88,7 +100,7 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) { uint16_t res; uint64_t key = BuildKey(op, operand1, operand2, modifier); - ValueMap::iterator lb = global_value_map_.lower_bound(key); + auto lb = global_value_map_.lower_bound(key); if (lb != global_value_map_.end() && lb->first == key) { res = lb->second; } else { @@ -99,10 +111,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { } // Look up a value in the global value map, don't add a new entry if there was none before. - uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) { + uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const { uint16_t res; uint64_t key = BuildKey(op, operand1, operand2, modifier); - ValueMap::iterator lb = global_value_map_.lower_bound(key); + auto lb = global_value_map_.lower_bound(key); if (lb != global_value_map_.end() && lb->first == key) { res = lb->second; } else { @@ -111,18 +123,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { return res; } - // Check if the exact value is stored in the global value map. - bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier, - uint16_t value) const { - DCHECK(value != 0u || !Good()); - DCHECK_LE(value, last_value_); - // This is equivalent to value == LookupValue(op, operand1, operand2, modifier) - // except that it doesn't add an entry to the global value map if it's not there. - uint64_t key = BuildKey(op, operand1, operand2, modifier); - ValueMap::const_iterator it = global_value_map_.find(key); - return (it != global_value_map_.end() && it->second == value); - } - // Get an instance field id. uint16_t GetIFieldId(MIR* mir) { return GetMirGraph()->GetGvnIFieldId(mir); @@ -200,14 +200,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const; - CompilationUnit* GetCompilationUnit() const { - return cu_; - } - - MIRGraph* GetMirGraph() const { - return mir_graph_; - } - ScopedArenaAllocator* Allocator() const { return allocator_; } @@ -255,6 +247,13 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { }; std::ostream& operator<<(std::ostream& os, const GlobalValueNumbering::Mode& rhs); +inline const LocalValueNumbering* GlobalValueNumbering::GetLvn(BasicBlockId bb_id) const { + DCHECK_EQ(mode_, kModeGvnPostProcessing); + DCHECK_LT(bb_id, lvns_.size()); + DCHECK(lvns_[bb_id] != nullptr); + return lvns_[bb_id]; +} + inline void GlobalValueNumbering::StartPostProcessing() { DCHECK(Good()); DCHECK_EQ(mode_, kModeGvn); @@ -271,8 +270,7 @@ template <typename Container> // Container of MirIFieldLoweringInfo or MirSFiel uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator, const Container& field_infos) { size_t size = field_infos.size(); - uint16_t* field_ids = reinterpret_cast<uint16_t*>(allocator->Alloc(size * sizeof(uint16_t), - kArenaAllocMisc)); + uint16_t* field_ids = allocator->AllocArray<uint16_t>(size, kArenaAllocMisc); for (size_t i = 0u; i != size; ++i) { size_t idx = i; const MirFieldInfo& cur_info = field_infos[i]; diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc index f71b7ae359..54e34eaa81 100644 --- a/compiler/dex/global_value_numbering_test.cc +++ b/compiler/dex/global_value_numbering_test.cc @@ -134,8 +134,8 @@ class GlobalValueNumberingTest : public testing::Test { { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } } #define DEF_PHI2(bb, reg, src1, src2) \ { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } } -#define DEF_DIV_REM(bb, opcode, result, dividend, divisor) \ - { bb, opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } } +#define DEF_BINOP(bb, opcode, result, src1, src2) \ + { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } } void DoPrepareIFields(const IFieldDef* defs, size_t count) { cu_.mir_graph->ifield_lowering_infos_.clear(); @@ -229,7 +229,7 @@ class GlobalValueNumberingTest : public testing::Test { void DoPrepareMIRs(const MIRDef* defs, size_t count) { mir_count_ = count; - mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR)); + mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR); ssa_reps_.resize(count); for (size_t i = 0u; i != count; ++i) { const MIRDef* def = &defs[i]; @@ -251,8 +251,8 @@ class GlobalValueNumberingTest : public testing::Test { ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(), SGetOrSPutMemAccessType(def->opcode)); } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) { - mir->meta.phi_incoming = static_cast<BasicBlockId*>( - allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo)); + mir->meta.phi_incoming = + allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo); ASSERT_EQ(def->num_uses, bb->predecessors.size()); std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming); } @@ -267,7 +267,6 @@ class GlobalValueNumberingTest : public testing::Test { mir->offset = i; // LVN uses offset only for debug output mir->optimization_flags = 0u; } - mirs_[count - 1u].next = nullptr; DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>( cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc)); code_item->insns_size_in_code_units_ = 2u * count; @@ -279,6 +278,20 @@ class GlobalValueNumberingTest : public testing::Test { DoPrepareMIRs(defs, count); } + void DoPrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t* map, size_t count) { + BasicBlock* bb = cu_.mir_graph->GetBasicBlock(bb_id); + ASSERT_TRUE(bb != nullptr); + ASSERT_TRUE(bb->data_flow_info != nullptr); + bb->data_flow_info->vreg_to_ssa_map_exit = + cu_.arena.AllocArray<int32_t>(count, kArenaAllocDFInfo); + std::copy_n(map, count, bb->data_flow_info->vreg_to_ssa_map_exit); + } + + template <size_t count> + void PrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t (&map)[count]) { + DoPrepareVregToSsaMapExit(bb_id, map, count); + } + void PerformGVN() { DoPerformGVN<LoopRepeatingTopologicalSortIterator>(); } @@ -294,9 +307,9 @@ class GlobalValueNumberingTest : public testing::Test { cu_.mir_graph->ComputeDominators(); cu_.mir_graph->ComputeTopologicalSortOrder(); cu_.mir_graph->SSATransformationEnd(); - cu_.mir_graph->temp_.gvn.ifield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds( + cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds( allocator_.get(), cu_.mir_graph->ifield_lowering_infos_); - cu_.mir_graph->temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds( + cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds( allocator_.get(), cu_.mir_graph->sfield_lowering_infos_); ASSERT_TRUE(gvn_ == nullptr); gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(), @@ -348,6 +361,10 @@ class GlobalValueNumberingTest : public testing::Test { cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); cu_.access_flags = kAccStatic; // Don't let "this" interfere with this test. allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack)); + // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that + // 0 constants are integral, not references. Nothing else is used by LVN/GVN. + cu_.mir_graph->reg_location_ = + cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc); // Bind all possible sregs to live vregs for test purposes. live_in_v_->SetInitialBits(kMaxSsaRegs); cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs); @@ -1570,6 +1587,40 @@ TEST_F(GlobalValueNumberingTestLoop, Phi) { EXPECT_NE(value_names_[4], value_names_[3]); } +TEST_F(GlobalValueNumberingTestLoop, IFieldLoopVariable) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 0), + DEF_IPUT(3, Instruction::IPUT, 0u, 100u, 0u), + DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u), + DEF_BINOP(4, Instruction::ADD_INT, 3u, 2u, 101u), + DEF_IPUT(4, Instruction::IPUT, 3u, 100u, 0u), + }; + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN(); + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_NE(value_names_[2], value_names_[0]); + EXPECT_NE(value_names_[3], value_names_[0]); + EXPECT_NE(value_names_[3], value_names_[2]); + + + // Set up vreg_to_ssa_map_exit for prologue and loop and set post-processing mode + // as needed for GetStartingVregValueNumber(). + const int32_t prologue_vreg_to_ssa_map_exit[] = { 0 }; + const int32_t loop_vreg_to_ssa_map_exit[] = { 3 }; + PrepareVregToSsaMapExit(3, prologue_vreg_to_ssa_map_exit); + PrepareVregToSsaMapExit(4, loop_vreg_to_ssa_map_exit); + gvn_->StartPostProcessing(); + + // Check that vreg 0 has the same value number as the result of IGET 2u. + const LocalValueNumbering* loop = gvn_->GetLvn(4); + EXPECT_EQ(value_names_[2], loop->GetStartingVregValueNumber(0)); +} + TEST_F(GlobalValueNumberingTestCatch, IFields) { static const IFieldDef ifields[] = { { 0u, 1u, 0u, false, kDexMemAccessWord }, @@ -2225,18 +2276,18 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) { TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) { static const MIRDef mirs[] = { - DEF_DIV_REM(3u, Instruction::DIV_INT, 1u, 20u, 21u), - DEF_DIV_REM(3u, Instruction::DIV_INT, 2u, 24u, 21u), - DEF_DIV_REM(3u, Instruction::DIV_INT, 3u, 20u, 23u), - DEF_DIV_REM(4u, Instruction::DIV_INT, 4u, 24u, 22u), - DEF_DIV_REM(4u, Instruction::DIV_INT, 9u, 24u, 25u), - DEF_DIV_REM(5u, Instruction::DIV_INT, 5u, 24u, 21u), - DEF_DIV_REM(5u, Instruction::DIV_INT, 10u, 24u, 26u), + DEF_BINOP(3u, Instruction::DIV_INT, 1u, 20u, 21u), + DEF_BINOP(3u, Instruction::DIV_INT, 2u, 24u, 21u), + DEF_BINOP(3u, Instruction::DIV_INT, 3u, 20u, 23u), + DEF_BINOP(4u, Instruction::DIV_INT, 4u, 24u, 22u), + DEF_BINOP(4u, Instruction::DIV_INT, 9u, 24u, 25u), + DEF_BINOP(5u, Instruction::DIV_INT, 5u, 24u, 21u), + DEF_BINOP(5u, Instruction::DIV_INT, 10u, 24u, 26u), DEF_PHI2(6u, 27u, 25u, 26u), - DEF_DIV_REM(6u, Instruction::DIV_INT, 12u, 20u, 27u), - DEF_DIV_REM(6u, Instruction::DIV_INT, 6u, 24u, 21u), - DEF_DIV_REM(6u, Instruction::DIV_INT, 7u, 20u, 23u), - DEF_DIV_REM(6u, Instruction::DIV_INT, 8u, 20u, 22u), + DEF_BINOP(6u, Instruction::DIV_INT, 12u, 20u, 27u), + DEF_BINOP(6u, Instruction::DIV_INT, 6u, 24u, 21u), + DEF_BINOP(6u, Instruction::DIV_INT, 7u, 20u, 23u), + DEF_BINOP(6u, Instruction::DIV_INT, 8u, 20u, 22u), }; static const bool expected_ignore_div_zero_check[] = { diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc new file mode 100644 index 0000000000..2e7f0328d2 --- /dev/null +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -0,0 +1,1391 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <sstream> + +#include "gvn_dead_code_elimination.h" + +#include "base/bit_vector-inl.h" +#include "base/macros.h" +#include "compiler_enums.h" +#include "dataflow_iterator-inl.h" +#include "dex_instruction.h" +#include "dex/mir_graph.h" +#include "local_value_numbering.h" +#include "utils/arena_bit_vector.h" + +namespace art { + +constexpr uint16_t GvnDeadCodeElimination::kNoValue; +constexpr uint16_t GvnDeadCodeElimination::kNPos; + +inline uint16_t GvnDeadCodeElimination::MIRData::PrevChange(int v_reg) const { + DCHECK(has_def); + DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1); + return (v_reg == vreg_def) ? prev_value.change : prev_value_high.change; +} + +inline void GvnDeadCodeElimination::MIRData::SetPrevChange(int v_reg, uint16_t change) { + DCHECK(has_def); + DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1); + if (v_reg == vreg_def) { + prev_value.change = change; + } else { + prev_value_high.change = change; + } +} + +inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData* prev_data) { + DCHECK_NE(PrevChange(v_reg), kNPos); + DCHECK(v_reg == prev_data->vreg_def || v_reg == prev_data->vreg_def + 1); + if (vreg_def == v_reg) { + if (prev_data->vreg_def == v_reg) { + prev_value = prev_data->prev_value; + low_def_over_high_word = prev_data->low_def_over_high_word; + } else { + prev_value = prev_data->prev_value_high; + low_def_over_high_word = + prev_data->prev_value_high.value != kNPos && !prev_data->high_def_over_low_word; + } + } else { + if (prev_data->vreg_def == v_reg) { + prev_value_high = prev_data->prev_value; + high_def_over_low_word = + prev_data->prev_value.value != kNPos && !prev_data->low_def_over_high_word; + } else { + prev_value_high = prev_data->prev_value_high; + high_def_over_low_word = prev_data->high_def_over_low_word; + } + } +} + +GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc) + : num_vregs_(num_vregs), + vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)), + mir_data_(alloc->Adapter()) { + mir_data_.reserve(100); +} + +inline void GvnDeadCodeElimination::VRegChains::Reset() { + DCHECK(mir_data_.empty()); + std::fill_n(vreg_data_, num_vregs_, VRegValue()); +} + +void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide, + uint16_t new_value) { + uint16_t pos = mir_data_.size(); + mir_data_.emplace_back(mir); + MIRData* data = &mir_data_.back(); + data->has_def = true; + data->wide_def = wide; + data->vreg_def = v_reg; + + if (vreg_data_[v_reg].change != kNPos && + mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) { + data->low_def_over_high_word = true; + } + data->prev_value = vreg_data_[v_reg]; + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + vreg_data_[v_reg].value = new_value; + vreg_data_[v_reg].change = pos; + + if (wide) { + if (vreg_data_[v_reg + 1].change != kNPos && + mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) { + data->high_def_over_low_word = true; + } + data->prev_value_high = vreg_data_[v_reg + 1]; + DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_); + vreg_data_[v_reg + 1].value = new_value; + vreg_data_[v_reg + 1].change = pos; + } +} + +inline void GvnDeadCodeElimination::VRegChains::AddMIRWithoutDef(MIR* mir) { + mir_data_.emplace_back(mir); +} + +void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() { + MIRData* data = LastMIRData(); + if (data->has_def) { + DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u); + vreg_data_[data->vreg_def] = data->prev_value; + if (data->wide_def) { + DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u); + vreg_data_[data->vreg_def + 1] = data->prev_value_high; + } + } + mir_data_.pop_back(); +} + +void GvnDeadCodeElimination::VRegChains::RemoveTrailingNops() { + // There's at least one NOP to drop. There may be more. + MIRData* last_data = LastMIRData(); + DCHECK(!last_data->must_keep && !last_data->has_def); + do { + DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop)); + mir_data_.pop_back(); + if (mir_data_.empty()) { + break; + } + last_data = LastMIRData(); + } while (!last_data->must_keep && !last_data->has_def); +} + +inline size_t GvnDeadCodeElimination::VRegChains::NumMIRs() const { + return mir_data_.size(); +} + +inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::GetMIRData(size_t pos) { + DCHECK_LT(pos, mir_data_.size()); + return &mir_data_[pos]; +} + +inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::LastMIRData() { + DCHECK(!mir_data_.empty()); + return &mir_data_.back(); +} + +uint32_t GvnDeadCodeElimination::VRegChains::NumVRegs() const { + return num_vregs_; +} + +void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint16_t value) { + DCHECK_NE(value, kNoValue); + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + uint16_t change = vreg_data_[v_reg].change; + if (change == kNPos) { + vreg_data_[v_reg].value = value; + } else { + while (true) { + MIRData* data = &mir_data_[change]; + DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg); + if (data->vreg_def == v_reg) { // Low word, use prev_value. + if (data->prev_value.change == kNPos) { + DCHECK_EQ(data->prev_value.value, kNoValue); + data->prev_value.value = value; + data->low_def_over_high_word = true; + break; + } + change = data->prev_value.change; + } else { // High word, use prev_value_high. + if (data->prev_value_high.change == kNPos) { + DCHECK_EQ(data->prev_value_high.value, kNoValue); + data->prev_value_high.value = value; + break; + } + change = data->prev_value_high.change; + } + } + } +} + +void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool wide, + const LocalValueNumbering* lvn) { + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + if (!wide) { + if (vreg_data_[v_reg].value == kNoValue) { + uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg); + if (old_value == kNoValue) { + // Maybe there was a wide value in v_reg before. Do not check for wide value in v_reg-1, + // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary. + old_value = lvn->GetStartingVregValueNumberWide(v_reg); + if (old_value != kNoValue) { + InsertInitialValueHigh(v_reg + 1, old_value); + } + } + vreg_data_[v_reg].value = old_value; + } + } else { + DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_); + bool check_high = true; + if (vreg_data_[v_reg].value == kNoValue) { + uint16_t old_value = lvn->GetStartingVregValueNumberWide(v_reg); + if (old_value != kNoValue) { + InsertInitialValueHigh(v_reg + 1, old_value); + check_high = false; // High word has been processed. + } else { + // Maybe there was a narrow value before. Do not check for wide value in v_reg-1, + // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary. + old_value = lvn->GetStartingVregValueNumber(v_reg); + } + vreg_data_[v_reg].value = old_value; + } + if (check_high && vreg_data_[v_reg + 1].value == kNoValue) { + uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1); + if (old_value == kNoValue && static_cast<size_t>(v_reg + 2) < num_vregs_) { + // Maybe there was a wide value before. + old_value = lvn->GetStartingVregValueNumberWide(v_reg + 1); + if (old_value != kNoValue) { + InsertInitialValueHigh(v_reg + 2, old_value); + } + } + vreg_data_[v_reg + 1].value = old_value; + } + } +} + +inline uint16_t GvnDeadCodeElimination::VRegChains::LastChange(int v_reg) { + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + return vreg_data_[v_reg].change; +} + +inline uint16_t GvnDeadCodeElimination::VRegChains::CurrentValue(int v_reg) { + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + return vreg_data_[v_reg].value; +} + +uint16_t GvnDeadCodeElimination::VRegChains::FindKillHead(int v_reg, uint16_t cutoff) { + uint16_t current_value = this->CurrentValue(v_reg); + DCHECK_NE(current_value, kNoValue); + uint16_t change = LastChange(v_reg); + DCHECK_LT(change, mir_data_.size()); + DCHECK_GE(change, cutoff); + bool match_high_word = (mir_data_[change].vreg_def != v_reg); + do { + MIRData* data = &mir_data_[change]; + DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg); + if (data->vreg_def == v_reg) { // Low word, use prev_value. + if (data->prev_value.value == current_value && + match_high_word == data->low_def_over_high_word) { + break; + } + change = data->prev_value.change; + } else { // High word, use prev_value_high. + if (data->prev_value_high.value == current_value && + match_high_word != data->high_def_over_low_word) { + break; + } + change = data->prev_value_high.change; + } + if (change < cutoff) { + change = kNPos; + } + } while (change != kNPos); + return change; +} + +uint16_t GvnDeadCodeElimination::VRegChains::FindFirstChangeAfter(int v_reg, + uint16_t change) const { + DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + DCHECK_LT(change, mir_data_.size()); + uint16_t result = kNPos; + uint16_t search_change = vreg_data_[v_reg].change; + while (search_change != kNPos && search_change > change) { + result = search_change; + search_change = mir_data_[search_change].PrevChange(v_reg); + } + return result; +} + +void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint16_t new_change) { + const MIRData* old_data = GetMIRData(old_change); + DCHECK(old_data->has_def); + int count = old_data->wide_def ? 2 : 1; + for (int v_reg = old_data->vreg_def, end = old_data->vreg_def + count; v_reg != end; ++v_reg) { + uint16_t next_change = FindFirstChangeAfter(v_reg, old_change); + if (next_change == kNPos) { + DCHECK_EQ(vreg_data_[v_reg].change, old_change); + vreg_data_[v_reg].change = new_change; + } else { + DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change); + mir_data_[next_change].SetPrevChange(v_reg, new_change); + } + } +} + +void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) { + MIRData* data = &mir_data_[change]; + DCHECK(data->has_def); + int count = data->wide_def ? 2 : 1; + for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) { + uint16_t next_change = FindFirstChangeAfter(v_reg, change); + if (next_change == kNPos) { + DCHECK_EQ(vreg_data_[v_reg].change, change); + vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high; + } else { + DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change); + mir_data_[next_change].RemovePrevChange(v_reg, data); + } + } +} + +inline bool GvnDeadCodeElimination::VRegChains::IsTopChange(uint16_t change) const { + DCHECK_LT(change, mir_data_.size()); + const MIRData* data = &mir_data_[change]; + DCHECK(data->has_def); + DCHECK_LT(data->wide_def ? data->vreg_def + 1u : data->vreg_def, num_vregs_); + return vreg_data_[data->vreg_def].change == change && + (!data->wide_def || vreg_data_[data->vreg_def + 1u].change == change); +} + +bool GvnDeadCodeElimination::VRegChains::IsSRegUsed(uint16_t first_change, uint16_t last_change, + int s_reg) const { + DCHECK_LE(first_change, last_change); + DCHECK_LE(last_change, mir_data_.size()); + for (size_t c = first_change; c != last_change; ++c) { + SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep; + for (int i = 0; i != ssa_rep->num_uses; ++i) { + if (ssa_rep->uses[i] == s_reg) { + return true; + } + } + } + return false; +} + +void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change, + int old_s_reg, int new_s_reg, bool wide) { + for (size_t c = first_change; c != last_change; ++c) { + SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep; + for (int i = 0; i != ssa_rep->num_uses; ++i) { + if (ssa_rep->uses[i] == old_s_reg) { + ssa_rep->uses[i] = new_s_reg; + if (wide) { + ++i; + DCHECK_LT(i, ssa_rep->num_uses); + ssa_rep->uses[i] = new_s_reg + 1; + } + } + } + } +} + +void GvnDeadCodeElimination::VRegChains::RenameVRegUses(uint16_t first_change, uint16_t last_change, + int old_s_reg, int old_v_reg, + int new_s_reg, int new_v_reg) { + for (size_t c = first_change; c != last_change; ++c) { + MIR* mir = mir_data_[c].mir; + if (IsInstructionBinOp2Addr(mir->dalvikInsn.opcode) && + mir->ssa_rep->uses[0] == old_s_reg && old_v_reg != new_v_reg) { + // Rewrite binop_2ADDR with plain binop before doing the register rename. + ChangeBinOp2AddrToPlainBinOp(mir); + } + uint64_t df_attr = MIRGraph::GetDataFlowAttributes(mir); + size_t use = 0u; +#define REPLACE_VREG(REG) \ + if ((df_attr & DF_U##REG) != 0) { \ + if (mir->ssa_rep->uses[use] == old_s_reg) { \ + DCHECK_EQ(mir->dalvikInsn.v##REG, static_cast<uint32_t>(old_v_reg)); \ + mir->dalvikInsn.v##REG = new_v_reg; \ + mir->ssa_rep->uses[use] = new_s_reg; \ + if ((df_attr & DF_##REG##_WIDE) != 0) { \ + DCHECK_EQ(mir->ssa_rep->uses[use + 1], old_s_reg + 1); \ + mir->ssa_rep->uses[use + 1] = new_s_reg + 1; \ + } \ + } \ + use += ((df_attr & DF_##REG##_WIDE) != 0) ? 2 : 1; \ + } + REPLACE_VREG(A) + REPLACE_VREG(B) + REPLACE_VREG(C) +#undef REPLACE_VREG + // We may encounter an out-of-order Phi which we need to ignore, otherwise we should + // only be asked to rename registers specified by DF_UA, DF_UB and DF_UC. + DCHECK_EQ(use, + static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi + ? 0u + : static_cast<size_t>(mir->ssa_rep->num_uses)); + } +} + +GvnDeadCodeElimination::GvnDeadCodeElimination(const GlobalValueNumbering* gvn, + ScopedArenaAllocator* alloc) + : gvn_(gvn), + mir_graph_(gvn_->GetMirGraph()), + vreg_chains_(mir_graph_->GetNumOfCodeAndTempVRs(), alloc), + bb_(nullptr), + lvn_(nullptr), + no_uses_all_since_(0u), + unused_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)), + vregs_to_kill_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)), + kill_heads_(alloc->AllocArray<uint16_t>(vreg_chains_.NumVRegs(), kArenaAllocMisc)), + changes_to_kill_(alloc->Adapter()), + dependent_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)) { + changes_to_kill_.reserve(16u); +} + +void GvnDeadCodeElimination::Apply(BasicBlock* bb) { + bb_ = bb; + lvn_ = gvn_->GetLvn(bb->id); + + RecordPass(); + BackwardPass(); + + DCHECK_EQ(no_uses_all_since_, 0u); + lvn_ = nullptr; + bb_ = nullptr; +} + +void GvnDeadCodeElimination::RecordPass() { + // Record MIRs with vreg definition data, eliminate single instructions. + vreg_chains_.Reset(); + DCHECK_EQ(no_uses_all_since_, 0u); + for (MIR* mir = bb_->first_mir_insn; mir != nullptr; mir = mir->next) { + if (RecordMIR(mir)) { + RecordPassTryToKillOverwrittenMoveOrMoveSrc(); + RecordPassTryToKillLastMIR(); + } + } +} + +void GvnDeadCodeElimination::BackwardPass() { + // Now process MIRs in reverse order, trying to eliminate them. + unused_vregs_->ClearAllBits(); // Implicitly depend on all vregs at the end of BB. + while (vreg_chains_.NumMIRs() != 0u) { + if (BackwardPassTryToKillLastMIR()) { + continue; + } + BackwardPassProcessLastMIR(); + } +} + +void GvnDeadCodeElimination::KillMIR(MIRData* data) { + DCHECK(!data->must_keep); + DCHECK(!data->uses_all_vregs); + DCHECK(data->has_def); + DCHECK(data->mir->ssa_rep->num_defs == 1 || data->mir->ssa_rep->num_defs == 2); + + KillMIR(data->mir); + data->has_def = false; + data->is_move = false; + data->is_move_src = false; +} + +void GvnDeadCodeElimination::KillMIR(MIR* mir) { + mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + mir->ssa_rep->num_uses = 0; + mir->ssa_rep->num_defs = 0; +} + +void GvnDeadCodeElimination::ChangeBinOp2AddrToPlainBinOp(MIR* mir) { + mir->dalvikInsn.vC = mir->dalvikInsn.vB; + mir->dalvikInsn.vB = mir->dalvikInsn.vA; + mir->dalvikInsn.opcode = static_cast<Instruction::Code>( + mir->dalvikInsn.opcode - Instruction::ADD_INT_2ADDR + Instruction::ADD_INT); +} + +MIR* GvnDeadCodeElimination::CreatePhi(int s_reg, bool fp) { + int v_reg = mir_graph_->SRegToVReg(s_reg); + MIR* phi = mir_graph_->NewMIR(); + phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); + phi->dalvikInsn.vA = v_reg; + phi->offset = bb_->start_offset; + phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method. + + phi->ssa_rep = static_cast<struct SSARepresentation *>(mir_graph_->GetArena()->Alloc( + sizeof(SSARepresentation), kArenaAllocDFInfo)); + + mir_graph_->AllocateSSADefData(phi, 1); + phi->ssa_rep->defs[0] = s_reg; + phi->ssa_rep->fp_def[0] = fp; + + size_t num_uses = bb_->predecessors.size(); + mir_graph_->AllocateSSAUseData(phi, num_uses); + std::fill_n(phi->ssa_rep->fp_use, num_uses, fp); + size_t idx = 0u; + for (BasicBlockId pred_id : bb_->predecessors) { + BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id); + DCHECK(pred_bb != nullptr); + phi->ssa_rep->uses[idx] = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg]; + DCHECK_NE(phi->ssa_rep->uses[idx], INVALID_SREG); + idx++; + } + + phi->meta.phi_incoming = static_cast<BasicBlockId*>(mir_graph_->GetArena()->Alloc( + sizeof(BasicBlockId) * num_uses, kArenaAllocDFInfo)); + std::copy(bb_->predecessors.begin(), bb_->predecessors.end(), phi->meta.phi_incoming); + bb_->PrependMIR(phi); + return phi; +} + +MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change, + MIR* mir_to_kill) { + DCHECK(mir_to_kill->ssa_rep->num_defs == 1 || mir_to_kill->ssa_rep->num_defs == 2); + bool wide = (mir_to_kill->ssa_rep->num_defs != 1); + int new_s_reg = mir_to_kill->ssa_rep->defs[0]; + + // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the + // same dalvik reg to keep consistency with subsequent instructions. However, if there's no + // defining MIR for that dalvik reg, the preserved valus must come from its predecessors + // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor). + if (def_change == kNPos) { + bool fp = mir_to_kill->ssa_rep->fp_def[0]; + if (wide) { + DCHECK_EQ(new_s_reg + 1, mir_to_kill->ssa_rep->defs[1]); + DCHECK_EQ(fp, mir_to_kill->ssa_rep->fp_def[1]); + DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1)); + CreatePhi(new_s_reg + 1, fp); // High word Phi. + } + return CreatePhi(new_s_reg, fp); + } else { + DCHECK_LT(def_change, last_change); + DCHECK_LE(last_change, vreg_chains_.NumMIRs()); + MIRData* def_data = vreg_chains_.GetMIRData(def_change); + DCHECK(def_data->has_def); + int old_s_reg = def_data->mir->ssa_rep->defs[0]; + DCHECK_NE(old_s_reg, new_s_reg); + DCHECK_EQ(mir_graph_->SRegToVReg(old_s_reg), mir_graph_->SRegToVReg(new_s_reg)); + def_data->mir->ssa_rep->defs[0] = new_s_reg; + if (wide) { + if (static_cast<int>(def_data->mir->dalvikInsn.opcode) == kMirOpPhi) { + // Currently the high word Phi is always located after the low word Phi. + MIR* phi_high = def_data->mir->next; + DCHECK(phi_high != nullptr && static_cast<int>(phi_high->dalvikInsn.opcode) == kMirOpPhi); + DCHECK_EQ(phi_high->ssa_rep->defs[0], old_s_reg + 1); + phi_high->ssa_rep->defs[0] = new_s_reg + 1; + } else { + DCHECK_EQ(def_data->mir->ssa_rep->defs[1], old_s_reg + 1); + def_data->mir->ssa_rep->defs[1] = new_s_reg + 1; + } + } + vreg_chains_.RenameSRegUses(def_change + 1u, last_change, old_s_reg, new_s_reg, wide); + return nullptr; + } +} + + +void GvnDeadCodeElimination::BackwardPassProcessLastMIR() { + MIRData* data = vreg_chains_.LastMIRData(); + if (data->uses_all_vregs) { + DCHECK(data->must_keep); + unused_vregs_->ClearAllBits(); + DCHECK_EQ(no_uses_all_since_, vreg_chains_.NumMIRs()); + --no_uses_all_since_; + while (no_uses_all_since_ != 0u && + !vreg_chains_.GetMIRData(no_uses_all_since_ - 1u)->uses_all_vregs) { + --no_uses_all_since_; + } + } else { + if (data->has_def) { + unused_vregs_->SetBit(data->vreg_def); + if (data->wide_def) { + unused_vregs_->SetBit(data->vreg_def + 1); + } + } + for (int i = 0, num_uses = data->mir->ssa_rep->num_uses; i != num_uses; ++i) { + int v_reg = mir_graph_->SRegToVReg(data->mir->ssa_rep->uses[i]); + unused_vregs_->ClearBit(v_reg); + } + } + vreg_chains_.RemoveLastMIRData(); +} + +void GvnDeadCodeElimination::RecordPassKillMoveByRenamingSrcDef(uint16_t src_change, + uint16_t move_change) { + DCHECK_LT(src_change, move_change); + MIRData* src_data = vreg_chains_.GetMIRData(src_change); + MIRData* move_data = vreg_chains_.GetMIRData(move_change); + DCHECK(src_data->is_move_src); + DCHECK_EQ(src_data->wide_def, move_data->wide_def); + DCHECK(move_data->prev_value.change == kNPos || move_data->prev_value.change <= src_change); + DCHECK(!move_data->wide_def || move_data->prev_value_high.change == kNPos || + move_data->prev_value_high.change <= src_change); + + int old_s_reg = src_data->mir->ssa_rep->defs[0]; + // NOTE: old_s_reg may differ from move_data->mir->ssa_rep->uses[0]; value names must match. + int new_s_reg = move_data->mir->ssa_rep->defs[0]; + DCHECK_NE(old_s_reg, new_s_reg); + + if (IsInstructionBinOp2Addr(src_data->mir->dalvikInsn.opcode) && + src_data->vreg_def != move_data->vreg_def) { + // Rewrite binop_2ADDR with plain binop before doing the register rename. + ChangeBinOp2AddrToPlainBinOp(src_data->mir); + } + // Remove src_change from the vreg chain(s). + vreg_chains_.RemoveChange(src_change); + // Replace the move_change with the src_change, copying all necessary data. + src_data->is_move_src = move_data->is_move_src; + src_data->low_def_over_high_word = move_data->low_def_over_high_word; + src_data->high_def_over_low_word = move_data->high_def_over_low_word; + src_data->vreg_def = move_data->vreg_def; + src_data->prev_value = move_data->prev_value; + src_data->prev_value_high = move_data->prev_value_high; + src_data->mir->dalvikInsn.vA = move_data->vreg_def; + src_data->mir->ssa_rep->defs[0] = new_s_reg; + if (move_data->wide_def) { + DCHECK_EQ(src_data->mir->ssa_rep->defs[1], old_s_reg + 1); + src_data->mir->ssa_rep->defs[1] = new_s_reg + 1; + } + vreg_chains_.ReplaceChange(move_change, src_change); + + // Rename uses and kill the move. + vreg_chains_.RenameVRegUses(src_change + 1u, vreg_chains_.NumMIRs(), + old_s_reg, mir_graph_->SRegToVReg(old_s_reg), + new_s_reg, mir_graph_->SRegToVReg(new_s_reg)); + KillMIR(move_data); +} + +void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change) { + MIRData* data = vreg_chains_.GetMIRData(check_change); + DCHECK(data->is_move || data->is_move_src); + int32_t dest_s_reg = data->mir->ssa_rep->defs[0]; + + if (data->is_move) { + // Check if source vreg has changed since the MOVE. + int32_t src_s_reg = data->mir->ssa_rep->uses[0]; + uint32_t src_v_reg = mir_graph_->SRegToVReg(src_s_reg); + uint16_t src_change = vreg_chains_.FindFirstChangeAfter(src_v_reg, check_change); + bool wide = data->wide_def; + if (wide) { + uint16_t src_change_high = vreg_chains_.FindFirstChangeAfter(src_v_reg + 1, check_change); + if (src_change_high != kNPos && (src_change == kNPos || src_change_high < src_change)) { + src_change = src_change_high; + } + } + if (src_change == kNPos || + !vreg_chains_.IsSRegUsed(src_change + 1u, vreg_chains_.NumMIRs(), dest_s_reg)) { + // We can simply change all uses of dest to src. + size_t rename_end = (src_change != kNPos) ? src_change + 1u : vreg_chains_.NumMIRs(); + vreg_chains_.RenameVRegUses(check_change + 1u, rename_end, + dest_s_reg, mir_graph_->SRegToVReg(dest_s_reg), + src_s_reg, mir_graph_->SRegToVReg(src_s_reg)); + + // Now, remove the MOVE from the vreg chain(s) and kill it. + vreg_chains_.RemoveChange(check_change); + KillMIR(data); + return; + } + } + + if (data->is_move_src) { + // Try to find a MOVE to a vreg that wasn't changed since check_change. + uint16_t value_name = + data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg); + for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) { + MIRData* d = vreg_chains_.GetMIRData(c); + if (d->is_move && d->wide_def == data->wide_def && + (d->prev_value.change == kNPos || d->prev_value.change <= check_change) && + (!d->wide_def || + d->prev_value_high.change == kNPos || d->prev_value_high.change <= check_change)) { + // Compare value names to find move to move. + int32_t src_s_reg = d->mir->ssa_rep->uses[0]; + uint16_t src_name = + (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg)); + if (value_name == src_name) { + RecordPassKillMoveByRenamingSrcDef(check_change, c); + return; + } + } + } + } +} + +void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc() { + // Check if we're overwriting a the result of a move or the definition of a source of a move. + // For MOVE_WIDE, we may be overwriting partially; if that's the case, check that the other + // word wasn't previously overwritten - we would have tried to rename back then. + MIRData* data = vreg_chains_.LastMIRData(); + if (!data->has_def) { + return; + } + // NOTE: Instructions such as new-array implicitly use all vregs (if they throw) but they can + // define a move source which can be renamed. Therefore we allow the checked change to be the + // change before no_uses_all_since_. This has no effect on moves as they never use all vregs. + if (data->prev_value.change != kNPos && data->prev_value.change + 1u >= no_uses_all_since_) { + MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value.change); + bool try_to_kill = false; + if (!check_data->is_move && !check_data->is_move_src) { + DCHECK(!try_to_kill); + } else if (!check_data->wide_def) { + // Narrow move; always fully overwritten by the last MIR. + try_to_kill = true; + } else if (data->low_def_over_high_word) { + // Overwriting only the high word; is the low word still valid? + DCHECK_EQ(check_data->vreg_def + 1u, data->vreg_def); + if (vreg_chains_.LastChange(check_data->vreg_def) == data->prev_value.change) { + try_to_kill = true; + } + } else if (!data->wide_def) { + // Overwriting only the low word, is the high word still valid? + if (vreg_chains_.LastChange(data->vreg_def + 1) == data->prev_value.change) { + try_to_kill = true; + } + } else { + // Overwriting both words; was the high word still from the same move? + if (data->prev_value_high.change == data->prev_value.change) { + try_to_kill = true; + } + } + if (try_to_kill) { + RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value.change); + } + } + if (data->wide_def && data->high_def_over_low_word && + data->prev_value_high.change != kNPos && + data->prev_value_high.change + 1u >= no_uses_all_since_) { + MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value_high.change); + bool try_to_kill = false; + if (!check_data->is_move && !check_data->is_move_src) { + DCHECK(!try_to_kill); + } else if (!check_data->wide_def) { + // Narrow move; always fully overwritten by the last MIR. + try_to_kill = true; + } else if (vreg_chains_.LastChange(check_data->vreg_def + 1) == + data->prev_value_high.change) { + // High word is still valid. + try_to_kill = true; + } + if (try_to_kill) { + RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value_high.change); + } + } +} + +void GvnDeadCodeElimination::RecordPassTryToKillLastMIR() { + MIRData* last_data = vreg_chains_.LastMIRData(); + if (last_data->must_keep) { + return; + } + if (UNLIKELY(!last_data->has_def)) { + // Must be an eliminated MOVE. Drop its data and data of all eliminated MIRs before it. + vreg_chains_.RemoveTrailingNops(); + return; + } + + // Try to kill a sequence of consecutive definitions of the same vreg. Allow mixing + // wide and non-wide defs; consider high word dead if low word has been overwritten. + uint16_t current_value = vreg_chains_.CurrentValue(last_data->vreg_def); + uint16_t change = vreg_chains_.NumMIRs() - 1u; + MIRData* data = last_data; + while (data->prev_value.value != current_value) { + --change; + if (data->prev_value.change == kNPos || data->prev_value.change != change) { + return; + } + data = vreg_chains_.GetMIRData(data->prev_value.change); + if (data->must_keep || !data->has_def || data->vreg_def != last_data->vreg_def) { + return; + } + } + + bool wide = last_data->wide_def; + if (wide) { + // Check that the low word is valid. + if (data->low_def_over_high_word) { + return; + } + // Check that the high word is valid. + MIRData* high_data = data; + if (!high_data->wide_def) { + uint16_t high_change = vreg_chains_.FindFirstChangeAfter(data->vreg_def + 1, change); + DCHECK_NE(high_change, kNPos); + high_data = vreg_chains_.GetMIRData(high_change); + DCHECK_EQ(high_data->vreg_def, data->vreg_def); + } + if (high_data->prev_value_high.value != current_value || high_data->high_def_over_low_word) { + return; + } + } + + MIR* phi = RenameSRegDefOrCreatePhi(data->prev_value.change, change, last_data->mir); + for (size_t i = 0, count = vreg_chains_.NumMIRs() - change; i != count; ++i) { + KillMIR(vreg_chains_.LastMIRData()->mir); + vreg_chains_.RemoveLastMIRData(); + } + if (phi != nullptr) { + // Though the Phi has been added to the beginning, we can put the MIRData at the end. + vreg_chains_.AddMIRWithDef(phi, phi->dalvikInsn.vA, wide, current_value); + // Reset the previous value to avoid eventually eliminating the Phi itself (unless unused). + last_data = vreg_chains_.LastMIRData(); + last_data->prev_value.value = kNoValue; + last_data->prev_value_high.value = kNoValue; + } +} + +uint16_t GvnDeadCodeElimination::FindChangesToKill(uint16_t first_change, uint16_t last_change) { + // Process dependencies for changes in range [first_change, last_change) and record all + // changes that we need to kill. Return kNPos if there's a dependent change that must be + // kept unconditionally; otherwise the end of the range processed before encountering + // a change that defines a dalvik reg that we need to keep (last_change on full success). + changes_to_kill_.clear(); + dependent_vregs_->ClearAllBits(); + for (size_t change = first_change; change != last_change; ++change) { + MIRData* data = vreg_chains_.GetMIRData(change); + DCHECK(!data->uses_all_vregs); + bool must_not_depend = data->must_keep; + bool depends = false; + // Check if the MIR defines a vreg we're trying to eliminate. + if (data->has_def && vregs_to_kill_->IsBitSet(data->vreg_def)) { + if (change < kill_heads_[data->vreg_def]) { + must_not_depend = true; + } else { + depends = true; + } + } + if (data->has_def && data->wide_def && vregs_to_kill_->IsBitSet(data->vreg_def + 1)) { + if (change < kill_heads_[data->vreg_def + 1]) { + must_not_depend = true; + } else { + depends = true; + } + } + if (!depends) { + // Check for dependency through SSA reg uses. + SSARepresentation* ssa_rep = data->mir->ssa_rep; + for (int i = 0; i != ssa_rep->num_uses; ++i) { + if (dependent_vregs_->IsBitSet(mir_graph_->SRegToVReg(ssa_rep->uses[i]))) { + depends = true; + break; + } + } + } + // Now check if we can eliminate the insn if we need to. + if (depends && must_not_depend) { + return kNPos; + } + if (depends && data->has_def && + vreg_chains_.IsTopChange(change) && !vregs_to_kill_->IsBitSet(data->vreg_def) && + !unused_vregs_->IsBitSet(data->vreg_def) && + (!data->wide_def || !unused_vregs_->IsBitSet(data->vreg_def + 1))) { + // This is a top change but neither unnecessary nor one of the top kill changes. + return change; + } + // Finally, update the data. + if (depends) { + changes_to_kill_.push_back(change); + if (data->has_def) { + dependent_vregs_->SetBit(data->vreg_def); + if (data->wide_def) { + dependent_vregs_->SetBit(data->vreg_def + 1); + } + } + } else { + if (data->has_def) { + dependent_vregs_->ClearBit(data->vreg_def); + if (data->wide_def) { + dependent_vregs_->ClearBit(data->vreg_def + 1); + } + } + } + } + return last_change; +} + +void GvnDeadCodeElimination::BackwardPassTryToKillRevertVRegs() { +} + +bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() { + MIRData* last_data = vreg_chains_.LastMIRData(); + if (last_data->must_keep) { + return false; + } + DCHECK(!last_data->uses_all_vregs); + if (!last_data->has_def) { + // Previously eliminated. + DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop)); + vreg_chains_.RemoveTrailingNops(); + return true; + } + if (unused_vregs_->IsBitSet(last_data->vreg_def) || + (last_data->wide_def && unused_vregs_->IsBitSet(last_data->vreg_def + 1))) { + if (last_data->wide_def) { + // For wide defs, one of the vregs may still be considered needed, fix that. + unused_vregs_->SetBit(last_data->vreg_def); + unused_vregs_->SetBit(last_data->vreg_def + 1); + } + KillMIR(last_data->mir); + vreg_chains_.RemoveLastMIRData(); + return true; + } + + vregs_to_kill_->ClearAllBits(); + size_t num_mirs = vreg_chains_.NumMIRs(); + DCHECK_NE(num_mirs, 0u); + uint16_t kill_change = num_mirs - 1u; + uint16_t start = num_mirs; + size_t num_killed_top_changes = 0u; + while (num_killed_top_changes != kMaxNumTopChangesToKill && + kill_change != kNPos && kill_change != num_mirs) { + ++num_killed_top_changes; + + DCHECK(vreg_chains_.IsTopChange(kill_change)); + MIRData* data = vreg_chains_.GetMIRData(kill_change); + int count = data->wide_def ? 2 : 1; + for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) { + uint16_t kill_head = vreg_chains_.FindKillHead(v_reg, no_uses_all_since_); + if (kill_head == kNPos) { + return false; + } + kill_heads_[v_reg] = kill_head; + vregs_to_kill_->SetBit(v_reg); + start = std::min(start, kill_head); + } + DCHECK_LT(start, vreg_chains_.NumMIRs()); + + kill_change = FindChangesToKill(start, num_mirs); + } + + if (kill_change != num_mirs) { + return false; + } + + // Kill all MIRs marked as dependent. + for (uint32_t v_reg : vregs_to_kill_->Indexes()) { + // Rename s_regs or create Phi only once for each MIR (only for low word). + MIRData* data = vreg_chains_.GetMIRData(vreg_chains_.LastChange(v_reg)); + DCHECK(data->has_def); + if (data->vreg_def == v_reg) { + MIRData* kill_head_data = vreg_chains_.GetMIRData(kill_heads_[v_reg]); + RenameSRegDefOrCreatePhi(kill_head_data->PrevChange(v_reg), num_mirs, data->mir); + } else { + DCHECK_EQ(data->vreg_def + 1u, v_reg); + DCHECK_EQ(vreg_chains_.GetMIRData(kill_heads_[v_reg - 1u])->PrevChange(v_reg - 1u), + vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg)); + } + } + unused_vregs_->Union(vregs_to_kill_); + for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) { + MIRData* data = vreg_chains_.GetMIRData(*it); + DCHECK(!data->must_keep); + DCHECK(data->has_def); + vreg_chains_.RemoveChange(*it); + KillMIR(data); + } + + vreg_chains_.RemoveTrailingNops(); + return true; +} + +bool GvnDeadCodeElimination::RecordMIR(MIR* mir) { + bool must_keep = false; + bool uses_all_vregs = false; + bool is_move = false; + uint16_t opcode = mir->dalvikInsn.opcode; + switch (opcode) { + case kMirOpPhi: { + // We can't recognize wide variables in Phi from num_defs == 2 as we've got two Phis instead. + DCHECK_EQ(mir->ssa_rep->num_defs, 1); + int s_reg = mir->ssa_rep->defs[0]; + bool wide = false; + uint16_t new_value = lvn_->GetSregValue(s_reg); + if (new_value == kNoValue) { + wide = true; + new_value = lvn_->GetSregValueWide(s_reg); + if (new_value == kNoValue) { + return false; // Ignore the high word Phi. + } + } + + int v_reg = mir_graph_->SRegToVReg(s_reg); + DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue); // No previous def for v_reg. + if (wide) { + DCHECK_EQ(vreg_chains_.CurrentValue(v_reg + 1), kNoValue); + } + vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value); + return true; // Avoid the common processing. + } + + case kMirOpNop: + case Instruction::NOP: + // Don't record NOPs. + return false; + + case kMirOpCheck: + must_keep = true; + uses_all_vregs = true; + break; + + case Instruction::RETURN_VOID: + case Instruction::RETURN: + case Instruction::RETURN_OBJECT: + case Instruction::RETURN_WIDE: + case Instruction::GOTO: + case Instruction::GOTO_16: + case Instruction::GOTO_32: + case Instruction::PACKED_SWITCH: + case Instruction::SPARSE_SWITCH: + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + case Instruction::IF_EQZ: + case Instruction::IF_NEZ: + case Instruction::IF_LTZ: + case Instruction::IF_GEZ: + case Instruction::IF_GTZ: + case Instruction::IF_LEZ: + case kMirOpFusedCmplFloat: + case kMirOpFusedCmpgFloat: + case kMirOpFusedCmplDouble: + case kMirOpFusedCmpgDouble: + case kMirOpFusedCmpLong: + must_keep = true; + uses_all_vregs = true; // Keep the implicit dependencies on all vregs. + break; + + case Instruction::CONST_CLASS: + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + // NOTE: While we're currently treating CONST_CLASS, CONST_STRING and CONST_STRING_JUMBO + // as throwing but we could conceivably try and eliminate those exceptions if we're + // retrieving the class/string repeatedly. + must_keep = true; + uses_all_vregs = true; + break; + + case Instruction::MONITOR_ENTER: + case Instruction::MONITOR_EXIT: + // We can actually try and optimize across the acquire operation of MONITOR_ENTER, + // the value names provided by GVN reflect the possible changes to memory visibility. + // NOTE: In ART, MONITOR_ENTER and MONITOR_EXIT can throw only NPE. + must_keep = true; + uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0; + break; + + case Instruction::INVOKE_DIRECT: + case Instruction::INVOKE_DIRECT_RANGE: + case Instruction::INVOKE_VIRTUAL: + case Instruction::INVOKE_VIRTUAL_RANGE: + case Instruction::INVOKE_SUPER: + case Instruction::INVOKE_SUPER_RANGE: + case Instruction::INVOKE_INTERFACE: + case Instruction::INVOKE_INTERFACE_RANGE: + case Instruction::INVOKE_STATIC: + case Instruction::INVOKE_STATIC_RANGE: + case Instruction::CHECK_CAST: + case Instruction::THROW: + case Instruction::FILLED_NEW_ARRAY: + case Instruction::FILLED_NEW_ARRAY_RANGE: + case Instruction::FILL_ARRAY_DATA: + must_keep = true; + uses_all_vregs = true; + break; + + case Instruction::NEW_INSTANCE: + case Instruction::NEW_ARRAY: + must_keep = true; + uses_all_vregs = true; + break; + + case kMirOpNullCheck: + DCHECK_EQ(mir->ssa_rep->num_uses, 1); + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) { + mir->ssa_rep->num_uses = 0; + mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + return false; + } + must_keep = true; + uses_all_vregs = true; + break; + + case Instruction::MOVE_RESULT: + case Instruction::MOVE_RESULT_OBJECT: + case Instruction::MOVE_RESULT_WIDE: + break; + + case Instruction::INSTANCE_OF: + break; + + case Instruction::MOVE_EXCEPTION: + must_keep = true; + break; + + case kMirOpCopy: + case Instruction::MOVE: + case Instruction::MOVE_FROM16: + case Instruction::MOVE_16: + case Instruction::MOVE_WIDE: + case Instruction::MOVE_WIDE_FROM16: + case Instruction::MOVE_WIDE_16: + case Instruction::MOVE_OBJECT: + case Instruction::MOVE_OBJECT_FROM16: + case Instruction::MOVE_OBJECT_16: { + is_move = true; + // If the MIR defining src vreg is known, allow renaming all uses of src vreg to dest vreg + // while updating the defining MIR to directly define dest vreg. However, changing Phi's + // def this way doesn't work without changing MIRs in other BBs. + int src_v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]); + int src_change = vreg_chains_.LastChange(src_v_reg); + if (src_change != kNPos) { + MIRData* src_data = vreg_chains_.GetMIRData(src_change); + if (static_cast<int>(src_data->mir->dalvikInsn.opcode) != kMirOpPhi) { + src_data->is_move_src = true; + } + } + break; + } + + case Instruction::CONST_4: + case Instruction::CONST_16: + case Instruction::CONST: + case Instruction::CONST_HIGH16: + case Instruction::CONST_WIDE_16: + case Instruction::CONST_WIDE_32: + case Instruction::CONST_WIDE: + case Instruction::CONST_WIDE_HIGH16: + case Instruction::ARRAY_LENGTH: + case Instruction::CMPL_FLOAT: + case Instruction::CMPG_FLOAT: + case Instruction::CMPL_DOUBLE: + case Instruction::CMPG_DOUBLE: + case Instruction::CMP_LONG: + case Instruction::NEG_INT: + case Instruction::NOT_INT: + case Instruction::NEG_LONG: + case Instruction::NOT_LONG: + case Instruction::NEG_FLOAT: + case Instruction::NEG_DOUBLE: + case Instruction::INT_TO_LONG: + case Instruction::INT_TO_FLOAT: + case Instruction::INT_TO_DOUBLE: + case Instruction::LONG_TO_INT: + case Instruction::LONG_TO_FLOAT: + case Instruction::LONG_TO_DOUBLE: + case Instruction::FLOAT_TO_INT: + case Instruction::FLOAT_TO_LONG: + case Instruction::FLOAT_TO_DOUBLE: + case Instruction::DOUBLE_TO_INT: + case Instruction::DOUBLE_TO_LONG: + case Instruction::DOUBLE_TO_FLOAT: + case Instruction::INT_TO_BYTE: + case Instruction::INT_TO_CHAR: + case Instruction::INT_TO_SHORT: + case Instruction::ADD_INT: + case Instruction::SUB_INT: + case Instruction::MUL_INT: + case Instruction::AND_INT: + case Instruction::OR_INT: + case Instruction::XOR_INT: + case Instruction::SHL_INT: + case Instruction::SHR_INT: + case Instruction::USHR_INT: + case Instruction::ADD_LONG: + case Instruction::SUB_LONG: + case Instruction::MUL_LONG: + case Instruction::AND_LONG: + case Instruction::OR_LONG: + case Instruction::XOR_LONG: + case Instruction::SHL_LONG: + case Instruction::SHR_LONG: + case Instruction::USHR_LONG: + case Instruction::ADD_FLOAT: + case Instruction::SUB_FLOAT: + case Instruction::MUL_FLOAT: + case Instruction::DIV_FLOAT: + case Instruction::REM_FLOAT: + case Instruction::ADD_DOUBLE: + case Instruction::SUB_DOUBLE: + case Instruction::MUL_DOUBLE: + case Instruction::DIV_DOUBLE: + case Instruction::REM_DOUBLE: + case Instruction::ADD_INT_2ADDR: + case Instruction::SUB_INT_2ADDR: + case Instruction::MUL_INT_2ADDR: + case Instruction::AND_INT_2ADDR: + case Instruction::OR_INT_2ADDR: + case Instruction::XOR_INT_2ADDR: + case Instruction::SHL_INT_2ADDR: + case Instruction::SHR_INT_2ADDR: + case Instruction::USHR_INT_2ADDR: + case Instruction::ADD_LONG_2ADDR: + case Instruction::SUB_LONG_2ADDR: + case Instruction::MUL_LONG_2ADDR: + case Instruction::AND_LONG_2ADDR: + case Instruction::OR_LONG_2ADDR: + case Instruction::XOR_LONG_2ADDR: + case Instruction::SHL_LONG_2ADDR: + case Instruction::SHR_LONG_2ADDR: + case Instruction::USHR_LONG_2ADDR: + case Instruction::ADD_FLOAT_2ADDR: + case Instruction::SUB_FLOAT_2ADDR: + case Instruction::MUL_FLOAT_2ADDR: + case Instruction::DIV_FLOAT_2ADDR: + case Instruction::REM_FLOAT_2ADDR: + case Instruction::ADD_DOUBLE_2ADDR: + case Instruction::SUB_DOUBLE_2ADDR: + case Instruction::MUL_DOUBLE_2ADDR: + case Instruction::DIV_DOUBLE_2ADDR: + case Instruction::REM_DOUBLE_2ADDR: + case Instruction::ADD_INT_LIT16: + case Instruction::RSUB_INT: + case Instruction::MUL_INT_LIT16: + case Instruction::AND_INT_LIT16: + case Instruction::OR_INT_LIT16: + case Instruction::XOR_INT_LIT16: + case Instruction::ADD_INT_LIT8: + case Instruction::RSUB_INT_LIT8: + case Instruction::MUL_INT_LIT8: + case Instruction::AND_INT_LIT8: + case Instruction::OR_INT_LIT8: + case Instruction::XOR_INT_LIT8: + case Instruction::SHL_INT_LIT8: + case Instruction::SHR_INT_LIT8: + case Instruction::USHR_INT_LIT8: + break; + + case Instruction::DIV_INT: + case Instruction::REM_INT: + case Instruction::DIV_LONG: + case Instruction::REM_LONG: + case Instruction::DIV_INT_2ADDR: + case Instruction::REM_INT_2ADDR: + case Instruction::DIV_LONG_2ADDR: + case Instruction::REM_LONG_2ADDR: + if ((mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { + must_keep = true; + uses_all_vregs = true; + } + break; + + case Instruction::DIV_INT_LIT16: + case Instruction::REM_INT_LIT16: + case Instruction::DIV_INT_LIT8: + case Instruction::REM_INT_LIT8: + if (mir->dalvikInsn.vC == 0) { // Explicit division by 0? + must_keep = true; + uses_all_vregs = true; + } + break; + + case Instruction::AGET_OBJECT: + case Instruction::AGET: + case Instruction::AGET_WIDE: + case Instruction::AGET_BOOLEAN: + case Instruction::AGET_BYTE: + case Instruction::AGET_CHAR: + case Instruction::AGET_SHORT: + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 || + (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) { + must_keep = true; + uses_all_vregs = true; + } + break; + + case Instruction::APUT_OBJECT: + case Instruction::APUT: + case Instruction::APUT_WIDE: + case Instruction::APUT_BYTE: + case Instruction::APUT_BOOLEAN: + case Instruction::APUT_SHORT: + case Instruction::APUT_CHAR: + must_keep = true; + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 || + (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) { + uses_all_vregs = true; + } + break; + + case Instruction::IGET_OBJECT: + case Instruction::IGET: + case Instruction::IGET_WIDE: + case Instruction::IGET_BOOLEAN: + case Instruction::IGET_BYTE: + case Instruction::IGET_CHAR: + case Instruction::IGET_SHORT: { + const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir); + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 || + !info.IsResolved() || !info.FastGet()) { + must_keep = true; + uses_all_vregs = true; + } else if (info.IsVolatile()) { + must_keep = true; + } + break; + } + + case Instruction::IPUT_OBJECT: + case Instruction::IPUT: + case Instruction::IPUT_WIDE: + case Instruction::IPUT_BOOLEAN: + case Instruction::IPUT_BYTE: + case Instruction::IPUT_CHAR: + case Instruction::IPUT_SHORT: { + must_keep = true; + const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir); + if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 || + !info.IsResolved() || !info.FastPut()) { + uses_all_vregs = true; + } + break; + } + + case Instruction::SGET_OBJECT: + case Instruction::SGET: + case Instruction::SGET_WIDE: + case Instruction::SGET_BOOLEAN: + case Instruction::SGET_BYTE: + case Instruction::SGET_CHAR: + case Instruction::SGET_SHORT: { + const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir); + if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 || + !info.IsResolved() || !info.FastGet()) { + must_keep = true; + uses_all_vregs = true; + } else if (info.IsVolatile()) { + must_keep = true; + } + break; + } + + case Instruction::SPUT_OBJECT: + case Instruction::SPUT: + case Instruction::SPUT_WIDE: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: { + must_keep = true; + const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir); + if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 || + !info.IsResolved() || !info.FastPut()) { + uses_all_vregs = true; + } + break; + } + + default: + LOG(FATAL) << "Unexpected opcode: " << opcode; + UNREACHABLE(); + break; + } + + if (mir->ssa_rep->num_defs != 0) { + DCHECK(mir->ssa_rep->num_defs == 1 || mir->ssa_rep->num_defs == 2); + bool wide = (mir->ssa_rep->num_defs == 2); + int s_reg = mir->ssa_rep->defs[0]; + int v_reg = mir_graph_->SRegToVReg(s_reg); + uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg); + DCHECK_NE(new_value, kNoValue); + + vreg_chains_.UpdateInitialVRegValue(v_reg, wide, lvn_); + vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value); + if (is_move) { + // Allow renaming all uses of dest vreg to src vreg. + vreg_chains_.LastMIRData()->is_move = true; + } + } else { + vreg_chains_.AddMIRWithoutDef(mir); + DCHECK(!is_move) << opcode; + } + + if (must_keep) { + MIRData* last_data = vreg_chains_.LastMIRData(); + last_data->must_keep = true; + if (uses_all_vregs) { + last_data->uses_all_vregs = true; + no_uses_all_since_ = vreg_chains_.NumMIRs(); + } + } else { + DCHECK_NE(mir->ssa_rep->num_defs, 0) << opcode; + DCHECK(!uses_all_vregs) << opcode; + } + return true; +} + +} // namespace art diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h new file mode 100644 index 0000000000..9a19f29970 --- /dev/null +++ b/compiler/dex/gvn_dead_code_elimination.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_ +#define ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_ + +#include "base/arena_object.h" +#include "base/scoped_arena_containers.h" +#include "global_value_numbering.h" + +namespace art { + +class ArenaBitVector; +class BasicBlock; +class LocalValueNumbering; +class MIR; +class MIRGraph; + +/** + * @class DeadCodeElimination + * @details Eliminate dead code based on the results of global value numbering. + * Also get rid of MOVE insns when we can use the source instead of destination + * without affecting the vreg values at safepoints; this is useful in methods + * with a large number of vregs that frequently move values to and from low vregs + * to accommodate insns that can work only with the low 16 or 256 vregs. + */ +class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> { + public: + GvnDeadCodeElimination(const GlobalValueNumbering* gvn, ScopedArenaAllocator* alloc); + + // Apply the DCE to a basic block. + void Apply(BasicBlock* bb); + + private: + static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue; + static constexpr uint16_t kNPos = 0xffffu; + static constexpr size_t kMaxNumTopChangesToKill = 2; + + struct VRegValue { + VRegValue() : value(kNoValue), change(kNPos) { } + + // Value name as reported by GVN, kNoValue if not available. + uint16_t value; + // Index of the change in mir_data_ that defined the value, kNPos if initial value for the BB. + uint16_t change; + }; + + struct MIRData { + explicit MIRData(MIR* m) + : mir(m), uses_all_vregs(false), must_keep(false), is_move(false), is_move_src(false), + has_def(false), wide_def(false), + low_def_over_high_word(false), high_def_over_low_word(false), vreg_def(0u), + prev_value(), prev_value_high() { + } + + uint16_t PrevChange(int v_reg) const; + void SetPrevChange(int v_reg, uint16_t change); + void RemovePrevChange(int v_reg, MIRData* prev_data); + + MIR* mir; + bool uses_all_vregs : 1; // If mir uses all vregs, uses in mir->ssa_rep are irrelevant. + bool must_keep : 1; + bool is_move : 1; + bool is_move_src : 1; + bool has_def : 1; + bool wide_def : 1; + bool low_def_over_high_word : 1; + bool high_def_over_low_word : 1; + uint16_t vreg_def; + VRegValue prev_value; + VRegValue prev_value_high; // For wide defs. + }; + + class VRegChains { + public: + VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc); + + void Reset(); + + void AddMIRWithDef(MIR* mir, int v_reg, bool wide, uint16_t new_value); + void AddMIRWithoutDef(MIR* mir); + void RemoveLastMIRData(); + void RemoveTrailingNops(); + + size_t NumMIRs() const; + MIRData* GetMIRData(size_t pos); + MIRData* LastMIRData(); + + uint32_t NumVRegs() const; + void InsertInitialValueHigh(int v_reg, uint16_t value); + void UpdateInitialVRegValue(int v_reg, bool wide, const LocalValueNumbering* lvn); + uint16_t LastChange(int v_reg); + uint16_t CurrentValue(int v_reg); + + uint16_t FindKillHead(int v_reg, uint16_t cutoff); + uint16_t FindFirstChangeAfter(int v_reg, uint16_t change) const; + void ReplaceChange(uint16_t old_change, uint16_t new_change); + void RemoveChange(uint16_t change); + bool IsTopChange(uint16_t change) const; + bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const; + void RenameSRegUses(uint16_t first_change, uint16_t last_change, + int old_s_reg, int new_s_reg, bool wide); + void RenameVRegUses(uint16_t first_change, uint16_t last_change, + int old_s_reg, int old_v_reg, int new_s_reg, int new_v_reg); + + private: + const uint32_t num_vregs_; + VRegValue* const vreg_data_; + ScopedArenaVector<MIRData> mir_data_; + }; + + void RecordPass(); + void BackwardPass(); + + void KillMIR(MIRData* data); + static void KillMIR(MIR* mir); + static void ChangeBinOp2AddrToPlainBinOp(MIR* mir); + MIR* CreatePhi(int s_reg, bool fp); + MIR* RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change, MIR* mir_to_kill); + + // Update state variables going backwards through a MIR. + void BackwardPassProcessLastMIR(); + + uint16_t FindChangesToKill(uint16_t first_change, uint16_t last_change); + void BackwardPassTryToKillRevertVRegs(); + bool BackwardPassTryToKillLastMIR(); + + void RecordPassKillMoveByRenamingSrcDef(uint16_t src_change, uint16_t move_change); + void RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change); + void RecordPassTryToKillOverwrittenMoveOrMoveSrc(); + void RecordPassTryToKillLastMIR(); + + bool RecordMIR(MIR* mir); + + const GlobalValueNumbering* const gvn_; + MIRGraph* const mir_graph_; + + VRegChains vreg_chains_; + BasicBlock* bb_; + const LocalValueNumbering* lvn_; + size_t no_uses_all_since_; // The change index after the last change with uses_all_vregs set. + + // Data used when processing MIRs in reverse order. + ArenaBitVector* unused_vregs_; // vregs that are not needed later. + ArenaBitVector* vregs_to_kill_; // vregs that revert to a previous value. + uint16_t* kill_heads_; // For each vreg in vregs_to_kill_, the first change to kill. + ScopedArenaVector<uint16_t> changes_to_kill_; + ArenaBitVector* dependent_vregs_; +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_ diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc new file mode 100644 index 0000000000..954e9f1d37 --- /dev/null +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -0,0 +1,1800 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "dataflow_iterator-inl.h" +#include "dex/mir_field_info.h" +#include "global_value_numbering.h" +#include "gvn_dead_code_elimination.h" +#include "local_value_numbering.h" +#include "gtest/gtest.h" + +namespace art { + +class GvnDeadCodeEliminationTest : public testing::Test { + protected: + static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue; + + struct IFieldDef { + uint16_t field_idx; + uintptr_t declaring_dex_file; + uint16_t declaring_field_idx; + bool is_volatile; + DexMemAccessType type; + }; + + struct SFieldDef { + uint16_t field_idx; + uintptr_t declaring_dex_file; + uint16_t declaring_field_idx; + bool is_volatile; + DexMemAccessType type; + }; + + struct BBDef { + static constexpr size_t kMaxSuccessors = 4; + static constexpr size_t kMaxPredecessors = 4; + + BBType type; + size_t num_successors; + BasicBlockId successors[kMaxPredecessors]; + size_t num_predecessors; + BasicBlockId predecessors[kMaxPredecessors]; + }; + + struct MIRDef { + static constexpr size_t kMaxSsaDefs = 2; + static constexpr size_t kMaxSsaUses = 4; + + BasicBlockId bbid; + Instruction::Code opcode; + int64_t value; + uint32_t field_info; + size_t num_uses; + int32_t uses[kMaxSsaUses]; + size_t num_defs; + int32_t defs[kMaxSsaDefs]; + }; + +#define DEF_SUCC0() \ + 0u, { } +#define DEF_SUCC1(s1) \ + 1u, { s1 } +#define DEF_SUCC2(s1, s2) \ + 2u, { s1, s2 } +#define DEF_SUCC3(s1, s2, s3) \ + 3u, { s1, s2, s3 } +#define DEF_SUCC4(s1, s2, s3, s4) \ + 4u, { s1, s2, s3, s4 } +#define DEF_PRED0() \ + 0u, { } +#define DEF_PRED1(p1) \ + 1u, { p1 } +#define DEF_PRED2(p1, p2) \ + 2u, { p1, p2 } +#define DEF_PRED3(p1, p2, p3) \ + 3u, { p1, p2, p3 } +#define DEF_PRED4(p1, p2, p3, p4) \ + 4u, { p1, p2, p3, p4 } +#define DEF_BB(type, succ, pred) \ + { type, succ, pred } + +#define DEF_CONST(bb, opcode, reg, value) \ + { bb, opcode, value, 0u, 0, { }, 1, { reg } } +#define DEF_CONST_WIDE(bb, opcode, reg, value) \ + { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } } +#define DEF_CONST_STRING(bb, opcode, reg, index) \ + { bb, opcode, index, 0u, 0, { }, 1, { reg } } +#define DEF_IGET(bb, opcode, reg, obj, field_info) \ + { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } } +#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \ + { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } } +#define DEF_IPUT(bb, opcode, reg, obj, field_info) \ + { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } } +#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \ + { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } } +#define DEF_SGET(bb, opcode, reg, field_info) \ + { bb, opcode, 0u, field_info, 0, { }, 1, { reg } } +#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \ + { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } } +#define DEF_SPUT(bb, opcode, reg, field_info) \ + { bb, opcode, 0u, field_info, 1, { reg }, 0, { } } +#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \ + { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } } +#define DEF_AGET(bb, opcode, reg, obj, idx) \ + { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } } +#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \ + { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } } +#define DEF_APUT(bb, opcode, reg, obj, idx) \ + { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } } +#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \ + { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } } +#define DEF_INVOKE1(bb, opcode, reg) \ + { bb, opcode, 0u, 0u, 1, { reg }, 0, { } } +#define DEF_UNIQUE_REF(bb, opcode, reg) \ + { bb, opcode, 0u, 0u, 0, { }, 1, { reg } } // CONST_CLASS, CONST_STRING, NEW_ARRAY, ... +#define DEF_IFZ(bb, opcode, reg) \ + { bb, opcode, 0u, 0u, 1, { reg }, 0, { } } +#define DEF_MOVE(bb, opcode, reg, src) \ + { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } } +#define DEF_MOVE_WIDE(bb, opcode, reg, src) \ + { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } } +#define DEF_PHI2(bb, reg, src1, src2) \ + { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } } +#define DEF_UNOP(bb, opcode, result, src1) \ + { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } } +#define DEF_BINOP(bb, opcode, result, src1, src2) \ + { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } } + + void DoPrepareIFields(const IFieldDef* defs, size_t count) { + cu_.mir_graph->ifield_lowering_infos_.clear(); + cu_.mir_graph->ifield_lowering_infos_.reserve(count); + for (size_t i = 0u; i != count; ++i) { + const IFieldDef* def = &defs[i]; + MirIFieldLoweringInfo field_info(def->field_idx, def->type); + if (def->declaring_dex_file != 0u) { + field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file); + field_info.declaring_field_idx_ = def->declaring_field_idx; + field_info.flags_ = + MirIFieldLoweringInfo::kFlagFastGet | MirIFieldLoweringInfo::kFlagFastPut | + (field_info.flags_ & ~(def->is_volatile ? 0u : MirIFieldLoweringInfo::kFlagIsVolatile)); + } + cu_.mir_graph->ifield_lowering_infos_.push_back(field_info); + } + } + + template <size_t count> + void PrepareIFields(const IFieldDef (&defs)[count]) { + DoPrepareIFields(defs, count); + } + + void DoPrepareSFields(const SFieldDef* defs, size_t count) { + cu_.mir_graph->sfield_lowering_infos_.clear(); + cu_.mir_graph->sfield_lowering_infos_.reserve(count); + for (size_t i = 0u; i != count; ++i) { + const SFieldDef* def = &defs[i]; + MirSFieldLoweringInfo field_info(def->field_idx, def->type); + // Mark even unresolved fields as initialized. + field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized; + // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN. + if (def->declaring_dex_file != 0u) { + field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file); + field_info.declaring_field_idx_ = def->declaring_field_idx; + field_info.flags_ = + MirSFieldLoweringInfo::kFlagFastGet | MirSFieldLoweringInfo::kFlagFastPut | + (field_info.flags_ & ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile)); + } + cu_.mir_graph->sfield_lowering_infos_.push_back(field_info); + } + } + + template <size_t count> + void PrepareSFields(const SFieldDef (&defs)[count]) { + DoPrepareSFields(defs, count); + } + + void DoPrepareBasicBlocks(const BBDef* defs, size_t count) { + cu_.mir_graph->block_id_map_.clear(); + cu_.mir_graph->block_list_.clear(); + ASSERT_LT(3u, count); // null, entry, exit and at least one bytecode block. + ASSERT_EQ(kNullBlock, defs[0].type); + ASSERT_EQ(kEntryBlock, defs[1].type); + ASSERT_EQ(kExitBlock, defs[2].type); + for (size_t i = 0u; i != count; ++i) { + const BBDef* def = &defs[i]; + BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type); + if (def->num_successors <= 2) { + bb->successor_block_list_type = kNotUsed; + bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u; + bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u; + } else { + bb->successor_block_list_type = kPackedSwitch; + bb->fall_through = 0u; + bb->taken = 0u; + bb->successor_blocks.reserve(def->num_successors); + for (size_t j = 0u; j != def->num_successors; ++j) { + SuccessorBlockInfo* successor_block_info = + static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo), + kArenaAllocSuccessor)); + successor_block_info->block = j; + successor_block_info->key = 0u; // Not used by class init check elimination. + bb->successor_blocks.push_back(successor_block_info); + } + } + bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors); + if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) { + bb->data_flow_info = static_cast<BasicBlockDataFlow*>( + cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo)); + bb->data_flow_info->live_in_v = live_in_v_; + bb->data_flow_info->vreg_to_ssa_map_exit = nullptr; + } + } + ASSERT_EQ(count, cu_.mir_graph->block_list_.size()); + cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1]; + ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type); + cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2]; + ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type); + } + + template <size_t count> + void PrepareBasicBlocks(const BBDef (&defs)[count]) { + DoPrepareBasicBlocks(defs, count); + } + + int SRegToVReg(int32_t s_reg, bool wide) { + int v_reg = cu_.mir_graph->SRegToVReg(s_reg); + CHECK_LT(static_cast<size_t>(v_reg), num_vregs_); + if (wide) { + CHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_); + } + return v_reg; + } + + int SRegToVReg(int32_t* uses, size_t* use, bool wide) { + int v_reg = SRegToVReg(uses[*use], wide); + if (wide) { + CHECK_EQ(uses[*use] + 1, uses[*use + 1]); + *use += 2u; + } else { + *use += 1u; + } + return v_reg; + } + + void DoPrepareMIRs(const MIRDef* defs, size_t count) { + mir_count_ = count; + mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR)); + ssa_reps_.resize(count); + for (size_t i = 0u; i != count; ++i) { + const MIRDef* def = &defs[i]; + MIR* mir = &mirs_[i]; + ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size()); + BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid]; + bb->AppendMIR(mir); + mir->dalvikInsn.opcode = def->opcode; + mir->dalvikInsn.vB = static_cast<int32_t>(def->value); + mir->dalvikInsn.vB_wide = def->value; + if (IsInstructionIGetOrIPut(def->opcode)) { + ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size()); + mir->meta.ifield_lowering_info = def->field_info; + ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(), + IGetOrIPutMemAccessType(def->opcode)); + } else if (IsInstructionSGetOrSPut(def->opcode)) { + ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size()); + mir->meta.sfield_lowering_info = def->field_info; + ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(), + SGetOrSPutMemAccessType(def->opcode)); + } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) { + mir->meta.phi_incoming = + allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo); + ASSERT_EQ(def->num_uses, bb->predecessors.size()); + std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming); + } + mir->ssa_rep = &ssa_reps_[i]; + cu_.mir_graph->AllocateSSAUseData(mir, def->num_uses); + std::copy_n(def->uses, def->num_uses, mir->ssa_rep->uses); + // Keep mir->ssa_rep->fp_use[.] zero-initialized (false). Not used by DCE, only copied. + cu_.mir_graph->AllocateSSADefData(mir, def->num_defs); + std::copy_n(def->defs, def->num_defs, mir->ssa_rep->defs); + // Keep mir->ssa_rep->fp_def[.] zero-initialized (false). Not used by DCE, only copied. + mir->dalvikInsn.opcode = def->opcode; + mir->offset = i; // LVN uses offset only for debug output + mir->optimization_flags = 0u; + uint64_t df_attrs = MIRGraph::GetDataFlowAttributes(mir); + if ((df_attrs & DF_DA) != 0) { + CHECK_NE(def->num_defs, 0u); + mir->dalvikInsn.vA = SRegToVReg(def->defs[0], (df_attrs & DF_A_WIDE) != 0); + bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA] = def->defs[0]; + if ((df_attrs & DF_A_WIDE) != 0) { + CHECK_EQ(def->defs[0] + 1, def->defs[1]); + bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA + 1u] = def->defs[0] + 1; + } + } + if ((df_attrs & (DF_UA | DF_UB | DF_UC)) != 0) { + size_t use = 0; + if ((df_attrs & DF_UA) != 0) { + mir->dalvikInsn.vA = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_A_WIDE) != 0); + } + if ((df_attrs & DF_UB) != 0) { + mir->dalvikInsn.vB = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_B_WIDE) != 0); + } + if ((df_attrs & DF_UC) != 0) { + mir->dalvikInsn.vC = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_C_WIDE) != 0); + } + DCHECK_EQ(def->num_uses, use); + } + } + DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>( + cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc)); + code_item->insns_size_in_code_units_ = 2u * count; + code_item->registers_size_ = kMaxVRegs; + cu_.mir_graph->current_code_item_ = code_item; + } + + template <size_t count> + void PrepareMIRs(const MIRDef (&defs)[count]) { + DoPrepareMIRs(defs, count); + } + + template <size_t count> + void PrepareSRegToVRegMap(const int (&map)[count]) { + cu_.mir_graph->ssa_base_vregs_.assign(map, map + count); + num_vregs_ = *std::max_element(map, map + count) + 1u; + AllNodesIterator iterator(cu_.mir_graph.get()); + for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { + if (bb->data_flow_info != nullptr) { + bb->data_flow_info->vreg_to_ssa_map_exit = static_cast<int32_t*>( + cu_.arena.Alloc(sizeof(int32_t) * num_vregs_, kArenaAllocDFInfo)); + std::fill_n(bb->data_flow_info->vreg_to_ssa_map_exit, num_vregs_, INVALID_SREG); + } + } + } + + void PerformGVN() { + cu_.mir_graph->SSATransformationStart(); + cu_.mir_graph->ComputeDFSOrders(); + cu_.mir_graph->ComputeDominators(); + cu_.mir_graph->ComputeTopologicalSortOrder(); + cu_.mir_graph->SSATransformationEnd(); + cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds( + allocator_.get(), cu_.mir_graph->ifield_lowering_infos_); + cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds( + allocator_.get(), cu_.mir_graph->sfield_lowering_infos_); + ASSERT_TRUE(gvn_ == nullptr); + gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(), + GlobalValueNumbering::kModeGvn)); + value_names_.resize(mir_count_, 0xffffu); + LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get()); + bool change = false; + for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { + LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb); + if (lvn != nullptr) { + for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { + value_names_[mir - mirs_] = lvn->GetValueNumber(mir); + } + } + change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb); + ASSERT_TRUE(gvn_->Good()); + } + } + + void PerformGVNCodeModifications() { + ASSERT_TRUE(gvn_ != nullptr); + ASSERT_TRUE(gvn_->Good()); + gvn_->StartPostProcessing(); + TopologicalSortIterator iterator(cu_.mir_graph.get()); + for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { + LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb); + if (lvn != nullptr) { + for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { + uint16_t value_name = lvn->GetValueNumber(mir); + ASSERT_EQ(value_name, value_names_[mir - mirs_]); + } + } + bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb); + ASSERT_FALSE(change); + ASSERT_TRUE(gvn_->Good()); + } + } + + void FillVregToSsaRegExitMaps() { + // Fill in vreg_to_ssa_map_exit for each BB. + PreOrderDfsIterator iterator(cu_.mir_graph.get()); + for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { + if (bb->block_type == kDalvikByteCode) { + CHECK(!bb->predecessors.empty()); + BasicBlock* pred_bb = cu_.mir_graph->GetBasicBlock(bb->predecessors[0]); + for (size_t v_reg = 0; v_reg != num_vregs_; ++v_reg) { + if (bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] == INVALID_SREG) { + bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] = + pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg]; + } + } + } + } + } + + void PerformDCE() { + FillVregToSsaRegExitMaps(); + cu_.mir_graph->GetNumOfCodeAndTempVRs(); + dce_.reset(new (allocator_.get()) GvnDeadCodeElimination(gvn_.get(), allocator_.get())); + PreOrderDfsIterator iterator(cu_.mir_graph.get()); + for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) { + if (bb->block_type == kDalvikByteCode) { + dce_->Apply(bb); + } + } + } + + void PerformGVN_DCE() { + PerformGVN(); + PerformGVNCodeModifications(); // Eliminate null/range checks. + PerformDCE(); + } + + template <size_t count> + void ExpectValueNamesNE(const size_t (&indexes)[count]) { + for (size_t i1 = 0; i1 != count; ++i1) { + size_t idx1 = indexes[i1]; + for (size_t i2 = i1 + 1; i2 != count; ++i2) { + size_t idx2 = indexes[i2]; + EXPECT_NE(value_names_[idx1], value_names_[idx2]) << idx1 << " " << idx2; + } + } + } + + template <size_t count> + void ExpectNoNullCheck(const size_t (&indexes)[count]) { + for (size_t i = 0; i != count; ++i) { + size_t idx = indexes[i]; + EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[idx].optimization_flags & MIR_IGNORE_NULL_CHECK) + << idx; + } + size_t num_no_null_ck = 0u; + for (size_t i = 0; i != mir_count_; ++i) { + if ((mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) { + ++num_no_null_ck; + } + } + EXPECT_EQ(count, num_no_null_ck); + } + + GvnDeadCodeEliminationTest() + : pool_(), + cu_(&pool_, kRuntimeISA, nullptr, nullptr), + num_vregs_(0u), + mir_count_(0u), + mirs_(nullptr), + ssa_reps_(), + allocator_(), + gvn_(), + dce_(), + value_names_(), + live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)) { + cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); + cu_.access_flags = kAccStatic; // Don't let "this" interfere with this test. + allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack)); + // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that + // 0 constants are integral, not references. Nothing else is used by LVN/GVN. + cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc( + kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc)); + // Bind all possible sregs to live vregs for test purposes. + live_in_v_->SetInitialBits(kMaxSsaRegs); + cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs); + cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs); + for (unsigned int i = 0; i < kMaxSsaRegs; i++) { + cu_.mir_graph->ssa_base_vregs_.push_back(i); + cu_.mir_graph->ssa_subscripts_.push_back(0); + } + // Set shorty for a void-returning method without arguments. + cu_.shorty = "V"; + } + + static constexpr size_t kMaxSsaRegs = 16384u; + static constexpr size_t kMaxVRegs = 256u; + + ArenaPool pool_; + CompilationUnit cu_; + size_t num_vregs_; + size_t mir_count_; + MIR* mirs_; + std::vector<SSARepresentation> ssa_reps_; + std::unique_ptr<ScopedArenaAllocator> allocator_; + std::unique_ptr<GlobalValueNumbering> gvn_; + std::unique_ptr<GvnDeadCodeElimination> dce_; + std::vector<uint16_t> value_names_; + ArenaBitVector* live_in_v_; +}; + +constexpr uint16_t GvnDeadCodeEliminationTest::kNoValue; + +class GvnDeadCodeEliminationTestSimple : public GvnDeadCodeEliminationTest { + public: + GvnDeadCodeEliminationTestSimple(); + + private: + static const BBDef kSimpleBbs[]; +}; + +const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestSimple::kSimpleBbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)), +}; + +GvnDeadCodeEliminationTestSimple::GvnDeadCodeEliminationTestSimple() + : GvnDeadCodeEliminationTest() { + PrepareBasicBlocks(kSimpleBbs); +} + +class GvnDeadCodeEliminationTestDiamond : public GvnDeadCodeEliminationTest { + public: + GvnDeadCodeEliminationTestDiamond(); + + private: + static const BBDef kDiamondBbs[]; +}; + +const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestDiamond::kDiamondBbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)), // Block #3, top of the diamond. + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)), // Block #4, left side. + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)), // Block #5, right side. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)), // Block #6, bottom. +}; + +GvnDeadCodeEliminationTestDiamond::GvnDeadCodeEliminationTestDiamond() + : GvnDeadCodeEliminationTest() { + PrepareBasicBlocks(kDiamondBbs); +} + +class GvnDeadCodeEliminationTestLoop : public GvnDeadCodeEliminationTest { + public: + GvnDeadCodeEliminationTestLoop(); + + private: + static const BBDef kLoopBbs[]; +}; + +const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestLoop::kLoopBbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)), // "taken" loops to self. + DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)), +}; + +GvnDeadCodeEliminationTestLoop::GvnDeadCodeEliminationTestLoop() + : GvnDeadCodeEliminationTest() { + PrepareBasicBlocks(kLoopBbs); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename1) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u), + DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 3 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[2]); + + const size_t no_null_ck_indexes[] = { 1, 3 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0]. + ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses); + EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]); + EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u), + DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u), + DEF_CONST(3, Instruction::CONST, 4u, 1000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 3, 4 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[2]); + + const size_t no_null_ck_indexes[] = { 1, 3 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, true, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0]. + ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses); + EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]); + EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename3) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u), + DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 3 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[2]); + + const size_t no_null_ck_indexes[] = { 1, 3 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move. + ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA); + // Check that the first IGET is using the s_reg 2, v_reg 2. + ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses); + EXPECT_EQ(2, mirs_[1].ssa_rep->uses[0]); + EXPECT_EQ(2u, mirs_[1].dalvikInsn.vB); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename4) { + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 1u, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 1u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 3u, 1000u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0, 1 /* high word */ }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 3 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[2]); + + static const bool eliminated[] = { + false, true, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move 2u. + ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename5) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 3u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 1000u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 3, 0, 1 /* high word */ }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 5 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[0], value_names_[4]); + + static const bool eliminated[] = { + false, false, false, true, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the NEW_INSTANCE defines the s_reg 4, v_reg 3, originally defined by the move 4u. + ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(4, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(3u, mirs_[0].dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename6) { + static const MIRDef mirs[] = { + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u), + DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1 /* high word */, 1, 2 /* high word */ }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + + static const bool eliminated[] = { + false, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the CONST_WIDE defines the s_reg 2, v_reg 1, originally defined by the move 2u. + ASSERT_EQ(2, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(3, mirs_[0].ssa_rep->defs[1]); + EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename7) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_MOVE(3, Instruction::MOVE, 1u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 2u, 0u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_NE(value_names_[0], value_names_[2]); + EXPECT_EQ(value_names_[0], value_names_[1]); + + static const bool eliminated[] = { + false, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u. + ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA); + // Check that the ADD_INT inputs are both s_reg1, vreg 1. + ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses); + EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]); + EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]); + EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB); + EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename8) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_MOVE(3, Instruction::MOVE, 1u, 0u), + DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 2u, 0u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_NE(value_names_[0], value_names_[2]); + EXPECT_EQ(value_names_[0], value_names_[1]); + + static const bool eliminated[] = { + false, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u. + ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs); + EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]); + EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA); + // Check that the ADD_INT_2ADDR was replaced by ADD_INT and inputs are both s_reg 1, vreg 1. + EXPECT_EQ(Instruction::ADD_INT, mirs_[2].dalvikInsn.opcode); + ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses); + EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]); + EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]); + EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB); + EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Rename9) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 1u, 0u, 0u), + DEF_MOVE(3, Instruction::MOVE, 2u, 1u), + DEF_CONST(3, Instruction::CONST, 3u, 3000u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 0, 1, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 3 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[1], value_names_[2]); + + static const bool eliminated[] = { + false, false, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the ADD_INT_2ADDR was replaced by ADD_INT and output is in s_reg 2, vreg 1. + EXPECT_EQ(Instruction::ADD_INT, mirs_[1].dalvikInsn.opcode); + ASSERT_EQ(2, mirs_[1].ssa_rep->num_uses); + EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]); + EXPECT_EQ(0, mirs_[1].ssa_rep->uses[1]); + EXPECT_EQ(0u, mirs_[1].dalvikInsn.vB); + EXPECT_EQ(0u, mirs_[1].dalvikInsn.vC); + ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs); + EXPECT_EQ(2, mirs_[1].ssa_rep->defs[0]); + EXPECT_EQ(1u, mirs_[1].dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, NoRename1) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u), + DEF_CONST(3, Instruction::CONST, 4u, 1000), + DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 0, 1 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + + const size_t no_null_ck_indexes[] = { 1, 5 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, NoRename2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 2u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u), + DEF_CONST(3, Instruction::CONST, 4u, 1000), + DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u), + DEF_CONST(3, Instruction::CONST, 6u, 2000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 0, 3, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 4, 5, 6 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + + const size_t no_null_ck_indexes[] = { 1, 5 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, NoRename3) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + { 1u, 1u, 1u, false, kDexMemAccessWord }, + { 2u, 1u, 2u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u), + DEF_IGET(3, Instruction::IGET, 2u, 0u, 2u), + DEF_BINOP(3, Instruction::ADD_INT, 3u, 1u, 2u), + DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 0u), + DEF_IGET(3, Instruction::IGET, 5u, 4u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 5 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[4]); + + const size_t no_null_ck_indexes[] = { 1, 2, 5 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessObject }, + { 1u, 1u, 1u, false, kDexMemAccessObject }, + { 2u, 1u, 2u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 1u), + DEF_IGET(3, Instruction::IGET, 3u, 2u, 2u), + DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 0u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 5u, 4u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_NE(value_names_[0], value_names_[1]); + EXPECT_NE(value_names_[0], value_names_[2]); + EXPECT_NE(value_names_[0], value_names_[3]); + EXPECT_NE(value_names_[1], value_names_[2]); + EXPECT_NE(value_names_[1], value_names_[3]); + EXPECT_NE(value_names_[2], value_names_[3]); + EXPECT_EQ(value_names_[1], value_names_[4]); + EXPECT_EQ(value_names_[2], value_names_[5]); + + EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[4].optimization_flags & MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[5].optimization_flags & MIR_IGNORE_NULL_CHECK); + + static const bool eliminated[] = { + false, false, false, false, true, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs); + EXPECT_EQ(4, mirs_[1].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses); + EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]); + ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs); + EXPECT_EQ(5, mirs_[2].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[2].ssa_rep->num_uses); + EXPECT_EQ(4, mirs_[2].ssa_rep->uses[0]); + ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs); + EXPECT_EQ(3, mirs_[3].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses); + EXPECT_EQ(5, mirs_[3].ssa_rep->uses[0]); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Simple2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_IGET(3, Instruction::IGET, 2u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 3u, 2u, 1u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 4u, 3u), + DEF_IGET(3, Instruction::IGET, 5u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 6u, 5u, 1u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 3, 2, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[2], value_names_[5]); + EXPECT_EQ(value_names_[3], value_names_[6]); + + const size_t no_null_ck_indexes[] = { 2, 5 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, true, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs); + EXPECT_EQ(6, mirs_[3].ssa_rep->defs[0]); + ASSERT_EQ(2, mirs_[3].ssa_rep->num_uses); + EXPECT_EQ(2, mirs_[3].ssa_rep->uses[0]); + EXPECT_EQ(1, mirs_[3].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs); + EXPECT_EQ(4, mirs_[4].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses); + EXPECT_EQ(6, mirs_[4].ssa_rep->uses[0]); +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Simple3) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), // Simple elimination of ADD+MUL + DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u), // allows simple elimination of IGET+SUB. + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 5, 5, 4 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[11]); + EXPECT_EQ(value_names_[7], value_names_[12]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, true, true, true, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs); + EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]); // 6 -> 11 + ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses); + EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]); + EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs); + EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12 + ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses); + EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]); // 6 -> 11 + EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs); + EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses); + EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12 +} + +TEST_F(GvnDeadCodeEliminationTestSimple, Simple4) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 1u, INT64_C(1)), + DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 3u, 1u, 2u), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 5u, 4u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 6u, INT64_C(1)), + DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 8u, 6u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3, 1, 2, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[1], value_names_[5]); + EXPECT_EQ(value_names_[2], value_names_[6]); + EXPECT_EQ(value_names_[3], value_names_[7]); + + const size_t no_null_ck_indexes[] = { 3, 7 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + // Simple elimination of CONST_WIDE+LONG_TO_FLOAT allows simple eliminatiion of IGET. + false, false, false, false, false, true, true, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs); + EXPECT_EQ(8, mirs_[2].ssa_rep->defs[0]); // 3 -> 8 + ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses); + EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]); + EXPECT_EQ(2, mirs_[2].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs); + EXPECT_EQ(9, mirs_[3].ssa_rep->defs[0]); // 4 -> 9 + ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses); + EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]); + ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs); + EXPECT_EQ(5, mirs_[4].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses); + EXPECT_EQ(9, mirs_[4].ssa_rep->uses[0]); // 4 -> 9 +} + +TEST_F(GvnDeadCodeEliminationTestSimple, KillChain1) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 5 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[11]); + EXPECT_EQ(value_names_[7], value_names_[12]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, true, true, true, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs); + EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]); // 6 -> 11 + ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses); + EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]); + EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs); + EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12 + ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses); + EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]); // 6 -> 11 + EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs); + EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses); + EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12 +} + +TEST_F(GvnDeadCodeEliminationTestSimple, KillChain2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u), + DEF_CONST(3, Instruction::CONST, 13u, 4000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4, 7 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[11]); + EXPECT_EQ(value_names_[7], value_names_[12]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, true, true, true, true, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs); + EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12 + ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses); + EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]); + EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs); + EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses); + EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12 +} + +TEST_F(GvnDeadCodeEliminationTestSimple, KillChain3) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), + DEF_CONST(3, Instruction::CONST, 12u, 4000), + DEF_BINOP(3, Instruction::SUB_INT, 13u, 11u, 3u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 4, 7, 4 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 12 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[11]); + EXPECT_EQ(value_names_[7], value_names_[13]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, true, true, true, false, true + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that the sregs have been renamed correctly. + ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs); + EXPECT_EQ(13, mirs_[7].ssa_rep->defs[0]); // 7 -> 13 + ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses); + EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]); + EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs); + EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]); + ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses); + EXPECT_EQ(13, mirs_[8].ssa_rep->uses[0]); // 7 -> 13 +} + +TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain1) { + // KillChain2 without the final CONST. + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[11]); + EXPECT_EQ(value_names_[7], value_names_[12]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, false, false, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain2) { + // KillChain1 with MIRs in the middle of the chain. + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1000), + DEF_CONST(3, Instruction::CONST, 2u, 2000), + DEF_CONST(3, Instruction::CONST, 3u, 3000), + DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u), + DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u), + DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u), + DEF_CONST(3, Instruction::CONST, 11u, 4000), + DEF_UNOP(3, Instruction::INT_TO_FLOAT, 12u, 11u), + DEF_BINOP(3, Instruction::MUL_INT, 13u, 10u, 2u), + DEF_BINOP(3, Instruction::SUB_INT, 14u, 13u, 3u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 7, 4, 5 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[4], value_names_[9]); + EXPECT_EQ(value_names_[5], value_names_[10]); + EXPECT_EQ(value_names_[6], value_names_[13]); + EXPECT_EQ(value_names_[7], value_names_[14]); + + const size_t no_null_ck_indexes[] = { 4, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000), + DEF_CONST(4, Instruction::CONST, 1u, 1000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + EXPECT_EQ(value_names_[0], value_names_[1]); + + static const bool eliminated[] = { + false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created a single-input Phi to replace the CONST 3u. + BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4); + MIR* phi = bb4->first_mir_insn; + ASSERT_TRUE(phi != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode)); + ASSERT_EQ(1, phi->ssa_rep->num_uses); + EXPECT_EQ(0, phi->ssa_rep->uses[0]); + ASSERT_EQ(1, phi->ssa_rep->num_defs); + EXPECT_EQ(1, phi->ssa_rep->defs[0]); + EXPECT_EQ(0u, phi->dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(4, Instruction::CONST, 1u, 1000), + DEF_IPUT(4, Instruction::IPUT, 1u, 0u, 0u), + DEF_CONST(5, Instruction::CONST, 3u, 2000), + DEF_IPUT(5, Instruction::IPUT, 3u, 0u, 0u), + DEF_IGET(6, Instruction::IGET, 5u, 0u, 0u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2 /* dummy */, 1, 2 /* dummy */, 1 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 3, 5 }; + ExpectValueNamesNE(diff_indexes); + + const size_t no_null_ck_indexes[] = { 2, 4, 5 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created a two-input Phi to replace the IGET 5u. + BasicBlock* bb6 = cu_.mir_graph->GetBasicBlock(6); + MIR* phi = bb6->first_mir_insn; + ASSERT_TRUE(phi != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode)); + ASSERT_EQ(2, phi->ssa_rep->num_uses); + EXPECT_EQ(1, phi->ssa_rep->uses[0]); + EXPECT_EQ(3, phi->ssa_rep->uses[1]); + ASSERT_EQ(1, phi->ssa_rep->num_defs); + EXPECT_EQ(5, phi->ssa_rep->defs[0]); + EXPECT_EQ(1u, phi->dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock1) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessObject }, // linked list + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u), + DEF_IFZ(3, Instruction::IF_NEZ, 4u), + DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u), + DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u), + DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u), + DEF_IGET(4, Instruction::IGET_OBJECT, 9u, 8u, 0u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[1], value_names_[6]); + EXPECT_EQ(value_names_[2], value_names_[7]); + EXPECT_EQ(value_names_[3], value_names_[8]); + EXPECT_EQ(value_names_[4], value_names_[9]); + + const size_t no_null_ck_indexes[] = { 1, 6, 7, 8, 9 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, true, true, true, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created two single-input Phis to replace the IGET 8u and IGET 9u; + // the IGET 6u and IGET 7u were killed without a replacement. + BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4); + MIR* phi1 = bb4->first_mir_insn; + ASSERT_TRUE(phi1 != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi1->dalvikInsn.opcode)); + MIR* phi2 = phi1->next; + ASSERT_TRUE(phi2 != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi2->dalvikInsn.opcode)); + ASSERT_TRUE(phi2->next == &mirs_[6]); + if (phi1->dalvikInsn.vA == 2u) { + std::swap(phi1, phi2); + } + ASSERT_EQ(1, phi1->ssa_rep->num_uses); + EXPECT_EQ(3, phi1->ssa_rep->uses[0]); + ASSERT_EQ(1, phi1->ssa_rep->num_defs); + EXPECT_EQ(8, phi1->ssa_rep->defs[0]); + EXPECT_EQ(1u, phi1->dalvikInsn.vA); + ASSERT_EQ(1, phi2->ssa_rep->num_uses); + EXPECT_EQ(4, phi2->ssa_rep->uses[0]); + ASSERT_EQ(1, phi2->ssa_rep->num_defs); + EXPECT_EQ(9, phi2->ssa_rep->defs[0]); + EXPECT_EQ(2u, phi2->dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock2) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessObject }, // linked list + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u), + DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u), + DEF_IFZ(3, Instruction::IF_NEZ, 4u), + DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u), + DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u), + DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u), + DEF_CONST(4, Instruction::CONST, 9u, 1000), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 9 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[1], value_names_[6]); + EXPECT_EQ(value_names_[2], value_names_[7]); + EXPECT_EQ(value_names_[3], value_names_[8]); + + const size_t no_null_ck_indexes[] = { 1, 6, 7, 8 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, false, false, true, true, true, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created a single-input Phi to replace the IGET 8u; + // the IGET 6u and IGET 7u were killed without a replacement. + BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4); + MIR* phi = bb4->first_mir_insn; + ASSERT_TRUE(phi != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode)); + ASSERT_TRUE(phi->next == &mirs_[6]); + ASSERT_EQ(1, phi->ssa_rep->num_uses); + EXPECT_EQ(3, phi->ssa_rep->uses[0]); + ASSERT_EQ(1, phi->ssa_rep->num_defs); + EXPECT_EQ(8, phi->ssa_rep->defs[0]); + EXPECT_EQ(1u, phi->dalvikInsn.vA); +} + +TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) { + static const IFieldDef ifields[] = { + { 0u, 1u, 0u, false, kDexMemAccessWord }, + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u), + DEF_CONST(3, Instruction::CONST, 1u, 1), + DEF_CONST(3, Instruction::CONST, 2u, 0), + DEF_IPUT(3, Instruction::IPUT, 2u, 0u, 0u), + DEF_IGET(4, Instruction::IGET, 4u, 0u, 0u), + DEF_BINOP(4, Instruction::ADD_INT, 5u, 4u, 1u), + DEF_IPUT(4, Instruction::IPUT, 5u, 0u, 0u), + }; + + static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3 /* dummy */, 2, 2 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 }; + ExpectValueNamesNE(diff_indexes); + + const size_t no_null_ck_indexes[] = { 3, 4, 6 }; + ExpectNoNullCheck(no_null_ck_indexes); + + static const bool eliminated[] = { + false, false, false, false, true, false, false, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } + // Check that we've created a two-input Phi to replace the IGET 3u. + BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4); + MIR* phi = bb4->first_mir_insn; + ASSERT_TRUE(phi != nullptr); + ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode)); + ASSERT_TRUE(phi->next == &mirs_[4]); + ASSERT_EQ(2, phi->ssa_rep->num_uses); + EXPECT_EQ(2, phi->ssa_rep->uses[0]); + EXPECT_EQ(5, phi->ssa_rep->uses[1]); + ASSERT_EQ(1, phi->ssa_rep->num_defs); + EXPECT_EQ(4, phi->ssa_rep->defs[0]); + EXPECT_EQ(2u, phi->dalvikInsn.vA); +} + +} // namespace art diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index 114346dd5a..99b6683b26 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -901,9 +901,9 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en // Calculate merged values for the intersection. for (auto& load_value_entry : my_values->load_value_map) { uint16_t location = load_value_entry.first; - bool same_values = true; - uint16_t value_name = kNoValue; merge_names_.clear(); + uint16_t value_name = kNoValue; + bool same_values = true; for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) { value_name = Versions::LookupMergeValue(gvn_, lvn, key, location); same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back()); @@ -937,6 +937,10 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en void LocalValueNumbering::Merge(MergeType merge_type) { DCHECK_GE(gvn_->merge_lvns_.size(), 2u); + // Always reserve space in merge_names_. Even if we don't use it in Merge() we may need it + // in GetStartingVregValueNumberImpl() when the merge_names_'s allocator is not the top. + merge_names_.reserve(gvn_->merge_lvns_.size()); + IntersectSregValueMaps<&LocalValueNumbering::sreg_value_map_>(); IntersectSregValueMaps<&LocalValueNumbering::sreg_wide_value_map_>(); if (merge_type == kReturnMerge) { @@ -1169,8 +1173,8 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) { int first_s_reg = uses[pos]; bool wide = (first_lvn->sreg_wide_value_map_.count(first_s_reg) != 0u); // Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN. - uint16_t value_name = kNoValue; merge_names_.clear(); + uint16_t value_name = kNoValue; bool same_values = true; for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) { DCHECK_LT(pos, mir->ssa_rep->num_uses); @@ -1210,6 +1214,31 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) { return value_name; } +uint16_t LocalValueNumbering::HandleConst(MIR* mir, uint32_t value) { + RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir); + uint16_t res; + if (value == 0u && raw_dest.ref) { + res = GlobalValueNumbering::kNullValue; + } else { + Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST; + res = gvn_->LookupValue(op, Low16Bits(value), High16Bits(value), 0); + } + SetOperandValue(mir->ssa_rep->defs[0], res); + return res; +} + +uint16_t LocalValueNumbering::HandleConstWide(MIR* mir, uint64_t value) { + RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir); + Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST; + uint32_t low_word = Low32Bits(value); + uint32_t high_word = High32Bits(value); + uint16_t low_res = gvn_->LookupValue(op, Low16Bits(low_word), High16Bits(low_word), 1); + uint16_t high_res = gvn_->LookupValue(op, Low16Bits(high_word), High16Bits(high_word), 2); + uint16_t res = gvn_->LookupValue(op, low_res, high_res, 3); + SetOperandValueWide(mir->ssa_rep->defs[0], res); + return res; +} + uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) { uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]); HandleNullCheck(mir, array); @@ -1592,12 +1621,18 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { break; case Instruction::MOVE_EXCEPTION: case Instruction::NEW_INSTANCE: - case Instruction::CONST_CLASS: case Instruction::NEW_ARRAY: // 1 result, treat as unique each time, use result s_reg - will be unique. res = MarkNonAliasingNonNull(mir); SetOperandValue(mir->ssa_rep->defs[0], res); break; + case Instruction::CONST_CLASS: + DCHECK_EQ(Low16Bits(mir->dalvikInsn.vB), mir->dalvikInsn.vB); + res = gvn_->LookupValue(Instruction::CONST_CLASS, mir->dalvikInsn.vB, 0, 0); + SetOperandValue(mir->ssa_rep->defs[0], res); + null_checked_.insert(res); + non_aliasing_refs_.insert(res); + break; case Instruction::CONST_STRING: case Instruction::CONST_STRING_JUMBO: // These strings are internalized, so assign value based on the string pool index. @@ -1641,53 +1676,29 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { SetOperandValueWide(mir->ssa_rep->defs[0], res); break; + case Instruction::CONST_HIGH16: + res = HandleConst(mir, mir->dalvikInsn.vB << 16); + break; case Instruction::CONST: case Instruction::CONST_4: case Instruction::CONST_16: - res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB), - High16Bits(mir->dalvikInsn.vB), 0); - SetOperandValue(mir->ssa_rep->defs[0], res); - break; - - case Instruction::CONST_HIGH16: - res = gvn_->LookupValue(Instruction::CONST, 0, mir->dalvikInsn.vB, 0); - SetOperandValue(mir->ssa_rep->defs[0], res); + res = HandleConst(mir, mir->dalvikInsn.vB); break; case Instruction::CONST_WIDE_16: - case Instruction::CONST_WIDE_32: { - uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB), - High16Bits(mir->dalvikInsn.vB >> 16), 1); - uint16_t high_res; - if (mir->dalvikInsn.vB & 0x80000000) { - high_res = gvn_->LookupValue(Instruction::CONST, 0xffff, 0xffff, 2); - } else { - high_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 2); - } - res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } + case Instruction::CONST_WIDE_32: + res = HandleConstWide( + mir, + mir->dalvikInsn.vB + + ((mir->dalvikInsn.vB & 0x80000000) != 0 ? UINT64_C(0xffffffff00000000) : 0u)); break; - case Instruction::CONST_WIDE: { - uint32_t low_word = Low32Bits(mir->dalvikInsn.vB_wide); - uint32_t high_word = High32Bits(mir->dalvikInsn.vB_wide); - uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(low_word), - High16Bits(low_word), 1); - uint16_t high_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(high_word), - High16Bits(high_word), 2); - res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } + case Instruction::CONST_WIDE: + res = HandleConstWide(mir, mir->dalvikInsn.vB_wide); break; - case Instruction::CONST_WIDE_HIGH16: { - uint16_t low_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 1); - uint16_t high_res = gvn_->LookupValue(Instruction::CONST, 0, - Low16Bits(mir->dalvikInsn.vB), 2); - res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } + case Instruction::CONST_WIDE_HIGH16: + res = HandleConstWide(mir, static_cast<uint64_t>(mir->dalvikInsn.vB) << 48); break; case Instruction::ARRAY_LENGTH: { @@ -1956,4 +1967,55 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { return res; } +uint16_t LocalValueNumbering::GetEndingVregValueNumberImpl(int v_reg, bool wide) const { + const BasicBlock* bb = gvn_->GetBasicBlock(Id()); + DCHECK(bb != nullptr); + int s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg]; + if (s_reg == INVALID_SREG) { + return kNoValue; + } + if (wide) { + int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1]; + if (high_s_reg != s_reg + 1) { + return kNoValue; // High word has been overwritten. + } + return GetSregValueWide(s_reg); + } else { + return GetSregValue(s_reg); + } +} + +uint16_t LocalValueNumbering::GetStartingVregValueNumberImpl(int v_reg, bool wide) const { + DCHECK_EQ(gvn_->mode_, GlobalValueNumbering::kModeGvnPostProcessing); + DCHECK(gvn_->CanModify()); + const BasicBlock* bb = gvn_->GetBasicBlock(Id()); + DCHECK(bb != nullptr); + DCHECK_NE(bb->predecessors.size(), 0u); + if (bb->predecessors.size() == 1u) { + return gvn_->GetLvn(bb->predecessors[0])->GetEndingVregValueNumberImpl(v_reg, wide); + } + merge_names_.clear(); + uint16_t value_name = kNoValue; + bool same_values = true; + for (BasicBlockId pred_id : bb->predecessors) { + value_name = gvn_->GetLvn(pred_id)->GetEndingVregValueNumberImpl(v_reg, wide); + if (value_name == kNoValue) { + return kNoValue; + } + same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back()); + merge_names_.push_back(value_name); + } + if (same_values) { + // value_name already contains the result. + } else { + auto lb = merge_map_.lower_bound(merge_names_); + if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) { + value_name = lb->second; + } else { + value_name = kNoValue; // We never assigned a value name to this set of merged names. + } + } + return value_name; +} + } // namespace art diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h index aef8c6df0c..97ea05a914 100644 --- a/compiler/dex/local_value_numbering.h +++ b/compiler/dex/local_value_numbering.h @@ -19,9 +19,9 @@ #include <memory> +#include "base/arena_object.h" #include "base/logging.h" #include "global_value_numbering.h" -#include "utils/arena_object.h" #include "utils/dex_instruction_utils.h" namespace art { @@ -52,13 +52,22 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { return div_zero_checked_.find(value_name) != div_zero_checked_.end(); } - bool IsSregValue(uint16_t s_reg, uint16_t value_name) const { - auto it = sreg_value_map_.find(s_reg); - if (it != sreg_value_map_.end()) { - return it->second == value_name; - } else { - return gvn_->HasValue(kNoValue, s_reg, kNoValue, kNoValue, value_name); - } + uint16_t GetSregValue(uint16_t s_reg) const { + return GetSregValueImpl(s_reg, &sreg_value_map_); + } + + uint16_t GetSregValueWide(uint16_t s_reg) const { + return GetSregValueImpl(s_reg, &sreg_wide_value_map_); + } + + // Get the starting value number for a given dalvik register. + uint16_t GetStartingVregValueNumber(int v_reg) const { + return GetStartingVregValueNumberImpl(v_reg, false); + } + + // Get the starting value number for a given wide dalvik register. + uint16_t GetStartingVregValueNumberWide(int v_reg) const { + return GetStartingVregValueNumberImpl(v_reg, true); } enum MergeType { @@ -80,6 +89,20 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { // Key is s_reg, value is value name. typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap; + uint16_t GetEndingVregValueNumberImpl(int v_reg, bool wide) const; + uint16_t GetStartingVregValueNumberImpl(int v_reg, bool wide) const; + + uint16_t GetSregValueImpl(int s_reg, const SregValueMap* map) const { + uint16_t res = kNoValue; + auto lb = map->find(s_reg); + if (lb != map->end()) { + res = lb->second; + } else { + res = gvn_->FindValue(kNoValue, s_reg, kNoValue, kNoValue); + } + return res; + } + void SetOperandValueImpl(uint16_t s_reg, uint16_t value, SregValueMap* map) { DCHECK_EQ(map->count(s_reg), 0u) << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file) << " LVN id: " << id_ << ", s_reg: " << s_reg; @@ -285,6 +308,8 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { void HandleEscapingRef(uint16_t base); void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn); uint16_t HandlePhi(MIR* mir); + uint16_t HandleConst(MIR* mir, uint32_t value); + uint16_t HandleConstWide(MIR* mir, uint64_t value); uint16_t HandleAGet(MIR* mir, uint16_t opcode); void HandleAPut(MIR* mir, uint16_t opcode); uint16_t HandleIGet(MIR* mir, uint16_t opcode); @@ -370,9 +395,9 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> { ValueNameSet div_zero_checked_; // Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack. - ScopedArenaVector<BasicBlockId> merge_names_; + mutable ScopedArenaVector<uint16_t> merge_names_; // Map to identify when different locations merge the same values. - ScopedArenaSafeMap<ScopedArenaVector<BasicBlockId>, uint16_t> merge_map_; + ScopedArenaSafeMap<ScopedArenaVector<uint16_t>, uint16_t> merge_map_; // New memory version for merge, kNoValue if all memory versions matched. uint16_t merge_new_memory_version_; diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc index c89489287f..d1c3a6b4ba 100644 --- a/compiler/dex/local_value_numbering_test.cc +++ b/compiler/dex/local_value_numbering_test.cc @@ -136,7 +136,7 @@ class LocalValueNumberingTest : public testing::Test { void DoPrepareMIRs(const MIRDef* defs, size_t count) { mir_count_ = count; - mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR)); + mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR); ssa_reps_.resize(count); for (size_t i = 0u; i != count; ++i) { const MIRDef* def = &defs[i]; @@ -185,9 +185,9 @@ class LocalValueNumberingTest : public testing::Test { } void PerformLVN() { - cu_.mir_graph->temp_.gvn.ifield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds( + cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds( allocator_.get(), cu_.mir_graph->ifield_lowering_infos_); - cu_.mir_graph->temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds( + cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds( allocator_.get(), cu_.mir_graph->sfield_lowering_infos_); gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(), GlobalValueNumbering::kModeLvn)); @@ -211,8 +211,14 @@ class LocalValueNumberingTest : public testing::Test { value_names_() { cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack)); + // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that + // 0 constants are integral, not references. Nothing else is used by LVN/GVN. + cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc( + kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc)); } + static constexpr size_t kMaxSsaRegs = 16384u; + ArenaPool pool_; CompilationUnit cu_; size_t mir_count_; @@ -772,4 +778,116 @@ TEST_F(LocalValueNumberingTest, DivZeroCheck) { } } +TEST_F(LocalValueNumberingTest, ConstWide) { + static const MIRDef mirs[] = { + // Core reg constants. + DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0), + DEF_CONST(Instruction::CONST_WIDE_16, 1u, 1), + DEF_CONST(Instruction::CONST_WIDE_16, 2u, -1), + DEF_CONST(Instruction::CONST_WIDE_32, 3u, 1 << 16), + DEF_CONST(Instruction::CONST_WIDE_32, 4u, -1 << 16), + DEF_CONST(Instruction::CONST_WIDE_32, 5u, (1 << 16) + 1), + DEF_CONST(Instruction::CONST_WIDE_32, 6u, (1 << 16) - 1), + DEF_CONST(Instruction::CONST_WIDE_32, 7u, -(1 << 16) + 1), + DEF_CONST(Instruction::CONST_WIDE_32, 8u, -(1 << 16) - 1), + DEF_CONST(Instruction::CONST_WIDE, 9u, INT64_C(1) << 32), + DEF_CONST(Instruction::CONST_WIDE, 10u, INT64_C(-1) << 32), + DEF_CONST(Instruction::CONST_WIDE, 11u, (INT64_C(1) << 32) + 1), + DEF_CONST(Instruction::CONST_WIDE, 12u, (INT64_C(1) << 32) - 1), + DEF_CONST(Instruction::CONST_WIDE, 13u, (INT64_C(-1) << 32) + 1), + DEF_CONST(Instruction::CONST_WIDE, 14u, (INT64_C(-1) << 32) - 1), + DEF_CONST(Instruction::CONST_WIDE_HIGH16, 15u, 1), // Effectively 1 << 48. + DEF_CONST(Instruction::CONST_WIDE_HIGH16, 16u, 0xffff), // Effectively -1 << 48. + DEF_CONST(Instruction::CONST_WIDE, 17u, (INT64_C(1) << 48) + 1), + DEF_CONST(Instruction::CONST_WIDE, 18u, (INT64_C(1) << 48) - 1), + DEF_CONST(Instruction::CONST_WIDE, 19u, (INT64_C(-1) << 48) + 1), + DEF_CONST(Instruction::CONST_WIDE, 20u, (INT64_C(-1) << 48) - 1), + // FP reg constants. + DEF_CONST(Instruction::CONST_WIDE_16, 21u, 0), + DEF_CONST(Instruction::CONST_WIDE_16, 22u, 1), + DEF_CONST(Instruction::CONST_WIDE_16, 23u, -1), + DEF_CONST(Instruction::CONST_WIDE_32, 24u, 1 << 16), + DEF_CONST(Instruction::CONST_WIDE_32, 25u, -1 << 16), + DEF_CONST(Instruction::CONST_WIDE_32, 26u, (1 << 16) + 1), + DEF_CONST(Instruction::CONST_WIDE_32, 27u, (1 << 16) - 1), + DEF_CONST(Instruction::CONST_WIDE_32, 28u, -(1 << 16) + 1), + DEF_CONST(Instruction::CONST_WIDE_32, 29u, -(1 << 16) - 1), + DEF_CONST(Instruction::CONST_WIDE, 30u, INT64_C(1) << 32), + DEF_CONST(Instruction::CONST_WIDE, 31u, INT64_C(-1) << 32), + DEF_CONST(Instruction::CONST_WIDE, 32u, (INT64_C(1) << 32) + 1), + DEF_CONST(Instruction::CONST_WIDE, 33u, (INT64_C(1) << 32) - 1), + DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(-1) << 32) + 1), + DEF_CONST(Instruction::CONST_WIDE, 35u, (INT64_C(-1) << 32) - 1), + DEF_CONST(Instruction::CONST_WIDE_HIGH16, 36u, 1), // Effectively 1 << 48. + DEF_CONST(Instruction::CONST_WIDE_HIGH16, 37u, 0xffff), // Effectively -1 << 48. + DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(1) << 48) + 1), + DEF_CONST(Instruction::CONST_WIDE, 39u, (INT64_C(1) << 48) - 1), + DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) + 1), + DEF_CONST(Instruction::CONST_WIDE, 41u, (INT64_C(-1) << 48) - 1), + }; + + PrepareMIRs(mirs); + for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) { + cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true; + } + PerformLVN(); + for (size_t i = 0u; i != mir_count_; ++i) { + for (size_t j = i + 1u; j != mir_count_; ++j) { + EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j; + } + } +} + +TEST_F(LocalValueNumberingTest, Const) { + static const MIRDef mirs[] = { + // Core reg constants. + DEF_CONST(Instruction::CONST_4, 0u, 0), + DEF_CONST(Instruction::CONST_4, 1u, 1), + DEF_CONST(Instruction::CONST_4, 2u, -1), + DEF_CONST(Instruction::CONST_16, 3u, 1 << 4), + DEF_CONST(Instruction::CONST_16, 4u, -1 << 4), + DEF_CONST(Instruction::CONST_16, 5u, (1 << 4) + 1), + DEF_CONST(Instruction::CONST_16, 6u, (1 << 4) - 1), + DEF_CONST(Instruction::CONST_16, 7u, -(1 << 4) + 1), + DEF_CONST(Instruction::CONST_16, 8u, -(1 << 4) - 1), + DEF_CONST(Instruction::CONST_HIGH16, 9u, 1), // Effectively 1 << 16. + DEF_CONST(Instruction::CONST_HIGH16, 10u, 0xffff), // Effectively -1 << 16. + DEF_CONST(Instruction::CONST, 11u, (1 << 16) + 1), + DEF_CONST(Instruction::CONST, 12u, (1 << 16) - 1), + DEF_CONST(Instruction::CONST, 13u, (-1 << 16) + 1), + DEF_CONST(Instruction::CONST, 14u, (-1 << 16) - 1), + // FP reg constants. + DEF_CONST(Instruction::CONST_4, 15u, 0), + DEF_CONST(Instruction::CONST_4, 16u, 1), + DEF_CONST(Instruction::CONST_4, 17u, -1), + DEF_CONST(Instruction::CONST_16, 18u, 1 << 4), + DEF_CONST(Instruction::CONST_16, 19u, -1 << 4), + DEF_CONST(Instruction::CONST_16, 20u, (1 << 4) + 1), + DEF_CONST(Instruction::CONST_16, 21u, (1 << 4) - 1), + DEF_CONST(Instruction::CONST_16, 22u, -(1 << 4) + 1), + DEF_CONST(Instruction::CONST_16, 23u, -(1 << 4) - 1), + DEF_CONST(Instruction::CONST_HIGH16, 24u, 1), // Effectively 1 << 16. + DEF_CONST(Instruction::CONST_HIGH16, 25u, 0xffff), // Effectively -1 << 16. + DEF_CONST(Instruction::CONST, 26u, (1 << 16) + 1), + DEF_CONST(Instruction::CONST, 27u, (1 << 16) - 1), + DEF_CONST(Instruction::CONST, 28u, (-1 << 16) + 1), + DEF_CONST(Instruction::CONST, 29u, (-1 << 16) - 1), + // null reference constant. + DEF_CONST(Instruction::CONST_4, 30u, 0), + }; + + PrepareMIRs(mirs); + static_assert((arraysize(mirs) & 1) != 0, "missing null or unmatched fp/core"); + cu_.mir_graph->reg_location_[arraysize(mirs) - 1].ref = true; + for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs) - 1; ++i) { + cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true; + } + PerformLVN(); + for (size_t i = 0u; i != mir_count_; ++i) { + for (size_t j = i + 1u; j != mir_count_; ++j) { + EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j; + } + } +} + } // namespace art diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 473196b98a..31dbc60594 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -18,6 +18,7 @@ #include <memory> #include "base/logging.h" +#include "base/scoped_arena_containers.h" #include "dataflow_iterator-inl.h" #include "compiler_ir.h" #include "dex_flags.h" @@ -29,7 +30,6 @@ #include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" -#include "utils/scoped_arena_containers.h" namespace art { @@ -1206,10 +1206,8 @@ void MIRGraph::DoCacheFieldLoweringInfo() { // All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN. const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 2u; ScopedArenaAllocator allocator(&cu_->arena_stack); - uint16_t* field_idxs = - reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc)); - DexMemAccessType* field_types = reinterpret_cast<DexMemAccessType*>( - allocator.Alloc(max_refs * sizeof(DexMemAccessType), kArenaAllocMisc)); + uint16_t* field_idxs = allocator.AllocArray<uint16_t>(max_refs, kArenaAllocMisc); + DexMemAccessType* field_types = allocator.AllocArray<DexMemAccessType>(max_refs, kArenaAllocMisc); // Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end. size_t ifield_pos = 0u; @@ -1328,8 +1326,8 @@ void MIRGraph::DoCacheMethodLoweringInfo() { // multi_index_container with one ordered index and one sequential index. ScopedArenaSet<MapEntry, MapEntryComparator> invoke_map(MapEntryComparator(), allocator.Adapter()); - const MapEntry** sequential_entries = reinterpret_cast<const MapEntry**>( - allocator.Alloc(max_refs * sizeof(sequential_entries[0]), kArenaAllocMisc)); + const MapEntry** sequential_entries = + allocator.AllocArray<const MapEntry*>(max_refs, kArenaAllocMisc); // Find INVOKE insns and their devirtualization targets. AllNodesIterator iter(this); diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index f09d1ae6d0..f9f7e22b03 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -910,11 +910,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_FORMAT_EXTENDED, }; -/* Return the base virtual register for a SSA name */ -int MIRGraph::SRegToVReg(int ssa_reg) const { - return ssa_base_vregs_[ssa_reg]; -} - /* Any register that is used before being defined is considered live-in */ void MIRGraph::HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v, ArenaBitVector* live_in_v, int dalvik_reg_id) { @@ -1084,9 +1079,9 @@ void MIRGraph::AllocateSSAUseData(MIR *mir, int num_uses) { mir->ssa_rep->num_uses = num_uses; if (mir->ssa_rep->num_uses_allocated < num_uses) { - mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, kArenaAllocDFInfo)); + mir->ssa_rep->uses = arena_->AllocArray<int32_t>(num_uses, kArenaAllocDFInfo); // NOTE: will be filled in during type & size inference pass - mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, kArenaAllocDFInfo)); + mir->ssa_rep->fp_use = arena_->AllocArray<bool>(num_uses, kArenaAllocDFInfo); } } @@ -1094,10 +1089,8 @@ void MIRGraph::AllocateSSADefData(MIR *mir, int num_defs) { mir->ssa_rep->num_defs = num_defs; if (mir->ssa_rep->num_defs_allocated < num_defs) { - mir->ssa_rep->defs = static_cast<int*>(arena_->Alloc(sizeof(int) * num_defs, - kArenaAllocDFInfo)); - mir->ssa_rep->fp_def = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_defs, - kArenaAllocDFInfo)); + mir->ssa_rep->defs = arena_->AllocArray<int32_t>(num_defs, kArenaAllocDFInfo); + mir->ssa_rep->fp_def = arena_->AllocArray<bool>(num_defs, kArenaAllocDFInfo); } } @@ -1198,11 +1191,30 @@ void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) { /* Entry function to convert a block into SSA representation */ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { - MIR* mir; - if (bb->data_flow_info == NULL) return false; - for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { + /* + * Pruned SSA form: Insert phi nodes for each dalvik register marked in phi_node_blocks + * only if the dalvik register is in the live-in set. + */ + BasicBlockId bb_id = bb->id; + for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) { + if (temp_.ssa.phi_node_blocks[dalvik_reg]->IsBitSet(bb_id)) { + if (!bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) { + /* Variable will be clobbered before being used - no need for phi */ + vreg_to_ssa_map_[dalvik_reg] = INVALID_SREG; + continue; + } + MIR *phi = NewMIR(); + phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); + phi->dalvikInsn.vA = dalvik_reg; + phi->offset = bb->start_offset; + phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method. + bb->PrependMIR(phi); + } + } + + for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { mir->ssa_rep = static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation), kArenaAllocDFInfo)); @@ -1315,8 +1327,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { * predecessor blocks. */ bb->data_flow_info->vreg_to_ssa_map_exit = - static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumOfCodeAndTempVRs(), - kArenaAllocDFInfo)); + arena_->AllocArray<int32_t>(GetNumOfCodeAndTempVRs(), kArenaAllocDFInfo); memcpy(bb->data_flow_info->vreg_to_ssa_map_exit, vreg_to_ssa_map_, sizeof(int) * GetNumOfCodeAndTempVRs()); @@ -1368,13 +1379,9 @@ void MIRGraph::CompilerInitializeSSAConversion() { * Initialize the DalvikToSSAMap map. There is one entry for each * Dalvik register, and the SSA names for those are the same. */ - vreg_to_ssa_map_ = - static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg, - kArenaAllocDFInfo)); + vreg_to_ssa_map_ = arena_->AllocArray<int32_t>(num_reg, kArenaAllocDFInfo); /* Keep track of the higest def for each dalvik reg */ - ssa_last_defs_ = - static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg, - kArenaAllocDFInfo)); + ssa_last_defs_ = arena_->AllocArray<int>(num_reg, kArenaAllocDFInfo); for (unsigned int i = 0; i < num_reg; i++) { vreg_to_ssa_map_[i] = i; diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h index ff427f88d0..98b2da8299 100644 --- a/compiler/dex/mir_field_info.h +++ b/compiler/dex/mir_field_info.h @@ -149,6 +149,7 @@ class MirIFieldLoweringInfo : public MirFieldInfo { friend class NullCheckEliminationTest; friend class GlobalValueNumberingTest; + friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; }; @@ -223,6 +224,7 @@ class MirSFieldLoweringInfo : public MirFieldInfo { friend class ClassInitCheckEliminationTest; friend class GlobalValueNumberingTest; + friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; }; diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 0f7d45df79..76b5e44df0 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -24,6 +24,7 @@ #include "base/logging.h" #include "base/stl_util.h" #include "base/stringprintf.h" +#include "base/scoped_arena_containers.h" #include "compiler_ir.h" #include "dex_file-inl.h" #include "dex_flags.h" @@ -34,7 +35,6 @@ #include "leb128.h" #include "pass_driver_me_post_opt.h" #include "stack.h" -#include "utils/scoped_arena_containers.h" namespace art { @@ -113,7 +113,6 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) entry_block_(NULL), exit_block_(NULL), current_code_item_(NULL), - dex_pc_to_block_map_(arena->Adapter()), m_units_(arena->Adapter()), method_stack_(arena->Adapter()), current_method_(kInvalidEntry), @@ -268,31 +267,14 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, DCHECK(insn != orig_block->first_mir_insn); DCHECK(insn == bottom_block->first_mir_insn); DCHECK_EQ(insn->offset, bottom_block->start_offset); - DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id); // Scan the "bottom" instructions, remapping them to the // newly created "bottom" block. MIR* p = insn; p->bb = bottom_block->id; - dex_pc_to_block_map_[p->offset] = bottom_block->id; while (p != bottom_block->last_mir_insn) { p = p->next; DCHECK(p != nullptr); p->bb = bottom_block->id; - int opcode = p->dalvikInsn.opcode; - /* - * Some messiness here to ensure that we only enter real opcodes and only the - * first half of a potentially throwing instruction that has been split into - * CHECK and work portions. Since the 2nd half of a split operation is always - * the first in a BasicBlock, we can't hit it here. - */ - if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { - BasicBlockId mapped_id = dex_pc_to_block_map_[p->offset]; - // At first glance the instructions should all be mapped to orig_block. - // However, multiple instructions may correspond to the same dex, hence an earlier - // instruction may have already moved the mapping for dex to bottom_block. - DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id)); - dex_pc_to_block_map_[p->offset] = bottom_block->id; - } } return bottom_block; @@ -307,12 +289,13 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset, * Utilizes a map for fast lookup of the typical cases. */ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create, - BasicBlock** immed_pred_block_p) { + BasicBlock** immed_pred_block_p, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { if (code_offset >= current_code_item_->insns_size_in_code_units_) { return nullptr; } - int block_id = dex_pc_to_block_map_[code_offset]; + int block_id = (*dex_pc_to_block_map)[code_offset]; BasicBlock* bb = GetBasicBlock(block_id); if ((bb != nullptr) && (bb->start_offset == code_offset)) { @@ -327,19 +310,46 @@ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create, if (bb != nullptr) { // The target exists somewhere in an existing block. - return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : nullptr); + BasicBlock* bottom_block = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : nullptr); + DCHECK(bottom_block != nullptr); + MIR* p = bottom_block->first_mir_insn; + BasicBlock* orig_block = bb; + DCHECK_EQ((*dex_pc_to_block_map)[p->offset], orig_block->id); + // Scan the "bottom" instructions, remapping them to the + // newly created "bottom" block. + (*dex_pc_to_block_map)[p->offset] = bottom_block->id; + while (p != bottom_block->last_mir_insn) { + p = p->next; + DCHECK(p != nullptr); + int opcode = p->dalvikInsn.opcode; + /* + * Some messiness here to ensure that we only enter real opcodes and only the + * first half of a potentially throwing instruction that has been split into + * CHECK and work portions. Since the 2nd half of a split operation is always + * the first in a BasicBlock, we can't hit it here. + */ + if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { + BasicBlockId mapped_id = (*dex_pc_to_block_map)[p->offset]; + // At first glance the instructions should all be mapped to orig_block. + // However, multiple instructions may correspond to the same dex, hence an earlier + // instruction may have already moved the mapping for dex to bottom_block. + DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id)); + (*dex_pc_to_block_map)[p->offset] = bottom_block->id; + } + } + return bottom_block; } // Create a new block. bb = CreateNewBB(kDalvikByteCode); bb->start_offset = code_offset; - dex_pc_to_block_map_[bb->start_offset] = bb->id; + (*dex_pc_to_block_map)[bb->start_offset] = bb->id; return bb; } /* Identify code range in try blocks and set up the empty catch blocks */ -void MIRGraph::ProcessTryCatchBlocks() { +void MIRGraph::ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { int tries_size = current_code_item_->tries_size_; DexOffset offset; @@ -364,7 +374,7 @@ void MIRGraph::ProcessTryCatchBlocks() { CatchHandlerIterator iterator(handlers_ptr); for (; iterator.HasNext(); iterator.Next()) { uint32_t address = iterator.GetHandlerAddress(); - FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr); + FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map); } handlers_ptr = iterator.EndDataPointer(); } @@ -439,7 +449,8 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, /* Process instructions with the kBranch flag */ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags, const uint16_t* code_ptr, - const uint16_t* code_end) { + const uint16_t* code_end, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { DexOffset target = cur_offset; switch (insn->dalvikInsn.opcode) { case Instruction::GOTO: @@ -470,7 +481,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs } CountBranch(target); BasicBlock* taken_block = FindBlock(target, /* create */ true, - /* immed_pred_block_p */ &cur_block); + /* immed_pred_block_p */ &cur_block, + dex_pc_to_block_map); cur_block->taken = taken_block->id; taken_block->predecessors.push_back(cur_block->id); @@ -480,18 +492,20 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs /* create */ true, /* immed_pred_block_p */ - &cur_block); + &cur_block, + dex_pc_to_block_map); cur_block->fall_through = fallthrough_block->id; fallthrough_block->predecessors.push_back(cur_block->id); } else if (code_ptr < code_end) { - FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr); + FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map); } return cur_block; } /* Process instructions with the kSwitch flag */ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, - int width, int flags) { + int width, int flags, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { UNUSED(flags); const uint16_t* switch_data = reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB); @@ -545,7 +559,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs for (i = 0; i < size; i++) { BasicBlock* case_block = FindBlock(cur_offset + target_table[i], /* create */ true, - /* immed_pred_block_p */ &cur_block); + /* immed_pred_block_p */ &cur_block, + dex_pc_to_block_map); SuccessorBlockInfo* successor_block_info = static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo), kArenaAllocSuccessor)); @@ -559,7 +574,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs /* Fall-through case */ BasicBlock* fallthrough_block = FindBlock(cur_offset + width, /* create */ true, - /* immed_pred_block_p */ nullptr); + /* immed_pred_block_p */ nullptr, + dex_pc_to_block_map); cur_block->fall_through = fallthrough_block->id; fallthrough_block->predecessors.push_back(cur_block->id); return cur_block; @@ -568,7 +584,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs /* Process instructions with the kThrow flag */ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags, ArenaBitVector* try_block_addr, - const uint16_t* code_ptr, const uint16_t* code_end) { + const uint16_t* code_ptr, const uint16_t* code_end, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { UNUSED(flags); bool in_try_block = try_block_addr->IsBitSet(cur_offset); bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW); @@ -585,7 +602,8 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse for (; iterator.HasNext(); iterator.Next()) { BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* create */, - nullptr /* immed_pred_block_p */); + nullptr /* immed_pred_block_p */, + dex_pc_to_block_map); if (insn->dalvikInsn.opcode == Instruction::MONITOR_EXIT && IsBadMonitorExitCatch(insn->offset, catch_block->start_offset)) { // Don't allow monitor-exit to catch its own exception, http://b/15745363 . @@ -620,7 +638,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse cur_block->explicit_throw = true; if (code_ptr < code_end) { // Force creation of new block following THROW via side-effect. - FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr); + FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map); } if (!in_try_block) { // Don't split a THROW that can't rethrow - we're done. @@ -652,7 +670,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse * not automatically terminated after the work portion, and may * contain following instructions. * - * Note also that the dex_pc_to_block_map_ entry for the potentially + * Note also that the dex_pc_to_block_map entry for the potentially * throwing instruction will refer to the original basic block. */ BasicBlock* new_block = CreateNewBB(kDalvikByteCode); @@ -687,7 +705,11 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ // TODO: need to rework expansion of block list & try_block_addr when inlining activated. // TUNING: use better estimate of basic blocks for following resize. block_list_.reserve(block_list_.size() + current_code_item_->insns_size_in_code_units_); - dex_pc_to_block_map_.resize(dex_pc_to_block_map_.size() + current_code_item_->insns_size_in_code_units_); + // FindBlock lookup cache. + ScopedArenaAllocator allocator(&cu_->arena_stack); + ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter()); + dex_pc_to_block_map.resize(dex_pc_to_block_map.size() + + current_code_item_->insns_size_in_code_units_); // TODO: replace with explicit resize routine. Using automatic extension side effect for now. try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_); @@ -728,7 +750,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ cur_block->predecessors.push_back(entry_block_->id); /* Identify code range in try blocks and set up the empty catch blocks */ - ProcessTryCatchBlocks(); + ProcessTryCatchBlocks(&dex_pc_to_block_map); uint64_t merged_df_flags = 0u; @@ -777,20 +799,21 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ DCHECK(cur_block->taken == NullBasicBlockId); // Unreachable instruction, mark for no continuation and end basic block. flags &= ~Instruction::kContinue; - FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr); + FindBlock(current_offset_ + width, /* create */ true, + /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map); } } else { cur_block->AppendMIR(insn); } // Associate the starting dex_pc for this opcode with its containing basic block. - dex_pc_to_block_map_[insn->offset] = cur_block->id; + dex_pc_to_block_map[insn->offset] = cur_block->id; code_ptr += width; if (flags & Instruction::kBranch) { cur_block = ProcessCanBranch(cur_block, insn, current_offset_, - width, flags, code_ptr, code_end); + width, flags, code_ptr, code_end, &dex_pc_to_block_map); } else if (flags & Instruction::kReturn) { cur_block->terminated_by_return = true; cur_block->fall_through = exit_block_->id; @@ -804,13 +827,15 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ * Create a fallthrough block for real instructions * (incl. NOP). */ - FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr); + FindBlock(current_offset_ + width, /* create */ true, + /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map); } } else if (flags & Instruction::kThrow) { cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_, - code_ptr, code_end); + code_ptr, code_end, &dex_pc_to_block_map); } else if (flags & Instruction::kSwitch) { - cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags); + cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, + flags, &dex_pc_to_block_map); } if (verify_flags & Instruction::kVerifyVarArgRange || verify_flags & Instruction::kVerifyVarArgRangeNonZero) { @@ -828,7 +853,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ } current_offset_ += width; BasicBlock* next_block = FindBlock(current_offset_, /* create */ false, - /* immed_pred_block_p */ nullptr); + /* immed_pred_block_p */ nullptr, + &dex_pc_to_block_map); if (next_block) { /* * The next instruction could be the target of a previously parsed @@ -1573,7 +1599,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { } } int length = str.length() + 1; - ret = static_cast<char*>(arena_->Alloc(length, kArenaAllocDFInfo)); + ret = arena_->AllocArray<char>(length, kArenaAllocDFInfo); strncpy(ret, str.c_str(), length); return ret; } @@ -1710,9 +1736,9 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, move_result_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); } info->num_arg_words = mir->ssa_rep->num_uses; - info->args = (info->num_arg_words == 0) ? NULL : static_cast<RegLocation*> - (arena_->Alloc(sizeof(RegLocation) * info->num_arg_words, kArenaAllocMisc)); - for (int i = 0; i < info->num_arg_words; i++) { + info->args = (info->num_arg_words == 0) ? nullptr : + arena_->AllocArray<RegLocation>(info->num_arg_words, kArenaAllocMisc); + for (size_t i = 0; i < info->num_arg_words; i++) { info->args[i] = GetRawSrc(mir, i); } info->opt_flags = mir->optimization_flags; @@ -1742,7 +1768,7 @@ BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) { void MIRGraph::InitializeConstantPropagation() { is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false); - constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), kArenaAllocDFInfo)); + constant_values_ = arena_->AllocArray<int>(GetNumSSARegs(), kArenaAllocDFInfo); } void MIRGraph::InitializeMethodUses() { @@ -1772,7 +1798,8 @@ void MIRGraph::SSATransformationEnd() { temp_.ssa.num_vregs = 0u; temp_.ssa.work_live_vregs = nullptr; - temp_.ssa.def_block_matrix = nullptr; + DCHECK(temp_.ssa.def_block_matrix == nullptr); + temp_.ssa.phi_node_blocks = nullptr; DCHECK(temp_scoped_alloc_.get() != nullptr); temp_scoped_alloc_.reset(); @@ -2532,4 +2559,13 @@ const uint16_t* MIRGraph::GetInsns(int m_unit_index) const { return m_units_[m_unit_index]->GetCodeItem()->insns_; } +void MIRGraph::SetPuntToInterpreter(bool val) { + punt_to_interpreter_ = val; + if (val) { + // Disable all subsequent optimizations. They may not be safe to run. (For example, + // LVN/GVN assumes there are no conflicts found by the type inference pass.) + cu_->disable_opt = ~static_cast<decltype(cu_->disable_opt)>(0); + } +} + } // namespace art diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 5def19128c..e5abd3be51 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -19,17 +19,17 @@ #include <stdint.h> +#include "base/arena_containers.h" +#include "base/scoped_arena_containers.h" #include "dex_file.h" #include "dex_instruction.h" #include "dex_types.h" #include "invoke_type.h" #include "mir_field_info.h" #include "mir_method_info.h" -#include "utils/arena_bit_vector.h" -#include "utils/arena_containers.h" -#include "utils/scoped_arena_containers.h" #include "reg_location.h" #include "reg_storage.h" +#include "utils/arena_bit_vector.h" namespace art { @@ -37,6 +37,7 @@ struct CompilationUnit; class DexCompilationUnit; class DexFileMethodInliner; class GlobalValueNumbering; +class GvnDeadCodeElimination; // Forward declaration. class MIRGraph; @@ -497,19 +498,19 @@ class ChildBlockIterator { * more efficient invoke code generation. */ struct CallInfo { - int num_arg_words; // Note: word count, not arg count. - RegLocation* args; // One for each word of arguments. - RegLocation result; // Eventual target of MOVE_RESULT. + size_t num_arg_words; // Note: word count, not arg count. + RegLocation* args; // One for each word of arguments. + RegLocation result; // Eventual target of MOVE_RESULT. int opt_flags; InvokeType type; uint32_t dex_idx; - uint32_t index; // Method idx for invokes, type idx for FilledNewArray. + uint32_t index; // Method idx for invokes, type idx for FilledNewArray. uintptr_t direct_code; uintptr_t direct_method; - RegLocation target; // Target of following move_result. + RegLocation target; // Target of following move_result. bool skip_this; bool is_range; - DexOffset offset; // Offset in code units. + DexOffset offset; // Offset in code units. MIR* mir; }; @@ -542,8 +543,9 @@ class MIRGraph { uint32_t method_idx, jobject class_loader, const DexFile& dex_file); /* Find existing block */ - BasicBlock* FindBlock(DexOffset code_offset) { - return FindBlock(code_offset, false, NULL); + BasicBlock* FindBlock(DexOffset code_offset, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map) { + return FindBlock(code_offset, false, nullptr, dex_pc_to_block_map); } const uint16_t* GetCurrentInsns() const { @@ -625,8 +627,7 @@ class MIRGraph { } void EnableOpcodeCounting() { - opcode_count_ = static_cast<int*>(arena_->Alloc(kNumPackedOpcodes * sizeof(int), - kArenaAllocMisc)); + opcode_count_ = arena_->AllocArray<int>(kNumPackedOpcodes, kArenaAllocMisc); } void ShowOpcodeStats(); @@ -1052,7 +1053,12 @@ class MIRGraph { void DumpCheckStats(); MIR* FindMoveResult(BasicBlock* bb, MIR* mir); - int SRegToVReg(int ssa_reg) const; + + /* Return the base virtual register for a SSA name */ + int SRegToVReg(int ssa_reg) const { + return ssa_base_vregs_[ssa_reg]; + } + void VerifyDataflow(); void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb); bool EliminateNullChecksGate(); @@ -1065,6 +1071,9 @@ class MIRGraph { bool ApplyGlobalValueNumberingGate(); bool ApplyGlobalValueNumbering(BasicBlock* bb); void ApplyGlobalValueNumberingEnd(); + bool EliminateDeadCodeGate(); + bool EliminateDeadCode(BasicBlock* bb); + void EliminateDeadCodeEnd(); bool EliminateSuspendChecksGate(); bool EliminateSuspendChecks(BasicBlock* bb); void EliminateSuspendChecksEnd(); @@ -1072,15 +1081,15 @@ class MIRGraph { uint16_t GetGvnIFieldId(MIR* mir) const { DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode)); DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size()); - DCHECK(temp_.gvn.ifield_ids_ != nullptr); - return temp_.gvn.ifield_ids_[mir->meta.ifield_lowering_info]; + DCHECK(temp_.gvn.ifield_ids != nullptr); + return temp_.gvn.ifield_ids[mir->meta.ifield_lowering_info]; } uint16_t GetGvnSFieldId(MIR* mir) const { DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode)); DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size()); - DCHECK(temp_.gvn.sfield_ids_ != nullptr); - return temp_.gvn.sfield_ids_[mir->meta.sfield_lowering_info]; + DCHECK(temp_.gvn.sfield_ids != nullptr); + return temp_.gvn.sfield_ids[mir->meta.sfield_lowering_info]; } /* @@ -1115,9 +1124,7 @@ class MIRGraph { return punt_to_interpreter_; } - void SetPuntToInterpreter(bool val) { - punt_to_interpreter_ = val; - } + void SetPuntToInterpreter(bool val); void DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir); char* GetDalvikDisassembly(const MIR* mir); @@ -1200,7 +1207,7 @@ class MIRGraph { void ComputeDominators(); void CompilerInitializeSSAConversion(); virtual void InitializeBasicBlockDataFlow(); - void InsertPhiNodes(); + void FindPhiNodeBlocks(); void DoDFSPreOrderSSARename(BasicBlock* block); bool DfsOrdersUpToDate() const { @@ -1249,16 +1256,20 @@ class MIRGraph { bool ContentIsInsn(const uint16_t* code_ptr); BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block, BasicBlock** immed_pred_block_p); - BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p); - void ProcessTryCatchBlocks(); + BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map); + void ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map); bool IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, NarrowDexOffset catch_offset); BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, - int flags, const uint16_t* code_ptr, const uint16_t* code_end); + int flags, const uint16_t* code_ptr, const uint16_t* code_end, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map); BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, - int flags); + int flags, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map); BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr, - const uint16_t* code_end); + const uint16_t* code_end, + ScopedArenaVector<uint16_t>* dex_pc_to_block_map); int AddNewSReg(int v_reg); void HandleSSAUse(int* uses, int dalvik_reg, int reg_index); void DataFlowSSAFormat35C(MIR* mir); @@ -1280,7 +1291,8 @@ class MIRGraph { * @param mir The mir to check. * @return Returns 'true' if the given MIR might throw an exception. */ - bool CanThrow(MIR* mir); + bool CanThrow(MIR* mir) const; + /** * @brief Combine multiply and add/sub MIRs into corresponding extended MAC MIR. * @param mul_mir The multiply MIR to be combined. @@ -1319,7 +1331,7 @@ class MIRGraph { ArenaVector<int> ssa_base_vregs_; ArenaVector<int> ssa_subscripts_; // Map original Dalvik virtual reg i to the current SSA name. - int* vreg_to_ssa_map_; // length == method->registers_size + int32_t* vreg_to_ssa_map_; // length == method->registers_size int* ssa_last_defs_; // length == method->registers_size ArenaBitVector* is_constant_v_; // length == num_ssa_reg int* constant_values_; // length == num_ssa_reg @@ -1373,12 +1385,14 @@ class MIRGraph { size_t num_vregs; ArenaBitVector* work_live_vregs; ArenaBitVector** def_block_matrix; // num_vregs x num_blocks_. + ArenaBitVector** phi_node_blocks; // num_vregs x num_blocks_. } ssa; // Global value numbering. struct { GlobalValueNumbering* gvn; - uint16_t* ifield_ids_; // Part of GVN/LVN but cached here for LVN to avoid recalculation. - uint16_t* sfield_ids_; // Ditto. + uint16_t* ifield_ids; // Part of GVN/LVN but cached here for LVN to avoid recalculation. + uint16_t* sfield_ids; // Ditto. + GvnDeadCodeElimination* dce; } gvn; // Suspend check elimination. struct { @@ -1391,7 +1405,6 @@ class MIRGraph { BasicBlock* entry_block_; BasicBlock* exit_block_; const DexFile::CodeItem* current_code_item_; - ArenaVector<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache. ArenaVector<DexCompilationUnit*> m_units_; // List of methods included in this graph typedef std::pair<int, int> MIRLocation; // Insert point, (m_unit_ index, offset) ArenaVector<MIRLocation> method_stack_; // Include stack @@ -1433,6 +1446,7 @@ class MIRGraph { friend class SuspendCheckEliminationTest; friend class NullCheckEliminationTest; friend class GlobalValueNumberingTest; + friend class GvnDeadCodeEliminationTest; friend class LocalValueNumberingTest; friend class TopologicalSortOrderTest; }; diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 8718191069..fd67d4ebec 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -16,17 +16,18 @@ #include "base/bit_vector-inl.h" #include "base/logging.h" +#include "base/scoped_arena_containers.h" #include "dataflow_iterator-inl.h" #include "dex_flags.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "global_value_numbering.h" +#include "gvn_dead_code_elimination.h" #include "local_value_numbering.h" #include "mir_field_info.h" #include "quick/dex_file_method_inliner.h" #include "quick/dex_file_to_method_inliner_map.h" #include "stack.h" -#include "utils/scoped_arena_containers.h" namespace art { @@ -632,8 +633,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { } else { DCHECK_EQ(SelectKind(if_true), kSelectMove); DCHECK_EQ(SelectKind(if_false), kSelectMove); - int* src_ssa = - static_cast<int*>(arena_->Alloc(sizeof(int) * 3, kArenaAllocDFInfo)); + int32_t* src_ssa = arena_->AllocArray<int32_t>(3, kArenaAllocDFInfo); src_ssa[0] = mir->ssa_rep->uses[0]; src_ssa[1] = if_true->ssa_rep->uses[0]; src_ssa[2] = if_false->ssa_rep->uses[0]; @@ -641,15 +641,12 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { mir->ssa_rep->num_uses = 3; } mir->ssa_rep->num_defs = 1; - mir->ssa_rep->defs = - static_cast<int*>(arena_->Alloc(sizeof(int) * 1, kArenaAllocDFInfo)); - mir->ssa_rep->fp_def = - static_cast<bool*>(arena_->Alloc(sizeof(bool) * 1, kArenaAllocDFInfo)); + mir->ssa_rep->defs = arena_->AllocArray<int32_t>(1, kArenaAllocDFInfo); + mir->ssa_rep->fp_def = arena_->AllocArray<bool>(1, kArenaAllocDFInfo); mir->ssa_rep->fp_def[0] = if_true->ssa_rep->fp_def[0]; // Match type of uses to def. - mir->ssa_rep->fp_use = - static_cast<bool*>(arena_->Alloc(sizeof(bool) * mir->ssa_rep->num_uses, - kArenaAllocDFInfo)); + mir->ssa_rep->fp_use = arena_->AllocArray<bool>(mir->ssa_rep->num_uses, + kArenaAllocDFInfo); for (int i = 0; i < mir->ssa_rep->num_uses; i++) { mir->ssa_rep->fp_use[i] = mir->ssa_rep->fp_def[0]; } @@ -900,8 +897,8 @@ bool MIRGraph::EliminateNullChecksGate() { temp_.nce.num_vregs = GetNumOfCodeAndTempVRs(); temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector( temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck); - temp_.nce.ending_vregs_to_check_matrix = static_cast<ArenaBitVector**>( - temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc)); + temp_.nce.ending_vregs_to_check_matrix = + temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc); std::fill_n(temp_.nce.ending_vregs_to_check_matrix, GetNumBlocks(), nullptr); // reset MIR_MARK @@ -1133,8 +1130,7 @@ bool MIRGraph::EliminateClassInitChecksGate() { // Each insn we use here has at least 2 code units, offset/2 will be a unique index. const size_t end = (GetNumDalvikInsns() + 1u) / 2u; - temp_.cice.indexes = static_cast<uint16_t*>( - temp_scoped_alloc_->Alloc(end * sizeof(*temp_.cice.indexes), kArenaAllocGrowableArray)); + temp_.cice.indexes = temp_scoped_alloc_->AllocArray<uint16_t>(end, kArenaAllocGrowableArray); std::fill_n(temp_.cice.indexes, end, 0xffffu); uint32_t unique_class_count = 0u; @@ -1215,8 +1211,8 @@ bool MIRGraph::EliminateClassInitChecksGate() { temp_.cice.num_class_bits = 2u * unique_class_count; temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector( temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck); - temp_.cice.ending_classes_to_check_matrix = static_cast<ArenaBitVector**>( - temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc)); + temp_.cice.ending_classes_to_check_matrix = + temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc); std::fill_n(temp_.cice.ending_classes_to_check_matrix, GetNumBlocks(), nullptr); DCHECK_GT(temp_.cice.num_class_bits, 0u); return true; @@ -1338,9 +1334,9 @@ bool MIRGraph::ApplyGlobalValueNumberingGate() { DCHECK(temp_scoped_alloc_ == nullptr); temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack)); - temp_.gvn.ifield_ids_ = + temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_); - temp_.gvn.sfield_ids_ = + temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_); DCHECK(temp_.gvn.gvn == nullptr); temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering( @@ -1364,8 +1360,8 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() { // Perform modifications. DCHECK(temp_.gvn.gvn != nullptr); if (temp_.gvn.gvn->Good()) { + temp_.gvn.gvn->StartPostProcessing(); if (max_nested_loops_ != 0u) { - temp_.gvn.gvn->StartPostProcessing(); TopologicalSortIterator iter(this); for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) { ScopedArenaAllocator allocator(&cu_->arena_stack); // Reclaim memory after each LVN. @@ -1383,12 +1379,45 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() { cu_->disable_opt |= (1u << kLocalValueNumbering); } else { LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + cu_->disable_opt |= (1u << kGvnDeadCodeElimination); } + if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) { + EliminateDeadCodeEnd(); + } // else preserve GVN data for CSE. +} + +bool MIRGraph::EliminateDeadCodeGate() { + if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) { + return false; + } + DCHECK(temp_scoped_alloc_ != nullptr); + temp_.gvn.dce = new (temp_scoped_alloc_.get()) GvnDeadCodeElimination(temp_.gvn.gvn, + temp_scoped_alloc_.get()); + return true; +} + +bool MIRGraph::EliminateDeadCode(BasicBlock* bb) { + DCHECK(temp_scoped_alloc_ != nullptr); + DCHECK(temp_.gvn.gvn != nullptr); + if (bb->block_type != kDalvikByteCode) { + return false; + } + DCHECK(temp_.gvn.dce != nullptr); + temp_.gvn.dce->Apply(bb); + return false; // No need to repeat. +} + +void MIRGraph::EliminateDeadCodeEnd() { + DCHECK_EQ(temp_.gvn.dce != nullptr, (cu_->disable_opt & (1 << kGvnDeadCodeElimination)) == 0); + if (temp_.gvn.dce != nullptr) { + delete temp_.gvn.dce; + temp_.gvn.dce = nullptr; + } delete temp_.gvn.gvn; temp_.gvn.gvn = nullptr; - temp_.gvn.ifield_ids_ = nullptr; - temp_.gvn.sfield_ids_ = nullptr; + temp_.gvn.ifield_ids = nullptr; + temp_.gvn.sfield_ids = nullptr; DCHECK(temp_scoped_alloc_ != nullptr); temp_scoped_alloc_.reset(); } @@ -1441,8 +1470,8 @@ void MIRGraph::InlineSpecialMethodsStart() { temp_.smi.processed_indexes = new (temp_scoped_alloc_.get()) ArenaBitVector( temp_scoped_alloc_.get(), temp_.smi.num_indexes, false, kBitMapMisc); temp_.smi.processed_indexes->ClearAllBits(); - temp_.smi.lowering_infos = static_cast<uint16_t*>(temp_scoped_alloc_->Alloc( - temp_.smi.num_indexes * sizeof(*temp_.smi.lowering_infos), kArenaAllocGrowableArray)); + temp_.smi.lowering_infos = + temp_scoped_alloc_->AllocArray<uint16_t>(temp_.smi.num_indexes, kArenaAllocGrowableArray); } void MIRGraph::InlineSpecialMethods(BasicBlock* bb) { @@ -1558,9 +1587,9 @@ bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) { void MIRGraph::BasicBlockOptimizationStart() { if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) { temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack)); - temp_.gvn.ifield_ids_ = + temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_); - temp_.gvn.sfield_ids_ = + temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_); } } @@ -1586,8 +1615,8 @@ void MIRGraph::BasicBlockOptimization() { void MIRGraph::BasicBlockOptimizationEnd() { // Clean up after LVN. - temp_.gvn.ifield_ids_ = nullptr; - temp_.gvn.sfield_ids_ = nullptr; + temp_.gvn.ifield_ids = nullptr; + temp_.gvn.sfield_ids = nullptr; temp_scoped_alloc_.reset(); } @@ -1603,8 +1632,7 @@ bool MIRGraph::EliminateSuspendChecksGate() { temp_.sce.inliner = cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file); } - suspend_checks_in_loops_ = static_cast<uint32_t*>( - arena_->Alloc(GetNumBlocks() * sizeof(*suspend_checks_in_loops_), kArenaAllocMisc)); + suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc); return true; } @@ -1690,7 +1718,7 @@ void MIRGraph::EliminateSuspendChecksEnd() { temp_.sce.inliner = nullptr; } -bool MIRGraph::CanThrow(MIR* mir) { +bool MIRGraph::CanThrow(MIR* mir) const { if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) { return false; } @@ -1724,7 +1752,6 @@ bool MIRGraph::CanThrow(MIR* mir) { // Non-throwing only if range check has been eliminated. return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0); } else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH || - mir->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA || static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) { // No more checks for these (null check was processed above). return false; diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 199bc27481..be05b80d83 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -280,7 +280,7 @@ class MirOptimizationTest : public testing::Test { void DoPrepareMIRs(const MIRDef* defs, size_t count) { mir_count_ = count; - mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR)); + mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR); uint64_t merged_df_flags = 0u; for (size_t i = 0u; i != count; ++i) { const MIRDef* def = &defs[i]; diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc index 8c8bde63ea..320d06aa06 100644 --- a/compiler/dex/pass_driver_me_opts.cc +++ b/compiler/dex/pass_driver_me_opts.cc @@ -45,6 +45,7 @@ void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) { pass_manager->AddPass(new BBCombine); pass_manager->AddPass(new CodeLayout); pass_manager->AddPass(new GlobalValueNumberingPass); + pass_manager->AddPass(new DeadCodeEliminationPass); pass_manager->AddPass(new ConstantPropagation); pass_manager->AddPass(new MethodUseCount); pass_manager->AddPass(new BBOptimizations); diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc index 4e1322702f..a8b8a54033 100644 --- a/compiler/dex/pass_driver_me_post_opt.cc +++ b/compiler/dex/pass_driver_me_post_opt.cc @@ -37,7 +37,7 @@ void PassDriverMEPostOpt::SetupPasses(PassManager* pass_manager) { pass_manager->AddPass(new InitializeSSATransformation); pass_manager->AddPass(new ClearPhiInstructions); pass_manager->AddPass(new DefBlockMatrix); - pass_manager->AddPass(new CreatePhiNodes); + pass_manager->AddPass(new FindPhiNodeBlocksPass); pass_manager->AddPass(new SSAConversion); pass_manager->AddPass(new PhiNodeOperands); pass_manager->AddPass(new PerformInitRegLocations); diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h index a3dbc5a273..1ab862503b 100644 --- a/compiler/dex/post_opt_passes.h +++ b/compiler/dex/post_opt_passes.h @@ -189,19 +189,19 @@ class DefBlockMatrix : public PassMEMirSsaRep { }; /** - * @class CreatePhiNodes - * @brief Pass to create the phi nodes after SSA calculation + * @class FindPhiNodeBlocksPass + * @brief Pass to find out where we need to insert the phi nodes for the SSA conversion. */ -class CreatePhiNodes : public PassMEMirSsaRep { +class FindPhiNodeBlocksPass : public PassMEMirSsaRep { public: - CreatePhiNodes() : PassMEMirSsaRep("CreatePhiNodes", kNoNodes) { + FindPhiNodeBlocksPass() : PassMEMirSsaRep("FindPhiNodeBlocks", kNoNodes) { } void Start(PassDataHolder* data) const { DCHECK(data != nullptr); CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit; DCHECK(c_unit != nullptr); - c_unit->mir_graph.get()->InsertPhiNodes(); + c_unit->mir_graph.get()->FindPhiNodeBlocks(); } }; diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index f15b727857..9cf005bc48 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -52,16 +52,13 @@ namespace art { */ void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpSparseSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable *tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; - tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -100,17 +97,13 @@ void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocati void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpPackedSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable *tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; - tab_rec->targets = - static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -491,6 +484,28 @@ void ArmMir2Lir::GenSpecialExitSequence() { NewLIR1(kThumbBx, rs_rARM_LR.GetReg()); } +void ArmMir2Lir::GenSpecialEntryForSuspend() { + // Keep 16-byte stack alignment - push r0, i.e. ArtMethod*, r5, r6, lr. + DCHECK(!IsTemp(rs_r5)); + DCHECK(!IsTemp(rs_r6)); + core_spill_mask_ = + (1u << rs_r5.GetRegNum()) | (1u << rs_r6.GetRegNum()) | (1u << rs_rARM_LR.GetRegNum()); + num_core_spills_ = 3u; + fp_spill_mask_ = 0u; + num_fp_spills_ = 0u; + frame_size_ = 16u; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod* + (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR. + (1u << 8)); // LR encoded for 16-bit push. +} + +void ArmMir2Lir::GenSpecialExitForSuspend() { + // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) + NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR. +} + static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { // Emit relative calls only within a dex file due to the limited range of the BL insn. return cu->dex_file == target_method.dex_file; diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 025e69f0ba..67fabbddfe 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -18,9 +18,9 @@ #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_ #include "arm_lir.h" +#include "base/arena_containers.h" #include "base/logging.h" #include "dex/quick/mir_to_lir.h" -#include "utils/arena_containers.h" namespace art { @@ -167,7 +167,9 @@ class ArmMir2Lir FINAL : public Mir2Lir { void GenDivZeroCheckWide(RegStorage reg); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenSpecialExitSequence(); + void GenSpecialExitSequence() OVERRIDE; + void GenSpecialEntryForSuspend() OVERRIDE; + void GenSpecialExitForSuspend() OVERRIDE; void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 6492442b94..24e8fdff80 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -51,16 +51,13 @@ namespace art { */ void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpSparseSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable *tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; - tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -103,17 +100,13 @@ void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLoca void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpPackedSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable *tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; uint32_t size = table[1]; - tab_rec->targets = - static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -399,10 +392,26 @@ void Arm64Mir2Lir::GenSpecialExitSequence() { NewLIR0(kA64Ret); } +void Arm64Mir2Lir::GenSpecialEntryForSuspend() { + // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr. + core_spill_mask_ = (1u << rs_xLR.GetRegNum()); + num_core_spills_ = 1u; + fp_spill_mask_ = 0u; + num_fp_spills_ = 0u; + frame_size_ = 16u; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8); +} + +void Arm64Mir2Lir::GenSpecialExitForSuspend() { + // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) + NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8); +} + static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { - UNUSED(cu, target_method); - // Always emit relative calls. - return true; + // Emit relative calls anywhere in the image or within a dex file otherwise. + return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file; } /* diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 49ca625096..d5f0536691 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -169,6 +169,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; void GenExitSequence() OVERRIDE; void GenSpecialExitSequence() OVERRIDE; + void GenSpecialEntryForSuspend() OVERRIDE; + void GenSpecialExitForSuspend() OVERRIDE; void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE; void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE; void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE; diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 04113dba81..0be9fd4781 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -456,37 +456,29 @@ LIR* Mir2Lir::AddWideData(LIR* *constant_list_p, int val_lo, int val_hi) { return AddWordData(constant_list_p, val_lo); } -static void Push32(std::vector<uint8_t>&buf, int data) { - buf.push_back(data & 0xff); - buf.push_back((data >> 8) & 0xff); - buf.push_back((data >> 16) & 0xff); - buf.push_back((data >> 24) & 0xff); -} - /** * @brief Push a compressed reference which needs patching at link/patchoat-time. * @details This needs to be kept consistent with the code which actually does the patching in * oat_writer.cc and in the patchoat tool. */ -static void PushUnpatchedReference(std::vector<uint8_t>&buf) { +static void PushUnpatchedReference(CodeBuffer* buf) { // Note that we can safely initialize the patches to zero. The code deduplication mechanism takes // the patches into account when determining whether two pieces of codes are functionally // equivalent. Push32(buf, UINT32_C(0)); } -static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) { - while (buf.size() < offset) { - buf.push_back(0); - } +static void AlignBuffer(CodeBuffer* buf, size_t offset) { + DCHECK_LE(buf->size(), offset); + buf->insert(buf->end(), offset - buf->size(), 0u); } /* Write the literal pool to the output stream */ void Mir2Lir::InstallLiteralPools() { - AlignBuffer(code_buffer_, data_offset_); + AlignBuffer(&code_buffer_, data_offset_); LIR* data_lir = literal_list_; while (data_lir != nullptr) { - Push32(code_buffer_, data_lir->operands[0]); + Push32(&code_buffer_, data_lir->operands[0]); data_lir = NEXT_LIR(data_lir); } // TODO: patches_.reserve() as needed. @@ -498,7 +490,7 @@ void Mir2Lir::InstallLiteralPools() { reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1])); patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(), target_dex_file, target_method_idx)); - PushUnpatchedReference(code_buffer_); + PushUnpatchedReference(&code_buffer_); data_lir = NEXT_LIR(data_lir); } data_lir = method_literal_list_; @@ -508,7 +500,7 @@ void Mir2Lir::InstallLiteralPools() { reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1])); patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(), target_dex_file, target_method_idx)); - PushUnpatchedReference(code_buffer_); + PushUnpatchedReference(&code_buffer_); data_lir = NEXT_LIR(data_lir); } // Push class literals. @@ -519,7 +511,7 @@ void Mir2Lir::InstallLiteralPools() { reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1])); patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(), class_dex_file, target_type_idx)); - PushUnpatchedReference(code_buffer_); + PushUnpatchedReference(&code_buffer_); data_lir = NEXT_LIR(data_lir); } } @@ -527,7 +519,7 @@ void Mir2Lir::InstallLiteralPools() { /* Write the switch tables to the output stream */ void Mir2Lir::InstallSwitchTables() { for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) { - AlignBuffer(code_buffer_, tab_rec->offset); + AlignBuffer(&code_buffer_, tab_rec->offset); /* * For Arm, our reference point is the address of the bx * instruction that does the launch, so we have to subtract @@ -557,29 +549,49 @@ void Mir2Lir::InstallSwitchTables() { LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset; } if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { - const int32_t* keys = reinterpret_cast<const int32_t*>(&(tab_rec->table[2])); - for (int elems = 0; elems < tab_rec->table[1]; elems++) { - int disp = tab_rec->targets[elems]->offset - bx_offset; + DCHECK(tab_rec->switch_mir != nullptr); + BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb); + DCHECK(bb != nullptr); + int elems = 0; + for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) { + int key = successor_block_info->key; + int target = successor_block_info->block; + LIR* boundary_lir = InsertCaseLabel(target, key); + DCHECK(boundary_lir != nullptr); + int disp = boundary_lir->offset - bx_offset; + Push32(&code_buffer_, key); + Push32(&code_buffer_, disp); if (cu_->verbose) { LOG(INFO) << " Case[" << elems << "] key: 0x" - << std::hex << keys[elems] << ", disp: 0x" + << std::hex << key << ", disp: 0x" << std::hex << disp; } - Push32(code_buffer_, keys[elems]); - Push32(code_buffer_, - tab_rec->targets[elems]->offset - bx_offset); + elems++; } + DCHECK_EQ(elems, tab_rec->table[1]); } else { DCHECK_EQ(static_cast<int>(tab_rec->table[0]), static_cast<int>(Instruction::kPackedSwitchSignature)); - for (int elems = 0; elems < tab_rec->table[1]; elems++) { - int disp = tab_rec->targets[elems]->offset - bx_offset; + DCHECK(tab_rec->switch_mir != nullptr); + BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb); + DCHECK(bb != nullptr); + int elems = 0; + int low_key = s4FromSwitchData(&tab_rec->table[2]); + for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) { + int key = successor_block_info->key; + DCHECK_EQ(elems + low_key, key); + int target = successor_block_info->block; + LIR* boundary_lir = InsertCaseLabel(target, key); + DCHECK(boundary_lir != nullptr); + int disp = boundary_lir->offset - bx_offset; + Push32(&code_buffer_, disp); if (cu_->verbose) { LOG(INFO) << " Case[" << elems << "] disp: 0x" << std::hex << disp; } - Push32(code_buffer_, tab_rec->targets[elems]->offset - bx_offset); + elems++; } + DCHECK_EQ(elems, tab_rec->table[1]); } } } @@ -587,7 +599,7 @@ void Mir2Lir::InstallSwitchTables() { /* Write the fill array dta to the output stream */ void Mir2Lir::InstallFillArrayData() { for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) { - AlignBuffer(code_buffer_, tab_rec->offset); + AlignBuffer(&code_buffer_, tab_rec->offset); for (int i = 0; i < (tab_rec->size + 1) / 2; i++) { code_buffer_.push_back(tab_rec->table[i] & 0xFF); code_buffer_.push_back((tab_rec->table[i] >> 8) & 0xFF); @@ -830,58 +842,25 @@ int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) { * branch table during the assembly phase. All resource flags * are set to prevent code motion. KeyVal is just there for debugging. */ -LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) { - LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id]; +LIR* Mir2Lir::InsertCaseLabel(uint32_t bbid, int keyVal) { + LIR* boundary_lir = &block_label_list_[bbid]; LIR* res = boundary_lir; if (cu_->verbose) { // Only pay the expense if we're pretty-printing. LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); - new_label->dalvik_offset = vaddr; + BasicBlock* bb = mir_graph_->GetBasicBlock(bbid); + DCHECK(bb != nullptr); + new_label->dalvik_offset = bb->start_offset; new_label->opcode = kPseudoCaseLabel; new_label->operands[0] = keyVal; new_label->flags.fixup = kFixupLabel; DCHECK(!new_label->flags.use_def_invalid); new_label->u.m.def_mask = &kEncodeAll; InsertLIRAfter(boundary_lir, new_label); - res = new_label; } return res; } -void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) { - const uint16_t* table = tab_rec->table; - DexOffset base_vaddr = tab_rec->vaddr; - const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]); - int entries = table[1]; - int low_key = s4FromSwitchData(&table[2]); - for (int i = 0; i < entries; i++) { - tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], i + low_key); - } -} - -void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) { - const uint16_t* table = tab_rec->table; - DexOffset base_vaddr = tab_rec->vaddr; - int entries = table[1]; - const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]); - const int32_t* targets = &keys[entries]; - for (int i = 0; i < entries; i++) { - tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]); - } -} - -void Mir2Lir::ProcessSwitchTables() { - for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) { - if (tab_rec->table[0] == Instruction::kPackedSwitchSignature) { - MarkPackedCaseLabels(tab_rec); - } else if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) { - MarkSparseCaseLabels(tab_rec); - } else { - LOG(FATAL) << "Invalid switch table"; - } - } -} - void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) { /* * Sparse switch data format: @@ -988,8 +967,11 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena estimated_native_code_size_(0), reg_pool_(nullptr), live_sreg_(0), + code_buffer_(mir_graph->GetArena()->Adapter()), + encoded_mapping_table_(mir_graph->GetArena()->Adapter()), core_vmap_table_(mir_graph->GetArena()->Adapter()), fp_vmap_table_(mir_graph->GetArena()->Adapter()), + native_gc_map_(mir_graph->GetArena()->Adapter()), patches_(mir_graph->GetArena()->Adapter()), num_core_spills_(0), num_fp_spills_(0), @@ -1032,9 +1014,6 @@ void Mir2Lir::Materialize() { /* Method is not empty */ if (first_lir_insn_) { - // mark the targets of switch statement case labels - ProcessSwitchTables(); - /* Convert LIR into machine code. */ AssembleLIR(); diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 9f53b89186..3c9b7a3ed3 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -413,7 +413,7 @@ void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest, * Current code also throws internal unimp if not 'L', '[' or 'I'. */ void Mir2Lir::GenFilledNewArray(CallInfo* info) { - int elems = info->num_arg_words; + size_t elems = info->num_arg_words; int type_idx = info->index; FlushAllRegs(); /* Everything to home location */ QuickEntrypointEnum target; @@ -450,7 +450,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { * of any regs in the source range that have been promoted to * home location. */ - for (int i = 0; i < elems; i++) { + for (size_t i = 0; i < elems; i++) { RegLocation loc = UpdateLoc(info->args[i]); if (loc.location == kLocPhysReg) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); @@ -493,7 +493,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { OpRegRegImm(kOpAdd, r_dst, ref_reg, mirror::Array::DataOffset(component_size).Int32Value()); // Set up the loop counter (known to be > 0) - LoadConstant(r_idx, elems - 1); + LoadConstant(r_idx, static_cast<int>(elems - 1)); // Generate the copy loop. Going backwards for convenience LIR* loop_head_target = NewLIR0(kPseudoTargetLabel); // Copy next element @@ -515,9 +515,9 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { FreeTemp(r_dst); FreeTemp(r_src); } else { - DCHECK_LE(elems, 5); // Usually but not necessarily non-range. + DCHECK_LE(elems, 5u); // Usually but not necessarily non-range. // TUNING: interleave - for (int i = 0; i < elems; i++) { + for (size_t i = 0; i < elems; i++) { RegLocation rl_arg; if (info->args[i].ref) { rl_arg = LoadValue(info->args[i], kRefReg); @@ -537,7 +537,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { } if (elems != 0 && info->args[0].ref) { // If there is at least one potentially non-null value, unconditionally mark the GC card. - for (int i = 0; i < elems; i++) { + for (size_t i = 0; i < elems; i++) { if (!mir_graph_->IsConstantNullRef(info->args[i])) { UnconditionallyMarkGCCard(ref_reg); break; @@ -2158,7 +2158,7 @@ void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_d } } -class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath { +class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath { public: SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont) : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) { diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index bb5b0cdd22..8e3df7c7a2 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -405,9 +405,10 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { */ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); RegLocation* t_loc = nullptr; + EnsureInitializedArgMappingToPhysicalReg(); for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) { // get reg corresponding to input - RegStorage reg = GetArgMappingToPhysicalReg(i); + RegStorage reg = in_to_reg_storage_mapping_.GetReg(i); t_loc = &ArgLocs[i]; // If the wide input appeared as single, flush it and go @@ -661,7 +662,7 @@ void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) { } ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); // Scan the rest of the args - if in phys_reg flush to memory - for (int next_arg = start; next_arg < info->num_arg_words;) { + for (size_t next_arg = start; next_arg < info->num_arg_words;) { RegLocation loc = info->args[next_arg]; if (loc.wide) { loc = UpdateLocWide(loc); @@ -719,10 +720,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, InvokeType type, bool skip_this) { // If no arguments, just return. - if (info->num_arg_words == 0) + if (info->num_arg_words == 0u) return call_state; - const int start_index = skip_this ? 1 : 0; + const size_t start_index = skip_this ? 1 : 0; // Get architecture dependent mapping between output VRs and physical registers // basing on shorty of method to call. @@ -733,13 +734,13 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper()); } - int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index); + size_t stack_map_start = std::max(in_to_reg_storage_mapping.GetEndMappedIn(), start_index); if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) { // It is possible that the last mapped reg is 32 bit while arg is 64-bit. // It will be handled together with low part mapped to register. stack_map_start++; } - int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start; + size_t regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start; // If it is a range case we can try to copy remaining VRs (not mapped to physical registers) // using more optimal algorithm. @@ -755,11 +756,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, RegStorage regRef = TargetReg(kArg3, kRef); RegStorage regSingle = TargetReg(kArg3, kNotWide); RegStorage regWide = TargetReg(kArg2, kWide); - for (int i = start_index; - i < stack_map_start + regs_left_to_pass_via_stack; i++) { + for (size_t i = start_index; i < stack_map_start + regs_left_to_pass_via_stack; i++) { RegLocation rl_arg = info->args[i]; rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); + RegStorage reg = in_to_reg_storage_mapping.GetReg(i); if (!reg.Valid()) { int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); { @@ -799,10 +799,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, } // Finish with VRs mapped to physical registers. - for (int i = start_index; i < stack_map_start; i++) { + for (size_t i = start_index; i < stack_map_start; i++) { RegLocation rl_arg = info->args[i]; rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); + RegStorage reg = in_to_reg_storage_mapping.GetReg(i); if (reg.Valid()) { if (rl_arg.wide) { // if reg is not 64-bit (it is half of 64-bit) then handle it separately. @@ -852,12 +852,11 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, return call_state; } -RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { +void Mir2Lir::EnsureInitializedArgMappingToPhysicalReg() { if (!in_to_reg_storage_mapping_.IsInitialized()) { ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic); in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper()); } - return in_to_reg_storage_mapping_.Get(arg_num); } RegLocation Mir2Lir::InlineTarget(CallInfo* info) { diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index 9f36e35f5e..db844bcde9 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -44,7 +44,9 @@ LIR* Mir2Lir::LoadConstant(RegStorage r_dest, int value) { void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) { if (rl_dest.fp) { int pmap_index = SRegToPMap(rl_dest.s_reg_low); - if (promotion_map_[pmap_index].fp_location == kLocPhysReg) { + const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg; + const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg; + if (is_fp_promoted || is_core_promoted) { // Now, determine if this vreg is ever used as a reference. If not, we're done. bool used_as_reference = false; int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); @@ -61,7 +63,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) { temp_reg = AllocTemp(); LoadConstant(temp_reg, 0); } - if (promotion_map_[pmap_index].core_location == kLocPhysReg) { + if (is_core_promoted) { // Promoted - just copy in a zero OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg); } else { diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index ccfdaf60bb..d9471f6fd1 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -68,17 +68,13 @@ bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& s */ void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpSparseSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int elements = table[1]; - tab_rec->targets = - static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // The table is composed of 8-byte key/disp pairs @@ -145,17 +141,13 @@ void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLoca */ void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { const uint16_t* table = mir_graph_->GetTable(mir, table_offset); - if (cu_->verbose) { - DumpPackedSwitchTable(table); - } // Add the table to the list - we'll process it later SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; - tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -323,6 +315,26 @@ void MipsMir2Lir::GenSpecialExitSequence() { OpReg(kOpBx, rs_rRA); } +void MipsMir2Lir::GenSpecialEntryForSuspend() { + // Keep 16-byte stack alignment - push A0, i.e. ArtMethod*, 2 filler words and RA. + core_spill_mask_ = (1u << rs_rRA.GetRegNum()); + num_core_spills_ = 1u; + fp_spill_mask_ = 0u; + num_fp_spills_ = 0u; + frame_size_ = 16u; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + OpRegImm(kOpSub, rs_rMIPS_SP, frame_size_); + Store32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA); + Store32Disp(rs_rMIPS_SP, 0, rs_rA0); +} + +void MipsMir2Lir::GenSpecialExitForSuspend() { + // Pop the frame. Don't pop ArtMethod*, it's no longer needed. + Load32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA); + OpRegImm(kOpAdd, rs_rMIPS_SP, frame_size_); +} + /* * Bit of a hack here - in the absence of a real scheduling pass, * emit the next instruction in static & direct invoke sequences. diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index a37fe40cfa..e1b43ca848 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -141,7 +141,9 @@ class MipsMir2Lir FINAL : public Mir2Lir { void GenDivZeroCheckWide(RegStorage reg); void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method); void GenExitSequence(); - void GenSpecialExitSequence(); + void GenSpecialExitSequence() OVERRIDE; + void GenSpecialEntryForSuspend() OVERRIDE; + void GenSpecialExitForSuspend() OVERRIDE; void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double); void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir); void GenSelect(BasicBlock* bb, MIR* mir); diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc index 6f6bf68fea..ec6edabdbd 100644 --- a/compiler/dex/quick/mips/utility_mips.cc +++ b/compiler/dex/quick/mips/utility_mips.cc @@ -56,7 +56,8 @@ LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) { } bool MipsMir2Lir::InexpensiveConstantInt(int32_t value) { - return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768))); + // For encodings, see LoadConstantNoClobber below. + return ((value == 0) || IsUint<16>(value) || IsInt<16>(value)); } bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value) { @@ -96,9 +97,11 @@ LIR* MipsMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { /* See if the value can be constructed cheaply */ if (value == 0) { res = NewLIR2(kMipsMove, r_dest.GetReg(), rZERO); - } else if ((value > 0) && (value <= 65535)) { + } else if (IsUint<16>(value)) { + // Use OR with (unsigned) immediate to encode 16b unsigned int. res = NewLIR3(kMipsOri, r_dest.GetReg(), rZERO, value); - } else if ((value < 0) && (value >= -32768)) { + } else if (IsInt<16>(value)) { + // Use ADD with (signed) immediate to encode 16b signed int. res = NewLIR3(kMipsAddiu, r_dest.GetReg(), rZERO, value); } else { res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16); diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 274e078399..34e5e25efe 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -24,6 +24,69 @@ namespace art { +class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath { + public: + SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont) + : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont), + num_used_args_(0u) { + } + + void PreserveArg(int in_position) { + // Avoid duplicates. + for (size_t i = 0; i != num_used_args_; ++i) { + if (used_args_[i] == in_position) { + return; + } + } + DCHECK_LT(num_used_args_, kMaxArgsToPreserve); + used_args_[num_used_args_] = in_position; + ++num_used_args_; + } + + void Compile() OVERRIDE { + m2l_->ResetRegPool(); + m2l_->ResetDefTracking(); + GenerateTargetLabel(kPseudoSuspendTarget); + + m2l_->LockCallTemps(); + + // Generate frame. + m2l_->GenSpecialEntryForSuspend(); + + // Spill all args. + for (size_t i = 0, end = m2l_->in_to_reg_storage_mapping_.GetEndMappedIn(); i < end; + i += m2l_->in_to_reg_storage_mapping_.GetShorty(i).IsWide() ? 2u : 1u) { + m2l_->SpillArg(i); + } + + m2l_->FreeCallTemps(); + + // Do the actual suspend call to runtime. + m2l_->CallRuntimeHelper(kQuickTestSuspend, true); + + m2l_->LockCallTemps(); + + // Unspill used regs. (Don't unspill unused args.) + for (size_t i = 0; i != num_used_args_; ++i) { + m2l_->UnspillArg(used_args_[i]); + } + + // Pop the frame. + m2l_->GenSpecialExitForSuspend(); + + // Branch to the continue label. + DCHECK(cont_ != nullptr); + m2l_->OpUnconditionalBranch(cont_); + + m2l_->FreeCallTemps(); + } + + private: + static constexpr size_t kMaxArgsToPreserve = 2u; + size_t num_used_args_; + int used_args_[kMaxArgsToPreserve]; +}; + RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) { RegisterClass res; switch (shorty_type) { @@ -54,15 +117,15 @@ RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) { return res; } -void Mir2Lir::LockArg(int in_position, bool) { - RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); +void Mir2Lir::LockArg(size_t in_position) { + RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position); if (reg_arg.Valid()) { LockTemp(reg_arg); } } -RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { +RegStorage Mir2Lir::LoadArg(size_t in_position, RegisterClass reg_class, bool wide) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); @@ -82,7 +145,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) offset += sizeof(uint64_t); } - RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); + RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position); // TODO: REVISIT: This adds a spill of low part while we could just copy it. if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) { @@ -112,7 +175,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) return reg_arg; } -void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { +void Mir2Lir::LoadArgDirect(size_t in_position, RegLocation rl_dest) { DCHECK_EQ(rl_dest.location, kLocPhysReg); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); @@ -132,7 +195,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { offset += sizeof(uint64_t); } - RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); + RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position); // TODO: REVISIT: This adds a spill of low part while we could just copy it. if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) { @@ -153,6 +216,41 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { } } +void Mir2Lir::SpillArg(size_t in_position) { + RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position); + + if (reg_arg.Valid()) { + int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); + ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position); + OpSize size = arg.IsRef() ? kReference : + (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32; + StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile); + } +} + +void Mir2Lir::UnspillArg(size_t in_position) { + RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position); + + if (reg_arg.Valid()) { + int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); + ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position); + OpSize size = arg.IsRef() ? kReference : + (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32; + LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile); + } +} + +Mir2Lir::SpecialSuspendCheckSlowPath* Mir2Lir::GenSpecialSuspendTest() { + LockCallTemps(); + LIR* branch = OpTestSuspend(nullptr); + FreeCallTemps(); + LIR* cont = NewLIR0(kPseudoTargetLabel); + SpecialSuspendCheckSlowPath* slow_path = + new (arena_) SpecialSuspendCheckSlowPath(this, branch, cont); + AddSlowPath(slow_path); + return slow_path; +} + bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { // FastInstance() already checked by DexFileMethodInliner. const InlineIGetIPutData& data = special.d.ifield_data; @@ -161,13 +259,16 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { return false; } - OpSize size = k32; + OpSize size; switch (data.op_variant) { - case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT): - size = kReference; + case InlineMethodAnalyser::IGetVariant(Instruction::IGET): + size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32; break; case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE): - size = k64; + size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64; + break; + case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT): + size = kReference; break; case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT): size = kSignedHalf; @@ -181,11 +282,18 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN): size = kUnsignedByte; break; + default: + LOG(FATAL) << "Unknown variant: " << data.op_variant; + UNREACHABLE(); } // Point of no return - no aborts after this - GenPrintLabel(mir); + if (!kLeafOptimization) { + auto* slow_path = GenSpecialSuspendTest(); + slow_path->PreserveArg(data.object_arg); + } LockArg(data.object_arg); + GenPrintLabel(mir); RegStorage reg_obj = LoadArg(data.object_arg, kRefReg); RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile); RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]); @@ -223,13 +331,16 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { return false; } - OpSize size = k32; + OpSize size; switch (data.op_variant) { - case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT): - size = kReference; + case InlineMethodAnalyser::IPutVariant(Instruction::IPUT): + size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32; break; case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE): - size = k64; + size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64; + break; + case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT): + size = kReference; break; case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT): size = kSignedHalf; @@ -243,12 +354,20 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN): size = kUnsignedByte; break; + default: + LOG(FATAL) << "Unknown variant: " << data.op_variant; + UNREACHABLE(); } // Point of no return - no aborts after this - GenPrintLabel(mir); + if (!kLeafOptimization) { + auto* slow_path = GenSpecialSuspendTest(); + slow_path->PreserveArg(data.object_arg); + slow_path->PreserveArg(data.src_arg); + } LockArg(data.object_arg); - LockArg(data.src_arg, IsWide(size)); + LockArg(data.src_arg); + GenPrintLabel(mir); RegStorage reg_obj = LoadArg(data.object_arg, kRefReg); RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile); RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size)); @@ -269,8 +388,12 @@ bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) { bool wide = (data.is_wide != 0u); // Point of no return - no aborts after this + if (!kLeafOptimization) { + auto* slow_path = GenSpecialSuspendTest(); + slow_path->PreserveArg(data.arg); + } + LockArg(data.arg); GenPrintLabel(mir); - LockArg(data.arg, wide); RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]); RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class); LoadArgDirect(data.arg, rl_dest); @@ -285,15 +408,22 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci current_dalvik_offset_ = mir->offset; MIR* return_mir = nullptr; bool successful = false; + EnsureInitializedArgMappingToPhysicalReg(); switch (special.opcode) { case kInlineOpNop: successful = true; DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID); + if (!kLeafOptimization) { + GenSpecialSuspendTest(); + } return_mir = mir; break; case kInlineOpNonWideConst: { successful = true; + if (!kLeafOptimization) { + GenSpecialSuspendTest(); + } RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0])); GenPrintLabel(mir); LoadConstant(rl_dest.reg, static_cast<int>(special.d.data)); @@ -333,13 +463,17 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci } GenSpecialExitSequence(); - core_spill_mask_ = 0; - num_core_spills_ = 0; - fp_spill_mask_ = 0; - num_fp_spills_ = 0; - frame_size_ = 0; - core_vmap_table_.clear(); - fp_vmap_table_.clear(); + if (!kLeafOptimization) { + HandleSlowPaths(); + } else { + core_spill_mask_ = 0; + num_core_spills_ = 0; + fp_spill_mask_ = 0; + num_fp_spills_ = 0; + frame_size_ = 0; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + } } return successful; @@ -1195,9 +1329,7 @@ void Mir2Lir::MethodMIR2LIR() { cu_->NewTimingSplit("MIR2LIR"); // Hold the labels of each block. - block_label_list_ = - static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(), - kArenaAllocLIR)); + block_label_list_ = arena_->AllocArray<LIR>(mir_graph_->GetNumBlocks(), kArenaAllocLIR); PreOrderDfsIterator iter(mir_graph_); BasicBlock* curr_bb = iter.Next(); @@ -1289,31 +1421,41 @@ void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper) { DCHECK(mapper != nullptr); DCHECK(shorty != nullptr); - max_mapped_in_ = -1; - has_arguments_on_stack_ = false; + DCHECK(!IsInitialized()); + DCHECK_EQ(end_mapped_in_, 0u); + DCHECK(!has_arguments_on_stack_); while (shorty->Next()) { ShortyArg arg = shorty->GetArg(); RegStorage reg = mapper->GetNextReg(arg); + mapping_.emplace_back(arg, reg); + if (arg.IsWide()) { + mapping_.emplace_back(ShortyArg(kInvalidShorty), RegStorage::InvalidReg()); + } if (reg.Valid()) { - mapping_.Put(count_, reg); - max_mapped_in_ = count_; - // If the VR is wide and was mapped as wide then account for it. - if (arg.IsWide() && reg.Is64Bit()) { - max_mapped_in_++; + end_mapped_in_ = mapping_.size(); + // If the VR is wide but wasn't mapped as wide then account for it. + if (arg.IsWide() && !reg.Is64Bit()) { + --end_mapped_in_; } } else { has_arguments_on_stack_ = true; } - count_ += arg.IsWide() ? 2 : 1; } initialized_ = true; } -RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) { +RegStorage Mir2Lir::InToRegStorageMapping::GetReg(size_t in_position) { + DCHECK(IsInitialized()); + DCHECK_LT(in_position, mapping_.size()); + DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty); + return mapping_[in_position].second; +} + +Mir2Lir::ShortyArg Mir2Lir::InToRegStorageMapping::GetShorty(size_t in_position) { DCHECK(IsInitialized()); - DCHECK_LT(in_position, count_); - auto res = mapping_.find(in_position); - return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); + DCHECK_LT(static_cast<size_t>(in_position), mapping_.size()); + DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty); + return mapping_[in_position].first; } } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 888c34eb24..6f3f057038 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -17,6 +17,9 @@ #ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_ #define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_ +#include "base/arena_allocator.h" +#include "base/arena_containers.h" +#include "base/arena_object.h" #include "compiled_method.h" #include "dex/compiler_enums.h" #include "dex/dex_flags.h" @@ -29,9 +32,6 @@ #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" -#include "utils/arena_allocator.h" -#include "utils/arena_containers.h" -#include "utils/arena_object.h" #include "utils/stack_checks.h" namespace art { @@ -146,7 +146,7 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int, uint32_t method_idx, uintptr_t direct_code, uintptr_t direct_method, InvokeType type); -typedef std::vector<uint8_t> CodeBuffer; +typedef ArenaVector<uint8_t> CodeBuffer; typedef uint32_t CodeOffset; // Native code offset in bytes. struct UseDefMasks { @@ -224,7 +224,7 @@ class Mir2Lir { struct SwitchTable : EmbeddedData { LIR* anchor; // Reference instruction for relative offsets. - LIR** targets; // Array of case targets. + MIR* switch_mir; // The switch mir. }; /* Static register use counts */ @@ -515,6 +515,9 @@ class Mir2Lir { LIR* const cont_; }; + class SuspendCheckSlowPath; + class SpecialSuspendCheckSlowPath; + // Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_. class ScopedMemRefType { public: @@ -592,7 +595,7 @@ class Mir2Lir { // strdup(), but allocates from the arena. char* ArenaStrdup(const char* str) { size_t len = strlen(str) + 1; - char* res = reinterpret_cast<char*>(arena_->Alloc(len, kArenaAllocMisc)); + char* res = arena_->AllocArray<char>(len, kArenaAllocMisc); if (res != NULL) { strncpy(res, str, len); } @@ -653,7 +656,6 @@ class Mir2Lir { LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx); LIR* AddWordData(LIR* *constant_list_p, int value); LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi); - void ProcessSwitchTables(); void DumpSparseSwitchTable(const uint16_t* table); void DumpPackedSwitchTable(const uint16_t* table); void MarkBoundary(DexOffset offset, const char* inst_str); @@ -671,9 +673,7 @@ class Mir2Lir { int AssignLiteralOffset(CodeOffset offset); int AssignSwitchTablesOffset(CodeOffset offset); int AssignFillArrayDataOffset(CodeOffset offset); - virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); - virtual void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec); - void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec); + LIR* InsertCaseLabel(uint32_t bbid, int keyVal); // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated. virtual RegLocation NarrowRegLoc(RegLocation loc); @@ -1206,7 +1206,7 @@ class Mir2Lir { } } - RegStorage GetArgMappingToPhysicalReg(int arg_num); + void EnsureInitializedArgMappingToPhysicalReg(); virtual RegLocation GetReturnAlt() = 0; virtual RegLocation GetReturnWideAlt() = 0; virtual RegLocation LocCReturn() = 0; @@ -1573,6 +1573,16 @@ class Mir2Lir { virtual void GenSpecialExitSequence() = 0; /** + * @brief Used to generate stack frame for suspend path of special methods. + */ + virtual void GenSpecialEntryForSuspend() = 0; + + /** + * @brief Used to pop the stack frame for suspend path of special methods. + */ + virtual void GenSpecialExitForSuspend() = 0; + + /** * @brief Used to generate code for special methods that are known to be * small enough to work in frameless mode. * @param bb The basic block of the first MIR. @@ -1593,9 +1603,8 @@ class Mir2Lir { * @brief Used to lock register if argument at in_position was passed that way. * @details Does nothing if the argument is passed via stack. * @param in_position The argument number whose register to lock. - * @param wide Whether the argument is wide. */ - void LockArg(int in_position, bool wide = false); + void LockArg(size_t in_position); /** * @brief Used to load VR argument to a physical register. @@ -1605,14 +1614,33 @@ class Mir2Lir { * @param wide Whether the argument is 64-bit or not. * @return Returns the register (or register pair) for the loaded argument. */ - RegStorage LoadArg(int in_position, RegisterClass reg_class, bool wide = false); + RegStorage LoadArg(size_t in_position, RegisterClass reg_class, bool wide = false); /** * @brief Used to load a VR argument directly to a specified register location. * @param in_position The argument number to place in register. * @param rl_dest The register location where to place argument. */ - void LoadArgDirect(int in_position, RegLocation rl_dest); + void LoadArgDirect(size_t in_position, RegLocation rl_dest); + + /** + * @brief Used to spill register if argument at in_position was passed that way. + * @details Does nothing if the argument is passed via stack. + * @param in_position The argument number whose register to spill. + */ + void SpillArg(size_t in_position); + + /** + * @brief Used to unspill register if argument at in_position was passed that way. + * @details Does nothing if the argument is passed via stack. + * @param in_position The argument number whose register to spill. + */ + void UnspillArg(size_t in_position); + + /** + * @brief Generate suspend test in a special method. + */ + SpecialSuspendCheckSlowPath* GenSpecialSuspendTest(); /** * @brief Used to generate LIR for special getter method. @@ -1745,10 +1773,10 @@ class Mir2Lir { // The source mapping table data (pc -> dex). More entries than in encoded_mapping_table_ DefaultSrcMap src_mapping_table_; // The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix. - std::vector<uint8_t> encoded_mapping_table_; + ArenaVector<uint8_t> encoded_mapping_table_; ArenaVector<uint32_t> core_vmap_table_; ArenaVector<uint32_t> fp_vmap_table_; - std::vector<uint8_t> native_gc_map_; + ArenaVector<uint8_t> native_gc_map_; ArenaVector<LinkerPatch> patches_; int num_core_spills_; int num_fp_spills_; @@ -1805,21 +1833,22 @@ class Mir2Lir { class InToRegStorageMapping { public: explicit InToRegStorageMapping(ArenaAllocator* arena) - : mapping_(std::less<int>(), arena->Adapter()), count_(0), - max_mapped_in_(0), has_arguments_on_stack_(false), initialized_(false) {} + : mapping_(arena->Adapter()), + end_mapped_in_(0u), has_arguments_on_stack_(false), initialized_(false) {} void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper); /** - * @return the index of last VR mapped to physical register. In other words - * any VR starting from (return value + 1) index is mapped to memory. + * @return the past-the-end index of VRs mapped to physical registers. + * In other words any VR starting from this index is mapped to memory. */ - int GetMaxMappedIn() { return max_mapped_in_; } + size_t GetEndMappedIn() { return end_mapped_in_; } bool HasArgumentsOnStack() { return has_arguments_on_stack_; } - RegStorage Get(int in_position); + RegStorage GetReg(size_t in_position); + ShortyArg GetShorty(size_t in_position); bool IsInitialized() { return initialized_; } private: - ArenaSafeMap<int, RegStorage> mapping_; - int count_; - int max_mapped_in_; + static constexpr char kInvalidShorty = '-'; + ArenaVector<std::pair<ShortyArg, RegStorage>> mapping_; + size_t end_mapped_in_; bool has_arguments_on_stack_; bool initialized_; }; diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 909077eca2..19c2a5a3a3 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -560,6 +560,7 @@ static uint32_t kCompilerOptimizerDisableFlags = 0 | // Disable specific optimi // (1 << kNullCheckElimination) | // (1 << kClassInitCheckElimination) | // (1 << kGlobalValueNumbering) | + (1 << kGvnDeadCodeElimination) | // (1 << kLocalValueNumbering) | // (1 << kPromoteRegs) | // (1 << kTrackLiveTemps) | diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index 8efafb23fe..67fb8040f7 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -1191,8 +1191,7 @@ void Mir2Lir::DoPromotion() { int num_regs = mir_graph_->GetNumOfCodeAndTempVRs(); const int promotion_threshold = 1; // Allocate the promotion map - one entry for each Dalvik vReg or compiler temp - promotion_map_ = static_cast<PromotionMap*> - (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), kArenaAllocRegAlloc)); + promotion_map_ = arena_->AllocArray<PromotionMap>(num_regs, kArenaAllocRegAlloc); // Allow target code to add any special registers AdjustSpillMask(); @@ -1210,12 +1209,8 @@ void Mir2Lir::DoPromotion() { */ size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2; size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2; - RefCounts *core_regs = - static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size, - kArenaAllocRegAlloc)); - RefCounts *fp_regs = - static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size, - kArenaAllocRegAlloc)); + RefCounts *core_regs = arena_->AllocArray<RefCounts>(core_reg_count_size, kArenaAllocRegAlloc); + RefCounts *fp_regs = arena_->AllocArray<RefCounts>(fp_reg_count_size, kArenaAllocRegAlloc); // Set ssa names for original Dalvik registers for (int i = 0; i < num_regs; i++) { core_regs[i].s_reg = fp_regs[i].s_reg = i; diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc index 8a27ecb94f..57e8af32a2 100644 --- a/compiler/dex/quick/resource_mask.cc +++ b/compiler/dex/quick/resource_mask.cc @@ -18,8 +18,8 @@ #include "resource_mask.h" +#include "base/arena_allocator.h" #include "base/logging.h" -#include "utils/arena_allocator.h" #include "utils.h" namespace art { diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index aa0972f861..c3db3a64e5 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -37,84 +37,6 @@ void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocat } /* - * We override InsertCaseLabel, because the first parameter represents - * a basic block id, instead of a dex offset. - */ -LIR* X86Mir2Lir::InsertCaseLabel(DexOffset bbid, int keyVal) { - LIR* boundary_lir = &block_label_list_[bbid]; - LIR* res = boundary_lir; - if (cu_->verbose) { - // Only pay the expense if we're pretty-printing. - LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); - BasicBlock* bb = mir_graph_->GetBasicBlock(bbid); - DCHECK(bb != nullptr); - new_label->dalvik_offset = bb->start_offset;; - new_label->opcode = kPseudoCaseLabel; - new_label->operands[0] = keyVal; - new_label->flags.fixup = kFixupLabel; - DCHECK(!new_label->flags.use_def_invalid); - new_label->u.m.def_mask = &kEncodeAll; - InsertLIRAfter(boundary_lir, new_label); - res = new_label; - } - return res; -} - -void X86Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) { - const uint16_t* table = tab_rec->table; - const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]); - int entries = table[1]; - int low_key = s4FromSwitchData(&table[2]); - for (int i = 0; i < entries; i++) { - // The value at targets[i] is a basic block id, instead of a dex offset. - tab_rec->targets[i] = InsertCaseLabel(targets[i], i + low_key); - } -} - -/* - * We convert and create a new packed switch table that stores - * basic block ids to targets[] by examining successor blocks. - * Note that the original packed switch table stores dex offsets to targets[]. - */ -const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* table) { - /* - * The original packed switch data format: - * ushort ident = 0x0100 magic value - * ushort size number of entries in the table - * int first_key first (and lowest) switch case value - * int targets[size] branch targets, relative to switch opcode - * - * Total size is (4+size*2) 16-bit code units. - * - * Note that the new packed switch data format is the same as the original - * format, except that targets[] are basic block ids. - * - */ - BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb); - DCHECK(bb != nullptr); - // Get the number of entries. - int entries = table[1]; - const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]); - int32_t starting_key = as_int32[0]; - // Create a new table. - int size = sizeof(uint16_t) * (4 + entries * 2); - uint16_t* new_table = reinterpret_cast<uint16_t*>(arena_->Alloc(size, kArenaAllocMisc)); - // Copy ident, size, and first_key to the new table. - memcpy(new_table, table, sizeof(uint16_t) * 4); - // Get the new targets. - int32_t* new_targets = reinterpret_cast<int32_t*>(&new_table[4]); - // Find out targets for each entry. - int i = 0; - for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) { - DCHECK_EQ(starting_key + i, successor_block_info->key); - // Save target basic block id. - new_targets[i++] = successor_block_info->block; - } - DCHECK_EQ(i, entries); - return new_table; -} - -/* * Code pattern will look something like: * * mov r_val, .. @@ -131,16 +53,14 @@ const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* t * done: */ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) { - const uint16_t* old_table = mir_graph_->GetTable(mir, table_offset); - const uint16_t* table = ConvertPackedSwitchTable(mir, old_table); + const uint16_t* table = mir_graph_->GetTable(mir, table_offset); // Add the table to the list - we'll process it later SwitchTable* tab_rec = static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData)); + tab_rec->switch_mir = mir; tab_rec->table = table; tab_rec->vaddr = current_dalvik_offset_; int size = table[1]; - tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), - kArenaAllocLIR)); switch_tables_.push_back(tab_rec); // Get the switch value @@ -352,6 +272,41 @@ void X86Mir2Lir::GenSpecialExitSequence() { NewLIR0(kX86Ret); } +void X86Mir2Lir::GenSpecialEntryForSuspend() { + // Keep 16-byte stack alignment, there's already the return address, so + // - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI, + // - for 64-bit push RAX, i.e. ArtMethod*. + if (!cu_->target64) { + DCHECK(!IsTemp(rs_rSI)); + DCHECK(!IsTemp(rs_rDI)); + core_spill_mask_ = + (1u << rs_rDI.GetRegNum()) | (1u << rs_rSI.GetRegNum()) | (1u << rs_rRET.GetRegNum()); + num_core_spills_ = 3u; + } else { + core_spill_mask_ = (1u << rs_rRET.GetRegNum()); + num_core_spills_ = 1u; + } + fp_spill_mask_ = 0u; + num_fp_spills_ = 0u; + frame_size_ = 16u; + core_vmap_table_.clear(); + fp_vmap_table_.clear(); + if (!cu_->target64) { + NewLIR1(kX86Push32R, rs_rDI.GetReg()); + NewLIR1(kX86Push32R, rs_rSI.GetReg()); + } + NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* +} + +void X86Mir2Lir::GenSpecialExitForSuspend() { + // Pop the frame. (ArtMethod* no longer needed but restore it anyway.) + NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod* + if (!cu_->target64) { + NewLIR1(kX86Pop32R, rs_rSI.GetReg()); + NewLIR1(kX86Pop32R, rs_rDI.GetReg()); + } +} + void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) { if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) { return; diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 811d4f5d7b..20163b4b76 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -259,6 +259,8 @@ class X86Mir2Lir : public Mir2Lir { void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; void GenExitSequence() OVERRIDE; void GenSpecialExitSequence() OVERRIDE; + void GenSpecialEntryForSuspend() OVERRIDE; + void GenSpecialExitForSuspend() OVERRIDE; void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE; void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE; void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE; @@ -271,11 +273,8 @@ class X86Mir2Lir : public Mir2Lir { int first_bit, int second_bit) OVERRIDE; void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; - const uint16_t* ConvertPackedSwitchTable(MIR* mir, const uint16_t* table); void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE; - LIR* InsertCaseLabel(DexOffset vaddr, int keyVal) OVERRIDE; - void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) OVERRIDE; /** * @brief Implement instanceof a final class with x86 specific code. diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 4fe7a43a85..91168c78bd 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -863,22 +863,29 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { RegLocation rl_src1 = info->args[0]; RegLocation rl_src2 = info->args[2]; RegLocation rl_dest = InlineTargetWide(info); - int res_vreg, src1_vreg, src2_vreg; if (rl_dest.s_reg_low == INVALID_SREG) { // Result is unused, the code is dead. Inlining successful, no code generated. return true; } + if (PartiallyIntersects(rl_src1, rl_dest) && + PartiallyIntersects(rl_src2, rl_dest)) { + // A special case which we don't want to handle. + // This is when src1 is mapped on v0 and v1, + // src2 is mapped on v2, v3, + // result is mapped on v1, v2 + return false; + } + + /* * If the result register is the same as the second element, then we * need to be careful. The reason is that the first copy will * inadvertently clobber the second element with the first one thus * yielding the wrong result. Thus we do a swap in that case. */ - res_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); - src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low); - if (res_vreg == src2_vreg) { + if (Intersects(rl_src2, rl_dest)) { std::swap(rl_src1, rl_src2); } @@ -893,19 +900,30 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { * nothing else to do because they are equal and we have already * moved one into the result. */ - src1_vreg = mir_graph_->SRegToVReg(rl_src1.s_reg_low); - src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low); - if (src1_vreg == src2_vreg) { + if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) == + mir_graph_->SRegToVReg(rl_src2.s_reg_low)) { StoreValueWide(rl_dest, rl_result); return true; } // Free registers to make some room for the second operand. - // But don't try to free ourselves or promoted registers. - if (res_vreg != src1_vreg && - IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { - FreeTemp(rl_src1.reg); + // But don't try to free part of a source which intersects + // part of result or promoted registers. + + if (IsTemp(rl_src1.reg.GetLow()) && + (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) && + (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) { + // Is low part temporary and doesn't intersect any parts of result? + FreeTemp(rl_src1.reg.GetLow()); } + + if (IsTemp(rl_src1.reg.GetHigh()) && + (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) && + (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) { + // Is high part temporary and doesn't intersect any parts of result? + FreeTemp(rl_src1.reg.GetHigh()); + } + rl_src2 = LoadValueWide(rl_src2, kCoreReg); // Do we have a free register for intermediate calculations? @@ -939,12 +957,15 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { // Let's put pop 'edi' here to break a bit the dependency chain. if (tmp == rs_rDI) { NewLIR1(kX86Pop32R, tmp.GetReg()); + } else { + FreeTemp(tmp); } // Conditionally move the other integer into the destination register. ConditionCode cc = is_min ? kCondGe : kCondLt; OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow()); OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh()); + FreeTemp(rl_src2.reg); StoreValueWide(rl_dest, rl_result); return true; } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index c4adb09248..8f97d1e7c8 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -1051,10 +1051,10 @@ void X86Mir2Lir::InstallLiteralPools() { } for (LIR *p = const_vectors_; p != nullptr; p = p->next) { - PushWord(&code_buffer_, p->operands[0]); - PushWord(&code_buffer_, p->operands[1]); - PushWord(&code_buffer_, p->operands[2]); - PushWord(&code_buffer_, p->operands[3]); + Push32(&code_buffer_, p->operands[0]); + Push32(&code_buffer_, p->operands[1]); + Push32(&code_buffer_, p->operands[2]); + Push32(&code_buffer_, p->operands[3]); } } diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 6bd49de989..197f66d017 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -16,9 +16,9 @@ #include "base/bit_vector-inl.h" #include "base/logging.h" +#include "base/scoped_arena_containers.h" #include "compiler_ir.h" #include "dataflow_iterator-inl.h" -#include "utils/scoped_arena_containers.h" #define NOTVISITED (-1) @@ -137,8 +137,8 @@ void MIRGraph::ComputeDefBlockMatrix() { /* Allocate num_registers bit vector pointers */ DCHECK(temp_scoped_alloc_ != nullptr); DCHECK(temp_.ssa.def_block_matrix == nullptr); - temp_.ssa.def_block_matrix = static_cast<ArenaBitVector**>( - temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * num_registers, kArenaAllocDFInfo)); + temp_.ssa.def_block_matrix = + temp_scoped_alloc_->AllocArray<ArenaBitVector*>(num_registers, kArenaAllocDFInfo); int i; /* Initialize num_register vectors with num_blocks bits each */ @@ -363,8 +363,7 @@ void MIRGraph::ComputeDominators() { /* Initialize & Clear i_dom_list */ if (max_num_reachable_blocks_ < num_reachable_blocks_) { - i_dom_list_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_reachable_blocks, - kArenaAllocDFInfo)); + i_dom_list_ = arena_->AllocArray<int>(num_reachable_blocks, kArenaAllocDFInfo); } for (int i = 0; i < num_reachable_blocks; i++) { i_dom_list_[i] = NOTVISITED; @@ -463,24 +462,28 @@ bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) { return false; } -/* Insert phi nodes to for each variable to the dominance frontiers */ -void MIRGraph::InsertPhiNodes() { - int dalvik_reg; - ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector( - temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapPhi); - ArenaBitVector* input_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector( - temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapInputBlocks); - +/* For each dalvik reg, find blocks that need phi nodes according to the dominance frontiers. */ +void MIRGraph::FindPhiNodeBlocks() { RepeatingPostOrderDfsIterator iter(this); bool change = false; for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) { change = ComputeBlockLiveIns(bb); } + ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector( + temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapBMatrix); + + // Reuse the def_block_matrix storage for phi_node_blocks. + ArenaBitVector** def_block_matrix = temp_.ssa.def_block_matrix; + ArenaBitVector** phi_node_blocks = def_block_matrix; + DCHECK(temp_.ssa.phi_node_blocks == nullptr); + temp_.ssa.phi_node_blocks = phi_node_blocks; + temp_.ssa.def_block_matrix = nullptr; + /* Iterate through each Dalvik register */ - for (dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) { - input_blocks->Copy(temp_.ssa.def_block_matrix[dalvik_reg]); + for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) { phi_blocks->ClearAllBits(); + ArenaBitVector* input_blocks = def_block_matrix[dalvik_reg]; do { // TUNING: When we repeat this, we could skip indexes from the previous pass. for (uint32_t idx : input_blocks->Indexes()) { @@ -491,23 +494,8 @@ void MIRGraph::InsertPhiNodes() { } } while (input_blocks->Union(phi_blocks)); - /* - * Insert a phi node for dalvik_reg in the phi_blocks if the Dalvik - * register is in the live-in set. - */ - for (uint32_t idx : phi_blocks->Indexes()) { - BasicBlock* phi_bb = GetBasicBlock(idx); - /* Variable will be clobbered before being used - no need for phi */ - if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) { - continue; - } - MIR *phi = NewMIR(); - phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi); - phi->dalvikInsn.vA = dalvik_reg; - phi->offset = phi_bb->start_offset; - phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method. - phi_bb->PrependMIR(phi); - } + def_block_matrix[dalvik_reg] = phi_blocks; + phi_blocks = input_blocks; // Reuse the bit vector in next iteration. } } @@ -528,9 +516,7 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) { size_t num_uses = bb->predecessors.size(); AllocateSSAUseData(mir, num_uses); int* uses = mir->ssa_rep->uses; - BasicBlockId* incoming = - static_cast<BasicBlockId*>(arena_->Alloc(sizeof(BasicBlockId) * num_uses, - kArenaAllocDFInfo)); + BasicBlockId* incoming = arena_->AllocArray<BasicBlockId>(num_uses, kArenaAllocDFInfo); mir->meta.phi_incoming = incoming; int idx = 0; for (BasicBlockId pred_id : bb->predecessors) { @@ -553,12 +539,12 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) { /* Process this block */ DoSSAConversion(block); - int map_size = sizeof(int) * GetNumOfCodeAndTempVRs(); /* Save SSA map snapshot */ ScopedArenaAllocator allocator(&cu_->arena_stack); - int* saved_ssa_map = - static_cast<int*>(allocator.Alloc(map_size, kArenaAllocDalvikToSSAMap)); + uint32_t num_vregs = GetNumOfCodeAndTempVRs(); + int32_t* saved_ssa_map = allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap); + size_t map_size = sizeof(saved_ssa_map[0]) * num_vregs; memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size); if (block->fall_through != NullBasicBlockId) { diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index f70850a332..b620969ae2 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -440,8 +440,7 @@ void MIRGraph::InitRegLocations() { // the temp allocation initializes reg location as well (in order to deal with // case when it will be called after this pass). int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps(); - RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc), - kArenaAllocRegAlloc)); + RegLocation* loc = arena_->AllocArray<RegLocation>(max_regs, kArenaAllocRegAlloc); for (int i = 0; i < GetNumSSARegs(); i++) { loc[i] = fresh_loc; loc[i].s_reg_low = i; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 2d8c9d4a9e..b8a893649b 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1285,7 +1285,15 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType *stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot; } if (!use_dex_cache && force_relocations) { - if (!IsImage() || !IsImageClass(method->GetDeclaringClassDescriptor())) { + bool is_in_image; + if (IsImage()) { + is_in_image = IsImageClass(method->GetDeclaringClassDescriptor()); + } else { + is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 && + Runtime::Current()->GetHeap()->FindSpaceFromObject(method->GetDeclaringClass(), + false)->IsImageSpace(); + } + if (!is_in_image) { // We can only branch directly to Methods that are resolved in the DexCache. // Otherwise we won't invoke the resolution trampoline. use_dex_cache = true; diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 2fca2e52f4..b7562442d7 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -22,6 +22,7 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/arena_allocator.h" #include "base/mutex.h" #include "base/timing_logger.h" #include "class_reference.h" @@ -38,7 +39,6 @@ #include "runtime.h" #include "safe_map.h" #include "thread_pool.h" -#include "utils/arena_allocator.h" #include "utils/dedupe_set.h" #include "utils/swap_space.h" #include "utils.h" diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 9ec4f281cb..401d5a951d 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -90,19 +90,19 @@ std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) { // Length (will be filled in later in this routine). if (is_x86_64) { - PushWord(cfi_info, 0xffffffff); // Indicates 64bit - PushWord(cfi_info, 0); - PushWord(cfi_info, 0); + Push32(cfi_info, 0xffffffff); // Indicates 64bit + Push32(cfi_info, 0); + Push32(cfi_info, 0); } else { - PushWord(cfi_info, 0); + Push32(cfi_info, 0); } // CIE id: always 0. if (is_x86_64) { - PushWord(cfi_info, 0); - PushWord(cfi_info, 0); + Push32(cfi_info, 0); + Push32(cfi_info, 0); } else { - PushWord(cfi_info, 0); + Push32(cfi_info, 0); } // Version: always 1. @@ -318,7 +318,7 @@ class LineTableGenerator FINAL : public Leb128Encoder { PushByte(data_, 0); // extended opcode: PushByte(data_, 1 + 4); // length: opcode_size + address_size PushByte(data_, DW_LNE_set_address); - PushWord(data_, addr); + Push32(data_, addr); } void SetLine(unsigned line) { @@ -507,13 +507,13 @@ static void FillInCFIInformation(OatWriter* oat_writer, // Start the debug_info section with the header information // 'unit_length' will be filled in later. int cunit_length = dbg_info->size(); - PushWord(dbg_info, 0); + Push32(dbg_info, 0); // 'version' - 3. PushHalf(dbg_info, 3); // Offset into .debug_abbrev section (always 0). - PushWord(dbg_info, 0); + Push32(dbg_info, 0); // Address size: 4. PushByte(dbg_info, 4); @@ -523,7 +523,7 @@ static void FillInCFIInformation(OatWriter* oat_writer, PushByte(dbg_info, 1); // The producer is Android dex2oat. - PushWord(dbg_info, producer_str_offset); + Push32(dbg_info, producer_str_offset); // The language is Java. PushByte(dbg_info, DW_LANG_Java); @@ -532,8 +532,8 @@ static void FillInCFIInformation(OatWriter* oat_writer, uint32_t cunit_low_pc = 0 - 1; uint32_t cunit_high_pc = 0; int cunit_low_pc_pos = dbg_info->size(); - PushWord(dbg_info, 0); - PushWord(dbg_info, 0); + Push32(dbg_info, 0); + Push32(dbg_info, 0); if (dbg_line == nullptr) { for (size_t i = 0; i < method_info.size(); ++i) { @@ -546,9 +546,9 @@ static void FillInCFIInformation(OatWriter* oat_writer, PushByte(dbg_info, 2); // Enter name, low_pc, high_pc. - PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_)); - PushWord(dbg_info, dbg.low_pc_ + text_section_offset); - PushWord(dbg_info, dbg.high_pc_ + text_section_offset); + Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); + Push32(dbg_info, dbg.low_pc_ + text_section_offset); + Push32(dbg_info, dbg.high_pc_ + text_section_offset); } } else { // TODO: in gdb info functions <regexp> - reports Java functions, but @@ -559,15 +559,15 @@ static void FillInCFIInformation(OatWriter* oat_writer, // method ranges. // Line number table offset - PushWord(dbg_info, dbg_line->size()); + Push32(dbg_info, dbg_line->size()); size_t lnt_length = dbg_line->size(); - PushWord(dbg_line, 0); + Push32(dbg_line, 0); PushHalf(dbg_line, 4); // LNT Version DWARF v4 => 4 size_t lnt_hdr_length = dbg_line->size(); - PushWord(dbg_line, 0); // TODO: 64-bit uses 8-byte here + Push32(dbg_line, 0); // TODO: 64-bit uses 8-byte here PushByte(dbg_line, 1); // minimum_instruction_length (ubyte) PushByte(dbg_line, 1); // maximum_operations_per_instruction (ubyte) = always 1 @@ -629,9 +629,9 @@ static void FillInCFIInformation(OatWriter* oat_writer, PushByte(dbg_info, 2); // Enter name, low_pc, high_pc. - PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_)); - PushWord(dbg_info, dbg.low_pc_ + text_section_offset); - PushWord(dbg_info, dbg.high_pc_ + text_section_offset); + Push32(dbg_info, PushStr(dbg_str, dbg.method_name_)); + Push32(dbg_info, dbg.low_pc_ + text_section_offset); + Push32(dbg_info, dbg.high_pc_ + text_section_offset); GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(), &pc2java_map, dbg.low_pc_); diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h index bc8ad41608..4c36ef733c 100644 --- a/compiler/gc_map_builder.h +++ b/compiler/gc_map_builder.h @@ -26,15 +26,17 @@ namespace art { class GcMapBuilder { public: - GcMapBuilder(std::vector<uint8_t>* table, size_t entries, uint32_t max_native_offset, + template <typename Alloc> + GcMapBuilder(std::vector<uint8_t, Alloc>* table, size_t entries, uint32_t max_native_offset, size_t references_width) : entries_(entries), references_width_(entries != 0u ? references_width : 0u), native_offset_width_(entries != 0 && max_native_offset != 0 ? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u : 0u), - in_use_(entries), table_(table) { + in_use_(entries) { // Resize table and set up header. table->resize((EntryWidth() * entries) + sizeof(uint32_t)); + table_ = table->data(); CHECK_LT(native_offset_width_, 1U << 3); (*table)[0] = native_offset_width_ & 7; CHECK_LT(references_width_, 1U << 13); @@ -65,7 +67,7 @@ class GcMapBuilder { uint32_t native_offset = 0; size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { - native_offset |= (*table_)[table_offset + i] << (i * 8); + native_offset |= table_[table_offset + i] << (i * 8); } return native_offset; } @@ -73,13 +75,13 @@ class GcMapBuilder { void SetCodeOffset(size_t table_index, uint32_t native_offset) { size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); for (size_t i = 0; i < native_offset_width_; i++) { - (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF; + table_[table_offset + i] = (native_offset >> (i * 8)) & 0xFF; } } void SetReferences(size_t table_index, const uint8_t* references) { size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t); - memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_); + memcpy(&table_[table_offset + native_offset_width_], references, references_width_); } size_t EntryWidth() const { @@ -95,7 +97,7 @@ class GcMapBuilder { // Entries that are in use. std::vector<bool> in_use_; // The table we're building. - std::vector<uint8_t>* const table_; + uint8_t* table_; }; } // namespace art diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index b2342491fa..c588e1a53d 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -273,13 +273,7 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { void ImageWriter::AssignImageBinSlot(mirror::Object* object) { DCHECK(object != nullptr); - size_t object_size; - if (object->IsArtMethod()) { - // Methods are sized based on the target pointer size. - object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_); - } else { - object_size = object->SizeOf(); - } + size_t object_size = object->SizeOf(); // The magic happens here. We segregate objects into different bins based // on how likely they are to get dirty at runtime. @@ -569,6 +563,7 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT } mirror::String* string = obj->AsString(); const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset(); + size_t utf16_length = static_cast<size_t>(string->GetLength()); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock()); size_t dex_cache_count = class_linker->GetDexCacheCount(); @@ -576,10 +571,10 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT DexCache* dex_cache = class_linker->GetDexCache(i); const DexFile& dex_file = *dex_cache->GetDexFile(); const DexFile::StringId* string_id; - if (UNLIKELY(string->GetLength() == 0)) { + if (UNLIKELY(utf16_length == 0)) { string_id = dex_file.FindStringId(""); } else { - string_id = dex_file.FindStringId(utf16_string); + string_id = dex_file.FindStringId(utf16_string, utf16_length); } if (string_id != nullptr) { // This string occurs in this dex file, assign the dex cache entry. @@ -931,7 +926,7 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { if (obj->IsArtMethod()) { // Size without pointer fields since we don't want to overrun the buffer if target art method // is 32 bits but source is 64 bits. - n = mirror::ArtMethod::SizeWithoutPointerFields(sizeof(void*)); + n = mirror::ArtMethod::SizeWithoutPointerFields(image_writer->target_ptr_size_); } else { n = obj->SizeOf(); } @@ -1016,10 +1011,6 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) { } if (orig->IsArtMethod<kVerifyNone>()) { FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy)); - } else if (orig->IsClass() && orig->AsClass()->IsArtMethodClass()) { - // Set the right size for the target. - size_t size = mirror::ArtMethod::InstanceSize(target_ptr_size_); - down_cast<mirror::Class*>(copy)->SetObjectSizeWithoutChecks(size); } } @@ -1031,7 +1022,9 @@ const uint8_t* ImageWriter::GetQuickCode(mirror::ArtMethod* method, bool* quick_ // trampoline. // Quick entrypoint: - const uint8_t* quick_code = GetOatAddress(method->GetQuickOatCodeOffset()); + uint32_t quick_oat_code_offset = PointerToLowMemUInt32( + method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_)); + const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset); *quick_is_interpreted = false; if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) { @@ -1082,11 +1075,12 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) { // locations. // Copy all of the fields from the runtime methods to the target methods first since we did a // bytewise copy earlier. - copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(orig->GetEntryPointFromInterpreter(), - target_ptr_size_); - copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_); + copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>( + orig->GetEntryPointFromInterpreterPtrSize(target_ptr_size_), target_ptr_size_); + copy->SetEntryPointFromJniPtrSize<kVerifyNone>( + orig->GetEntryPointFromJniPtrSize(target_ptr_size_), target_ptr_size_); copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>( - orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_); + orig->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_), target_ptr_size_); // The resolution method has a special trampoline to call. Runtime* runtime = Runtime::Current(); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 3c36ffa4e9..9c0157e885 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -899,7 +899,8 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { public: InitImageMethodVisitor(OatWriter* writer, size_t offset) - : OatDexMethodVisitor(writer, offset) { + : OatDexMethodVisitor(writer, offset), + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) { } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) @@ -932,10 +933,14 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { std::string dump = exc->Dump(); LOG(FATAL) << dump; } - method->SetQuickOatCodeOffset(offsets.code_offset_); + method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_), + pointer_size_); return true; } + + protected: + const size_t pointer_size_; }; class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { @@ -1103,10 +1108,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (UNLIKELY(target_offset == 0)) { mirror::ArtMethod* target = GetTargetMethod(patch); DCHECK(target != nullptr); - DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u); - target_offset = target->IsNative() - ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset() - : writer_->oat_header_->GetQuickToInterpreterBridgeOffset(); + size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet()); + const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size); + if (oat_code_offset != 0) { + DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset)); + DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset)); + DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset)); + target_offset = PointerToLowMemUInt32(oat_code_offset); + } else { + target_offset = target->IsNative() + ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset() + : writer_->oat_header_->GetQuickToInterpreterBridgeOffset(); + } } return target_offset; } @@ -1138,10 +1151,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - // NOTE: Direct calls across oat files don't use linker patches. - DCHECK(writer_->image_writer_ != nullptr); - uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + - writer_->oat_data_offset_ + target_offset); + uint32_t address = writer_->image_writer_ == nullptr ? target_offset : + PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + + writer_->oat_data_offset_ + target_offset); DCHECK_LE(offset + 4, code->size()); uint8_t* data = &(*code)[offset]; data[0] = address & 0xffu; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d6c3515726..811a3bdf0c 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_containers.h" #include "bounds_check_elimination.h" #include "nodes.h" -#include "utils/arena_containers.h" namespace art { @@ -28,18 +28,11 @@ class MonotonicValueRange; */ class ValueBound : public ValueObject { public: - ValueBound(HInstruction* instruction, int constant) { + ValueBound(HInstruction* instruction, int32_t constant) { if (instruction != nullptr && instruction->IsIntConstant()) { - // Normalizing ValueBound with constant instruction. - int instr_const = instruction->AsIntConstant()->GetValue(); - if (constant >= 0 && (instr_const <= INT_MAX - constant)) { - // No overflow. - instruction_ = nullptr; - constant_ = instr_const + constant; - return; - } - if (constant < 0 && (instr_const >= INT_MIN - constant)) { - // No underflow. + // Normalize ValueBound with constant instruction. + int32_t instr_const = instruction->AsIntConstant()->GetValue(); + if (!WouldAddOverflowOrUnderflow(instr_const, constant)) { instruction_ = nullptr; constant_ = instr_const + constant; return; @@ -49,6 +42,41 @@ class ValueBound : public ValueObject { constant_ = constant; } + // Return whether (left + right) overflows or underflows. + static bool WouldAddOverflowOrUnderflow(int32_t left, int32_t right) { + if (right == 0) { + return false; + } + if ((right > 0) && (left <= INT_MAX - right)) { + // No overflow. + return false; + } + if ((right < 0) && (left >= INT_MIN - right)) { + // No underflow. + return false; + } + return true; + } + + static bool IsAddOrSubAConstant(HInstruction* instruction, + HInstruction** left_instruction, + int* right_constant) { + if (instruction->IsAdd() || instruction->IsSub()) { + HBinaryOperation* bin_op = instruction->AsBinaryOperation(); + HInstruction* left = bin_op->GetLeft(); + HInstruction* right = bin_op->GetRight(); + if (right->IsIntConstant()) { + *left_instruction = left; + int32_t c = right->AsIntConstant()->GetValue(); + *right_constant = instruction->IsAdd() ? c : -c; + return true; + } + } + *left_instruction = nullptr; + *right_constant = 0; + return false; + } + // Try to detect useful value bound format from an instruction, e.g. // a constant or array length related value. static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) { @@ -63,13 +91,12 @@ class ValueBound : public ValueObject { return ValueBound(instruction, 0); } // Try to detect (array.length + c) format. - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left->IsArrayLength() && right->IsIntConstant()) { + HInstruction *left; + int32_t right; + if (IsAddOrSubAConstant(instruction, &left, &right)) { + if (left->IsArrayLength()) { *found = true; - return ValueBound(left, right->AsIntConstant()->GetValue()); + return ValueBound(left, right); } } @@ -79,10 +106,13 @@ class ValueBound : public ValueObject { } HInstruction* GetInstruction() const { return instruction_; } - int GetConstant() const { return constant_; } + int32_t GetConstant() const { return constant_; } - bool IsRelativeToArrayLength() const { - return instruction_ != nullptr && instruction_->IsArrayLength(); + bool IsRelatedToArrayLength() const { + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + return (instruction_ != nullptr) && + (instruction_->IsArrayLength() || instruction_->IsNewArray()); } bool IsConstant() const { @@ -96,54 +126,45 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - // Returns if it's certain bound1 >= bound2. - bool GreaterThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ >= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; + static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { + // Null check on the NewArray should have been eliminated by instruction + // simplifier already. + if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { + return instruction->InputAt(0)->AsNewArray(); } - // Not comparable. Just return false. - return false; + return instruction; } - // Returns if it's certain bound1 <= bound2. - bool LessThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ <= bound.constant_; - } - if (IsRelativeToArrayLength()) { - // Array length is guaranteed to be no less than 0. - // No overflow/underflow can happen if both constants are negative. - if (constant_ <= 0 && bound.constant_ <= 0) { - return constant_ <= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; - } + static bool Equal(HInstruction* instruction1, HInstruction* instruction2) { + if (instruction1 == instruction2) { + return true; } - // In case the array length is some constant, we can - // still compare. - if (IsConstant() && bound.IsRelativeToArrayLength()) { - HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0); - if (array->IsNullCheck()) { - array = array->AsNullCheck()->InputAt(0); - } - if (array->IsNewArray()) { - HInstruction* len = array->InputAt(0); - if (len->IsIntConstant()) { - int len_const = len->AsIntConstant()->GetValue(); - return constant_ <= len_const + bound.GetConstant(); - } - } + if (instruction1 == nullptr || instruction2 == nullptr) { + return false; } + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); + instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + return instruction1 == instruction2; + } + + // Returns if it's certain this->bound >= `bound`. + bool GreaterThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ >= bound.constant_; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain this->bound <= `bound`. + bool LessThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ <= bound.constant_; + } // Not comparable. Just return false. return false; } @@ -151,10 +172,11 @@ class ValueBound : public ValueObject { // Try to narrow lower bound. Returns the greatest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::max(bound1.constant_, bound2.constant_)); + if (bound1.GreaterThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.GreaterThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. @@ -165,58 +187,71 @@ class ValueBound : public ValueObject { // Try to narrow upper bound. Returns the lowest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::min(bound1.constant_, bound2.constant_)); + if (bound1.LessThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.LessThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. // Favor array length as upper bound. - return bound1.IsRelativeToArrayLength() ? bound1 : bound2; + return bound1.IsRelatedToArrayLength() ? bound1 : bound2; } - // Add a constant to a ValueBound. If the constant part of the ValueBound - // overflows/underflows, then we can't accurately represent it. For correctness, - // just return Max/Min() depending on whether the returned ValueBound is used for - // lower/upper bound. - ValueBound Add(int c, bool* overflow_or_underflow) const { - *overflow_or_underflow = false; + // Add a constant to a ValueBound. + // `overflow` or `underflow` will return whether the resulting bound may + // overflow or underflow an int. + ValueBound Add(int32_t c, bool* overflow, bool* underflow) const { + *overflow = *underflow = false; if (c == 0) { return *this; } - int new_constant; + int32_t new_constant; if (c > 0) { if (constant_ > INT_MAX - c) { - // Constant part overflows. - *overflow_or_underflow = true; + *overflow = true; return Max(); - } else { - new_constant = constant_ + c; } + + new_constant = constant_ + c; + // (array.length + non-positive-constant) won't overflow an int. + if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) { + return ValueBound(instruction_, new_constant); + } + // Be conservative. + *overflow = true; + return Max(); } else { if (constant_ < INT_MIN - c) { - // Constant part underflows. - *overflow_or_underflow = true; - return Max(); - } else { - new_constant = constant_ + c; + *underflow = true; + return Min(); } + + new_constant = constant_ + c; + // Regardless of the value new_constant, (array.length+new_constant) will + // never underflow since array.length is no less than 0. + if (IsConstant() || IsRelatedToArrayLength()) { + return ValueBound(instruction_, new_constant); + } + // Be conservative. + *underflow = true; + return Min(); } return ValueBound(instruction_, new_constant); } private: HInstruction* instruction_; - int constant_; + int32_t constant_; }; /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: * comparisons related to array bounds, array bounds check, add/sub on top - * of an existing value range, or a loop phi corresponding to an + * of an existing value range, NewArray or a loop phi corresponding to an * incrementing/decrementing array index (MonotonicValueRange). */ class ValueRange : public ArenaObject<kArenaAllocMisc> { @@ -241,8 +276,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { return true; } DCHECK(!other_range->IsMonotonicValueRange()); - return lower_.GreaterThanOrEqual(other_range->lower_) && - upper_.LessThanOrEqual(other_range->upper_); + return lower_.GreaterThanOrEqualTo(other_range->lower_) && + upper_.LessThanOrEqualTo(other_range->upper_); } // Returns the intersection of this and range. @@ -263,29 +298,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound::NarrowUpperBound(upper_, range->upper_)); } - // Shift a range by a constant. If either bound can't be represented - // as (instruction+c) format due to possible overflow/underflow, - // return the full integer range. - ValueRange* Add(int constant) const { - bool overflow_or_underflow; - ValueBound lower = lower_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); + // Shift a range by a constant. + ValueRange* Add(int32_t constant) const { + bool overflow, underflow; + ValueBound lower = lower_.Add(constant, &overflow, &underflow); + if (underflow) { + // Lower bound underflow will wrap around to positive values + // and invalidate the upper bound. + return nullptr; } - ValueBound upper = upper_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); + ValueBound upper = upper_.Add(constant, &overflow, &underflow); + if (overflow) { + // Upper bound overflow will wrap around to negative values + // and invalidate the lower bound. + return nullptr; } return new (allocator_) ValueRange(allocator_, lower, upper); } - // Return [INT_MIN, INT_MAX]. - ValueRange* FullIntRange() const { - return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max()); - } - private: ArenaAllocator* const allocator_; const ValueBound lower_; // inclusive @@ -304,7 +334,7 @@ class MonotonicValueRange : public ValueRange { public: MonotonicValueRange(ArenaAllocator* allocator, HInstruction* initial, - int increment, + int32_t increment, ValueBound bound) // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's // used as a regular value range, due to possible overflow/underflow. @@ -343,23 +373,17 @@ class MonotonicValueRange : public ValueRange { // make assumptions about the max array length, e.g. due to the max heap size, // divided by the element size (such as 4 bytes for each integer array), we can // lower this number and rule out some possible overflows. - int max_array_len = INT_MAX; - - int upper = INT_MAX; - if (range->GetUpper().IsConstant()) { - upper = range->GetUpper().GetConstant(); - } else if (range->GetUpper().IsRelativeToArrayLength()) { - int constant = range->GetUpper().GetConstant(); - if (constant <= 0) { - // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc. - upper = max_array_len + constant; - } else { - // There might be overflow. Give up narrowing. - return this; - } - } else { - // There might be overflow. Give up narrowing. - return this; + int32_t max_array_len = INT_MAX; + + // max possible integer value of range's upper value. + int32_t upper = INT_MAX; + // Try to lower upper. + ValueBound upper_bound = range->GetUpper(); + if (upper_bound.IsConstant()) { + upper = upper_bound.GetConstant(); + } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) { + // Normal case. e.g. <= array.length - 1. + upper = max_array_len + upper_bound.GetConstant(); } // If we can prove for the last number in sequence of initial_, @@ -368,13 +392,13 @@ class MonotonicValueRange : public ValueRange { // then this MonoticValueRange is narrowed to a normal value range. // Be conservative first, assume last number in the sequence hits upper. - int last_num_in_sequence = upper; + int32_t last_num_in_sequence = upper; if (initial_->IsIntConstant()) { - int initial_constant = initial_->AsIntConstant()->GetValue(); + int32_t initial_constant = initial_->AsIntConstant()->GetValue(); if (upper <= initial_constant) { last_num_in_sequence = upper; } else { - // Cast to int64_t for the substraction part to avoid int overflow. + // Cast to int64_t for the substraction part to avoid int32_t overflow. last_num_in_sequence = initial_constant + ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; } @@ -392,23 +416,22 @@ class MonotonicValueRange : public ValueRange { ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); // Need to take care of underflow. Try to prove underflow won't happen - // for common cases. Basically need to be able to prove for any value - // that's >= range->GetLower(), it won't be positive with value+increment. + // for common cases. if (range->GetLower().IsConstant()) { - int constant = range->GetLower().GetConstant(); + int32_t constant = range->GetLower().GetConstant(); if (constant >= INT_MIN - increment_) { return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); } } - // There might be underflow. Give up narrowing. + // For non-constant lower bound, just assume might be underflow. Give up narrowing. return this; } } private: HInstruction* const initial_; - const int increment_; + const int32_t increment_; ValueBound bound_; // Additional value bound info for initial_; DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); @@ -446,13 +469,26 @@ class BCEVisitor : public HGraphVisitor { return nullptr; } - // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range', - // and push the narrowed value range to 'successor'. + // Narrow the value range of `instruction` at the end of `basic_block` with `range`, + // and push the narrowed value range to `successor`. void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, - HBasicBlock* successor, ValueRange* range) { + HBasicBlock* successor, ValueRange* range) { ValueRange* existing_range = LookupValueRange(instruction, basic_block); - ValueRange* narrowed_range = (existing_range == nullptr) ? - range : existing_range->Narrow(range); + if (existing_range == nullptr) { + if (range != nullptr) { + GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range); + } + return; + } + if (existing_range->IsMonotonicValueRange()) { + DCHECK(instruction->IsLoopHeaderPhi()); + // Make sure the comparison is in the loop header so each increment is + // checked with a comparison. + if (instruction->GetBlock() != basic_block) { + return; + } + } + ValueRange* narrowed_range = existing_range->Narrow(range); if (narrowed_range != nullptr) { GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range); } @@ -472,10 +508,12 @@ class BCEVisitor : public HGraphVisitor { bool found; ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found); + // Each comparison can establish a lower bound and an upper bound + // for the left hand side. ValueBound lower = bound; ValueBound upper = bound; if (!found) { - // No constant or array.length+c bound found. + // No constant or array.length+c format bound found. // For i<j, we can still use j's upper bound as i's upper bound. Same for lower. ValueRange* range = LookupValueRange(right, block); if (range != nullptr) { @@ -487,13 +525,13 @@ class BCEVisitor : public HGraphVisitor { } } - bool overflow_or_underflow; + bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -501,11 +539,11 @@ class BCEVisitor : public HGraphVisitor { } // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -513,11 +551,11 @@ class BCEVisitor : public HGraphVisitor { } } else if (cond == kCondGT || cond == kCondGE) { // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -525,10 +563,10 @@ class BCEVisitor : public HGraphVisitor { } if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -541,41 +579,56 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - ValueRange* index_range = LookupValueRange(index, block); - - if (index_range != nullptr) { - ValueBound lower = ValueBound(nullptr, 0); // constant 0 - ValueBound upper = ValueBound(array_length, -1); // array_length - 1 - ValueRange* array_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); - if (index_range->FitsIn(array_range)) { - ReplaceBoundsCheck(bounds_check, index); + DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + + if (!index->IsIntConstant()) { + ValueRange* index_range = LookupValueRange(index, block); + if (index_range != nullptr) { + ValueBound lower = ValueBound(nullptr, 0); // constant 0 + ValueBound upper = ValueBound(array_length, -1); // array_length - 1 + ValueRange* array_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + if (index_range->FitsIn(array_range)) { + ReplaceBoundsCheck(bounds_check, index); + return; + } + } + } else { + int32_t constant = index->AsIntConstant()->GetValue(); + if (constant < 0) { + // Will always throw exception. + return; + } + if (array_length->IsIntConstant()) { + if (constant < array_length->AsIntConstant()->GetValue()) { + ReplaceBoundsCheck(bounds_check, index); + } return; } - } - if (index->IsIntConstant()) { - ValueRange* array_length_range = LookupValueRange(array_length, block); - int constant = index->AsIntConstant()->GetValue(); - if (array_length_range != nullptr && - array_length_range->GetLower().IsConstant()) { - if (constant < array_length_range->GetLower().GetConstant()) { + DCHECK(array_length->IsArrayLength()); + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr) { + ValueBound lower = existing_range->GetLower(); + DCHECK(lower.IsConstant()); + if (constant < lower.GetConstant()) { ReplaceBoundsCheck(bounds_check, index); return; + } else { + // Existing range isn't strong enough to eliminate the bounds check. + // Fall through to update the array_length range with info from this + // bounds check. } } // Once we have an array access like 'array[5] = 1', we record array.length >= 6. + // We currently don't do it for non-constant index since a valid array[i] can't prove + // a valid array[i-1] yet due to the lower bound side. ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), lower, upper); - ValueRange* existing_range = LookupValueRange(array_length, block); - ValueRange* new_range = range; - if (existing_range != nullptr) { - new_range = range->Narrow(existing_range); - } - GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range); + GetValueRangeMap(block)->Overwrite(array_length->GetId(), range); } } @@ -588,14 +641,12 @@ class BCEVisitor : public HGraphVisitor { if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { DCHECK_EQ(phi->InputCount(), 2U); HInstruction* instruction = phi->InputAt(1); - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left == phi && right->IsIntConstant()) { + HInstruction *left; + int32_t increment; + if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) { + if (left == phi) { HInstruction* initial_value = phi->InputAt(0); ValueRange* range = nullptr; - int increment = right->AsIntConstant()->GetValue(); if (increment == 0) { // Add constant 0. It's really a fixed value. range = new (GetGraph()->GetArena()) ValueRange( @@ -676,29 +727,122 @@ class BCEVisitor : public HGraphVisitor { // Here we are interested in the typical triangular case of nested loops, // such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i // is the index for outer loop. In this case, we know j is bounded by array.length-1. + + // Try to handle (array.length - i) or (array.length + c - i) format. + HInstruction* left_of_left; // left input of left. + int32_t right_const = 0; + if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &right_const)) { + left = left_of_left; + } + // The value of left input of the sub equals (left + right_const). + if (left->IsArrayLength()) { HInstruction* array_length = left->AsArrayLength(); ValueRange* right_range = LookupValueRange(right, sub->GetBlock()); if (right_range != nullptr) { ValueBound lower = right_range->GetLower(); ValueBound upper = right_range->GetUpper(); - if (lower.IsConstant() && upper.IsRelativeToArrayLength()) { + if (lower.IsConstant() && upper.IsRelatedToArrayLength()) { HInstruction* upper_inst = upper.GetInstruction(); - if (upper_inst->IsArrayLength() && - upper_inst->AsArrayLength() == array_length) { - // (array.length - v) where v is in [c1, array.length + c2] - // gets [-c2, array.length - c1] as its value range. - ValueRange* range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), - ValueBound(nullptr, - upper.GetConstant()), - ValueBound(array_length, - lower.GetConstant())); - GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + // Make sure it's the same array. + if (ValueBound::Equal(array_length, upper_inst)) { + int32_t c0 = right_const; + int32_t c1 = lower.GetConstant(); + int32_t c2 = upper.GetConstant(); + // (array.length + c0 - v) where v is in [c1, array.length + c2] + // gets [c0 - c2, array.length + c0 - c1] as its value range. + if (!ValueBound::WouldAddOverflowOrUnderflow(c0, -c2) && + !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) { + if ((c0 - c1) <= 0) { + // array.length + (c0 - c1) won't overflow/underflow. + ValueRange* range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound(nullptr, right_const - upper.GetConstant()), + ValueBound(array_length, right_const - lower.GetConstant())); + GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + } + } } } } } } + void FindAndHandlePartialArrayLength(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsShr() || instruction->IsUShr()); + HInstruction* right = instruction->GetRight(); + int32_t right_const; + if (right->IsIntConstant()) { + right_const = right->AsIntConstant()->GetValue(); + // Detect division by two or more. + if ((instruction->IsDiv() && right_const <= 1) || + (instruction->IsShr() && right_const < 1) || + (instruction->IsUShr() && right_const < 1)) { + return; + } + } else { + return; + } + + // Try to handle array.length/2 or (array.length-1)/2 format. + HInstruction* left = instruction->GetLeft(); + HInstruction* left_of_left; // left input of left. + int32_t c = 0; + if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &c)) { + left = left_of_left; + } + // The value of left input of instruction equals (left + c). + + // (array_length + 1) or smaller divided by two or more + // always generate a value in [INT_MIN, array_length]. + // This is true even if array_length is INT_MAX. + if (left->IsArrayLength() && c <= 1) { + if (instruction->IsUShr() && c < 0) { + // Make sure for unsigned shift, left side is not negative. + // e.g. if array_length is 2, ((array_length - 3) >>> 2) is way bigger + // than array_length. + return; + } + ValueRange* range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound(nullptr, INT_MIN), + ValueBound(left, 0)); + GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range); + } + } + + void VisitDiv(HDiv* div) { + FindAndHandlePartialArrayLength(div); + } + + void VisitShr(HShr* shr) { + FindAndHandlePartialArrayLength(shr); + } + + void VisitUShr(HUShr* ushr) { + FindAndHandlePartialArrayLength(ushr); + } + + void VisitNewArray(HNewArray* new_array) { + HInstruction* len = new_array->InputAt(0); + if (!len->IsIntConstant()) { + HInstruction *left; + int32_t right_const; + if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) { + // (left + right_const) is used as size to new the array. + // We record "-right_const <= left <= new_array - right_const"; + ValueBound lower = ValueBound(nullptr, -right_const); + // We use new_array for the bound instead of new_array.length, + // which isn't available as an instruction yet. new_array will + // be treated the same as new_array.length when it's used in a ValueBound. + ValueBound upper = ValueBound(new_array, -right_const); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; DISALLOW_COPY_AND_ASSIGN(BCEVisitor); diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 3dcb08d195..a298413d14 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -14,19 +14,22 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "bounds_check_elimination.h" #include "builder.h" #include "gvn.h" +#include "instruction_simplifier.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" namespace art { -static void RunGvn(HGraph* graph) { +static void RunSimplifierAndGvn(HGraph* graph) { + InstructionSimplifier simplify(graph); + simplify.Run(); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { block3->AddSuccessor(block4); // False successor graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check2)); @@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { exit->AddInstruction(new (&allocator) HExit()); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check5)); @@ -397,7 +400,6 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check); HInstruction* cmp = nullptr; @@ -413,6 +415,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, loop_header->AddInstruction(array_length); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check); @@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -541,7 +544,6 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(array_length); HInstruction* cmp = nullptr; if (cond == kCondLE) { cmp = new (allocator) HLessThanOrEqual(phi, constant_initial); @@ -553,6 +555,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, loop_header->AddPhi(phi); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(array_length); HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1); null_check = new (allocator) HNullCheck(parameter, 0); @@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -623,13 +626,13 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); } -// int[] array = new array[10]; +// int[] array = new int[10]; // for (int i=0; i<10; i+=increment) { array[i] = 10; } static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, HInstruction** bounds_check, @@ -669,7 +672,6 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* cmp = nullptr; if (cond == kCondGE) { cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10); @@ -681,6 +683,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, loop_header->AddPhi(phi); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0); HArrayLength* array_length = new (allocator) HArrayLength(null_check); @@ -705,39 +708,39 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { ArenaPool pool; ArenaAllocator allocator(&pool); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. } HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -782,7 +785,6 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check); HInstruction* cmp = nullptr; @@ -797,6 +799,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, loop_header->AddInstruction(array_length); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check); @@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -901,7 +904,6 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph); graph->AddBlock(outer_header); HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); - phi_i->AddInput(constant_0); HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); HArrayLength* array_length = new (&allocator) HArrayLength(null_check); HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1); @@ -913,11 +915,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_header->AddInstruction(add); outer_header->AddInstruction(cmp); outer_header->AddInstruction(if_inst); + phi_i->AddInput(constant_0); HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph); graph->AddBlock(inner_header); HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); - phi_j->AddInput(constant_0); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i); @@ -931,6 +933,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { inner_header->AddInstruction(add); inner_header->AddInstruction(cmp); inner_header->AddInstruction(if_inst); + phi_j->AddInput(constant_0); HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph); graph->AddBlock(inner_body_compare); @@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); ASSERT_FALSE(IsRemoved(bounds_check2)); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c5101363ee..3e4a6169d9 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -17,13 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ +#include "base/arena_object.h" #include "dex_file.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "optimizing_compiler_stats.h" #include "primitive.h" -#include "utils/arena_object.h" #include "utils/growable_array.h" #include "nodes.h" diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index fd4e391470..2a57fdc929 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -40,9 +40,17 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue(); } -void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - DCHECK_EQ(frame_size_, kUninitializedFrameSize); +static bool IsSingleGoto(HBasicBlock* block) { + HLoopInformation* loop_info = block->GetLoopInformation(); + // TODO: Remove the null check b/19084197. + return (block->GetFirstInstruction() != nullptr) + && (block->GetFirstInstruction() == block->GetLastInstruction()) + && block->GetLastInstruction()->IsGoto() + // Back edges generate the suspend check. + && (loop_info == nullptr || !loop_info->IsBackEdge(block)); +} +void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { Initialize(); if (!is_leaf) { MarkNotLeaf(); @@ -58,19 +66,43 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { CompileInternal(allocator, /* is_baseline */ true); } +bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { + DCHECK_EQ(block_order_->Get(current_block_index_), current); + return GetNextBlockToEmit() == FirstNonEmptyBlock(next); +} + +HBasicBlock* CodeGenerator::GetNextBlockToEmit() const { + for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) { + HBasicBlock* block = block_order_->Get(i); + if (!IsSingleGoto(block)) { + return block; + } + } + return nullptr; +} + +HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const { + while (IsSingleGoto(block)) { + block = block->GetSuccessors().Get(0); + } + return block; +} + void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - HGraphVisitor* location_builder = GetLocationBuilder(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { HBasicBlock* block = block_order_->Get(current_block_index_); + // Don't generate code for an empty block. Its predecessors will branch to its successor + // directly. Also, the label of that block will not be emitted, so this helps catch + // errors where we reference that label. + if (IsSingleGoto(block)) continue; Bind(block); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (is_baseline) { - current->Accept(location_builder); - InitLocations(current); + InitLocationsBaseline(current); } current->Accept(instruction_visitor); } @@ -88,7 +120,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { // The register allocator already called `InitializeCodeGeneration`, // where the frame size has been computed. - DCHECK_NE(frame_size_, kUninitializedFrameSize); DCHECK(block_order_ != nullptr); Initialize(); CompileInternal(allocator, /* is_baseline */ false); @@ -138,13 +169,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; - SetFrameSize(RoundUp( - number_of_spill_slots * kVRegSize - + number_of_out_slots * kVRegSize - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() - + FrameEntrySpillSize(), - kStackAlignment)); + if (number_of_spill_slots == 0 + && !HasAllocatedCalleeSaveRegisters() + && IsLeafMethod() + && !RequiresCurrentMethod()) { + DCHECK_EQ(maximum_number_of_live_core_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + SetFrameSize(CallPushesPC() ? GetWordSize() : 0); + } else { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + number_of_out_slots * kVRegSize + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + FrameEntrySpillSize(), + kStackAlignment)); + } } Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const { @@ -294,7 +334,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } -void CodeGenerator::InitLocations(HInstruction* instruction) { +void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { + AllocateLocations(instruction); if (instruction->GetLocations() == nullptr) { if (instruction->IsTemporary()) { HInstruction* previous = instruction->GetPrevious(); @@ -320,10 +361,17 @@ void CodeGenerator::InitLocations(HInstruction* instruction) { } } -bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { - DCHECK_EQ(block_order_->Get(current_block_index_), current); - return (current_block_index_ < block_order_->Size() - 1) - && (block_order_->Get(current_block_index_ + 1) == next); +void CodeGenerator::AllocateLocations(HInstruction* instruction) { + instruction->Accept(GetLocationBuilder()); + LocationSummary* locations = instruction->GetLocations(); + if (!instruction->IsSuspendCheckEntry()) { + if (locations != nullptr && locations->CanCall()) { + MarkNotLeaf(); + } + if (instruction->NeedsCurrentMethod()) { + SetRequiresCurrentMethod(); + } + } } CodeGenerator* CodeGenerator::Create(HGraph* graph, @@ -572,7 +620,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { Location location = locations->GetEnvironmentAt(i); switch (location.GetKind()) { case Location::kConstant: { - DCHECK(current == location.GetConstant()); + DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); @@ -588,6 +636,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + } else if (current->IsNullConstant()) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0); } else { DCHECK(current->IsFloatConstant()); int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue()); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ab63b911b2..f46a36d02f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,7 +30,6 @@ namespace art { static size_t constexpr kVRegSize = 4; -static size_t constexpr kUninitializedFrameSize = 0; // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -92,6 +91,8 @@ class CodeGenerator { HGraph* GetGraph() const { return graph_; } + HBasicBlock* GetNextBlockToEmit() const; + HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; size_t GetStackSlotOfParameter(HParameterValue* parameter) const { @@ -107,8 +108,6 @@ class CodeGenerator { virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; - virtual HGraphVisitor* GetLocationBuilder() = 0; - virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; @@ -196,6 +195,15 @@ class CodeGenerator { void MarkNotLeaf() { is_leaf_ = false; + requires_current_method_ = true; + } + + void SetRequiresCurrentMethod() { + requires_current_method_ = true; + } + + bool RequiresCurrentMethod() const { + return requires_current_method_; } // Clears the spill slots taken by loop phis in the `LocationSummary` of the @@ -228,6 +236,41 @@ class CodeGenerator { allocated_registers_.Add(location); } + void AllocateLocations(HInstruction* instruction); + + // Tells whether the stack frame of the compiled method is + // considered "empty", that is either actually having a size of zero, + // or just containing the saved return address register. + bool HasEmptyFrame() const { + return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); + } + + static int32_t GetInt32ValueOf(HConstant* constant) { + if (constant->IsIntConstant()) { + return constant->AsIntConstant()->GetValue(); + } else if (constant->IsNullConstant()) { + return 0; + } else { + DCHECK(constant->IsFloatConstant()); + return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } + } + + static int64_t GetInt64ValueOf(HConstant* constant) { + if (constant->IsIntConstant()) { + return constant->AsIntConstant()->GetValue(); + } else if (constant->IsNullConstant()) { + return 0; + } else if (constant->IsFloatConstant()) { + return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } else if (constant->IsLongConstant()) { + return constant->AsLongConstant()->GetValue(); + } else { + DCHECK(constant->IsDoubleConstant()); + return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } + } + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, @@ -236,7 +279,7 @@ class CodeGenerator { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) - : frame_size_(kUninitializedFrameSize), + : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), @@ -255,6 +298,7 @@ class CodeGenerator { block_order_(nullptr), current_block_index_(0), is_leaf_(true), + requires_current_method_(false), stack_map_stream_(graph->GetArena()) {} // Register allocation logic. @@ -269,11 +313,12 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual ParallelMoveResolver* GetMoveResolver() = 0; + virtual HGraphVisitor* GetLocationBuilder() = 0; + virtual HGraphVisitor* GetInstructionVisitor() = 0; // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { - DCHECK_NE(frame_size_, kUninitializedFrameSize); return GetFrameSize() - FrameEntrySpillSize(); } @@ -289,6 +334,25 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + bool HasAllocatedCalleeSaveRegisters() const { + // We check the core registers against 1 because it always comprises the return PC. + return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) + || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); + } + + bool CallPushesPC() const { + InstructionSet instruction_set = GetInstructionSet(); + return instruction_set == kX86 || instruction_set == kX86_64; + } + + // Arm64 has its own type for a label, so we need to templatize this method + // to share the logic. + template <typename T> + T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { + block = FirstNonEmptyBlock(block); + return raw_pointer_to_labels_array + block->GetBlockId(); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -311,7 +375,7 @@ class CodeGenerator { const uint32_t fpu_callee_save_mask_; private: - void InitLocations(HInstruction* instruction); + void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); @@ -328,8 +392,12 @@ class CodeGenerator { // we are generating code for. size_t current_block_index_; + // Whether the method is a leaf method. bool is_leaf_; + // Whether an instruction in the graph accesses the current method. + bool requires_current_method_; + StackMapStream stack_map_stream_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 78fd181dcf..e864ae1cec 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,8 @@ #include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_arm.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -32,11 +34,6 @@ namespace art { namespace arm { -static DRegister FromLowSToD(SRegister reg) { - DCHECK_EQ(reg % 2, 0); - return static_cast<DRegister>(reg / 2); -} - static bool ExpectedPairLayout(Location location) { // We expected this for both core and fpu register pairs. return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); @@ -58,6 +55,10 @@ static constexpr Register kCoreCalleeSaves[] = static constexpr SRegister kFpuCalleeSaves[] = { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 }; +// D31 cannot be split into two S registers, and the register allocator only works on +// S registers. Therefore there is no need to block it. +static constexpr DRegister DTMP = D31; + class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeRuntimeCallingConvention() @@ -73,20 +74,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() -class SlowPathCodeARM : public SlowPathCode { - public: - SlowPathCodeARM() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); -}; - class NullCheckSlowPathARM : public SlowPathCodeARM { public: explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {} @@ -396,10 +383,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(true), isa_features_(isa_features) { - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline)); // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } @@ -508,6 +491,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) { void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + // Save one extra register for baseline. Note that on thumb2, there is no easy + // instruction to restore just the PC, so this actually helps both baseline + // and non-baseline to save and restore at least two registers at entry and exit. + core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take @@ -529,6 +516,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { + return; + } + if (!skip_overflow_check) { __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); __ LoadFromOffset(kLoadWord, IP, IP, 0); @@ -547,6 +538,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { } void CodeGeneratorARM::GenerateFrameExit() { + if (HasEmptyFrame()) { + __ bx(LR); + return; + } __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); @@ -784,8 +779,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - int32_t value = const_to_move->AsIntConstant()->GetValue(); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + int32_t value = GetInt32ValueOf(const_to_move); if (location.IsRegister()) { __ LoadImmediate(location.AsRegister<Register>(), value); } else { @@ -952,8 +947,8 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); } else { DCHECK(locations->InAt(1).IsConstant()); - int32_t value = - locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + HConstant* constant = locations->InAt(1).GetConstant(); + int32_t value = CodeGenerator::GetInt32ValueOf(constant); ShifterOperand operand; if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { __ cmp(left, operand); @@ -1114,6 +1109,17 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1168,44 +1174,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void CodeGeneratorARM::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } -void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorARM intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); - // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value()); - // LR() - __ blx(LR); - } else { - __ bl(codegen_->GetFrameEntryLabel()); +void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; } - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); - DCHECK(!codegen_->IsLeafMethod()); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + + codegen_->GenerateStaticOrDirectCall(invoke, temp); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -1223,10 +1222,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); @@ -3366,16 +3375,44 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); } } else if (source.IsDoubleStackSlot()) { - DCHECK(destination.IsDoubleStackSlot()) << destination; - __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + if (destination.IsDoubleStackSlot()) { + __ LoadDFromOffset(DTMP, SP, source.GetStackIndex()); + __ StoreDToOffset(DTMP, SP, destination.GetStackIndex()); + } else if (destination.IsRegisterPair()) { + DCHECK(ExpectedPairLayout(destination)); + __ LoadFromOffset( + kLoadWordPair, destination.AsRegisterPairLow<Register>(), SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsFpuRegisterPair()) << destination; + __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + SP, + source.GetStackIndex()); + } + } else if (source.IsRegisterPair()) { + if (destination.IsRegisterPair()) { + __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); + __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + DCHECK(ExpectedPairLayout(source)); + __ StoreToOffset( + kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex()); + } + } else if (source.IsFpuRegisterPair()) { + if (destination.IsFpuRegisterPair()) { + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), + SP, + destination.GetStackIndex()); + } } else { DCHECK(source.IsConstant()) << source; - HInstruction* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - int32_t value = constant->AsIntConstant()->GetValue(); + HConstant* constant = source.GetConstant(); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { __ LoadImmediate(destination.AsRegister<Register>(), value); } else { @@ -3385,17 +3422,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { } } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); - if (destination.IsRegister()) { - // In the presence of long or double constants, the parallel move resolver will - // split the move into two, but keeps the same constant for both moves. Here, - // we use the low or high part depending on which register this move goes to. - if (destination.reg() % 2 == 0) { - __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value)); - } else { - __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value)); - } + if (destination.IsRegisterPair()) { + __ LoadImmediate(destination.AsRegisterPairLow<Register>(), Low32Bits(value)); + __ LoadImmediate(destination.AsRegisterPairHigh<Register>(), High32Bits(value)); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; __ LoadImmediate(IP, Low32Bits(value)); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); __ LoadImmediate(IP, High32Bits(value)); @@ -3403,20 +3434,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { } } else if (constant->IsDoubleConstant()) { double value = constant->AsDoubleConstant()->GetValue(); - uint64_t int_value = bit_cast<uint64_t, double>(value); - if (destination.IsFpuRegister()) { - // In the presence of long or double constants, the parallel move resolver will - // split the move into two, but keeps the same constant for both moves. Here, - // we use the low or high part depending on which register this move goes to. - if (destination.reg() % 2 == 0) { - __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), - bit_cast<float, uint32_t>(Low32Bits(int_value))); - } else { - __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), - bit_cast<float, uint32_t>(High32Bits(int_value))); - } + if (destination.IsFpuRegisterPair()) { + __ LoadDImmediate(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), value); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; + uint64_t int_value = bit_cast<uint64_t, double>(value); __ LoadImmediate(IP, Low32Bits(int_value)); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); __ LoadImmediate(IP, High32Bits(int_value)); @@ -3474,6 +3496,40 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { __ vmovrs(IP, source.AsFpuRegister<SRegister>()); __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>()); __ vmovsr(destination.AsFpuRegister<SRegister>(), IP); + } else if (source.IsRegisterPair() && destination.IsRegisterPair()) { + __ vmovdrr(DTMP, source.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>()); + __ Mov(source.AsRegisterPairLow<Register>(), destination.AsRegisterPairLow<Register>()); + __ Mov(source.AsRegisterPairHigh<Register>(), destination.AsRegisterPairHigh<Register>()); + __ vmovrrd(destination.AsRegisterPairLow<Register>(), + destination.AsRegisterPairHigh<Register>(), + DTMP); + } else if (source.IsRegisterPair() || destination.IsRegisterPair()) { + Register low_reg = source.IsRegisterPair() + ? source.AsRegisterPairLow<Register>() + : destination.AsRegisterPairLow<Register>(); + int mem = source.IsRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination)); + __ vmovdrr(DTMP, low_reg, static_cast<Register>(low_reg + 1)); + __ LoadFromOffset(kLoadWordPair, low_reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + DRegister first = FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()); + DRegister second = FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()); + __ vmovd(DTMP, first); + __ vmovd(first, second); + __ vmovd(second, DTMP); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + DRegister reg = source.IsFpuRegisterPair() + ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()) + : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()); + int mem = source.IsFpuRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + __ vmovd(DTMP, reg); + __ LoadDFromOffset(reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>() : destination.AsFpuRegister<SRegister>(); @@ -3482,7 +3538,7 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { : source.GetStackIndex(); __ vmovrs(IP, reg); - __ LoadFromOffset(kLoadWord, IP, SP, mem); + __ LoadSFromOffset(reg, SP, mem); __ StoreToOffset(kStoreWord, IP, SP, mem); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); @@ -3776,5 +3832,50 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { + DCHECK_EQ(temp, kArtMethodRegister); + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // temp = temp[index_in_cache] + __ LoadFromOffset( + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = temp[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); + } else { + __ bl(GetFrameEntryLabel()); + } + + RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!IsLeafMethod()); +} + +void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4b03dffd38..f1a3729c13 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); +static constexpr Register kArtMethodRegister = R0; + +static constexpr DRegister FromLowSToD(SRegister reg) { + return DCHECK_CONSTEXPR(reg % 2 == 0, , D0) + static_cast<DRegister>(reg / 2); +} + + class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeDexCallingConvention() @@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver { DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM); }; +class SlowPathCodeARM : public SlowPathCode { + public: + SlowPathCodeARM() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); +}; + class LocationsBuilderARM : public HGraphVisitor { public: LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -230,7 +252,7 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { @@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3bc23fe4f3..0d7864fa35 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, - (1 << LR), - 0, + callee_saved_core_registers.list(), + callee_saved_fp_registers.list(), compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this) { // Save the link register (containing the return address) to mimic Quick. - AddAllocatedRegister(Location::RegisterLocation(LR)); + AddAllocatedRegister(LocationFrom(lr)); } #undef __ @@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); __ Ldr(wzr, MemOperand(temp, 0)); RecordPcInfo(nullptr, 0); } - int frame_size = GetFrameSize(); - __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - - // Stack layout: - // sp[frame_size - 8] : lr. - // ... : other preserved registers. - // sp[frame_size - regs_size]: first preserved register. - // ... : reserved frame space. - // sp[0] : current method. + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + // Stack layout: + // sp[frame_size - 8] : lr. + // ... : other preserved core registers. + // ... : other preserved fp registers. + // ... : reserved frame space. + // sp[0] : current method. + __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + } } void CodeGeneratorARM64::GenerateFrameExit() { - int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - __ Drop(frame_size); + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ Drop(frame_size); + } } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -486,18 +491,21 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, Primitive::Type type = instruction->GetType(); DCHECK_NE(type, Primitive::kPrimVoid); - if (instruction->IsIntConstant() || instruction->IsLongConstant()) { - int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue() - : instruction->AsLongConstant()->GetValue(); + if (instruction->IsIntConstant() + || instruction->IsLongConstant() + || instruction->IsNullConstant()) { + int64_t value = GetInt64ValueOf(instruction->AsConstant()); if (location.IsRegister()) { Register dst = RegisterFrom(location, type); - DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) || + DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) || (instruction->IsLongConstant() && dst.Is64Bits())); __ Mov(dst, value); } else { DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX(); + Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant()) + ? temps.AcquireW() + : temps.AcquireX(); __ Mov(temp, value); __ Str(temp, StackOperandFrom(location)); } @@ -555,26 +563,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { - // Block reserved registers: - // ip0 (VIXL temporary) - // ip1 (VIXL temporary) - // tr - // lr - // sp is not part of the allocatable registers, so we don't need to block it. - // TODO: Avoid blocking callee-saved registers, and instead preserve them - // where necessary. +void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { + // Blocked core registers: + // lr : Runtime reserved. + // tr : Runtime reserved. + // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it. + // ip1 : VIXL core temp. + // ip0 : VIXL core temp. + // + // Blocked fp registers: + // d31 : VIXL fp temp. CPURegList reserved_core_registers = vixl_reserved_core_registers; reserved_core_registers.Combine(runtime_reserved_core_registers); - reserved_core_registers.Combine(quick_callee_saved_registers); while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true; } + CPURegList reserved_fp_registers = vixl_reserved_fp_registers; - reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP()); - while (!reserved_core_registers.IsEmpty()) { + while (!reserved_fp_registers.IsEmpty()) { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; } + + if (is_baseline) { + CPURegList reserved_core_baseline_registers = callee_saved_core_registers; + while (!reserved_core_baseline_registers.IsEmpty()) { + blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; + } + + CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; + while (!reserved_fp_baseline_registers.IsEmpty()) { + blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; + } + } } Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { @@ -626,10 +646,12 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg } void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { - if (constant->IsIntConstant() || constant->IsLongConstant()) { - __ Mov(Register(destination), - constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() - : constant->AsLongConstant()->GetValue()); + if (constant->IsIntConstant()) { + __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); + } else if (constant->IsLongConstant()) { + __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); + } else if (constant->IsNullConstant()) { + __ Mov(Register(destination), 0); } else if (constant->IsFloatConstant()) { __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); } else { @@ -643,6 +665,8 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { DCHECK(constant.IsConstant()); HConstant* cst = constant.GetConstant(); return (cst->IsIntConstant() && type == Primitive::kPrimInt) || + // Null is mapped to a core W register, which we associate with kPrimInt. + (cst->IsNullConstant() && type == Primitive::kPrimInt) || (cst->IsLongConstant() && type == Primitive::kPrimLong) || (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); @@ -663,7 +687,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri if (unspecified_type) { HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; if (source.IsStackSlot() || - (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) { + (src_cst != nullptr && (src_cst->IsIntConstant() + || src_cst->IsFloatConstant() + || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; } else { @@ -709,7 +735,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; - if (src_cst->IsIntConstant()) { + if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) { temp = temps.AcquireW(); } else if (src_cst->IsLongConstant()) { temp = temps.AcquireX(); @@ -947,6 +973,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { + DCHECK(RequiresCurrentMethod()); DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); } @@ -1370,7 +1397,13 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + HInstruction* right = compare->InputAt(1); + if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || + (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) { + locations->SetInAt(1, Location::ConstantLocation(right->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } locations->SetOut(Location::RequiresRegister()); break; } @@ -1400,9 +1433,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { case Primitive::kPrimDouble: { Register result = OutputRegister(compare); FPRegister left = InputFPRegisterAt(compare, 0); - FPRegister right = InputFPRegisterAt(compare, 1); - - __ Fcmp(left, right); + if (compare->GetLocations()->InAt(1).IsConstant()) { + if (kIsDebugBuild) { + HInstruction* right = compare->GetLocations()->InAt(1).GetConstant(); + DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || + (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))); + } + // 0.0 is the only immediate that can be encoded directly in a FCMP instruction. + __ Fcmp(left, 0.0); + } else { + __ Fcmp(left, InputFPRegisterAt(compare, 1)); + } if (compare->IsGtBias()) { __ Cset(result, ne); } else { @@ -1752,6 +1793,16 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); @@ -2545,6 +2596,18 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9a99dcccea..afb7fc3718 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi const vixl::Register tr = vixl::x18; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. +const vixl::Register kQuickSuspendRegister = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); -const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister, - vixl::kXRegSize, - kArm64CalleeSaveRefSpills); +// TODO: When the runtime does not use kQuickSuspendRegister as a suspend +// counter remove it from the reserved registers list. +const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr); + +// Callee-saved registers defined by AAPCS64. +const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, + vixl::kXRegSize, + vixl::x19.code(), + vixl::x30.code()); +const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister, + vixl::kDRegSize, + vixl::d8.code(), + vixl::d15.code()); Location ARM64ReturnLocation(Primitive::Type return_type); class SlowPathCodeARM64 : public SlowPathCode { @@ -191,16 +201,20 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - static const vixl::CPURegList& GetFramePreservedRegisters() { - static const vixl::CPURegList frame_preserved_regs = - vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit()); - return frame_preserved_regs; + vixl::CPURegList GetFramePreservedCoreRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, + core_spill_mask_); + } + + vixl::CPURegList GetFramePreservedFPRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize, + fpu_spill_mask_); } void Bind(HBasicBlock* block) OVERRIDE; vixl::Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_ + block->GetBlockId(); + return CommonGetLabelOf<vixl::Label>(block_labels_, block); } void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 98f93a418a..1101569174 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); - __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + if (!HasEmptyFrame()) { + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + } } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + if (!HasEmptyFrame()) { + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + } } void CodeGeneratorX86::Bind(HBasicBlock* block) { @@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { } void CodeGeneratorX86::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } @@ -597,13 +602,7 @@ void CodeGeneratorX86::Move32(Location destination, Location source) { __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int32_t value; - if (constant->IsIntConstant()) { - value = constant->AsIntConstant()->GetValue(); - } else { - DCHECK(constant->IsFloatConstant()); - value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); - } + int32_t value = GetInt32ValueOf(constant); __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } else { DCHECK(source.IsStackSlot()); @@ -669,8 +668,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - Immediate imm(const_to_move->AsIntConstant()->GetValue()); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + Immediate imm(GetInt32ValueOf(const_to_move)); if (location.IsRegister()) { __ movl(location.AsRegister<Register>(), imm); } else if (location.IsStackSlot()) { @@ -920,7 +919,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { locations->InAt(1).AsRegister<Register>()); } else if (locations->InAt(1).IsConstant()) { HConstant* instruction = locations->InAt(1).GetConstant(); - Immediate imm(instruction->AsIntConstant()->GetValue()); + Immediate imm(CodeGenerator::GetInt32ValueOf(instruction)); __ cmpl(locations->InAt(0).AsRegister<Register>(), imm); } else { __ cmpl(locations->InAt(0).AsRegister<Register>(), @@ -989,6 +988,17 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1799,7 +1809,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -1843,8 +1853,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: { if (second.IsFpuRegister()) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); - } else { - __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1852,8 +1860,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: { if (second.IsFpuRegister()) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); - } else { - __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } @@ -3495,8 +3501,8 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - Immediate imm(constant->AsIntConstant()->GetValue()); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + Immediate imm(CodeGenerator::GetInt32ValueOf(constant)); if (destination.IsRegister()) { __ movl(destination.AsRegister<Register>(), imm); } else { @@ -3904,5 +3910,17 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } +void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 107ddafea4..f5a9b7d1f7 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -234,7 +234,7 @@ class CodeGeneratorX86 : public CodeGenerator { void LoadCurrentMethod(Register reg); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2ff53a0603..41a19e11f0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } + if (HasEmptyFrame()) { + return; + } + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { @@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } void CodeGeneratorX86_64::GenerateFrameExit() { + if (HasEmptyFrame()) { + return; + } uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { @@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { } void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } @@ -599,13 +607,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int32_t value; - if (constant->IsFloatConstant()) { - value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); - } else { - DCHECK(constant->IsIntConstant()); - value = constant->AsIntConstant()->GetValue(); - } + int32_t value = GetInt32ValueOf(constant); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { DCHECK(source.IsStackSlot()) << source; @@ -649,8 +651,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - Immediate imm(const_to_move->AsIntConstant()->GetValue()); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + Immediate imm(GetInt32ValueOf(const_to_move)); if (location.IsRegister()) { __ movl(location.AsRegister<CpuRegister>(), imm); } else if (location.IsStackSlot()) { @@ -790,7 +792,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); @@ -806,8 +808,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Immediate(rhs.GetConstant()->AsIntConstant()->GetValue())); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); @@ -883,15 +889,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. __ xorq(reg, reg); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - locations->InAt(1).AsRegister<CpuRegister>()); - } else if (locations->InAt(1).IsConstant()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue())); + Location lhs = locations->InAt(0); + Location rhs = locations->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); + } else if (rhs.IsConstant()) { + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } __ setcc(X86_64Condition(comp->GetCondition()), reg); } @@ -1018,6 +1028,17 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1840,8 +1861,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1869,16 +1890,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { LocationSummary* locations = add->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); + Location out = locations->Out(); switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); + } } else if (second.IsConstant()) { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ addl(first.AsRegister<CpuRegister>(), imm); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); + } } else { + DCHECK(first.Equals(locations->Out())); __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; @@ -2754,7 +2786,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); } else if (obj.IsStackSlot()) { __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); } else { @@ -3236,13 +3268,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - Immediate imm(constant->AsIntConstant()->GetValue()); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { - __ movl(destination.AsRegister<CpuRegister>(), imm); + if (value == 0) { + __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); + } else { + __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); + } } else { DCHECK(destination.IsStackSlot()) << destination; - __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); @@ -3675,5 +3711,17 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } +void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dbdbf869db..707c9992c0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -232,7 +232,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void LoadCurrentMethod(CpuRegister reg); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index b246c6f98d..7623e421fd 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -14,11 +14,11 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index e05d9b3b0f..2bfecc696a 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -14,13 +14,13 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "pretty_printer.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 4ebb1363cc..a7f1f74e27 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -160,6 +160,22 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { instruction->GetId())); } } + + // Ensure 'instruction' has pointers to its inputs' use entries. + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); + HInstruction* input = input_record.GetInstruction(); + HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); + if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " + "at input %u (%s:%d).", + instruction->DebugName(), + instruction->GetId(), + static_cast<unsigned>(i), + input->DebugName(), + input->GetId())); + } + } } void SSAChecker::VisitBasicBlock(HBasicBlock* block) { @@ -285,6 +301,19 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } +static Primitive::Type PrimitiveKind(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + return Primitive::kPrimInt; + default: + return type; + } +} + void SSAChecker::VisitPhi(HPhi* phi) { VisitInstruction(phi); @@ -321,18 +350,17 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } } -} - -static Primitive::Type PrimitiveKind(Primitive::Type type) { - switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - return Primitive::kPrimInt; - default: - return type; + // Ensure that the inputs have the same primitive kind as the phi. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) { + AddError(StringPrintf( + "Input %d at index %zu of phi %d from block %d does not have the " + "same type as the phi: %s versus %s", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + Primitive::PrettyDescriptor(input->GetType()), + Primitive::PrettyDescriptor(phi->GetType()))); + } } } diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index c59f8366fa..4742e4d073 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 835bca688f..c59273753e 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -184,6 +184,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " " << instruction->GetValue(); } + void VisitPhi(HPhi* phi) OVERRIDE { + output_ << " " << phi->GetRegNumber(); + } + void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); instruction->Accept(this); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 89bba2d9f6..cb448c883f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -270,7 +270,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - set = sets_.Get(dominator->GetBlockId())->Copy(); + set = sets_.Get(dominator->GetBlockId()); if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) { // We have to copy if the dominator has other successors, or `block` is not a successor // of the dominator. diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 4a48fee2fb..a81d49aa0c 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "gvn.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 32f6972c84..d55a3ca00b 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -159,7 +159,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, SsaDeadPhiElimination dead_phi(callee_graph); HDeadCodeElimination dce(callee_graph); HConstantFolding fold(callee_graph); - InstructionSimplifier simplify(callee_graph); + InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { &redundant_phi, @@ -176,7 +176,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, if (depth_ + 1 < kDepthLimit) { HInliner inliner( - callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1); + callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1); inliner.Run(); } @@ -221,7 +221,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, // after optimizations get a unique id. graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId()); VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file); - outer_stats_->RecordStat(kInlinedInvoke); + MaybeRecordStat(kInlinedInvoke); return true; } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 07d893e7c9..8e9cf837df 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -35,10 +35,9 @@ class HInliner : public HOptimization { CompilerDriver* compiler_driver, OptimizingCompilerStats* stats, size_t depth = 0) - : HOptimization(outer_graph, true, "inliner"), + : HOptimization(outer_graph, true, "inliner", stats), outer_compilation_unit_(outer_compilation_unit), compiler_driver_(compiler_driver), - outer_stats_(stats), depth_(depth) {} void Run() OVERRIDE; @@ -48,7 +47,6 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; CompilerDriver* const compiler_driver_; - OptimizingCompilerStats* const outer_stats_; const size_t depth_; DISALLOW_COPY_AND_ASSIGN(HInliner); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17c8f337ca..fd99070780 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -16,11 +16,15 @@ #include "instruction_simplifier.h" +#include "mirror/class-inl.h" +#include "scoped_thread_state_change.h" + namespace art { class InstructionSimplifierVisitor : public HGraphVisitor { public: - explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {} + InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), stats_(stats) {} private: void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; @@ -28,10 +32,14 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitArraySet(HArraySet* equal) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitNullCheck(HNullCheck* instruction) OVERRIDE; + void VisitArrayLength(HArrayLength* instruction) OVERRIDE; + void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; }; void InstructionSimplifier::Run() { - InstructionSimplifierVisitor visitor(graph_); + InstructionSimplifierVisitor visitor(graph_, stats_); visitor.VisitInsertionOrder(); } @@ -40,6 +48,28 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { if (!obj->CanBeNull()) { null_check->ReplaceWith(obj); null_check->GetBlock()->RemoveInstruction(null_check); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedNullCheck); + } + } +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (!load_class->IsResolved()) { + // If the class couldn't be resolve it's not safe to compare against it. It's + // default type would be Top which might be wider that the actual class type + // and thus producing wrong results. + return; + } + ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ScopedObjectAccess soa(Thread::Current()); + if (class_rti.IsSupertypeOf(obj_rti)) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } } } @@ -75,6 +105,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { } } +void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { + HInstruction* input = instruction->InputAt(0); + // If the array is a NewArray with constant size, replace the array length + // with the constant instruction. This helps the bounds check elimination phase. + if (input->IsNewArray()) { + input = input->InputAt(0); + if (input->IsIntConstant()) { + instruction->ReplaceWith(input); + } + } +} + void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); if (value->GetType() != Primitive::kPrimNot) return; diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index bca6697d05..a7ff755aed 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -19,6 +19,7 @@ #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -27,8 +28,10 @@ namespace art { */ class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph, const char* name = "instruction_simplifier") - : HOptimization(graph, true, name) {} + InstructionSimplifier(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = "instruction_simplifier") + : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc new file mode 100644 index 0000000000..a82d80af13 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.cc @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_arm.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/arm/assembler_arm.h" + +namespace art { + +namespace arm { + +ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() { + return codegen_->GetAssembler(); +} + +ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +#define __ codegen->GetAssembler()-> + +static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + DCHECK_NE(type, Primitive::kPrimVoid); + + if (Primitive::IsIntegralType(type)) { + if (type == Primitive::kPrimLong) { + Register trg_reg_lo = trg.AsRegisterPairLow<Register>(); + Register trg_reg_hi = trg.AsRegisterPairHigh<Register>(); + Register res_reg_lo = R0; + Register res_reg_hi = R1; + if (trg_reg_lo != res_reg_hi) { + if (trg_reg_lo != res_reg_lo) { + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + } else { + DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi); + } + } else { + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + } + } else { + Register trg_reg = trg.AsRegister<Register>(); + Register res_reg = R0; + if (trg_reg != res_reg) { + __ mov(trg_reg, ShifterOperand(res_reg)); + } + } + } else { + UNIMPLEMENTED(FATAL) << "Floating-point return."; + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathARM : public SlowPathCodeARM { + public: + explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ b(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM); +}; + +#undef __ + +bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovrrd(output.AsRegisterPairLow<Register>(), + output.AsRegisterPairHigh<Register>(), + FromLowSToD(input.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>()); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()), + input.AsRegisterPairLow<Register>(), + input.AsRegisterPairHigh<Register>()); + } else { + __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location out = locations->Out(); + + if (is64bit) { + __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, + bool is64bit, + ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location output = locations->Out(); + + Register mask = locations->GetTemp(0).AsRegister<Register>(); + + if (is64bit) { + Register in_reg_lo = in.AsRegisterPairLow<Register>(); + Register in_reg_hi = in.AsRegisterPairHigh<Register>(); + Register out_reg_lo = output.AsRegisterPairLow<Register>(); + Register out_reg_hi = output.AsRegisterPairHigh<Register>(); + + DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected."; + + __ Asr(mask, in_reg_hi, 31); + __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask)); + __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask)); + __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo)); + __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi)); + } else { + Register in_reg = in.AsRegister<Register>(); + Register out_reg = output.AsRegister<Register>(); + + __ Asr(mask, in_reg, 31); + __ add(out_reg, in_reg, ShifterOperand(mask)); + __ eor(out_reg, mask, ShifterOperand(out_reg)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + + +void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + ArmAssembler* assembler) { + Register op1 = locations->InAt(0).AsRegister<Register>(); + Register op2 = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + __ cmp(op1, ShifterOperand(op2)); + + __ it((is_min) ? Condition::LT : Condition::GT, kItElse); + __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT); + __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + ArmAssembler* assembler = GetAssembler(); + __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>(); + Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>(); + if (addr == lo) { + __ ldr(hi, Address(addr, 4)); + __ ldr(lo, Address(addr, 0)); + } else { + __ ldr(lo, Address(addr, 0)); + __ ldr(hi, Address(addr, 4)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0)); + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4)); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ LoadFromOffset(kLoadWord, + invoke->GetLocations()->Out().AsRegister<Register>(), + TR, + Thread::PeerOffset<kArmPointerSize>().Int32Value()); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorARM* codegen) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK((type == Primitive::kPrimInt) || + (type == Primitive::kPrimLong) || + (type == Primitive::kPrimNot)); + ArmAssembler* assembler = codegen->GetAssembler(); + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + + if (type == Primitive::kPrimLong) { + Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + } else { + Register trg = locations->Out().AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features, + Primitive::Type type, + bool is_volatile, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + if (type == Primitive::kPrimLong) { + // Potentially need temps for ldrexd-strexd loop. + if (is_volatile && !features.HasAtomicLdrdAndStrd()) { + locations->AddTemp(Location::RequiresRegister()); // Temp_lo. + locations->AddTemp(Location::RequiresRegister()); // Temp_hi. + } + } else if (type == Primitive::kPrimNot) { + // Temps for card-marking. + locations->AddTemp(Location::RequiresRegister()); // Temp. + locations->AddTemp(Location::RequiresRegister()); // Card. + } +} + +void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Register value; + + if (is_volatile || is_ordered) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimLong) { + Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); + value = value_lo; + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register temp_lo = locations->GetTemp(0).AsRegister<Register>(); + Register temp_hi = locations->GetTemp(1).AsRegister<Register>(); + Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); + + __ add(IP, base, ShifterOperand(offset)); + Label loop_head; + __ Bind(&loop_head); + __ ldrexd(temp_lo, temp_hi, IP); + __ strexd(temp_lo, value_lo, value_hi, IP); + __ cmp(temp_lo, ShifterOperand(0)); + __ b(&loop_head, NE); + } else { + __ add(IP, base, ShifterOperand(offset)); + __ strd(value_lo, Address(IP)); + } + } else { + value = locations->InAt(3).AsRegister<Register>(); + __ str(value, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimNot) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen->MarkGCCard(temp, card, base, value); + } +} + +void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); +} + +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); // Pointer. + locations->AddTemp(Location::RequiresRegister()); // Temp 1. + locations->AddTemp(Location::RequiresRegister()); // Temp 2. +} + +static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { + DCHECK_NE(type, Primitive::kPrimLong); + + ArmAssembler* assembler = codegen->GetAssembler(); + + Register out = locations->Out().AsRegister<Register>(); // Boolean result. + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + + Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. + Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. + codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo); + } + + // Prevent reordering with prior memory operations. + __ dmb(ISH); + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + Label loop_head; + __ Bind(&loop_head); + + __ ldrex(tmp_lo, tmp_ptr); + + __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + + __ it(EQ, ItState::kItT); + __ strex(tmp_lo, value_lo, tmp_ptr, EQ); + __ cmp(tmp_lo, ShifterOperand(1), EQ); + + __ b(&loop_head, EQ); + + __ dmb(ISH); + + __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ it(CC); + __ mov(out, ShifterOperand(0), CC); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); +} + +void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const MemberOffset value_offset = mirror::String::ValueOffset(); + // Location of count + const MemberOffset count_offset = mirror::String::CountOffset(); + // Starting offset within data array + const MemberOffset offset_offset = mirror::String::OffsetOffset(); + // Start of char data with array_ + const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); + + Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. + Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. + Register out = locations->Out().AsRegister<Register>(); // Result character. + + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register array_temp = locations->GetTemp(1).AsRegister<Register>(); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + + __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length. + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ cmp(idx, ShifterOperand(temp)); + __ b(slow_path->GetEntryLabel(), CS); + + // Index computation. + __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset. + __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset. + __ add(temp, temp, ShifterOperand(idx)); + DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. + __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2)); + + // Load the value. + __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp]. + + __ Bind(slow_path->GetExitLabel()); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) +UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMinLongLong) +UNIMPLEMENTED_INTRINSIC(MathMaxLongLong) +UNIMPLEMENTED_INTRINSIC(MathCeil) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathFloor) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h new file mode 100644 index 0000000000..8bfb7d4686 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class ArmInstructionSetFeatures; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace arm { + +class ArmAssembler; +class CodeGeneratorARM; + +class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features) + : arena_(arena), features_(features) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + const ArmInstructionSetFeatures& features_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM); +}; + +class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + ArmAssembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorARM* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 7a3d7d8389..8874edc341 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { } static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - // We only support FP registers here. LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) { - // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 10f24d8148..bf9b8e59c5 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -66,8 +66,7 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* input = environment->GetInstructionAt(i); if (input != nullptr && IsPhiOf(input, info->GetHeader())) { - HUseListNode<HEnvironment*>* env_use = environment->GetInstructionEnvUseAt(i); - input->RemoveEnvironmentUser(env_use); + environment->RemoveAsUserOfInput(i); HInstruction* incoming = input->InputAt(0); environment->SetRawEnvAt(i, incoming); incoming->AddEnvUseAt(environment, i); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index eb27965c79..f22b7a7e82 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,6 +16,7 @@ #include <fstream> +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "code_generator.h" @@ -29,7 +30,6 @@ #include "pretty_printer.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index ac8759c805..28000c18f8 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 0558b85b47..17914e8206 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -24,7 +25,6 @@ #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index c9be570c73..907eff162f 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -24,7 +25,6 @@ #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 990d662d86..4ac1fe8573 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -64,6 +64,13 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); + if (location.IsRegister() || location.IsFpuRegister()) { + os << location.reg(); + } else if (location.IsPair()) { + os << location.low() << ":" << location.high(); + } else if (location.IsStackSlot() || location.IsDoubleStackSlot()) { + os << location.GetStackIndex(); + } return os; } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index bf27c5cf7a..198cc15cce 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -17,10 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_ #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ +#include "base/arena_object.h" #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/arena_object.h" #include "utils/growable_array.h" namespace art { @@ -151,6 +151,10 @@ class Location : public ValueObject { return GetKind() == kFpuRegisterPair; } + bool IsRegisterKind() const { + return IsRegister() || IsFpuRegister() || IsRegisterPair() || IsFpuRegisterPair(); + } + int reg() const { DCHECK(IsRegister() || IsFpuRegister()); return GetPayload(); @@ -268,6 +272,20 @@ class Location : public ValueObject { return value_ == other.value_; } + bool Contains(Location other) const { + if (Equals(other)) { + return true; + } else if (IsFpuRegisterPair() && other.IsFpuRegister()) { + return other.reg() == low() || other.reg() == high(); + } else if (IsRegisterPair() && other.IsRegister()) { + return other.reg() == low() || other.reg() == high(); + } else if (IsDoubleStackSlot() && other.IsStackSlot()) { + return (GetStackIndex() == other.GetStackIndex()) + || (GetStackIndex() + 4 == other.GetStackIndex()); + } + return false; + } + const char* DebugString() const { switch (GetKind()) { case kInvalid: return "I"; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd36598171..93787b8bfd 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -18,6 +18,7 @@ #include "ssa_builder.h" #include "utils/growable_array.h" +#include "scoped_thread_state_change.h" namespace art { @@ -33,17 +34,14 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { static void RemoveAsUser(HInstruction* instruction) { for (size_t i = 0; i < instruction->InputCount(); i++) { - instruction->InputAt(i)->RemoveUser(instruction, i); + instruction->RemoveAsUserOfInput(i); } HEnvironment* environment = instruction->GetEnvironment(); if (environment != nullptr) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i); - if (vreg_env_use != nullptr) { - HInstruction* vreg = environment->GetInstructionAt(i); - DCHECK(vreg != nullptr); - vreg->RemoveEnvironmentUser(vreg_env_use); + if (environment->GetInstructionAt(i) != nullptr) { + environment->RemoveAsUserOfInput(i); } } } @@ -63,22 +61,19 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit } } -void HGraph::RemoveBlock(HBasicBlock* block) const { - for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { - block->GetSuccessors().Get(j)->RemovePredecessor(block); - } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - block->RemovePhi(it.Current()->AsPhi()); - } - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - block->RemoveInstruction(it.Current()); - } -} - void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { - RemoveBlock(blocks_.Get(i)); + HBasicBlock* block = blocks_.Get(i); + for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + block->GetSuccessors().Get(j)->RemovePredecessor(block); + } + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false); + } } } } @@ -292,6 +287,15 @@ bool HGraph::AnalyzeNaturalLoops() const { return true; } +HNullConstant* HGraph::GetNullConstant() { + if (cached_null_constant_ == nullptr) { + cached_null_constant_ = new (arena_) HNullConstant(); + entry_block_->InsertInstructionBefore(cached_null_constant_, + entry_block_->GetLastInstruction()); + } + return cached_null_constant_; +} + void HLoopInformation::Add(HBasicBlock* block) { blocks_.SetBit(block->GetBlockId()); } @@ -429,22 +433,24 @@ void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { static void Remove(HInstructionList* instruction_list, HBasicBlock* block, - HInstruction* instruction) { + HInstruction* instruction, + bool ensure_safety) { DCHECK_EQ(block, instruction->GetBlock()); - DCHECK(instruction->GetUses().IsEmpty()); - DCHECK(instruction->GetEnvUses().IsEmpty()); instruction->SetBlock(nullptr); instruction_list->RemoveInstruction(instruction); - - RemoveAsUser(instruction); + if (ensure_safety) { + DCHECK(instruction->GetUses().IsEmpty()); + DCHECK(instruction->GetEnvUses().IsEmpty()); + RemoveAsUser(instruction); + } } -void HBasicBlock::RemoveInstruction(HInstruction* instruction) { - Remove(&instructions_, this, instruction); +void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) { + Remove(&instructions_, this, instruction, ensure_safety); } -void HBasicBlock::RemovePhi(HPhi* phi) { - Remove(&phis_, this, phi); +void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) { + Remove(&phis_, this, phi, ensure_safety); } void HEnvironment::CopyFrom(HEnvironment* env) { @@ -457,15 +463,9 @@ void HEnvironment::CopyFrom(HEnvironment* env) { } } -template <typename T> -static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) { - HUseListNode<T>* current; - for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) { - current = use_it.Current(); - if (current->GetUser() == user && current->GetIndex() == input_index) { - list->Remove(current); - } - } +void HEnvironment::RemoveAsUserOfInput(size_t index) const { + const HUserRecord<HEnvironment*> user_record = vregs_.Get(index); + user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); } HInstruction* HInstruction::GetNextDisregardingMoves() const { @@ -484,14 +484,6 @@ HInstruction* HInstruction::GetPreviousDisregardingMoves() const { return previous; } -void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { - RemoveFromUseList(user, input_index, &uses_); -} - -void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) { - env_uses_.Remove(use); -} - void HInstructionList::AddInstruction(HInstruction* instruction) { if (first_instruction_ == nullptr) { DCHECK(last_instruction_ == nullptr); @@ -602,7 +594,7 @@ void HInstruction::ReplaceWith(HInstruction* other) { } void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { - InputAt(index)->RemoveUser(this, index); + RemoveAsUserOfInput(index); SetRawInputAt(index, replacement); replacement->AddUseAt(this, index); } @@ -613,7 +605,7 @@ size_t HInstruction::EnvironmentSize() const { void HPhi::AddInput(HInstruction* input) { DCHECK(input->GetBlock() != nullptr); - inputs_.Add(input); + inputs_.Add(HUserRecord<HInstruction*>(input)); input->AddUseAt(this, inputs_.Size() - 1); } @@ -990,4 +982,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { + ScopedObjectAccess soa(Thread::Current()); + os << "[" + << " is_top=" << rhs.IsTop() + << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get())) + << " is_exact=" << rhs.IsExact() + << " ]"; + return os; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 30d869d026..de448cc483 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -17,23 +17,28 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_H_ #define ART_COMPILER_OPTIMIZING_NODES_H_ +#include "base/arena_object.h" #include "entrypoints/quick/quick_entrypoints_enum.h" +#include "handle.h" +#include "handle_scope.h" #include "invoke_type.h" #include "locations.h" +#include "mirror/class.h" #include "offsets.h" #include "primitive.h" -#include "utils/arena_object.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" namespace art { +class GraphChecker; class HBasicBlock; class HEnvironment; class HInstruction; class HIntConstant; class HInvoke; class HGraphVisitor; +class HNullConstant; class HPhi; class HSuspendCheck; class LiveInterval; @@ -194,6 +199,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return reverse_post_order_; } + HNullConstant* GetNullConstant(); + private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; void VisitBlockForDominatorTree(HBasicBlock* block, @@ -205,7 +212,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited) const; - void RemoveBlock(HBasicBlock* block) const; ArenaAllocator* const arena_; @@ -233,6 +239,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; + // Cached null constant that might be created when building SSA form. + HNullConstant* cached_null_constant_; + ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); }; @@ -481,14 +490,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void ReplaceWith(HBasicBlock* other); void AddInstruction(HInstruction* instruction); - void RemoveInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); void AddPhi(HPhi* phi); void InsertPhiAfter(HPhi* instruction, HPhi* cursor); - void RemovePhi(HPhi* phi); + // RemoveInstruction and RemovePhi delete a given instruction from the respective + // instruction list. With 'ensure_safety' set to true, it verifies that the + // instruction is not in use and removes it from the use lists of its inputs. + void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true); + void RemovePhi(HPhi* phi, bool ensure_safety = true); bool IsLoopHeader() const { return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); @@ -574,6 +586,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(ArrayLength, Instruction) \ M(ArraySet, Instruction) \ M(BoundsCheck, Instruction) \ + M(BoundType, Instruction) \ M(CheckCast, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ @@ -610,6 +623,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(NewInstance, Instruction) \ M(Not, UnaryOperation) \ M(NotEqual, Condition) \ + M(NullConstant, Instruction) \ M(NullCheck, Instruction) \ M(Or, BinaryOperation) \ M(ParallelMove, Instruction) \ @@ -704,6 +718,9 @@ class HUseList : public ValueObject { } void Remove(HUseListNode<T>* node) { + DCHECK(node != nullptr); + DCHECK(Contains(node)); + if (node->prev_ != nullptr) { node->prev_->next_ = node->next_; } @@ -715,6 +732,18 @@ class HUseList : public ValueObject { } } + bool Contains(const HUseListNode<T>* node) const { + if (node == nullptr) { + return false; + } + for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) { + if (current == node) { + return true; + } + } + return false; + } + bool IsEmpty() const { return first_ == nullptr; } @@ -750,6 +779,33 @@ class HUseIterator : public ValueObject { friend class HValue; }; +// This class is used by HEnvironment and HInstruction classes to record the +// instructions they use and pointers to the corresponding HUseListNodes kept +// by the used instructions. +template <typename T> +class HUserRecord : public ValueObject { + public: + HUserRecord() : instruction_(nullptr), use_node_(nullptr) {} + explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {} + + HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node) + : instruction_(old_record.instruction_), use_node_(use_node) { + DCHECK(instruction_ != nullptr); + DCHECK(use_node_ != nullptr); + DCHECK(old_record.use_node_ == nullptr); + } + + HInstruction* GetInstruction() const { return instruction_; } + HUseListNode<T>* GetUseNode() const { return use_node_; } + + private: + // Instruction used by the user. + HInstruction* instruction_; + + // Corresponding entry in the use list kept by 'instruction_'. + HUseListNode<T>* use_node_; +}; + // Represents the side effects an instruction may have. class SideEffects : public ValueObject { public: @@ -820,50 +876,118 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { : vregs_(arena, number_of_vregs) { vregs_.SetSize(number_of_vregs); for (size_t i = 0; i < number_of_vregs; i++) { - vregs_.Put(i, VRegInfo(nullptr, nullptr)); + vregs_.Put(i, HUserRecord<HEnvironment*>()); } } void CopyFrom(HEnvironment* env); void SetRawEnvAt(size_t index, HInstruction* instruction) { - vregs_.Put(index, VRegInfo(instruction, nullptr)); + vregs_.Put(index, HUserRecord<HEnvironment*>(instruction)); } + HInstruction* GetInstructionAt(size_t index) const { + return vregs_.Get(index).GetInstruction(); + } + + void RemoveAsUserOfInput(size_t index) const; + + size_t Size() const { return vregs_.Size(); } + + private: // Record instructions' use entries of this environment for constant-time removal. + // It should only be called by HInstruction when a new environment use is added. void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) { DCHECK(env_use->GetUser() == this); size_t index = env_use->GetIndex(); - VRegInfo info = vregs_.Get(index); - DCHECK(info.vreg_ != nullptr); - DCHECK(info.node_ == nullptr); - vregs_.Put(index, VRegInfo(info.vreg_, env_use)); + vregs_.Put(index, HUserRecord<HEnvironment*>(vregs_.Get(index), env_use)); } - HInstruction* GetInstructionAt(size_t index) const { - return vregs_.Get(index).vreg_; + GrowableArray<HUserRecord<HEnvironment*> > vregs_; + + friend HInstruction; + + DISALLOW_COPY_AND_ASSIGN(HEnvironment); +}; + +class ReferenceTypeInfo : ValueObject { + public: + typedef Handle<mirror::Class> TypeHandle; + + static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (type_handle->IsObjectClass()) { + // Override the type handle to be consistent with the case when we get to + // Top but don't have the Object class available. It avoids having to guess + // what value the type_handle has when it's Top. + return ReferenceTypeInfo(TypeHandle(), is_exact, true); + } else { + return ReferenceTypeInfo(type_handle, is_exact, false); + } } - HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const { - return vregs_.Get(index).node_; + static ReferenceTypeInfo CreateTop(bool is_exact) { + return ReferenceTypeInfo(TypeHandle(), is_exact, true); } - size_t Size() const { return vregs_.Size(); } + bool IsExact() const { return is_exact_; } + bool IsTop() const { return is_top_; } - private: - struct VRegInfo { - HInstruction* vreg_; - HUseListNode<HEnvironment*>* node_; + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use) - : vreg_(instruction), node_(env_use) {} - }; + bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsTop()) { + // Top (equivalent for java.lang.Object) is supertype of anything. + return true; + } + if (rti.IsTop()) { + // If we get here `this` is not Top() so it can't be a supertype. + return false; + } + return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } - GrowableArray<VRegInfo> vregs_; + // Returns true if the type information provide the same amount of details. + // Note that it does not mean that the instructions have the same actual type + // (e.g. tops are equal but they can be the result of a merge). + bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsExact() != rti.IsExact()) { + return false; + } + if (IsTop() && rti.IsTop()) { + // `Top` means java.lang.Object, so the types are equivalent. + return true; + } + if (IsTop() || rti.IsTop()) { + // If only one is top or object than they are not equivalent. + // NB: We need this extra check because the type_handle of `Top` is invalid + // and we cannot inspect its reference. + return false; + } - DISALLOW_COPY_AND_ASSIGN(HEnvironment); + // Finally check the types. + return GetTypeHandle().Get() == rti.GetTypeHandle().Get(); + } + + private: + ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {} + ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top) + : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {} + + // The class of the object. + TypeHandle type_handle_; + // Whether or not the type is exact or a superclass of the actual type. + // Whether or not we have any information about this type. + bool is_exact_; + // A true value here means that the object type should be java.lang.Object. + // We don't have access to the corresponding mirror object every time so this + // flag acts as a substitute. When true, the TypeHandle refers to a null + // pointer and should not be used. + bool is_top_; }; +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); + class HInstruction : public ArenaObject<kArenaAllocMisc> { public: explicit HInstruction(SideEffects side_effects) @@ -876,7 +1000,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { locations_(nullptr), live_interval_(nullptr), lifetime_position_(kNoLifetime), - side_effects_(side_effects) {} + side_effects_(side_effects), + reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {} virtual ~HInstruction() {} @@ -899,13 +1024,15 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); } virtual size_t InputCount() const = 0; - virtual HInstruction* InputAt(size_t i) const = 0; + HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); } virtual void Accept(HGraphVisitor* visitor) = 0; virtual const char* DebugName() const = 0; virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; } - virtual void SetRawInputAt(size_t index, HInstruction* input) = 0; + void SetRawInputAt(size_t index, HInstruction* input) { + SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input)); + } virtual bool NeedsEnvironment() const { return false; } virtual bool IsControlFlow() const { return false; } @@ -914,12 +1041,24 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. - virtual bool CanBeNull() const { return true; } + virtual bool CanBeNull() const { + DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types"; + return true; + } virtual bool CanDoImplicitNullCheck() const { return false; } + void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) { + reference_type_info_ = reference_type_info; + } + + ReferenceTypeInfo GetReferenceTypeInfo() const { return reference_type_info_; } + void AddUseAt(HInstruction* user, size_t index) { - uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); + DCHECK(user != nullptr); + HUseListNode<HInstruction*>* use = + uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); + user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use)); } void AddEnvUseAt(HEnvironment* user, size_t index) { @@ -929,11 +1068,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { user->RecordEnvUse(env_use); } - void RemoveUser(HInstruction* user, size_t index); - void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use); + void RemoveAsUserOfInput(size_t input) { + HUserRecord<HInstruction*> input_use = InputRecordAt(input); + input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode()); + } - const HUseList<HInstruction*>& GetUses() { return uses_; } - const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; } + const HUseList<HInstruction*>& GetUses() const { return uses_; } + const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; } bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); } bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); } @@ -1015,7 +1156,25 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; } bool HasLiveInterval() const { return live_interval_ != nullptr; } + bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); } + + // Returns whether the code generation of the instruction will require to have access + // to the current method. Such instructions are: + // (1): Instructions that require an environment, as calling the runtime requires + // to walk the stack and have the current method stored at a specific stack address. + // (2): Object literals like classes and strings, that are loaded from the dex cache + // fields of the current method. + bool NeedsCurrentMethod() const { + return NeedsEnvironment() || IsLoadClass() || IsLoadString(); + } + + protected: + virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0; + virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0; + private: + void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); } + HInstruction* previous_; HInstruction* next_; HBasicBlock* block_; @@ -1050,7 +1209,12 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { const SideEffects side_effects_; + // TODO: for primitive types this should be marked as invalid. + ReferenceTypeInfo reference_type_info_; + + friend class GraphChecker; friend class HBasicBlock; + friend class HEnvironment; friend class HGraph; friend class HInstructionList; @@ -1170,15 +1334,16 @@ class HTemplateInstruction: public HInstruction { virtual ~HTemplateInstruction() {} virtual size_t InputCount() const { return N; } - virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; } protected: - virtual void SetRawInputAt(size_t i, HInstruction* instruction) { - inputs_[i] = instruction; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; } + + void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_[i] = input; } private: - EmbeddedArray<HInstruction*, N> inputs_; + EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_; friend class SsaBuilder; }; @@ -1663,6 +1828,22 @@ class HDoubleConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; +class HNullConstant : public HConstant { + public: + HNullConstant() : HConstant(Primitive::kPrimNot) {} + + bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + + size_t ComputeHashCode() const OVERRIDE { return 0; } + + DECLARE_INSTRUCTION(NullConstant); + + private: + DISALLOW_COPY_AND_ASSIGN(HNullConstant); +}; + // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). class HIntConstant : public HConstant { @@ -1718,7 +1899,6 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); class HInvoke : public HInstruction { public: virtual size_t InputCount() const { return inputs_.Size(); } - virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); } // Runtime needs to walk the stack, so Dex -> Dex calls need to // know their environment. @@ -1728,10 +1908,6 @@ class HInvoke : public HInstruction { SetRawInputAt(index, argument); } - virtual void SetRawInputAt(size_t index, HInstruction* input) { - inputs_.Put(index, input); - } - virtual Primitive::Type GetType() const { return return_type_; } uint32_t GetDexPc() const { return dex_pc_; } @@ -1763,7 +1939,12 @@ class HInvoke : public HInstruction { inputs_.SetSize(number_of_arguments); } - GrowableArray<HInstruction*> inputs_; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } + void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_.Put(index, input); + } + + GrowableArray<HUserRecord<HInstruction*> > inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; const uint32_t dex_method_index_; @@ -2259,11 +2440,6 @@ class HPhi : public HInstruction { } size_t InputCount() const OVERRIDE { return inputs_.Size(); } - HInstruction* InputAt(size_t i) const OVERRIDE { return inputs_.Get(i); } - - void SetRawInputAt(size_t index, HInstruction* input) OVERRIDE { - inputs_.Put(index, input); - } void AddInput(HInstruction* input); @@ -2282,8 +2458,15 @@ class HPhi : public HInstruction { DECLARE_INSTRUCTION(Phi); + protected: + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } + + void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_.Put(index, input); + } + private: - GrowableArray<HInstruction*> inputs_; + GrowableArray<HUserRecord<HInstruction*> > inputs_; const uint32_t reg_number_; Primitive::Type type_; bool is_live_; @@ -2608,7 +2791,8 @@ class HLoadClass : public HExpression<0> { type_index_(type_index), is_referrers_class_(is_referrers_class), dex_pc_(dex_pc), - generate_clinit_check_(false) {} + generate_clinit_check_(false), + loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {} bool CanBeMoved() const OVERRIDE { return true; } @@ -2646,6 +2830,20 @@ class HLoadClass : public HExpression<0> { return !is_referrers_class_; } + ReferenceTypeInfo GetLoadedClassRTI() { + return loaded_class_rti_; + } + + void SetLoadedClassRTI(ReferenceTypeInfo rti) { + // Make sure we only set exact types (the loaded class should never be merged). + DCHECK(rti.IsExact()); + loaded_class_rti_ = rti; + } + + bool IsResolved() { + return loaded_class_rti_.IsExact(); + } + DECLARE_INSTRUCTION(LoadClass); private: @@ -2656,6 +2854,8 @@ class HLoadClass : public HExpression<0> { // Used for code generation. bool generate_clinit_check_; + ReferenceTypeInfo loaded_class_rti_; + DISALLOW_COPY_AND_ASSIGN(HLoadClass); }; @@ -2858,6 +3058,32 @@ class HInstanceOf : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HInstanceOf); }; +class HBoundType : public HExpression<1> { + public: + HBoundType(HInstruction* input, ReferenceTypeInfo bound_type) + : HExpression(Primitive::kPrimNot, SideEffects::None()), + bound_type_(bound_type) { + SetRawInputAt(0, input); + } + + const ReferenceTypeInfo& GetBoundType() const { return bound_type_; } + + bool CanBeNull() const OVERRIDE { + // `null instanceof ClassX` always return false so we can't be null. + return false; + } + + DECLARE_INSTRUCTION(BoundType); + + private: + // Encodes the most upper class that this instruction can have. In other words + // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()). + // It is used to bound the type in cases like `if (x instanceof ClassX) {}` + const ReferenceTypeInfo bound_type_; + + DISALLOW_COPY_AND_ASSIGN(HBoundType); +}; + class HCheckCast : public HTemplateInstruction<2> { public: HCheckCast(HInstruction* object, @@ -2959,7 +3185,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // True if this blocks a move from the given location. bool Blocks(Location loc) const { - return !IsEliminated() && source_.Equals(loc); + return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_)); } // A move is redundant if it's been eliminated, if its source and @@ -3000,46 +3226,19 @@ class HParallelMove : public HTemplateInstruction<0> { void AddMove(Location source, Location destination, HInstruction* instruction) { DCHECK(source.IsValid()); DCHECK(destination.IsValid()); - // The parallel move resolver does not handle pairs. So we decompose the - // pair locations into two moves. - if (source.IsPair() && destination.IsPair()) { - AddMove(source.ToLow(), destination.ToLow(), instruction); - AddMove(source.ToHigh(), destination.ToHigh(), nullptr); - } else if (source.IsPair()) { - DCHECK(destination.IsDoubleStackSlot()) << destination; - AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction); - AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr); - } else if (destination.IsPair()) { - if (source.IsConstant()) { - // We put the same constant in the move. The code generator will handle which - // low or high part to use. - AddMove(source, destination.ToLow(), instruction); - AddMove(source, destination.ToHigh(), nullptr); - } else { - DCHECK(source.IsDoubleStackSlot()); - AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); - // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to - // always be 4. - static constexpr int kHighOffset = 4; - AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), - destination.ToHigh(), - nullptr); - } - } else { - if (kIsDebugBuild) { - if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) - << "Doing parallel moves for the same instruction."; - } - } + if (kIsDebugBuild) { + if (instruction != nullptr) { for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) - << "Same destination for two moves in a parallel move."; + DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) + << "Doing parallel moves for the same instruction."; } } - moves_.Add(MoveOperands(source, destination, instruction)); + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) + << "Same destination for two moves in a parallel move."; + } } + moves_.Add(MoveOperands(source, destination, instruction)); } MoveOperands* MoveOperandsAt(size_t index) const { diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 5dbdc74924..4cf22d3b2e 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "nodes.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index b99f6784f7..b13e07eb22 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,6 +21,12 @@ namespace art { +void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const { + if (stats_ != nullptr) { + stats_->RecordStat(compilation_stat); + } +} + void HOptimization::Check() { if (kIsDebugBuild) { if (is_in_ssa_form_) { diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index d9e082a7f3..af39e092c7 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #include "nodes.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -34,8 +35,10 @@ class HOptimization : public ValueObject { public: HOptimization(HGraph* graph, bool is_in_ssa_form, - const char* pass_name) + const char* pass_name, + OptimizingCompilerStats* stats = nullptr) : graph_(graph), + stats_(stats), is_in_ssa_form_(is_in_ssa_form), pass_name_(pass_name) {} @@ -51,7 +54,11 @@ class HOptimization : public ValueObject { void Check(); protected: + void MaybeRecordStat(MethodCompilationStat compilation_stat) const; + HGraph* const graph_; + // Used to record stats about the optimization. + OptimizingCompilerStats* const stats_; private: // Does the analyzed graph use the SSA form? diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c518f33f53..2fef8c7b3a 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -19,6 +19,7 @@ #include <fstream> #include <stdint.h> +#include "base/arena_allocator.h" #include "base/dumpable.h" #include "base/timing_logger.h" #include "bounds_check_elimination.h" @@ -47,7 +48,6 @@ #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" #include "reference_type_propagation.h" -#include "utils/arena_allocator.h" namespace art { @@ -201,6 +201,7 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info) const; @@ -293,13 +294,15 @@ static void RunOptimizations(HOptimization* optimizations[], static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, - PassInfoPrinter* pass_info_printer) { + PassInfoPrinter* pass_info_printer, + StackHandleScopeCollection* handles) { SsaRedundantPhiElimination redundant_phi(graph); SsaDeadPhiElimination dead_phi(graph); HDeadCodeElimination dce(graph); HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph); + InstructionSimplifier simplify1(graph, stats); HInliner inliner(graph, dex_compilation_unit, driver, stats); @@ -308,8 +311,8 @@ static void RunOptimizations(HGraph* graph, GVNOptimization gvn(graph, side_effects); LICM licm(graph, side_effects); BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph); - InstructionSimplifier simplify2(graph, "instruction_simplifier_after_types"); + ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles); + InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); @@ -348,10 +351,12 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* compiler_driver, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer) const { - RunOptimizations( - graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info_printer); + StackHandleScopeCollection handles(Thread::Current()); + RunOptimizations(graph, compiler_driver, &compilation_stats_, + dex_file, dex_compilation_unit, pass_info_printer, &handles); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -376,7 +381,10 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, compiler_driver, codegen->GetInstructionSet(), ArrayRef<const uint8_t>(allocator.GetMemory()), - codegen->GetFrameSize(), + // Follow Quick's behavior and set the frame size to zero if it is + // considered "empty" (see the definition of + // art::CodeGenerator::HasEmptyFrame). + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), ArrayRef<const uint8_t>(stack_map)); @@ -400,17 +408,21 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, - codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - &src_mapping_table, - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - ArrayRef<const uint8_t>()); + return CompiledMethod::SwapAllocCompiledMethod( + compiler_driver, + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(allocator.GetMemory()), + // Follow Quick's behavior and set the frame size to zero if it is + // considered "empty" (see the definition of + // art::CodeGenerator::HasEmptyFrame). + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + &src_mapping_table, + AlignVectorSize(mapping_table), + AlignVectorSize(vmap_table), + AlignVectorSize(gc_map), + ArrayRef<const uint8_t>()); } CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, @@ -508,6 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return CompileOptimized(graph, codegen.get(), compiler_driver, + dex_file, dex_compilation_unit, &pass_info_printer); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index cc2723df99..3ebf0f8cd2 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -43,6 +43,8 @@ enum MethodCompilationStat { kNotCompiledCantAccesType, kNotOptimizedRegisterAllocator, kNotCompiledUnhandledInstruction, + kRemovedCheckedCast, + kRemovedNullCheck, kLastStat }; @@ -96,6 +98,8 @@ class OptimizingCompilerStats { case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; + case kRemovedCheckedCast: return "kRemovedCheckedCast"; + case kRemovedNullCheck: return "kRemovedNullCheck"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index debe466560..7d0641ec13 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -57,17 +57,49 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { // unallocated, or the move was already eliminated). for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); - // The parallel move resolver algorithm does not work with register pairs. - DCHECK(!move->GetSource().IsPair()); - DCHECK(!move->GetDestination().IsPair()); if (!move->IsRedundant()) { moves_.Add(move); } } } +// Update the source of `move`, knowing that `updated_location` has been swapped +// with `new_source`. Note that `updated_location` can be a pair, therefore if +// `move` is non-pair, we need to extract which register to use. +static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) { + Location source = move->GetSource(); + if (new_source.GetKind() == source.GetKind()) { + DCHECK(updated_location.Equals(source)); + move->SetSource(new_source); + } else if (new_source.IsStackSlot() + || new_source.IsDoubleStackSlot() + || source.IsStackSlot() + || source.IsDoubleStackSlot()) { + // Stack slots never take part of a pair/non-pair swap. + DCHECK(updated_location.Equals(source)); + move->SetSource(new_source); + } else if (source.IsRegister()) { + DCHECK(new_source.IsRegisterPair()) << new_source; + DCHECK(updated_location.IsRegisterPair()) << updated_location; + if (updated_location.low() == source.reg()) { + move->SetSource(Location::RegisterLocation(new_source.low())); + } else { + DCHECK_EQ(updated_location.high(), source.reg()); + move->SetSource(Location::RegisterLocation(new_source.high())); + } + } else if (source.IsFpuRegister()) { + DCHECK(new_source.IsFpuRegisterPair()) << new_source; + DCHECK(updated_location.IsFpuRegisterPair()) << updated_location; + if (updated_location.low() == source.reg()) { + move->SetSource(Location::FpuRegisterLocation(new_source.low())); + } else { + DCHECK_EQ(updated_location.high(), source.reg()); + move->SetSource(Location::FpuRegisterLocation(new_source.high())); + } + } +} -void ParallelMoveResolver::PerformMove(size_t index) { +MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We // mark a move as "pending" on entry to PerformMove in order to detect @@ -75,35 +107,59 @@ void ParallelMoveResolver::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - DCHECK(!moves_.Get(index)->IsPending()); - DCHECK(!moves_.Get(index)->IsRedundant()); + MoveOperands* move = moves_.Get(index); + DCHECK(!move->IsPending()); + if (move->IsRedundant()) { + // Because we swap register pairs first, following, un-pending + // moves may become redundant. + move->Eliminate(); + return nullptr; + } // Clear this move's destination to indicate a pending move. The actual // destination is saved in a stack-allocated local. Recursion may allow // multiple moves to be pending. - DCHECK(!moves_.Get(index)->GetSource().IsInvalid()); - Location destination = moves_.Get(index)->MarkPending(); + DCHECK(!move->GetSource().IsInvalid()); + Location destination = move->MarkPending(); // Perform a depth-first traversal of the move graph to resolve // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. + MoveOperands* required_swap = nullptr; for (size_t i = 0; i < moves_.Size(); ++i) { const MoveOperands& other_move = *moves_.Get(i); if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, - // this call cannot create a blocking move via a swap (this loop does - // not miss any). Assume there is a non-blocking move with source A + // calling `PerformMove` cannot create a blocking move via a swap + // (this loop does not miss any). + // For example, assume there is a non-blocking move with source A // and this move is blocked on source B and there is a swap of A and // B. Then A and B must be involved in the same cycle (or they would // not be swapped). Since this move's destination is B and there is // only a single incoming edge to an operand, this move must also be // involved in the same cycle. In that case, the blocking move will // be created but will be "pending" when we return from PerformMove. - PerformMove(i); + required_swap = PerformMove(i); + + if (required_swap == move) { + // If this move is required to swap, we do so without looking + // at the next moves. Swapping is not blocked by anything, it just + // updates other moves's source. + break; + } else if (required_swap == moves_.Get(i)) { + // If `other_move` was swapped, we iterate again to find a new + // potential cycle. + required_swap = nullptr; + i = 0; + } else if (required_swap != nullptr) { + // A move is required to swap. We walk back the cycle to find the + // move by just returning from this `PerforrmMove`. + moves_.Get(index)->ClearPending(destination); + return required_swap; + } } } - MoveOperands* move = moves_.Get(index); // We are about to resolve this move and don't need it marked as // pending, so restore its destination. @@ -113,19 +169,30 @@ void ParallelMoveResolver::PerformMove(size_t index) { // so it may now be the last move in the cycle. If so remove it. if (move->GetSource().Equals(destination)) { move->Eliminate(); - return; + DCHECK(required_swap == nullptr); + return nullptr; } // The move may be blocked on a (at most one) pending move, in which case // we have a cycle. Search for such a blocking move and perform a swap to // resolve it. bool do_swap = false; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - do_swap = true; - break; + if (required_swap != nullptr) { + DCHECK_EQ(required_swap, move); + do_swap = true; + } else { + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination)) { + DCHECK(other_move.IsPending()); + if (!destination.IsPair() && other_move.GetSource().IsPair()) { + // We swap pairs before swapping non-pairs. Go back from the + // cycle by returning the pair that must be swapped. + return moves_.Get(i); + } + do_swap = true; + break; + } } } @@ -140,15 +207,21 @@ void ParallelMoveResolver::PerformMove(size_t index) { for (size_t i = 0; i < moves_.Size(); ++i) { const MoveOperands& other_move = *moves_.Get(i); if (other_move.Blocks(source)) { - moves_.Get(i)->SetSource(swap_destination); + UpdateSourceOf(moves_.Get(i), source, swap_destination); } else if (other_move.Blocks(swap_destination)) { - moves_.Get(i)->SetSource(source); + UpdateSourceOf(moves_.Get(i), swap_destination, source); } } + // If the swap was required because of a pair in the middle of a cycle, + // we return the swapped move, so that the caller knows it needs to re-iterate + // its dependency loop. + return required_swap; } else { // This move is not blocked. EmitMove(index); move->Eliminate(); + DCHECK(required_swap == nullptr); + return nullptr; } } diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 7ec1dd2deb..3fa1b37afd 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -83,7 +83,15 @@ class ParallelMoveResolver : public ValueObject { // Perform the move at the moves_ index in question (possibly requiring // other moves to satisfy dependencies). - void PerformMove(size_t index); + // + // Return whether another move in the dependency cycle needs to swap. This + // is to handle pair swaps, where we want the pair to swap first to avoid + // building pairs that are unexpected by the code generator. For example, if + // we were to swap R1 with R2, we would need to update all locations using + // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make + // the code generator understand such pairs, but it's easier and cleaner to + // just not create such pairs and exchange pairs in priority. + MoveOperands* PerformMove(size_t index); DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); }; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 28b5697bbd..44a3da2817 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" @@ -165,7 +165,7 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterPairLocation(2, 3), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); } { @@ -180,7 +180,7 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterLocation(4), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); } { @@ -195,7 +195,89 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterLocation(0), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); } } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 12acd0884a..2d9a2bf330 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -42,6 +42,11 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); } +void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { + bound_type->ReplaceWith(bound_type->InputAt(0)); + bound_type->GetBlock()->RemoveInstruction(bound_type); +} + void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { HLoadClass* cls = check->GetLoadClass(); check->ReplaceWith(cls); diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 0fdb65ffe0..0f697fbc25 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -36,6 +36,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { virtual void VisitNullCheck(HNullCheck* check) OVERRIDE; virtual void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + virtual void VisitBoundType(HBoundType* bound_type) OVERRIDE; virtual void VisitClinitCheck(HClinitCheck* check) OVERRIDE; virtual void VisitCondition(HCondition* condition) OVERRIDE; diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 9cf8235d85..293fde978e 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "dex_file.h" @@ -21,7 +22,6 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index 7e274f6ebf..fe23fcf326 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -40,6 +40,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_ // Re-compute and update the type of the instruction. Returns // whether or not the type was changed. bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { + DCHECK(phi->IsLive()); Primitive::Type existing = phi->GetType(); Primitive::Type new_type = existing; @@ -49,15 +50,20 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { } phi->SetType(new_type); - if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) { + if (new_type == Primitive::kPrimDouble + || new_type == Primitive::kPrimFloat + || new_type == Primitive::kPrimNot) { // If the phi is of floating point type, we need to update its inputs to that // type. For inputs that are phis, we need to recompute their types. for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { HInstruction* input = phi->InputAt(i); if (input->GetType() != new_type) { - HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); + HInstruction* equivalent = (new_type == Primitive::kPrimNot) + ? SsaBuilder::GetReferenceTypeEquivalent(input) + : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { + equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } } @@ -78,15 +84,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { if (block->IsLoopHeader()) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - // Set the initial type for the phi. Use the non back edge input for reaching - // a fixed point faster. - Primitive::Type phi_type = phi->GetType(); - // We merge with the existing type, that has been set by the SSA builder. - DCHECK(phi_type == Primitive::kPrimVoid - || phi_type == Primitive::kPrimFloat - || phi_type == Primitive::kPrimDouble); - phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType())); - AddToWorklist(phi); + if (phi->IsLive()) { + AddToWorklist(phi); + } } } else { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { @@ -95,7 +95,10 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { // doing a reverse post-order visit, therefore either the phi users are // non-loop phi and will be visited later in the visit, or are loop-phis, // and they are already in the work list. - UpdateType(it.Current()->AsPhi()); + HPhi* phi = it.Current()->AsPhi(); + if (phi->IsLive()) { + UpdateType(phi); + } } } } @@ -110,13 +113,14 @@ void PrimitiveTypePropagation::ProcessWorklist() { } void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { + DCHECK(instruction->IsLive()); worklist_.Add(instruction); } void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr) { + if (phi != nullptr && phi->IsLive()) { AddToWorklist(phi); } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 24e6837f45..76b8d7eacf 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -16,16 +16,17 @@ #include "reference_type_propagation.h" +#include "class_linker.h" +#include "mirror/class-inl.h" +#include "mirror/dex_cache.h" +#include "scoped_thread_state_change.h" + namespace art { -// TODO: Only do the analysis on reference types. We currently have to handle -// the `null` constant, that is represented as a `HIntConstant` and therefore -// has the Primitive::kPrimInt type. +// TODO: handle: a !=/== null. void ReferenceTypePropagation::Run() { - // Compute null status for instructions. - - // To properly propagate not-null info we need to visit in the dominator-based order. + // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { @@ -34,9 +35,210 @@ void ReferenceTypePropagation::Run() { ProcessWorklist(); } +void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { + // TODO: handle other instructions that give type info + // (NewArray/Call/Field accesses/array accesses) + + // Initialize exact types first for faster convergence. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->IsNewInstance()) { + VisitNewInstance(instr->AsNewInstance()); + } else if (instr->IsLoadClass()) { + VisitLoadClass(instr->AsLoadClass()); + } + } + + // Handle Phis. + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + VisitPhi(it.Current()->AsPhi()); + } + + // Add extra nodes to bound types. + BoundTypeForIfInstanceOf(block); +} + +// Detects if `block` is the True block for the pattern +// `if (x instanceof ClassX) { }` +// If that's the case insert an HBoundType instruction to bound the type of `x` +// to `ClassX` in the scope of the dominated blocks. +void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { + HInstruction* lastInstruction = block->GetLastInstruction(); + if (!lastInstruction->IsIf()) { + return; + } + HInstruction* ifInput = lastInstruction->InputAt(0); + // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual). + if (!ifInput->IsEqual()) { + return; + } + HInstruction* instanceOf = ifInput->InputAt(0); + HInstruction* comp_value = ifInput->InputAt(1); + if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) { + return; + } + + HInstruction* obj = instanceOf->InputAt(0); + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); + + // Narrow the type as much as possible. + { + ScopedObjectAccess soa(Thread::Current()); + if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { + bound_type->SetReferenceTypeInfo(obj_rti); + } else { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + } + + block->InsertInstructionBefore(bound_type, lastInstruction); + // Pick the right successor based on the value we compare against. + HIntConstant* comp_value_int = comp_value->AsIntConstant(); + HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0 + ? lastInstruction->AsIf()->IfFalseSuccessor() + : lastInstruction->AsIf()->IfTrueSuccessor(); + + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (instanceOfTrueBlock->Dominates(user->GetBlock())) { + user->ReplaceInput(bound_type, it.Current()->GetIndex()); + } + } +} + +void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + // Get type from dex cache assuming it was populated by the verifier. + mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); + if (resolved_class != nullptr) { + MutableHandle<mirror::Class> handle = handles_->NewHandle(resolved_class); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true)); + } +} + +void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + // Get type from dex cache assuming it was populated by the verifier. + mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); + if (resolved_class != nullptr) { + Handle<mirror::Class> handle = handles_->NewHandle(resolved_class); + instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true)); + } + Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass()); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true)); +} + +void ReferenceTypePropagation::VisitPhi(HPhi* phi) { + if (phi->GetType() != Primitive::kPrimNot) { + return; + } + + if (phi->GetBlock()->IsLoopHeader()) { + // Set the initial type for the phi. Use the non back edge input for reaching + // a fixed point faster. + AddToWorklist(phi); + phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); + phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo()); + } else { + // Eagerly compute the type of the phi, for quicker convergence. Note + // that we don't need to add users to the worklist because we are + // doing a reverse post-order visit, therefore either the phi users are + // non-loop phi and will be visited later in the visit, or are loop-phis, + // and they are already in the work list. + UpdateNullability(phi); + UpdateReferenceTypeInfo(phi); + } +} + +ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a, + const ReferenceTypeInfo& b) { + bool is_exact = a.IsExact() && b.IsExact(); + bool is_top = a.IsTop() || b.IsTop(); + Handle<mirror::Class> type_handle; + + if (!is_top) { + if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { + type_handle = a.GetTypeHandle(); + } else if (a.IsSupertypeOf(b)) { + type_handle = a.GetTypeHandle(); + is_exact = false; + } else if (b.IsSupertypeOf(a)) { + type_handle = b.GetTypeHandle(); + is_exact = false; + } else { + // TODO: Find a common super class. + is_top = true; + is_exact = false; + } + } + + return is_top + ? ReferenceTypeInfo::CreateTop(is_exact) + : ReferenceTypeInfo::Create(type_handle, is_exact); +} + +bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { + ScopedObjectAccess soa(Thread::Current()); + + ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo(); + if (instr->IsBoundType()) { + UpdateBoundType(instr->AsBoundType()); + } else if (instr->IsPhi()) { + UpdatePhi(instr->AsPhi()); + } else { + LOG(FATAL) << "Invalid instruction (should not get here)"; + } + + return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); +} + +void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { + ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + // Be sure that we don't go over the bounded type. + ReferenceTypeInfo bound_rti = instr->GetBoundType(); + if (!bound_rti.IsSupertypeOf(new_rti)) { + new_rti = bound_rti; + } + instr->SetReferenceTypeInfo(new_rti); +} + +void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { + ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + if (new_rti.IsTop() && !new_rti.IsExact()) { + // Early return if we are Top and inexact. + instr->SetReferenceTypeInfo(new_rti); + return; + } + for (size_t i = 1; i < instr->InputCount(); i++) { + new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo()); + if (new_rti.IsTop()) { + if (!new_rti.IsExact()) { + break; + } else { + continue; + } + } + } + instr->SetReferenceTypeInfo(new_rti); +} + // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. -bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) { +bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { + DCHECK(instr->IsPhi() || instr->IsBoundType()); + + if (!instr->IsPhi()) { + return false; + } + + HPhi* phi = instr->AsPhi(); bool existing_can_be_null = phi->CanBeNull(); bool new_can_be_null = false; for (size_t i = 0; i < phi->InputCount(); i++) { @@ -47,48 +249,26 @@ bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) { return existing_can_be_null != new_can_be_null; } - -void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - if (block->IsLoopHeader()) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Set the initial type for the phi. Use the non back edge input for reaching - // a fixed point faster. - HPhi* phi = it.Current()->AsPhi(); - AddToWorklist(phi); - phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); - } - } else { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Eagerly compute the type of the phi, for quicker convergence. Note - // that we don't need to add users to the worklist because we are - // doing a reverse post-order visit, therefore either the phi users are - // non-loop phi and will be visited later in the visit, or are loop-phis, - // and they are already in the work list. - UpdateNullability(it.Current()->AsPhi()); - } - } -} - void ReferenceTypePropagation::ProcessWorklist() { while (!worklist_.IsEmpty()) { - HPhi* instruction = worklist_.Pop(); - if (UpdateNullability(instruction)) { + HInstruction* instruction = worklist_.Pop(); + if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) { AddDependentInstructionsToWorklist(instruction); } } } -void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) { +void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType(); worklist_.Add(instruction); } -void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { +void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr) { - AddToWorklist(phi); + HInstruction* user = it.Current()->GetUser(); + if (user->IsPhi() || user->IsBoundType()) { + AddToWorklist(user); } } } - } // namespace art diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index a74319d0c5..e346dbfc6c 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -17,31 +17,57 @@ #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ +#include "driver/dex_compilation_unit.h" +#include "handle_scope-inl.h" #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { /** * Propagates reference types to instructions. - * TODO: Currently only nullability is computed. */ class ReferenceTypePropagation : public HOptimization { public: - explicit ReferenceTypePropagation(HGraph* graph) + ReferenceTypePropagation(HGraph* graph, + const DexFile& dex_file, + const DexCompilationUnit& dex_compilation_unit, + StackHandleScopeCollection* handles) : HOptimization(graph, true, "reference_type_propagation"), + dex_file_(dex_file), + dex_compilation_unit_(dex_compilation_unit), + handles_(handles), worklist_(graph->GetArena(), kDefaultWorklistSize) {} void Run() OVERRIDE; private: + void VisitNewInstance(HNewInstance* new_instance); + void VisitLoadClass(HLoadClass* load_class); + void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); + + void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + void BoundTypeForIfInstanceOf(HBasicBlock* block); + void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HPhi* phi); - bool UpdateNullability(HPhi* phi); + void AddToWorklist(HInstruction* instr); + void AddDependentInstructionsToWorklist(HInstruction* instr); + + bool UpdateNullability(HInstruction* instr); + bool UpdateReferenceTypeInfo(HInstruction* instr); + + ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + const DexFile& dex_file_; + const DexCompilationUnit& dex_compilation_unit_; + StackHandleScopeCollection* handles_; - GrowableArray<HPhi*> worklist_; + GrowableArray<HInstruction*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 3809720cb4..54e62a5b2c 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), temp_intervals_(allocator, 4), - spill_slots_(allocator, kDefaultNumberOfSpillSlots), + int_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + long_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + float_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + double_spill_slots_(allocator, kDefaultNumberOfSpillSlots), safepoints_(allocator, 0), processing_core_registers_(false), number_of_registers_(-1), @@ -252,8 +255,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { && (instruction->GetType() != Primitive::kPrimFloat); if (locations->CanCall()) { - if (!instruction->IsSuspendCheck()) { - codegen_->MarkNotLeaf(); + if (codegen_->IsLeafMethod()) { + // TODO: We do this here because we do not want the suspend check to artificially + // create live registers. We should find another place, but this is currently the + // simplest. + DCHECK(instruction->IsSuspendCheckEntry()); + instruction->GetBlock()->RemoveInstruction(instruction); + return; } safepoints_.Add(instruction); if (locations->OnlyCallsOnSlowPath()) { @@ -433,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { } } - return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_, + return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, allocator_, processing_core_registers_, log_fatal_on_failure); } @@ -1128,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { } size_t end = last_sibling->GetEnd(); + GrowableArray<size_t>* spill_slots = nullptr; + switch (interval->GetType()) { + case Primitive::kPrimDouble: + spill_slots = &double_spill_slots_; + break; + case Primitive::kPrimLong: + spill_slots = &long_spill_slots_; + break; + case Primitive::kPrimFloat: + spill_slots = &float_spill_slots_; + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + spill_slots = &int_spill_slots_; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); + } + // Find an available spill slot. size_t slot = 0; - for (size_t e = spill_slots_.Size(); slot < e; ++slot) { - // We check if it is less rather than less or equal because the parallel move - // resolver does not work when a single spill slot needs to be exchanged with - // a double spill slot. The strict comparison avoids needing to exchange these - // locations at the same lifetime position. - if (spill_slots_.Get(slot) < parent->GetStart() - && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) { + for (size_t e = spill_slots->Size(); slot < e; ++slot) { + if (spill_slots->Get(slot) <= parent->GetStart() + && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) { break; } } if (parent->NeedsTwoSpillSlots()) { - if (slot == spill_slots_.Size()) { + if (slot == spill_slots->Size()) { // We need a new spill slot. - spill_slots_.Add(end); - spill_slots_.Add(end); - } else if (slot == spill_slots_.Size() - 1) { - spill_slots_.Put(slot, end); - spill_slots_.Add(end); + spill_slots->Add(end); + spill_slots->Add(end); + } else if (slot == spill_slots->Size() - 1) { + spill_slots->Put(slot, end); + spill_slots->Add(end); } else { - spill_slots_.Put(slot, end); - spill_slots_.Put(slot + 1, end); + spill_slots->Put(slot, end); + spill_slots->Put(slot + 1, end); } } else { - if (slot == spill_slots_.Size()) { + if (slot == spill_slots->Size()) { // We need a new spill slot. - spill_slots_.Add(end); + spill_slots->Add(end); } else { - spill_slots_.Put(slot, end); + spill_slots->Put(slot, end); } } - parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize); + // Note that the exact spill slot location will be computed when we resolve, + // that is when we know the number of spill slots for each type. + parent->SetSpillSlot(slot); } static bool IsValidDestination(Location destination) { @@ -1511,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, } void RegisterAllocator::Resolve() { - codegen_->InitializeCodeGeneration(spill_slots_.Size(), + codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(), maximum_number_of_live_core_registers_, maximum_number_of_live_fp_registers_, reserved_out_slots_, @@ -1537,6 +1566,39 @@ void RegisterAllocator::Resolve() { } else if (current->HasSpillSlot()) { current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); } + } else if (current->HasSpillSlot()) { + // Adjust the stack slot, now that we know the number of them for each type. + // The way this implementation lays out the stack is the following: + // [parameter slots ] + // [double spill slots ] + // [long spill slots ] + // [float spill slots ] + // [int/ref values ] + // [maximum out values ] (number of arguments for calls) + // [art method ]. + uint32_t slot = current->GetSpillSlot(); + switch (current->GetType()) { + case Primitive::kPrimDouble: + slot += long_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimLong: + slot += float_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimFloat: + slot += int_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + slot += reserved_out_slots_; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << current->GetType(); + } + current->SetSpillSlot(slot * kVRegSize); } Location source = current->ToLocation(); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index b8f70bdc18..ff2f106b74 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -75,7 +75,10 @@ class RegisterAllocator { } size_t GetNumberOfSpillSlots() const { - return spill_slots_.Size(); + return int_spill_slots_.Size() + + long_spill_slots_.Size() + + float_spill_slots_.Size() + + double_spill_slots_.Size(); } private: @@ -171,8 +174,14 @@ class RegisterAllocator { // where an instruction requires a temporary. GrowableArray<LiveInterval*> temp_intervals_; - // The spill slots allocated for live intervals. - GrowableArray<size_t> spill_slots_; + // The spill slots allocated for live intervals. We ensure spill slots + // are typed to avoid (1) doing moves and swaps between two different kinds + // of registers, and (2) swapping between a single stack slot and a double + // stack slot. This simplifies the parallel move resolver. + GrowableArray<size_t> int_spill_slots_; + GrowableArray<size_t> long_spill_slots_; + GrowableArray<size_t> float_spill_slots_; + GrowableArray<size_t> double_spill_slots_; // Instructions that need a safepoint. GrowableArray<HInstruction*> safepoints_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 0cc00c0fde..e5d06a9f8b 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -25,7 +26,6 @@ #include "register_allocator.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c9a21aa681..3dc75059b2 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -42,20 +42,33 @@ void SsaBuilder::BuildSsa() { } } - // 3) Remove dead phis. This will remove phis that are only used by environments: + // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but // our code generator will complain if the inputs of a phi do not have the same - // type (modulo the special case of `null`). - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.Run(); + // type. The marking allows the type propagation to know which phis it needs + // to handle. We mark but do not eliminate: the elimination will be done in + // step 5). + { + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.MarkDeadPhis(); + } // 4) Propagate types of phis. At this point, phis are typed void in the general - // case, or float or double when we created a floating-point equivalent. So we + // case, or float/double/reference when we created an equivalent phi. So we // need to propagate the types across phis to give them a correct type. PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Clear locals. + // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run + // the algorithm and this time elimimates them. + // TODO: Make this work with debug info and reference liveness. We currently + // eagerly remove phis used in environments. + { + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.Run(); + } + + // 6) Clear locals. // TODO: Move this to a dead code eliminator phase. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); @@ -185,15 +198,24 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { /** * Because of Dex format, we might end up having the same phi being - * used for non floating point operations and floating point operations. Because - * we want the graph to be correctly typed (and thereafter avoid moves between + * used for non floating point operations and floating point / reference operations. + * Because we want the graph to be correctly typed (and thereafter avoid moves between * floating point registers and core registers), we need to create a copy of the - * phi with a floating point type. + * phi with a floating point / reference type. */ -static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) { - // We place the floating point phi next to this phi. +static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); - if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) { + if (next != nullptr + && next->AsPhi()->GetRegNumber() == phi->GetRegNumber() + && next->GetType() != type) { + // Move to the next phi to see if it is the one we are looking for. + next = next->GetNext(); + } + + if (next == nullptr + || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) + || (next->GetType() != type)) { ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { @@ -223,7 +245,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } else if (value->IsIntConstant()) { return GetFloatEquivalent(value->AsIntConstant()); } else if (value->IsPhi()) { - return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type); + return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type); } else { // For other instructions, we assume the verifier has checked that the dex format is correctly // typed and the value in a dex register will not be used for both floating point and @@ -234,12 +256,25 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } } +HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { + if (value->IsIntConstant()) { + DCHECK_EQ(value->AsIntConstant()->GetValue(), 0); + return value->GetBlock()->GetGraph()->GetNullConstant(); + } else { + DCHECK(value->IsPhi()); + return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot); + } +} + void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); - if (load->GetType() != value->GetType() - && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) { - // If the operation requests a specific type, we make sure its input is of that type. - value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + // If the operation requests a specific type, we make sure its input is of that type. + if (load->GetType() != value->GetType()) { + if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { + value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + } else if (load->GetType() == Primitive::kPrimNot) { + value = GetReferenceTypeEquivalent(value); + } } load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 2eec87b618..148e9590c3 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,6 +58,8 @@ class SsaBuilder : public HGraphVisitor { HInstruction* instruction, Primitive::Type type); + static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + private: // Locals for the current block being visited. HEnvironment* current_locals_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1b06315fce..bebb73ba22 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - HGraphVisitor* location_builder = codegen_->GetLocationBuilder(); for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(location_builder); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(codegen_->GetLocationBuilder()); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -312,7 +311,12 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) { return live_in->UnionIfNotIn(live_out, kill); } +static int RegisterOrLowRegister(Location location) { + return location.IsPair() ? location.low() : location.reg(); +} + int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { + DCHECK(!IsHighInterval()); if (GetParent() == this && defined_by_ != nullptr) { // This is the first interval for the instruction. Try to find // a register based on its definition. @@ -334,8 +338,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { if (user->IsPhi()) { // If the phi has a register, try to use the same. Location phi_location = user->GetLiveInterval()->ToLocation(); - if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) { - return phi_location.reg(); + if (phi_location.IsRegisterKind()) { + DCHECK(SameRegisterKind(phi_location)); + int reg = RegisterOrLowRegister(phi_location); + if (free_until[reg] >= use_position) { + return reg; + } } const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors(); // If the instruction dies at the phi assignment, we can try having the @@ -348,8 +356,11 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { HInstruction* input = user->InputAt(i); Location location = input->GetLiveInterval()->GetLocationAt( predecessors.Get(i)->GetLifetimeEnd() - 1); - if (location.IsRegister() && free_until[location.reg()] >= use_position) { - return location.reg(); + if (location.IsRegisterKind()) { + int reg = RegisterOrLowRegister(location); + if (free_until[reg] >= use_position) { + return reg; + } } } } @@ -360,8 +371,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { // We use the user's lifetime position - 1 (and not `use_position`) because the // register is blocked at the beginning of the user. size_t position = user->GetLifetimePosition() - 1; - if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) { - return expected.reg(); + if (expected.IsRegisterKind()) { + DCHECK(SameRegisterKind(expected)); + int reg = RegisterOrLowRegister(expected); + if (free_until[reg] >= position) { + return reg; + } } } } @@ -383,8 +398,9 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the end of the predecessor, we know its register can // be reused. Location input_location = input_interval.ToLocation(); - if (SameRegisterKind(input_location)) { - return input_location.reg(); + if (input_location.IsRegisterKind()) { + DCHECK(SameRegisterKind(input_location)); + return RegisterOrLowRegister(input_location); } } } @@ -399,8 +415,9 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the start of this instruction, we know its register can // be reused. Location location = input_interval.ToLocation(); - if (SameRegisterKind(location)) { - return location.reg(); + if (location.IsRegisterKind()) { + DCHECK(SameRegisterKind(location)); + return RegisterOrLowRegister(location); } } } @@ -409,9 +426,19 @@ int LiveInterval::FindHintAtDefinition() const { } bool LiveInterval::SameRegisterKind(Location other) const { - return IsFloatingPoint() - ? other.IsFpuRegister() - : other.IsRegister(); + if (IsFloatingPoint()) { + if (IsLowInterval() || IsHighInterval()) { + return other.IsFpuRegisterPair(); + } else { + return other.IsFpuRegister(); + } + } else { + if (IsLowInterval() || IsHighInterval()) { + return other.IsRegisterPair(); + } else { + return other.IsRegister(); + } + } } bool LiveInterval::NeedsTwoSpillSlots() const { diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index fd30c1bc76..2f2e2d1fab 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -19,6 +19,11 @@ namespace art { void SsaDeadPhiElimination::Run() { + MarkDeadPhis(); + EliminateDeadPhis(); +} + +void SsaDeadPhiElimination::MarkDeadPhis() { // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -49,7 +54,9 @@ void SsaDeadPhiElimination::Run() { } } } +} +void SsaDeadPhiElimination::EliminateDeadPhis() { // Remove phis that are not live. Visit in post order so that phis // that are not inputs of loop phis can be removed when they have // no users left (dead phis might use dead phis). @@ -57,31 +64,33 @@ void SsaDeadPhiElimination::Run() { HBasicBlock* block = it.Current(); HInstruction* current = block->GetFirstPhi(); HInstruction* next = nullptr; + HPhi* phi; while (current != nullptr) { + phi = current->AsPhi(); next = current->GetNext(); - if (current->AsPhi()->IsDead()) { - if (current->HasUses()) { - for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done(); + if (phi->IsDead()) { + // Make sure the phi is only used by other dead phis. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* user_node = use_it.Current(); - HInstruction* user = user_node->GetUser(); + HInstruction* user = use_it.Current()->GetUser(); DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); DCHECK(user->AsPhi()->IsDead()) << user->GetId(); - // Just put itself as an input. The phi will be removed in this loop anyway. - user->SetRawInputAt(user_node->GetIndex(), user); - current->RemoveUser(user, user_node->GetIndex()); } } - if (current->HasEnvironmentUses()) { - for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done(); - use_it.Advance()) { - HUseListNode<HEnvironment*>* user_node = use_it.Current(); - HEnvironment* user = user_node->GetUser(); - user->SetRawEnvAt(user_node->GetIndex(), nullptr); - current->RemoveEnvironmentUser(user_node); - } + // Remove the phi from use lists of its inputs. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + phi->RemoveAsUserOfInput(i); + } + // Remove the phi from environments that use it. + for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done(); + use_it.Advance()) { + HUseListNode<HEnvironment*>* user_node = use_it.Current(); + HEnvironment* user = user_node->GetUser(); + user->SetRawEnvAt(user_node->GetIndex(), nullptr); } - block->RemovePhi(current->AsPhi()); + // Delete it from the instruction list. + block->RemovePhi(phi, /*ensure_safety=*/ false); } current = next; } diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index b7899712d6..88a5279e14 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -34,6 +34,9 @@ class SsaDeadPhiElimination : public HOptimization { void Run() OVERRIDE; + void MarkDeadPhis(); + void EliminateDeadPhis(); + private: GrowableArray<HPhi*> worklist_; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 7e90b37fe6..7fc1ec6dd1 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "dex_file.h" @@ -22,7 +23,6 @@ #include "optimizing_unit_test.h" #include "pretty_printer.h" #include "ssa_builder.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 3974e53e6f..5283d5dcca 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -166,18 +166,23 @@ class StackMapStream : public ValueObject { stack_map.SetStackMask(*entry.sp_mask); } - // Set the register map. - MemoryRegion register_region = dex_register_maps_region.Subregion( - next_dex_register_map_offset, - DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); - next_dex_register_map_offset += register_region.size(); - DexRegisterMap dex_register_map(register_region); - stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); - - for (size_t j = 0; j < entry.num_dex_registers; ++j) { - DexRegisterEntry register_entry = - dex_register_maps_.Get(j + entry.dex_register_maps_start_index); - dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + if (entry.num_dex_registers != 0) { + // Set the register map. + MemoryRegion register_region = dex_register_maps_region.Subregion( + next_dex_register_map_offset, + DexRegisterMap::kFixedSize + + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); + next_dex_register_map_offset += register_region.size(); + DexRegisterMap dex_register_map(register_region); + stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); + + for (size_t j = 0; j < entry.num_dex_registers; ++j) { + DexRegisterEntry register_entry = + dex_register_maps_.Get(j + entry.dex_register_maps_start_index); + dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + } + } else { + stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap); } // Set the inlining info. @@ -196,7 +201,7 @@ class StackMapStream : public ValueObject { inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); } } else { - stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo); + stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo); } } } diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 5ee6ae049c..744fb45fff 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -61,6 +61,7 @@ TEST(StackMapTest, Test1) { MemoryRegion stack_mask = stack_map.GetStackMask(); ASSERT_TRUE(SameBits(stack_mask, sp_mask)); + ASSERT_TRUE(stack_map.HasDexRegisterMap()); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); @@ -107,6 +108,7 @@ TEST(StackMapTest, Test2) { MemoryRegion stack_mask = stack_map.GetStackMask(); ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + ASSERT_TRUE(stack_map.HasDexRegisterMap()); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc deleted file mode 100644 index a80ad938a6..0000000000 --- a/compiler/utils/arena_allocator.cc +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <algorithm> -#include <iomanip> -#include <numeric> - -#include "arena_allocator.h" -#include "base/logging.h" -#include "base/mutex.h" -#include "thread-inl.h" -#include <memcheck/memcheck.h> - -namespace art { - -// Memmap is a bit slower than malloc according to my measurements. -static constexpr bool kUseMemMap = false; -static constexpr bool kUseMemSet = true && kUseMemMap; -static constexpr size_t kValgrindRedZoneBytes = 8; -constexpr size_t Arena::kDefaultSize; - -template <bool kCount> -const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { - "Misc ", - "BasicBlock ", - "BBList " - "BBPreds ", - "DfsPreOrd ", - "DfsPostOrd ", - "DomPostOrd ", - "TopoOrd ", - "Lowering ", - "LIR ", - "LIR masks ", - "SwitchTbl ", - "FillArray ", - "SlowPaths ", - "MIR ", - "DataFlow ", - "GrowList ", - "GrowBitMap ", - "SSA2Dalvik ", - "Dalvik2SSA ", - "DebugInfo ", - "Successor ", - "RegAlloc ", - "Data ", - "Preds ", - "STL ", -}; - -template <bool kCount> -ArenaAllocatorStatsImpl<kCount>::ArenaAllocatorStatsImpl() - : num_allocations_(0u) { - std::fill_n(alloc_stats_, arraysize(alloc_stats_), 0u); -} - -template <bool kCount> -void ArenaAllocatorStatsImpl<kCount>::Copy(const ArenaAllocatorStatsImpl& other) { - num_allocations_ = other.num_allocations_; - std::copy(other.alloc_stats_, other.alloc_stats_ + arraysize(alloc_stats_), alloc_stats_); -} - -template <bool kCount> -void ArenaAllocatorStatsImpl<kCount>::RecordAlloc(size_t bytes, ArenaAllocKind kind) { - alloc_stats_[kind] += bytes; - ++num_allocations_; -} - -template <bool kCount> -size_t ArenaAllocatorStatsImpl<kCount>::NumAllocations() const { - return num_allocations_; -} - -template <bool kCount> -size_t ArenaAllocatorStatsImpl<kCount>::BytesAllocated() const { - const size_t init = 0u; // Initial value of the correct type. - return std::accumulate(alloc_stats_, alloc_stats_ + arraysize(alloc_stats_), init); -} - -template <bool kCount> -void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first, - ssize_t lost_bytes_adjustment) const { - size_t malloc_bytes = 0u; - size_t lost_bytes = 0u; - size_t num_arenas = 0u; - for (const Arena* arena = first; arena != nullptr; arena = arena->next_) { - malloc_bytes += arena->Size(); - lost_bytes += arena->RemainingSpace(); - ++num_arenas; - } - // The lost_bytes_adjustment is used to make up for the fact that the current arena - // may not have the bytes_allocated_ updated correctly. - lost_bytes += lost_bytes_adjustment; - const size_t bytes_allocated = BytesAllocated(); - os << " MEM: used: " << bytes_allocated << ", allocated: " << malloc_bytes - << ", lost: " << lost_bytes << "\n"; - size_t num_allocations = NumAllocations(); - if (num_allocations != 0) { - os << "Number of arenas allocated: " << num_arenas << ", Number of allocations: " - << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n"; - } - os << "===== Allocation by kind\n"; - static_assert(arraysize(kAllocNames) == kNumArenaAllocKinds, "arraysize of kAllocNames"); - for (int i = 0; i < kNumArenaAllocKinds; i++) { - os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n"; - } -} - -// Explicitly instantiate the used implementation. -template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>; - -Arena::Arena(size_t size) - : bytes_allocated_(0), - map_(nullptr), - next_(nullptr) { - if (kUseMemMap) { - std::string error_msg; - map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, false, - &error_msg); - CHECK(map_ != nullptr) << error_msg; - memory_ = map_->Begin(); - size_ = map_->Size(); - } else { - memory_ = reinterpret_cast<uint8_t*>(calloc(1, size)); - size_ = size; - } -} - -Arena::~Arena() { - if (kUseMemMap) { - delete map_; - } else { - free(reinterpret_cast<void*>(memory_)); - } -} - -void Arena::Reset() { - if (bytes_allocated_) { - if (kUseMemSet || !kUseMemMap) { - memset(Begin(), 0, bytes_allocated_); - } else { - map_->MadviseDontNeedAndZero(); - } - bytes_allocated_ = 0; - } -} - -ArenaPool::ArenaPool() - : lock_("Arena pool lock"), - free_arenas_(nullptr) { -} - -ArenaPool::~ArenaPool() { - while (free_arenas_ != nullptr) { - auto* arena = free_arenas_; - free_arenas_ = free_arenas_->next_; - delete arena; - } -} - -Arena* ArenaPool::AllocArena(size_t size) { - Thread* self = Thread::Current(); - Arena* ret = nullptr; - { - MutexLock lock(self, lock_); - if (free_arenas_ != nullptr && LIKELY(free_arenas_->Size() >= size)) { - ret = free_arenas_; - free_arenas_ = free_arenas_->next_; - } - } - if (ret == nullptr) { - ret = new Arena(size); - } - ret->Reset(); - return ret; -} - -size_t ArenaPool::GetBytesAllocated() const { - size_t total = 0; - MutexLock lock(Thread::Current(), lock_); - for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) { - total += arena->GetBytesAllocated(); - } - return total; -} - -void ArenaPool::FreeArenaChain(Arena* first) { - if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) { - for (Arena* arena = first; arena != nullptr; arena = arena->next_) { - VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_); - } - } - if (first != nullptr) { - Arena* last = first; - while (last->next_ != nullptr) { - last = last->next_; - } - Thread* self = Thread::Current(); - MutexLock lock(self, lock_); - last->next_ = free_arenas_; - free_arenas_ = first; - } -} - -size_t ArenaAllocator::BytesAllocated() const { - return ArenaAllocatorStats::BytesAllocated(); -} - -ArenaAllocator::ArenaAllocator(ArenaPool* pool) - : pool_(pool), - begin_(nullptr), - end_(nullptr), - ptr_(nullptr), - arena_head_(nullptr), - running_on_valgrind_(RUNNING_ON_VALGRIND > 0) { -} - -void ArenaAllocator::UpdateBytesAllocated() { - if (arena_head_ != nullptr) { - // Update how many bytes we have allocated into the arena so that the arena pool knows how - // much memory to zero out. - arena_head_->bytes_allocated_ = ptr_ - begin_; - } -} - -void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) { - size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8); - if (UNLIKELY(ptr_ + rounded_bytes > end_)) { - // Obtain a new block. - ObtainNewArenaForAllocation(rounded_bytes); - if (UNLIKELY(ptr_ == nullptr)) { - return nullptr; - } - } - ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind); - uint8_t* ret = ptr_; - ptr_ += rounded_bytes; - // Check that the memory is already zeroed out. - for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) { - CHECK_EQ(*ptr, 0U); - } - VALGRIND_MAKE_MEM_NOACCESS(ret + bytes, rounded_bytes - bytes); - return ret; -} - -ArenaAllocator::~ArenaAllocator() { - // Reclaim all the arenas by giving them back to the thread pool. - UpdateBytesAllocated(); - pool_->FreeArenaChain(arena_head_); -} - -void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) { - UpdateBytesAllocated(); - Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, allocation_size)); - new_arena->next_ = arena_head_; - arena_head_ = new_arena; - // Update our internal data structures. - ptr_ = begin_ = new_arena->Begin(); - end_ = new_arena->End(); -} - -MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena, - ssize_t lost_bytes_adjustment) - : name_(name), - stats_(stats), - first_arena_(first_arena), - lost_bytes_adjustment_(lost_bytes_adjustment) { -} - -void MemStats::Dump(std::ostream& os) const { - os << name_ << " stats:\n"; - stats_->Dump(os, first_arena_, lost_bytes_adjustment_); -} - -// Dump memory usage stats. -MemStats ArenaAllocator::GetMemStats() const { - ssize_t lost_bytes_adjustment = - (arena_head_ == nullptr) ? 0 : (end_ - ptr_) - arena_head_->RemainingSpace(); - return MemStats("ArenaAllocator", this, arena_head_, lost_bytes_adjustment); -} - -} // namespace art diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h deleted file mode 100644 index 7f5bc9ac4c..0000000000 --- a/compiler/utils/arena_allocator.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_ -#define ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_ - -#include <stdint.h> -#include <stddef.h> - -#include "base/macros.h" -#include "base/mutex.h" -#include "mem_map.h" -#include "utils.h" -#include "utils/debug_stack.h" - -namespace art { - -class Arena; -class ArenaPool; -class ArenaAllocator; -class ArenaStack; -class ScopedArenaAllocator; -class MemStats; - -template <typename T> -class ArenaAllocatorAdapter; - -static constexpr bool kArenaAllocatorCountAllocations = false; - -// Type of allocation for memory tuning. -enum ArenaAllocKind { - kArenaAllocMisc, - kArenaAllocBB, - kArenaAllocBBList, - kArenaAllocBBPredecessors, - kArenaAllocDfsPreOrder, - kArenaAllocDfsPostOrder, - kArenaAllocDomPostOrder, - kArenaAllocTopologicalSortOrder, - kArenaAllocLoweringInfo, - kArenaAllocLIR, - kArenaAllocLIRResourceMask, - kArenaAllocSwitchTable, - kArenaAllocFillArrayData, - kArenaAllocSlowPaths, - kArenaAllocMIR, - kArenaAllocDFInfo, - kArenaAllocGrowableArray, - kArenaAllocGrowableBitMap, - kArenaAllocSSAToDalvikMap, - kArenaAllocDalvikToSSAMap, - kArenaAllocDebugInfo, - kArenaAllocSuccessor, - kArenaAllocRegAlloc, - kArenaAllocData, - kArenaAllocPredecessors, - kArenaAllocSTL, - kNumArenaAllocKinds -}; - -template <bool kCount> -class ArenaAllocatorStatsImpl; - -template <> -class ArenaAllocatorStatsImpl<false> { - public: - ArenaAllocatorStatsImpl() = default; - ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default; - ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete; - - void Copy(const ArenaAllocatorStatsImpl& other) { UNUSED(other); } - void RecordAlloc(size_t bytes, ArenaAllocKind kind) { UNUSED(bytes, kind); } - size_t NumAllocations() const { return 0u; } - size_t BytesAllocated() const { return 0u; } - void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const { - UNUSED(os); UNUSED(first); UNUSED(lost_bytes_adjustment); - } -}; - -template <bool kCount> -class ArenaAllocatorStatsImpl { - public: - ArenaAllocatorStatsImpl(); - ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default; - ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete; - - void Copy(const ArenaAllocatorStatsImpl& other); - void RecordAlloc(size_t bytes, ArenaAllocKind kind); - size_t NumAllocations() const; - size_t BytesAllocated() const; - void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const; - - private: - size_t num_allocations_; - // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL. - size_t alloc_stats_[kNumArenaAllocKinds]; // Bytes used by various allocation kinds. - - static const char* const kAllocNames[]; -}; - -typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats; - -class Arena { - public: - static constexpr size_t kDefaultSize = 128 * KB; - explicit Arena(size_t size = kDefaultSize); - ~Arena(); - void Reset(); - uint8_t* Begin() { - return memory_; - } - - uint8_t* End() { - return memory_ + size_; - } - - size_t Size() const { - return size_; - } - - size_t RemainingSpace() const { - return Size() - bytes_allocated_; - } - - size_t GetBytesAllocated() const { - return bytes_allocated_; - } - - private: - size_t bytes_allocated_; - uint8_t* memory_; - size_t size_; - MemMap* map_; - Arena* next_; - friend class ArenaPool; - friend class ArenaAllocator; - friend class ArenaStack; - friend class ScopedArenaAllocator; - template <bool kCount> friend class ArenaAllocatorStatsImpl; - DISALLOW_COPY_AND_ASSIGN(Arena); -}; - -class ArenaPool { - public: - ArenaPool(); - ~ArenaPool(); - Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_); - void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_); - size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_); - - private: - mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - Arena* free_arenas_ GUARDED_BY(lock_); - DISALLOW_COPY_AND_ASSIGN(ArenaPool); -}; - -class ArenaAllocator : private DebugStackRefCounter, private ArenaAllocatorStats { - public: - explicit ArenaAllocator(ArenaPool* pool); - ~ArenaAllocator(); - - // Get adapter for use in STL containers. See arena_containers.h . - ArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL); - - // Returns zeroed memory. - void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { - if (UNLIKELY(running_on_valgrind_)) { - return AllocValgrind(bytes, kind); - } - bytes = RoundUp(bytes, 8); - if (UNLIKELY(ptr_ + bytes > end_)) { - // Obtain a new block. - ObtainNewArenaForAllocation(bytes); - if (UNLIKELY(ptr_ == nullptr)) { - return nullptr; - } - } - ArenaAllocatorStats::RecordAlloc(bytes, kind); - uint8_t* ret = ptr_; - ptr_ += bytes; - return ret; - } - - template <typename T> T* AllocArray(size_t length) { - return static_cast<T*>(Alloc(length * sizeof(T), kArenaAllocMisc)); - } - - void* AllocValgrind(size_t bytes, ArenaAllocKind kind); - void ObtainNewArenaForAllocation(size_t allocation_size); - size_t BytesAllocated() const; - MemStats GetMemStats() const; - - private: - void UpdateBytesAllocated(); - - ArenaPool* pool_; - uint8_t* begin_; - uint8_t* end_; - uint8_t* ptr_; - Arena* arena_head_; - bool running_on_valgrind_; - - template <typename U> - friend class ArenaAllocatorAdapter; - - DISALLOW_COPY_AND_ASSIGN(ArenaAllocator); -}; // ArenaAllocator - -class MemStats { - public: - MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena, - ssize_t lost_bytes_adjustment = 0); - void Dump(std::ostream& os) const; - - private: - const char* const name_; - const ArenaAllocatorStats* const stats_; - const Arena* const first_arena_; - const ssize_t lost_bytes_adjustment_; -}; // MemStats - -} // namespace art - -#endif // ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_ diff --git a/compiler/utils/arena_allocator_test.cc b/compiler/utils/arena_allocator_test.cc index 71565407a2..706552739f 100644 --- a/compiler/utils/arena_allocator_test.cc +++ b/compiler/utils/arena_allocator_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "gtest/gtest.h" -#include "utils/arena_allocator.h" #include "utils/arena_bit_vector.h" namespace art { diff --git a/compiler/utils/arena_bit_vector.cc b/compiler/utils/arena_bit_vector.cc index f17e5a92a4..ddc0c818c2 100644 --- a/compiler/utils/arena_bit_vector.cc +++ b/compiler/utils/arena_bit_vector.cc @@ -14,9 +14,10 @@ * limitations under the License. */ -#include "arena_allocator.h" #include "arena_bit_vector.h" +#include "base/arena_allocator.h" + namespace art { template <typename ArenaAlloc> diff --git a/compiler/utils/arena_bit_vector.h b/compiler/utils/arena_bit_vector.h index 34f1ca9129..f2a74527da 100644 --- a/compiler/utils/arena_bit_vector.h +++ b/compiler/utils/arena_bit_vector.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_ #define ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_ -#include "arena_object.h" +#include "base/arena_object.h" #include "base/bit_vector.h" namespace art { @@ -35,14 +35,10 @@ enum OatBitMapKind { kBitMapDominators, kBitMapIDominated, kBitMapDomFrontier, - kBitMapPhi, - kBitMapTmpBlocks, - kBitMapInputBlocks, kBitMapRegisterV, kBitMapTempSSARegisterV, kBitMapNullCheck, kBitMapClInitCheck, - kBitMapTmpBlockV, kBitMapPredecessors, kNumBitMapKinds }; diff --git a/compiler/utils/arena_containers.h b/compiler/utils/arena_containers.h deleted file mode 100644 index 825259157a..0000000000 --- a/compiler/utils/arena_containers.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ -#define ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ - -#include <deque> -#include <queue> -#include <set> -#include <vector> - -#include "utils/arena_allocator.h" -#include "safe_map.h" - -namespace art { - -// Adapter for use of ArenaAllocator in STL containers. -// Use ArenaAllocator::Adapter() to create an adapter to pass to container constructors. -// For example, -// struct Foo { -// explicit Foo(ArenaAllocator* allocator) -// : foo_vector(allocator->Adapter(kArenaAllocMisc)), -// foo_map(std::less<int>(), allocator->Adapter()) { -// } -// ArenaVector<int> foo_vector; -// ArenaSafeMap<int, int> foo_map; -// }; -template <typename T> -class ArenaAllocatorAdapter; - -template <typename T> -using ArenaDeque = std::deque<T, ArenaAllocatorAdapter<T>>; - -template <typename T> -using ArenaQueue = std::queue<T, ArenaDeque<T>>; - -template <typename T> -using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>; - -template <typename T, typename Comparator = std::less<T>> -using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>; - -template <typename K, typename V, typename Comparator = std::less<K>> -using ArenaSafeMap = - SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>; - -// Implementation details below. - -template <bool kCount> -class ArenaAllocatorAdapterKindImpl; - -template <> -class ArenaAllocatorAdapterKindImpl<false> { - public: - // Not tracking allocations, ignore the supplied kind and arbitrarily provide kArenaAllocSTL. - explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) { UNUSED(kind); } - ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default; - ArenaAllocKind Kind() { return kArenaAllocSTL; } -}; - -template <bool kCount> -class ArenaAllocatorAdapterKindImpl { - public: - explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { } - ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default; - ArenaAllocKind Kind() { return kind_; } - - private: - ArenaAllocKind kind_; -}; - -typedef ArenaAllocatorAdapterKindImpl<kArenaAllocatorCountAllocations> ArenaAllocatorAdapterKind; - -template <> -class ArenaAllocatorAdapter<void> - : private DebugStackReference, private ArenaAllocatorAdapterKind { - public: - typedef void value_type; - typedef void* pointer; - typedef const void* const_pointer; - - template <typename U> - struct rebind { - typedef ArenaAllocatorAdapter<U> other; - }; - - explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, - ArenaAllocKind kind = kArenaAllocSTL) - : DebugStackReference(arena_allocator), - ArenaAllocatorAdapterKind(kind), - arena_allocator_(arena_allocator) { - } - template <typename U> - ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - ArenaAllocatorAdapterKind(other), - arena_allocator_(other.arena_allocator_) { - } - ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default; - ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default; - ~ArenaAllocatorAdapter() = default; - - private: - ArenaAllocator* arena_allocator_; - - template <typename U> - friend class ArenaAllocatorAdapter; -}; - -template <typename T> -class ArenaAllocatorAdapter : private DebugStackReference, private ArenaAllocatorAdapterKind { - public: - typedef T value_type; - typedef T* pointer; - typedef T& reference; - typedef const T* const_pointer; - typedef const T& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - - template <typename U> - struct rebind { - typedef ArenaAllocatorAdapter<U> other; - }; - - explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, ArenaAllocKind kind) - : DebugStackReference(arena_allocator), - ArenaAllocatorAdapterKind(kind), - arena_allocator_(arena_allocator) { - } - template <typename U> - ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - ArenaAllocatorAdapterKind(other), - arena_allocator_(other.arena_allocator_) { - } - ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default; - ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default; - ~ArenaAllocatorAdapter() = default; - - size_type max_size() const { - return static_cast<size_type>(-1) / sizeof(T); - } - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pointer allocate(size_type n, ArenaAllocatorAdapter<void>::pointer hint = nullptr) { - UNUSED(hint); - DCHECK_LE(n, max_size()); - return reinterpret_cast<T*>(arena_allocator_->Alloc(n * sizeof(T), - ArenaAllocatorAdapterKind::Kind())); - } - void deallocate(pointer p, size_type n) { - UNUSED(p, n); - } - - void construct(pointer p, const_reference val) { - new (static_cast<void*>(p)) value_type(val); - } - void destroy(pointer p) { - p->~value_type(); - } - - private: - ArenaAllocator* arena_allocator_; - - template <typename U> - friend class ArenaAllocatorAdapter; - - template <typename U> - friend bool operator==(const ArenaAllocatorAdapter<U>& lhs, - const ArenaAllocatorAdapter<U>& rhs); -}; - -template <typename T> -inline bool operator==(const ArenaAllocatorAdapter<T>& lhs, - const ArenaAllocatorAdapter<T>& rhs) { - return lhs.arena_allocator_ == rhs.arena_allocator_; -} - -template <typename T> -inline bool operator!=(const ArenaAllocatorAdapter<T>& lhs, - const ArenaAllocatorAdapter<T>& rhs) { - return !(lhs == rhs); -} - -inline ArenaAllocatorAdapter<void> ArenaAllocator::Adapter(ArenaAllocKind kind) { - return ArenaAllocatorAdapter<void>(this, kind); -} - -} // namespace art - -#endif // ART_COMPILER_UTILS_ARENA_CONTAINERS_H_ diff --git a/compiler/utils/arena_object.h b/compiler/utils/arena_object.h deleted file mode 100644 index d64c419954..0000000000 --- a/compiler/utils/arena_object.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_ARENA_OBJECT_H_ -#define ART_COMPILER_UTILS_ARENA_OBJECT_H_ - -#include "arena_allocator.h" -#include "base/logging.h" -#include "scoped_arena_allocator.h" - -namespace art { - -// Parent for arena allocated objects giving appropriate new and delete operators. -template<enum ArenaAllocKind kAllocKind> -class ArenaObject { - public: - // Allocate a new ArenaObject of 'size' bytes in the Arena. - void* operator new(size_t size, ArenaAllocator* allocator) { - return allocator->Alloc(size, kAllocKind); - } - - static void* operator new(size_t size, ScopedArenaAllocator* arena) { - return arena->Alloc(size, kAllocKind); - } - - void operator delete(void*, size_t) { - LOG(FATAL) << "UNREACHABLE"; - UNREACHABLE(); - } -}; - - -// Parent for arena allocated objects that get deleted, gives appropriate new and delete operators. -// Currently this is used by the quick compiler for debug reference counting arena allocations. -template<enum ArenaAllocKind kAllocKind> -class DeletableArenaObject { - public: - // Allocate a new ArenaObject of 'size' bytes in the Arena. - void* operator new(size_t size, ArenaAllocator* allocator) { - return allocator->Alloc(size, kAllocKind); - } - - static void* operator new(size_t size, ScopedArenaAllocator* arena) { - return arena->Alloc(size, kAllocKind); - } - - void operator delete(void*, size_t) { - // Nop. - } -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_ARENA_OBJECT_H_ diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc index 05287732c5..a52e6eb30f 100644 --- a/compiler/utils/arm/assembler_arm.cc +++ b/compiler/utils/arm/assembler_arm.cc @@ -166,7 +166,7 @@ uint32_t ShifterOperand::encodingThumb() const { } uint32_t Address::encodingArm() const { - CHECK(IsAbsoluteUint(12, offset_)); + CHECK(IsAbsoluteUint<12>(offset_)); uint32_t encoding; if (is_immed_offset_) { if (offset_ < 0) { @@ -245,6 +245,7 @@ uint32_t Address::encodingThumb(bool is_32bit) const { // This is very like the ARM encoding except the offset is 10 bits. uint32_t Address::encodingThumbLdrdStrd() const { + DCHECK(IsImmediate()); uint32_t encoding; uint32_t am = am_; // If P is 0 then W must be 1 (Different from ARM). @@ -277,11 +278,12 @@ uint32_t Address::encoding3() const { // Encoding for vfp load/store addressing. uint32_t Address::vencoding() const { + CHECK(IsAbsoluteUint<10>(offset_)); // In the range -1020 to +1020. + CHECK_ALIGNED(offset_, 2); // Multiple of 4. + const uint32_t offset_mask = (1 << 12) - 1; uint32_t encoding = encodingArm(); uint32_t offset = encoding & offset_mask; - CHECK(IsAbsoluteUint(10, offset)); // In the range -1020 to +1020. - CHECK_ALIGNED(offset, 2); // Multiple of 4. CHECK((am_ == Offset) || (am_ == NegOffset)); uint32_t vencoding_value = (encoding & (0xf << kRnShift)) | (offset >> 2); if (am_ == Offset) { @@ -297,13 +299,13 @@ bool Address::CanHoldLoadOffsetArm(LoadOperandType type, int offset) { case kLoadSignedHalfword: case kLoadUnsignedHalfword: case kLoadWordPair: - return IsAbsoluteUint(8, offset); // Addressing mode 3. + return IsAbsoluteUint<8>(offset); // Addressing mode 3. case kLoadUnsignedByte: case kLoadWord: - return IsAbsoluteUint(12, offset); // Addressing mode 2. + return IsAbsoluteUint<12>(offset); // Addressing mode 2. case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint(10, offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset); // VFP addressing mode. default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -315,13 +317,13 @@ bool Address::CanHoldStoreOffsetArm(StoreOperandType type, int offset) { switch (type) { case kStoreHalfword: case kStoreWordPair: - return IsAbsoluteUint(8, offset); // Addressing mode 3. + return IsAbsoluteUint<8>(offset); // Addressing mode 3. case kStoreByte: case kStoreWord: - return IsAbsoluteUint(12, offset); // Addressing mode 2. + return IsAbsoluteUint<12>(offset); // Addressing mode 2. case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint(10, offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset); // VFP addressing mode. default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -335,12 +337,12 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) { case kLoadUnsignedHalfword: case kLoadUnsignedByte: case kLoadWord: - return IsAbsoluteUint(12, offset); + return IsAbsoluteUint<12>(offset); case kLoadSWord: case kLoadDWord: - return IsAbsoluteUint(10, offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset); // VFP addressing mode. case kLoadWordPair: - return IsAbsoluteUint(10, offset); + return IsAbsoluteUint<10>(offset); default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -353,12 +355,12 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) { case kStoreHalfword: case kStoreByte: case kStoreWord: - return IsAbsoluteUint(12, offset); + return IsAbsoluteUint<12>(offset); case kStoreSWord: case kStoreDWord: - return IsAbsoluteUint(10, offset); // VFP addressing mode. + return IsAbsoluteUint<10>(offset); // VFP addressing mode. case kStoreWordPair: - return IsAbsoluteUint(10, offset); + return IsAbsoluteUint<10>(offset); default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index d9122764d0..8730f52eca 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -536,8 +536,44 @@ class ArmAssembler : public Assembler { virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0; void LoadSImmediate(SRegister sd, float value, Condition cond = AL) { if (!vmovs(sd, value, cond)) { - LoadImmediate(IP, bit_cast<int32_t, float>(value), cond); - vmovsr(sd, IP, cond); + int32_t int_value = bit_cast<int32_t, float>(value); + if (int_value == bit_cast<int32_t, float>(0.0f)) { + // 0.0 is quite common, so we special case it by loading + // 2.0 in `sd` and then substracting it. + bool success = vmovs(sd, 2.0, cond); + CHECK(success); + vsubs(sd, sd, sd, cond); + } else { + LoadImmediate(IP, int_value, cond); + vmovsr(sd, IP, cond); + } + } + } + + void LoadDImmediate(DRegister sd, double value, Condition cond = AL) { + if (!vmovd(sd, value, cond)) { + uint64_t int_value = bit_cast<uint64_t, double>(value); + if (int_value == bit_cast<uint64_t, double>(0.0)) { + // 0.0 is quite common, so we special case it by loading + // 2.0 in `sd` and then substracting it. + bool success = vmovd(sd, 2.0, cond); + CHECK(success); + vsubd(sd, sd, sd, cond); + } else { + if (sd < 16) { + SRegister low = static_cast<SRegister>(sd << 1); + SRegister high = static_cast<SRegister>(low + 1); + LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond); + if (High32Bits(int_value) == Low32Bits(int_value)) { + vmovs(high, low); + } else { + LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond); + } + } else { + LOG(FATAL) << "Unimplemented loading of double into a D register " + << "that cannot be split into two S registers"; + } + } } } diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 8d1fb60725..95796916b4 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1254,7 +1254,7 @@ void Arm32Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR void Arm32Assembler::svc(uint32_t imm24) { - CHECK(IsUint(24, imm24)) << imm24; + CHECK(IsUint<24>(imm24)) << imm24; int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24; Emit(encoding); } diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 5383c28f82..6d0571e263 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -2080,7 +2080,7 @@ void Thumb2Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR. void Thumb2Assembler::svc(uint32_t imm8) { - CHECK(IsUint(8, imm8)) << imm8; + CHECK(IsUint<8>(imm8)) << imm8; int16_t encoding = B15 | B14 | B12 | B11 | B10 | B9 | B8 | imm8; @@ -2089,7 +2089,7 @@ void Thumb2Assembler::svc(uint32_t imm8) { void Thumb2Assembler::bkpt(uint16_t imm8) { - CHECK(IsUint(8, imm8)) << imm8; + CHECK(IsUint<8>(imm8)) << imm8; int16_t encoding = B15 | B13 | B12 | B11 | B10 | B9 | imm8; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index e571e72402..ebea9d4262 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -198,6 +198,18 @@ TEST_F(AssemblerThumb2Test, strexd) { DriverStr(expected, "strexd"); } +TEST_F(AssemblerThumb2Test, LdrdStrd) { + GetAssembler()->ldrd(arm::R0, arm::Address(arm::R2, 8)); + GetAssembler()->ldrd(arm::R0, arm::Address(arm::R12)); + GetAssembler()->strd(arm::R0, arm::Address(arm::R2, 8)); + + const char* expected = + "ldrd r0, r1, [r2, #8]\n" + "ldrd r0, r1, [r12]\n" + "strd r0, r1, [r2, #8]\n"; + DriverStr(expected, "ldrdstrd"); +} + TEST_F(AssemblerThumb2Test, eor) { #define __ GetAssembler()-> __ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0)); diff --git a/compiler/utils/debug_stack.h b/compiler/utils/debug_stack.h deleted file mode 100644 index 1bb0624187..0000000000 --- a/compiler/utils/debug_stack.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_DEBUG_STACK_H_ -#define ART_COMPILER_UTILS_DEBUG_STACK_H_ - -#include "base/logging.h" -#include "base/macros.h" -#include "globals.h" - -namespace art { - -// Helper classes for reference counting to enforce construction/destruction order and -// usage of the top element of a stack in debug mode with no overhead in release mode. - -// Reference counter. No references allowed in destructor or in explicitly called CheckNoRefs(). -template <bool kIsDebug> -class DebugStackRefCounterImpl; -// Reference. Allows an explicit check that it's the top reference. -template <bool kIsDebug> -class DebugStackReferenceImpl; -// Indirect top reference. Checks that the reference is the top reference when used. -template <bool kIsDebug> -class DebugStackIndirectTopRefImpl; - -typedef DebugStackRefCounterImpl<kIsDebugBuild> DebugStackRefCounter; -typedef DebugStackReferenceImpl<kIsDebugBuild> DebugStackReference; -typedef DebugStackIndirectTopRefImpl<kIsDebugBuild> DebugStackIndirectTopRef; - -// Non-debug mode specializations. This should be optimized away. - -template <> -class DebugStackRefCounterImpl<false> { - public: - size_t IncrementRefCount() { return 0u; } - void DecrementRefCount() { } - size_t GetRefCount() const { return 0u; } - void CheckNoRefs() const { } -}; - -template <> -class DebugStackReferenceImpl<false> { - public: - explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter) { UNUSED(counter); } - DebugStackReferenceImpl(const DebugStackReferenceImpl& other) = default; - DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) = default; - void CheckTop() { } -}; - -template <> -class DebugStackIndirectTopRefImpl<false> { - public: - explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref) { UNUSED(ref); } - DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) = default; - DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) = default; - void CheckTop() { } -}; - -// Debug mode versions. - -template <bool kIsDebug> -class DebugStackRefCounterImpl { - public: - DebugStackRefCounterImpl() : ref_count_(0u) { } - ~DebugStackRefCounterImpl() { CheckNoRefs(); } - size_t IncrementRefCount() { return ++ref_count_; } - void DecrementRefCount() { --ref_count_; } - size_t GetRefCount() const { return ref_count_; } - void CheckNoRefs() const { CHECK_EQ(ref_count_, 0u); } - - private: - size_t ref_count_; -}; - -template <bool kIsDebug> -class DebugStackReferenceImpl { - public: - explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<kIsDebug>* counter) - : counter_(counter), ref_count_(counter->IncrementRefCount()) { - } - DebugStackReferenceImpl(const DebugStackReferenceImpl& other) - : counter_(other.counter_), ref_count_(counter_->IncrementRefCount()) { - } - DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) { - CHECK(counter_ == other.counter_); - return *this; - } - ~DebugStackReferenceImpl() { counter_->DecrementRefCount(); } - void CheckTop() { CHECK_EQ(counter_->GetRefCount(), ref_count_); } - - private: - DebugStackRefCounterImpl<true>* counter_; - size_t ref_count_; -}; - -template <bool kIsDebug> -class DebugStackIndirectTopRefImpl { - public: - explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<kIsDebug>* ref) - : ref_(ref) { - CheckTop(); - } - DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) - : ref_(other.ref_) { - CheckTop(); - } - DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) { - CHECK(ref_ == other.ref_); - CheckTop(); - return *this; - } - ~DebugStackIndirectTopRefImpl() { - CheckTop(); - } - void CheckTop() { - ref_->CheckTop(); - } - - private: - DebugStackReferenceImpl<kIsDebug>* ref_; -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_DEBUG_STACK_H_ diff --git a/compiler/utils/dex_instruction_utils.h b/compiler/utils/dex_instruction_utils.h index 2c6e525e1d..bb2c592f13 100644 --- a/compiler/utils/dex_instruction_utils.h +++ b/compiler/utils/dex_instruction_utils.h @@ -110,6 +110,10 @@ constexpr bool IsInstructionAGetOrAPut(Instruction::Code code) { return Instruction::AGET <= code && code <= Instruction::APUT_SHORT; } +constexpr bool IsInstructionBinOp2Addr(Instruction::Code code) { + return Instruction::ADD_INT_2ADDR <= code && code <= Instruction::REM_DOUBLE_2ADDR; +} + // TODO: Remove the #if guards below when we fully migrate to C++14. constexpr bool IsInvokeInstructionRange(Instruction::Code opcode) { diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc index 83e5f5ad39..a7e09c6517 100644 --- a/compiler/utils/dwarf_cfi.cc +++ b/compiler/utils/dwarf_cfi.cc @@ -37,7 +37,7 @@ void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) { } else { // Four byte delta. buf->push_back(0x04); - PushWord(buf, increment); + Push32(buf, increment); } } @@ -68,35 +68,35 @@ void DW_CFA_restore_state(std::vector<uint8_t>* buf) { void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) { // 'length' (filled in by other functions). if (is_64bit) { - PushWord(buf, 0xffffffff); // Indicates 64bit - PushWord(buf, 0); - PushWord(buf, 0); + Push32(buf, 0xffffffff); // Indicates 64bit + Push32(buf, 0); + Push32(buf, 0); } else { - PushWord(buf, 0); + Push32(buf, 0); } // 'CIE_pointer' (filled in by linker). if (is_64bit) { - PushWord(buf, 0); - PushWord(buf, 0); + Push32(buf, 0); + Push32(buf, 0); } else { - PushWord(buf, 0); + Push32(buf, 0); } // 'initial_location' (filled in by linker). if (is_64bit) { - PushWord(buf, 0); - PushWord(buf, 0); + Push32(buf, 0); + Push32(buf, 0); } else { - PushWord(buf, 0); + Push32(buf, 0); } // 'address_range' (filled in by other functions). if (is_64bit) { - PushWord(buf, 0); - PushWord(buf, 0); + Push32(buf, 0); + Push32(buf, 0); } else { - PushWord(buf, 0); + Push32(buf, 0); } // Augmentation length: 0 diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h index 6af4853e09..821e28b4a0 100644 --- a/compiler/utils/growable_array.h +++ b/compiler/utils/growable_array.h @@ -20,7 +20,7 @@ #include <stdint.h> #include <stddef.h> -#include "arena_object.h" +#include "base/arena_object.h" namespace art { @@ -33,16 +33,14 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { : arena_(arena), num_allocated_(init_length), num_used_(0) { - elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length, - kArenaAllocGrowableArray)); + elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); } GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data) : arena_(arena), num_allocated_(init_length), num_used_(init_length) { - elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length, - kArenaAllocGrowableArray)); + elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); for (size_t i = 0; i < init_length; ++i) { elem_list_[i] = initial_data; } @@ -58,8 +56,7 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { if (new_length > target_length) { target_length = new_length; } - T* new_array = static_cast<T*>(arena_->Alloc(sizeof(T) * target_length, - kArenaAllocGrowableArray)); + T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray); memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); num_allocated_ = target_length; elem_list_ = new_array; diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc deleted file mode 100644 index d9e0619de6..0000000000 --- a/compiler/utils/scoped_arena_allocator.cc +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "scoped_arena_allocator.h" - -#include "utils/arena_allocator.h" -#include <memcheck/memcheck.h> - -namespace art { - -static constexpr size_t kValgrindRedZoneBytes = 8; - -ArenaStack::ArenaStack(ArenaPool* arena_pool) - : DebugStackRefCounter(), - stats_and_pool_(arena_pool), - bottom_arena_(nullptr), - top_arena_(nullptr), - top_ptr_(nullptr), - top_end_(nullptr), - running_on_valgrind_(RUNNING_ON_VALGRIND > 0) { -} - -ArenaStack::~ArenaStack() { - DebugStackRefCounter::CheckNoRefs(); - stats_and_pool_.pool->FreeArenaChain(bottom_arena_); -} - -void ArenaStack::Reset() { - DebugStackRefCounter::CheckNoRefs(); - stats_and_pool_.pool->FreeArenaChain(bottom_arena_); - bottom_arena_ = nullptr; - top_arena_ = nullptr; - top_ptr_ = nullptr; - top_end_ = nullptr; -} - -MemStats ArenaStack::GetPeakStats() const { - DebugStackRefCounter::CheckNoRefs(); - return MemStats("ArenaStack peak", static_cast<const TaggedStats<Peak>*>(&stats_and_pool_), - bottom_arena_); -} - -uint8_t* ArenaStack::AllocateFromNextArena(size_t rounded_bytes) { - UpdateBytesAllocated(); - size_t allocation_size = std::max(Arena::kDefaultSize, rounded_bytes); - if (UNLIKELY(top_arena_ == nullptr)) { - top_arena_ = bottom_arena_ = stats_and_pool_.pool->AllocArena(allocation_size); - top_arena_->next_ = nullptr; - } else if (top_arena_->next_ != nullptr && top_arena_->next_->Size() >= allocation_size) { - top_arena_ = top_arena_->next_; - } else { - Arena* tail = top_arena_->next_; - top_arena_->next_ = stats_and_pool_.pool->AllocArena(allocation_size); - top_arena_ = top_arena_->next_; - top_arena_->next_ = tail; - } - top_end_ = top_arena_->End(); - // top_ptr_ shall be updated by ScopedArenaAllocator. - return top_arena_->Begin(); -} - -void ArenaStack::UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats) { - if (PeakStats()->BytesAllocated() < CurrentStats()->BytesAllocated()) { - PeakStats()->Copy(*CurrentStats()); - } - CurrentStats()->Copy(restore_stats); -} - -void ArenaStack::UpdateBytesAllocated() { - if (top_arena_ != nullptr) { - // Update how many bytes we have allocated into the arena so that the arena pool knows how - // much memory to zero out. Though ScopedArenaAllocator doesn't guarantee the memory is - // zero-initialized, the Arena may be reused by ArenaAllocator which does guarantee this. - size_t allocated = static_cast<size_t>(top_ptr_ - top_arena_->Begin()); - if (top_arena_->bytes_allocated_ < allocated) { - top_arena_->bytes_allocated_ = allocated; - } - } -} - -void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) { - size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8); - uint8_t* ptr = top_ptr_; - if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { - ptr = AllocateFromNextArena(rounded_bytes); - CHECK(ptr != nullptr) << "Failed to allocate memory"; - } - CurrentStats()->RecordAlloc(bytes, kind); - top_ptr_ = ptr + rounded_bytes; - VALGRIND_MAKE_MEM_UNDEFINED(ptr, bytes); - VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes); - return ptr; -} - -ScopedArenaAllocator::ScopedArenaAllocator(ArenaStack* arena_stack) - : DebugStackReference(arena_stack), - DebugStackRefCounter(), - ArenaAllocatorStats(*arena_stack->CurrentStats()), - arena_stack_(arena_stack), - mark_arena_(arena_stack->top_arena_), - mark_ptr_(arena_stack->top_ptr_), - mark_end_(arena_stack->top_end_) { -} - -ScopedArenaAllocator::~ScopedArenaAllocator() { - DoReset(); -} - -void ScopedArenaAllocator::Reset() { - DoReset(); - // If this allocator was Create()d, we need to move the arena_stack_->top_ptr_ past *this. - if (mark_ptr_ == reinterpret_cast<uint8_t*>(this)) { - arena_stack_->top_ptr_ = mark_ptr_ + RoundUp(sizeof(ScopedArenaAllocator), 8); - } -} - -void ScopedArenaAllocator::DoReset() { - DebugStackReference::CheckTop(); - DebugStackRefCounter::CheckNoRefs(); - arena_stack_->UpdatePeakStatsAndRestore(*this); - arena_stack_->UpdateBytesAllocated(); - if (LIKELY(mark_arena_ != nullptr)) { - arena_stack_->top_arena_ = mark_arena_; - arena_stack_->top_ptr_ = mark_ptr_; - arena_stack_->top_end_ = mark_end_; - } else if (arena_stack_->bottom_arena_ != nullptr) { - mark_arena_ = arena_stack_->top_arena_ = arena_stack_->bottom_arena_; - mark_ptr_ = arena_stack_->top_ptr_ = mark_arena_->Begin(); - mark_end_ = arena_stack_->top_end_ = mark_arena_->End(); - } -} - -} // namespace art diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h deleted file mode 100644 index 523f158969..0000000000 --- a/compiler/utils/scoped_arena_allocator.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ -#define ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ - -#include "base/logging.h" -#include "base/macros.h" -#include "utils/arena_allocator.h" -#include "utils/debug_stack.h" -#include "globals.h" - -namespace art { - -class ArenaStack; -class ScopedArenaAllocator; - -template <typename T> -class ScopedArenaAllocatorAdapter; - -// Holds a list of Arenas for use by ScopedArenaAllocator stack. -class ArenaStack : private DebugStackRefCounter { - public: - explicit ArenaStack(ArenaPool* arena_pool); - ~ArenaStack(); - - void Reset(); - - size_t PeakBytesAllocated() { - return PeakStats()->BytesAllocated(); - } - - MemStats GetPeakStats() const; - - private: - struct Peak; - struct Current; - template <typename Tag> struct TaggedStats : ArenaAllocatorStats { }; - struct StatsAndPool : TaggedStats<Peak>, TaggedStats<Current> { - explicit StatsAndPool(ArenaPool* arena_pool) : pool(arena_pool) { } - ArenaPool* const pool; - }; - - ArenaAllocatorStats* PeakStats() { - return static_cast<TaggedStats<Peak>*>(&stats_and_pool_); - } - - ArenaAllocatorStats* CurrentStats() { - return static_cast<TaggedStats<Current>*>(&stats_and_pool_); - } - - // Private - access via ScopedArenaAllocator or ScopedArenaAllocatorAdapter. - void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { - if (UNLIKELY(running_on_valgrind_)) { - return AllocValgrind(bytes, kind); - } - size_t rounded_bytes = RoundUp(bytes, 8); - uint8_t* ptr = top_ptr_; - if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) { - ptr = AllocateFromNextArena(rounded_bytes); - } - CurrentStats()->RecordAlloc(bytes, kind); - top_ptr_ = ptr + rounded_bytes; - return ptr; - } - - uint8_t* AllocateFromNextArena(size_t rounded_bytes); - void UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats); - void UpdateBytesAllocated(); - void* AllocValgrind(size_t bytes, ArenaAllocKind kind); - - StatsAndPool stats_and_pool_; - Arena* bottom_arena_; - Arena* top_arena_; - uint8_t* top_ptr_; - uint8_t* top_end_; - - const bool running_on_valgrind_; - - friend class ScopedArenaAllocator; - template <typename T> - friend class ScopedArenaAllocatorAdapter; - - DISALLOW_COPY_AND_ASSIGN(ArenaStack); -}; - -class ScopedArenaAllocator - : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats { - public: - // Create a ScopedArenaAllocator directly on the ArenaStack when the scope of - // the allocator is not exactly a C++ block scope. For example, an optimization - // pass can create the scoped allocator in Start() and destroy it in End(). - static ScopedArenaAllocator* Create(ArenaStack* arena_stack) { - void* addr = arena_stack->Alloc(sizeof(ScopedArenaAllocator), kArenaAllocMisc); - ScopedArenaAllocator* allocator = new(addr) ScopedArenaAllocator(arena_stack); - allocator->mark_ptr_ = reinterpret_cast<uint8_t*>(addr); - return allocator; - } - - explicit ScopedArenaAllocator(ArenaStack* arena_stack); - ~ScopedArenaAllocator(); - - void Reset(); - - void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE { - DebugStackReference::CheckTop(); - return arena_stack_->Alloc(bytes, kind); - } - - // Get adapter for use in STL containers. See scoped_arena_containers.h . - ScopedArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL); - - // Allow a delete-expression to destroy but not deallocate allocators created by Create(). - static void operator delete(void* ptr) { UNUSED(ptr); } - - private: - ArenaStack* const arena_stack_; - Arena* mark_arena_; - uint8_t* mark_ptr_; - uint8_t* mark_end_; - - void DoReset(); - - template <typename T> - friend class ScopedArenaAllocatorAdapter; - - DISALLOW_COPY_AND_ASSIGN(ScopedArenaAllocator); -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_ diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h deleted file mode 100644 index df93b273d1..0000000000 --- a/compiler/utils/scoped_arena_containers.h +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_ -#define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_ - -#include <deque> -#include <queue> -#include <set> -#include <vector> - -#include "utils/arena_containers.h" // For ArenaAllocatorAdapterKind. -#include "utils/scoped_arena_allocator.h" -#include "safe_map.h" - -namespace art { - -// Adapter for use of ScopedArenaAllocator in STL containers. -// Use ScopedArenaAllocator::Adapter() to create an adapter to pass to container constructors. -// For example, -// void foo(ScopedArenaAllocator* allocator) { -// ScopedArenaVector<int> foo_vector(allocator->Adapter(kArenaAllocMisc)); -// ScopedArenaSafeMap<int, int> foo_map(std::less<int>(), allocator->Adapter()); -// // Use foo_vector and foo_map... -// } -template <typename T> -class ScopedArenaAllocatorAdapter; - -template <typename T> -using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>; - -template <typename T> -using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>; - -template <typename T> -using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>; - -template <typename T, typename Comparator = std::less<T>> -using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>; - -template <typename K, typename V, typename Comparator = std::less<K>> -using ScopedArenaSafeMap = - SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>; - -// Implementation details below. - -template <> -class ScopedArenaAllocatorAdapter<void> - : private DebugStackReference, private DebugStackIndirectTopRef, - private ArenaAllocatorAdapterKind { - public: - typedef void value_type; - typedef void* pointer; - typedef const void* const_pointer; - - template <typename U> - struct rebind { - typedef ScopedArenaAllocatorAdapter<U> other; - }; - - explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator, - ArenaAllocKind kind = kArenaAllocSTL) - : DebugStackReference(arena_allocator), - DebugStackIndirectTopRef(arena_allocator), - ArenaAllocatorAdapterKind(kind), - arena_stack_(arena_allocator->arena_stack_) { - } - template <typename U> - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - DebugStackIndirectTopRef(other), - ArenaAllocatorAdapterKind(other), - arena_stack_(other.arena_stack_) { - } - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; - ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; - ~ScopedArenaAllocatorAdapter() = default; - - private: - ArenaStack* arena_stack_; - - template <typename U> - friend class ScopedArenaAllocatorAdapter; -}; - -template <typename T> -class ScopedArenaAllocatorAdapter - : private DebugStackReference, private DebugStackIndirectTopRef, - private ArenaAllocatorAdapterKind { - public: - typedef T value_type; - typedef T* pointer; - typedef T& reference; - typedef const T* const_pointer; - typedef const T& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - - template <typename U> - struct rebind { - typedef ScopedArenaAllocatorAdapter<U> other; - }; - - explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator, - ArenaAllocKind kind = kArenaAllocSTL) - : DebugStackReference(arena_allocator), - DebugStackIndirectTopRef(arena_allocator), - ArenaAllocatorAdapterKind(kind), - arena_stack_(arena_allocator->arena_stack_) { - } - template <typename U> - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other) - : DebugStackReference(other), - DebugStackIndirectTopRef(other), - ArenaAllocatorAdapterKind(other), - arena_stack_(other.arena_stack_) { - } - ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default; - ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default; - ~ScopedArenaAllocatorAdapter() = default; - - size_type max_size() const { - return static_cast<size_type>(-1) / sizeof(T); - } - - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } - - pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) { - UNUSED(hint); - DCHECK_LE(n, max_size()); - DebugStackIndirectTopRef::CheckTop(); - return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T), - ArenaAllocatorAdapterKind::Kind())); - } - void deallocate(pointer p, size_type n) { - UNUSED(p); - UNUSED(n); - DebugStackIndirectTopRef::CheckTop(); - } - - void construct(pointer p, const_reference val) { - // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. - new (static_cast<void*>(p)) value_type(val); - } - void destroy(pointer p) { - // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top. - p->~value_type(); - } - - private: - ArenaStack* arena_stack_; - - template <typename U> - friend class ScopedArenaAllocatorAdapter; - - template <typename U> - friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs, - const ScopedArenaAllocatorAdapter<U>& rhs); -}; - -template <typename T> -inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs, - const ScopedArenaAllocatorAdapter<T>& rhs) { - return lhs.arena_stack_ == rhs.arena_stack_; -} - -template <typename T> -inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs, - const ScopedArenaAllocatorAdapter<T>& rhs) { - return !(lhs == rhs); -} - -inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter(ArenaAllocKind kind) { - return ScopedArenaAllocatorAdapter<void>(this, kind); -} - -} // namespace art - -#endif // ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_ diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index 2d0d77af78..1f8f5da6cd 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -23,12 +23,12 @@ #include <stdint.h> #include <stddef.h> +#include "base/debug_stack.h" #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" #include "mem_map.h" #include "utils.h" -#include "utils/debug_stack.h" namespace art { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 03744e4149..8f4208b417 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1290,7 +1290,7 @@ void X86Assembler::j(Condition condition, Label* label) { static const int kLongSize = 6; int offset = label->Position() - buffer_.Size(); CHECK_LE(offset, 0); - if (IsInt(8, offset - kShortSize)) { + if (IsInt<8>(offset - kShortSize)) { EmitUint8(0x70 + condition); EmitUint8((offset - kShortSize) & 0xFF); } else { @@ -1325,7 +1325,7 @@ void X86Assembler::jmp(Label* label) { static const int kLongSize = 5; int offset = label->Position() - buffer_.Size(); CHECK_LE(offset, 0); - if (IsInt(8, offset - kShortSize)) { + if (IsInt<8>(offset - kShortSize)) { EmitUint8(0xEB); EmitUint8((offset - kShortSize) & 0xFF); } else { diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 3a44ace649..2dde90744e 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -35,10 +35,10 @@ class Immediate : public ValueObject { int32_t value() const { return value_; } - bool is_int8() const { return IsInt(8, value_); } - bool is_uint8() const { return IsUint(8, value_); } - bool is_int16() const { return IsInt(16, value_); } - bool is_uint16() const { return IsUint(16, value_); } + bool is_int8() const { return IsInt<8>(value_); } + bool is_uint8() const { return IsUint<8>(value_); } + bool is_int16() const { return IsInt<16>(value_); } + bool is_uint16() const { return IsUint<16>(value_); } private: const int32_t value_; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 7e8e769249..f2704b72a4 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -328,6 +328,14 @@ void X86_64Assembler::leaq(CpuRegister dst, const Address& src) { } +void X86_64Assembler::leal(CpuRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x8D); + EmitOperand(dst.LowBits(), src); +} + + void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(dst, src); @@ -1507,7 +1515,7 @@ void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) { // See whether imm can be represented as a sign-extended 8bit value. int32_t v32 = static_cast<int32_t>(imm.value()); - if (IsInt32(8, v32)) { + if (IsInt<8>(v32)) { // Sign-extension works. EmitUint8(0x6B); EmitOperand(reg.LowBits(), Operand(reg)); @@ -1547,7 +1555,7 @@ void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) { // See whether imm can be represented as a sign-extended 8bit value. int64_t v64 = imm.value(); - if (IsInt64(8, v64)) { + if (IsInt<8>(v64)) { // Sign-extension works. EmitUint8(0x6B); EmitOperand(reg.LowBits(), Operand(reg)); @@ -1697,7 +1705,7 @@ void X86_64Assembler::notq(CpuRegister reg) { void X86_64Assembler::enter(const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xC8); - CHECK(imm.is_uint16()); + CHECK(imm.is_uint16()) << imm.value(); EmitUint8(imm.value() & 0xFF); EmitUint8((imm.value() >> 8) & 0xFF); EmitUint8(0x00); @@ -1751,7 +1759,7 @@ void X86_64Assembler::j(Condition condition, Label* label) { static const int kLongSize = 6; int offset = label->Position() - buffer_.Size(); CHECK_LE(offset, 0); - if (IsInt(8, offset - kShortSize)) { + if (IsInt<8>(offset - kShortSize)) { EmitUint8(0x70 + condition); EmitUint8((offset - kShortSize) & 0xFF); } else { @@ -1788,7 +1796,7 @@ void X86_64Assembler::jmp(Label* label) { static const int kLongSize = 5; int offset = label->Position() - buffer_.Size(); CHECK_LE(offset, 0); - if (IsInt(8, offset - kShortSize)) { + if (IsInt<8>(offset - kShortSize)) { EmitUint8(0xEB); EmitUint8((offset - kShortSize) & 0xFF); } else { diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 2fc251b07a..5dfcf4541b 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -42,15 +42,11 @@ class Immediate : public ValueObject { int64_t value() const { return value_; } - bool is_int8() const { return IsInt(8, value_); } - bool is_uint8() const { return IsUint(8, value_); } - bool is_int16() const { return IsInt(16, value_); } - bool is_uint16() const { return IsUint(16, value_); } - bool is_int32() const { - // This does not work on 32b machines: return IsInt(32, value_); - int64_t limit = static_cast<int64_t>(1) << 31; - return (-limit <= value_) && (value_ < limit); - } + bool is_int8() const { return IsInt<8>(value_); } + bool is_uint8() const { return IsUint<8>(value_); } + bool is_int16() const { return IsInt<16>(value_); } + bool is_uint16() const { return IsUint<16>(value_); } + bool is_int32() const { return IsInt<32>(value_); } private: const int64_t value_; @@ -296,6 +292,7 @@ class X86_64Assembler FINAL : public Assembler { void movw(const Address& dst, const Immediate& imm); void leaq(CpuRegister dst, const Address& src); + void leal(CpuRegister dst, const Address& src); void movaps(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 6df4144004..00f508b23f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -44,10 +44,10 @@ static constexpr size_t kRandomIterations = 100000; // Hosts are pretty powerfu TEST(AssemblerX86_64, SignExtension) { // 32bit. for (int32_t i = 0; i < 128; i++) { - EXPECT_TRUE(IsInt32(8, i)) << i; + EXPECT_TRUE(IsInt<8>(i)) << i; } for (int32_t i = 128; i < 255; i++) { - EXPECT_FALSE(IsInt32(8, i)) << i; + EXPECT_FALSE(IsInt<8>(i)) << i; } // Do some higher ones randomly. std::random_device rd; @@ -55,54 +55,65 @@ TEST(AssemblerX86_64, SignExtension) { std::uniform_int_distribution<int32_t> uniform_dist(256, INT32_MAX); for (size_t i = 0; i < kRandomIterations; i++) { int32_t value = uniform_dist(e1); - EXPECT_FALSE(IsInt32(8, value)) << value; + EXPECT_FALSE(IsInt<8>(value)) << value; } // Negative ones. for (int32_t i = -1; i >= -128; i--) { - EXPECT_TRUE(IsInt32(8, i)) << i; + EXPECT_TRUE(IsInt<8>(i)) << i; } for (int32_t i = -129; i > -256; i--) { - EXPECT_FALSE(IsInt32(8, i)) << i; + EXPECT_FALSE(IsInt<8>(i)) << i; } // Do some lower ones randomly. std::uniform_int_distribution<int32_t> uniform_dist2(INT32_MIN, -256); for (size_t i = 0; i < 100; i++) { int32_t value = uniform_dist2(e1); - EXPECT_FALSE(IsInt32(8, value)) << value; + EXPECT_FALSE(IsInt<8>(value)) << value; } // 64bit. for (int64_t i = 0; i < 128; i++) { - EXPECT_TRUE(IsInt64(8, i)) << i; + EXPECT_TRUE(IsInt<8>(i)) << i; } for (int32_t i = 128; i < 255; i++) { - EXPECT_FALSE(IsInt64(8, i)) << i; + EXPECT_FALSE(IsInt<8>(i)) << i; } // Do some higher ones randomly. std::uniform_int_distribution<int64_t> uniform_dist3(256, INT64_MAX); for (size_t i = 0; i < 100; i++) { int64_t value = uniform_dist3(e1); - EXPECT_FALSE(IsInt64(8, value)) << value; + EXPECT_FALSE(IsInt<8>(value)) << value; } // Negative ones. for (int64_t i = -1; i >= -128; i--) { - EXPECT_TRUE(IsInt64(8, i)) << i; + EXPECT_TRUE(IsInt<8>(i)) << i; } for (int64_t i = -129; i > -256; i--) { - EXPECT_FALSE(IsInt64(8, i)) << i; + EXPECT_FALSE(IsInt<8>(i)) << i; } // Do some lower ones randomly. std::uniform_int_distribution<int64_t> uniform_dist4(INT64_MIN, -256); for (size_t i = 0; i < kRandomIterations; i++) { int64_t value = uniform_dist4(e1); - EXPECT_FALSE(IsInt64(8, value)) << value; + EXPECT_FALSE(IsInt<8>(value)) << value; } + + int64_t value = INT64_C(0x1200000010); + x86_64::Immediate imm(value); + EXPECT_FALSE(imm.is_int8()); + EXPECT_FALSE(imm.is_int16()); + EXPECT_FALSE(imm.is_int32()); + value = INT64_C(0x8000000000000001); + x86_64::Immediate imm2(value); + EXPECT_FALSE(imm2.is_int8()); + EXPECT_FALSE(imm2.is_int16()); + EXPECT_FALSE(imm2.is_int32()); } struct X86_64CpuRegisterCompare { |