summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk4
-rw-r--r--compiler/dex/bb_optimizations.h35
-rw-r--r--compiler/dex/compiler_ir.h4
-rw-r--r--compiler/dex/dex_flags.h1
-rw-r--r--compiler/dex/dex_to_dex_compiler.cc4
-rw-r--r--compiler/dex/global_value_numbering.cc10
-rw-r--r--compiler/dex/global_value_numbering.h54
-rw-r--r--compiler/dex/global_value_numbering_test.cc89
-rw-r--r--compiler/dex/gvn_dead_code_elimination.cc1391
-rw-r--r--compiler/dex/gvn_dead_code_elimination.h166
-rw-r--r--compiler/dex/gvn_dead_code_elimination_test.cc1800
-rw-r--r--compiler/dex/local_value_numbering.cc144
-rw-r--r--compiler/dex/local_value_numbering.h45
-rw-r--r--compiler/dex/local_value_numbering_test.cc124
-rw-r--r--compiler/dex/mir_analysis.cc12
-rw-r--r--compiler/dex/mir_dataflow.cc51
-rw-r--r--compiler/dex/mir_field_info.h2
-rw-r--r--compiler/dex/mir_graph.cc138
-rw-r--r--compiler/dex/mir_graph.h78
-rw-r--r--compiler/dex/mir_optimization.cc89
-rw-r--r--compiler/dex/mir_optimization_test.cc2
-rw-r--r--compiler/dex/pass_driver_me_opts.cc1
-rw-r--r--compiler/dex/pass_driver_me_post_opt.cc2
-rw-r--r--compiler/dex/post_opt_passes.h10
-rw-r--r--compiler/dex/quick/arm/call_arm.cc33
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h6
-rw-r--r--compiler/dex/quick/arm64/call_arm64.cc33
-rw-r--r--compiler/dex/quick/arm64/codegen_arm64.h2
-rw-r--r--compiler/dex/quick/codegen_util.cc119
-rw-r--r--compiler/dex/quick/gen_common.cc14
-rwxr-xr-xcompiler/dex/quick/gen_invoke.cc25
-rw-r--r--compiler/dex/quick/gen_loadstore.cc6
-rw-r--r--compiler/dex/quick/mips/call_mips.cc32
-rw-r--r--compiler/dex/quick/mips/codegen_mips.h4
-rw-r--r--compiler/dex/quick/mips/utility_mips.cc9
-rw-r--r--compiler/dex/quick/mir_to_lir.cc222
-rw-r--r--compiler/dex/quick/mir_to_lir.h81
-rw-r--r--compiler/dex/quick/quick_compiler.cc1
-rw-r--r--compiler/dex/quick/ralloc_util.cc11
-rw-r--r--compiler/dex/quick/resource_mask.cc2
-rw-r--r--compiler/dex/quick/x86/call_x86.cc119
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h5
-rwxr-xr-xcompiler/dex/quick/x86/int_x86.cc43
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc8
-rw-r--r--compiler/dex/ssa_transformation.cc62
-rw-r--r--compiler/dex/vreg_analysis.cc3
-rw-r--r--compiler/driver/compiler_driver.cc10
-rw-r--r--compiler/driver/compiler_driver.h2
-rw-r--r--compiler/elf_writer_quick.cc44
-rw-r--r--compiler/gc_map_builder.h14
-rw-r--r--compiler/image_writer.cc32
-rw-r--r--compiler/oat_writer.cc32
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc540
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc75
-rw-r--r--compiler/optimizing/builder.h2
-rw-r--r--compiler/optimizing/code_generator.cc88
-rw-r--r--compiler/optimizing/code_generator.h80
-rw-r--r--compiler/optimizing/code_generator_arm.cc275
-rw-r--r--compiler/optimizing/code_generator_arm.h26
-rw-r--r--compiler/optimizing/code_generator_arm64.cc151
-rw-r--r--compiler/optimizing/code_generator_arm64.h32
-rw-r--r--compiler/optimizing/code_generator_x86.cc58
-rw-r--r--compiler/optimizing/code_generator_x86.h2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc110
-rw-r--r--compiler/optimizing/code_generator_x86_64.h2
-rw-r--r--compiler/optimizing/dominator_test.cc2
-rw-r--r--compiler/optimizing/find_loops_test.cc2
-rw-r--r--compiler/optimizing/graph_checker.cc52
-rw-r--r--compiler/optimizing/graph_test.cc2
-rw-r--r--compiler/optimizing/graph_visualizer.cc4
-rw-r--r--compiler/optimizing/gvn.cc2
-rw-r--r--compiler/optimizing/gvn_test.cc2
-rw-r--r--compiler/optimizing/inliner.cc6
-rw-r--r--compiler/optimizing/inliner.h4
-rw-r--r--compiler/optimizing/instruction_simplifier.cc46
-rw-r--r--compiler/optimizing/instruction_simplifier.h7
-rw-r--r--compiler/optimizing/intrinsics_arm.cc883
-rw-r--r--compiler/optimizing/intrinsics_arm.h88
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/licm.cc3
-rw-r--r--compiler/optimizing/linearize_test.cc2
-rw-r--r--compiler/optimizing/live_interval_test.cc2
-rw-r--r--compiler/optimizing/live_ranges_test.cc2
-rw-r--r--compiler/optimizing/liveness_test.cc2
-rw-r--r--compiler/optimizing/locations.cc7
-rw-r--r--compiler/optimizing/locations.h20
-rw-r--r--compiler/optimizing/nodes.cc96
-rw-r--r--compiler/optimizing/nodes.h373
-rw-r--r--compiler/optimizing/nodes_test.cc2
-rw-r--r--compiler/optimizing/optimization.cc6
-rw-r--r--compiler/optimizing/optimization.h9
-rw-r--r--compiler/optimizing/optimizing_compiler.cc51
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h4
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc115
-rw-r--r--compiler/optimizing/parallel_move_resolver.h10
-rw-r--r--compiler/optimizing/parallel_move_test.cc90
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc5
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/pretty_printer_test.cc2
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc30
-rw-r--r--compiler/optimizing/reference_type_propagation.cc254
-rw-r--r--compiler/optimizing/reference_type_propagation.h38
-rw-r--r--compiler/optimizing/register_allocator.cc110
-rw-r--r--compiler/optimizing/register_allocator.h15
-rw-r--r--compiler/optimizing/register_allocator_test.cc2
-rw-r--r--compiler/optimizing/ssa_builder.cc69
-rw-r--r--compiler/optimizing/ssa_builder.h2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc59
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc43
-rw-r--r--compiler/optimizing/ssa_phi_elimination.h3
-rw-r--r--compiler/optimizing/ssa_test.cc2
-rw-r--r--compiler/optimizing/stack_map_stream.h31
-rw-r--r--compiler/optimizing/stack_map_test.cc2
-rw-r--r--compiler/utils/arena_allocator.cc296
-rw-r--r--compiler/utils/arena_allocator.h237
-rw-r--r--compiler/utils/arena_allocator_test.cc2
-rw-r--r--compiler/utils/arena_bit_vector.cc3
-rw-r--r--compiler/utils/arena_bit_vector.h6
-rw-r--r--compiler/utils/arena_containers.h207
-rw-r--r--compiler/utils/arena_object.h67
-rw-r--r--compiler/utils/arm/assembler_arm.cc32
-rw-r--r--compiler/utils/arm/assembler_arm.h40
-rw-r--r--compiler/utils/arm/assembler_arm32.cc2
-rw-r--r--compiler/utils/arm/assembler_thumb2.cc4
-rw-r--r--compiler/utils/arm/assembler_thumb2_test.cc12
-rw-r--r--compiler/utils/debug_stack.h138
-rw-r--r--compiler/utils/dex_instruction_utils.h4
-rw-r--r--compiler/utils/dwarf_cfi.cc28
-rw-r--r--compiler/utils/growable_array.h11
-rw-r--r--compiler/utils/scoped_arena_allocator.cc146
-rw-r--r--compiler/utils/scoped_arena_allocator.h145
-rw-r--r--compiler/utils/scoped_arena_containers.h193
-rw-r--r--compiler/utils/swap_space.h2
-rw-r--r--compiler/utils/x86/assembler_x86.cc4
-rw-r--r--compiler/utils/x86/assembler_x86.h8
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc18
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h15
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc35
138 files changed, 7897 insertions, 2993 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index b87201ad8d..beb34dce37 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -21,6 +21,7 @@ include art/build/Android.common_build.mk
LIBART_COMPILER_SRC_FILES := \
compiled_method.cc \
dex/global_value_numbering.cc \
+ dex/gvn_dead_code_elimination.cc \
dex/local_value_numbering.cc \
dex/quick/arm/assemble_arm.cc \
dex/quick/arm/call_arm.cc \
@@ -100,6 +101,7 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/inliner.cc \
optimizing/instruction_simplifier.cc \
optimizing/intrinsics.cc \
+ optimizing/intrinsics_arm.cc \
optimizing/intrinsics_arm64.cc \
optimizing/intrinsics_x86_64.cc \
optimizing/licm.cc \
@@ -117,7 +119,6 @@ LIBART_COMPILER_SRC_FILES := \
optimizing/primitive_type_propagation.cc \
optimizing/reference_type_propagation.cc \
trampolines/trampoline_compiler.cc \
- utils/arena_allocator.cc \
utils/arena_bit_vector.cc \
utils/arm/assembler_arm.cc \
utils/arm/assembler_arm32.cc \
@@ -135,7 +136,6 @@ LIBART_COMPILER_SRC_FILES := \
utils/x86/managed_register_x86.cc \
utils/x86_64/assembler_x86_64.cc \
utils/x86_64/managed_register_x86_64.cc \
- utils/scoped_arena_allocator.cc \
utils/swap_space.cc \
buffered_output_stream.cc \
compiler.cc \
diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h
index 7685200261..93d83c6fd4 100644
--- a/compiler/dex/bb_optimizations.h
+++ b/compiler/dex/bb_optimizations.h
@@ -240,6 +240,41 @@ class GlobalValueNumberingPass : public PassME {
};
/**
+ * @class DeadCodeEliminationPass
+ * @brief Performs the GVN-based dead code elimination pass.
+ */
+class DeadCodeEliminationPass : public PassME {
+ public:
+ DeadCodeEliminationPass() : PassME("DCE", kPreOrderDFSTraversal, "4_post_dce_cfg") {
+ }
+
+ bool Gate(const PassDataHolder* data) const OVERRIDE {
+ DCHECK(data != nullptr);
+ CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit;
+ DCHECK(c_unit != nullptr);
+ return c_unit->mir_graph->EliminateDeadCodeGate();
+ }
+
+ bool Worker(PassDataHolder* data) const {
+ DCHECK(data != nullptr);
+ PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data);
+ CompilationUnit* c_unit = pass_me_data_holder->c_unit;
+ DCHECK(c_unit != nullptr);
+ BasicBlock* bb = pass_me_data_holder->bb;
+ DCHECK(bb != nullptr);
+ return c_unit->mir_graph->EliminateDeadCode(bb);
+ }
+
+ void End(PassDataHolder* data) const OVERRIDE {
+ DCHECK(data != nullptr);
+ CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
+ DCHECK(c_unit != nullptr);
+ c_unit->mir_graph->EliminateDeadCodeEnd();
+ down_cast<PassMEDataHolder*>(data)->dirty = !c_unit->mir_graph->MirSsaRepUpToDate();
+ }
+};
+
+/**
* @class BBCombine
* @brief Perform the basic block combination pass.
*/
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 0c46d4347d..dceea240fa 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -21,11 +21,11 @@
#include <string>
#include <vector>
+#include "base/arena_allocator.h"
+#include "base/scoped_arena_allocator.h"
#include "base/timing_logger.h"
#include "invoke_type.h"
#include "safe_map.h"
-#include "utils/arena_allocator.h"
-#include "utils/scoped_arena_allocator.h"
namespace art {
diff --git a/compiler/dex/dex_flags.h b/compiler/dex/dex_flags.h
index eaf272bb55..e8eb40ccd2 100644
--- a/compiler/dex/dex_flags.h
+++ b/compiler/dex/dex_flags.h
@@ -27,6 +27,7 @@ enum OptControlVector {
kNullCheckElimination,
kClassInitCheckElimination,
kGlobalValueNumbering,
+ kGvnDeadCodeElimination,
kLocalValueNumbering,
kPromoteRegs,
kTrackLiveTemps,
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index f7968c225a..7e916bee4a 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -238,7 +238,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
bool is_volatile;
bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put,
&field_offset, &is_volatile);
- if (fast_path && !is_volatile && IsUint(16, field_offset.Int32Value())) {
+ if (fast_path && !is_volatile && IsUint<16>(field_offset.Int32Value())) {
VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
<< " to " << Instruction::Name(new_opcode)
<< " by replacing field index " << field_idx
@@ -274,7 +274,7 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst,
&target_method, &vtable_idx,
&direct_code, &direct_method);
if (fast_path && original_invoke_type == invoke_type) {
- if (vtable_idx >= 0 && IsUint(16, vtable_idx)) {
+ if (vtable_idx >= 0 && IsUint<16>(vtable_idx)) {
VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
<< "(" << PrettyMethod(method_idx, GetDexFile(), true) << ")"
<< " to " << Instruction::Name(new_opcode)
diff --git a/compiler/dex/global_value_numbering.cc b/compiler/dex/global_value_numbering.cc
index a8fd8122ff..ab3c946897 100644
--- a/compiler/dex/global_value_numbering.cc
+++ b/compiler/dex/global_value_numbering.cc
@@ -28,7 +28,7 @@ GlobalValueNumbering::GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAlloc
allocator_(allocator),
bbs_processed_(0u),
max_bbs_to_process_(kMaxBbsToProcessMultiplyFactor * mir_graph_->GetNumReachableBlocks()),
- last_value_(0u),
+ last_value_(kNullValue),
modifications_allowed_(true),
mode_(mode),
global_value_map_(std::less<uint64_t>(), allocator->Adapter()),
@@ -128,7 +128,11 @@ bool GlobalValueNumbering::FinishBasicBlock(BasicBlock* bb) {
merge_lvns_.clear();
bool change = (lvns_[bb->id] == nullptr) || !lvns_[bb->id]->Equals(*work_lvn_);
- if (change) {
+ if (mode_ == kModeGvn) {
+ // In GVN mode, keep the latest LVN even if Equals() indicates no change. This is
+ // to keep the correct values of fields that do not contribute to Equals() as long
+ // as they depend only on predecessor LVNs' fields that do contribute to Equals().
+ // Currently, that's LVN::merge_map_ used by LVN::GetStartingVregValueNumberImpl().
std::unique_ptr<const LocalValueNumbering> old_lvn(lvns_[bb->id]);
lvns_[bb->id] = work_lvn_.release();
} else {
@@ -178,7 +182,7 @@ bool GlobalValueNumbering::NullCheckedInAllPredecessors(
}
// IF_EQZ/IF_NEZ checks some sreg, see if that sreg contains the value_name.
int s_reg = pred_bb->last_mir_insn->ssa_rep->uses[0];
- if (!pred_lvn->IsSregValue(s_reg, value_name)) {
+ if (pred_lvn->GetSregValue(s_reg) != value_name) {
return false;
}
}
diff --git a/compiler/dex/global_value_numbering.h b/compiler/dex/global_value_numbering.h
index cdafc68070..6fa658c0cc 100644
--- a/compiler/dex/global_value_numbering.h
+++ b/compiler/dex/global_value_numbering.h
@@ -17,12 +17,12 @@
#ifndef ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
#define ART_COMPILER_DEX_GLOBAL_VALUE_NUMBERING_H_
+#include "base/arena_object.h"
#include "base/logging.h"
#include "base/macros.h"
#include "mir_graph.h"
#include "compiler_ir.h"
#include "dex_flags.h"
-#include "utils/arena_object.h"
namespace art {
@@ -31,6 +31,9 @@ class MirFieldInfo;
class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
public:
+ static constexpr uint16_t kNoValue = 0xffffu;
+ static constexpr uint16_t kNullValue = 1u;
+
enum Mode {
kModeGvn,
kModeGvnPostProcessing,
@@ -51,6 +54,14 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
GlobalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator, Mode mode);
~GlobalValueNumbering();
+ CompilationUnit* GetCompilationUnit() const {
+ return cu_;
+ }
+
+ MIRGraph* GetMirGraph() const {
+ return mir_graph_;
+ }
+
// Prepare LVN for the basic block.
LocalValueNumbering* PrepareBasicBlock(BasicBlock* bb,
ScopedArenaAllocator* allocator = nullptr);
@@ -70,9 +81,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
return modifications_allowed_ && Good();
}
- private:
- static constexpr uint16_t kNoValue = 0xffffu;
+ // Retrieve the LVN with GVN results for a given BasicBlock.
+ const LocalValueNumbering* GetLvn(BasicBlockId bb_id) const;
+ private:
// Allocate a new value name.
uint16_t NewValueName();
@@ -88,7 +100,7 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
uint16_t res;
uint64_t key = BuildKey(op, operand1, operand2, modifier);
- ValueMap::iterator lb = global_value_map_.lower_bound(key);
+ auto lb = global_value_map_.lower_bound(key);
if (lb != global_value_map_.end() && lb->first == key) {
res = lb->second;
} else {
@@ -99,10 +111,10 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
}
// Look up a value in the global value map, don't add a new entry if there was none before.
- uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) {
+ uint16_t FindValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const {
uint16_t res;
uint64_t key = BuildKey(op, operand1, operand2, modifier);
- ValueMap::iterator lb = global_value_map_.lower_bound(key);
+ auto lb = global_value_map_.lower_bound(key);
if (lb != global_value_map_.end() && lb->first == key) {
res = lb->second;
} else {
@@ -111,18 +123,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
return res;
}
- // Check if the exact value is stored in the global value map.
- bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier,
- uint16_t value) const {
- DCHECK(value != 0u || !Good());
- DCHECK_LE(value, last_value_);
- // This is equivalent to value == LookupValue(op, operand1, operand2, modifier)
- // except that it doesn't add an entry to the global value map if it's not there.
- uint64_t key = BuildKey(op, operand1, operand2, modifier);
- ValueMap::const_iterator it = global_value_map_.find(key);
- return (it != global_value_map_.end() && it->second == value);
- }
-
// Get an instance field id.
uint16_t GetIFieldId(MIR* mir) {
return GetMirGraph()->GetGvnIFieldId(mir);
@@ -200,14 +200,6 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
bool DivZeroCheckedInAllPredecessors(const ScopedArenaVector<uint16_t>& merge_names) const;
- CompilationUnit* GetCompilationUnit() const {
- return cu_;
- }
-
- MIRGraph* GetMirGraph() const {
- return mir_graph_;
- }
-
ScopedArenaAllocator* Allocator() const {
return allocator_;
}
@@ -255,6 +247,13 @@ class GlobalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
};
std::ostream& operator<<(std::ostream& os, const GlobalValueNumbering::Mode& rhs);
+inline const LocalValueNumbering* GlobalValueNumbering::GetLvn(BasicBlockId bb_id) const {
+ DCHECK_EQ(mode_, kModeGvnPostProcessing);
+ DCHECK_LT(bb_id, lvns_.size());
+ DCHECK(lvns_[bb_id] != nullptr);
+ return lvns_[bb_id];
+}
+
inline void GlobalValueNumbering::StartPostProcessing() {
DCHECK(Good());
DCHECK_EQ(mode_, kModeGvn);
@@ -271,8 +270,7 @@ template <typename Container> // Container of MirIFieldLoweringInfo or MirSFiel
uint16_t* GlobalValueNumbering::PrepareGvnFieldIds(ScopedArenaAllocator* allocator,
const Container& field_infos) {
size_t size = field_infos.size();
- uint16_t* field_ids = reinterpret_cast<uint16_t*>(allocator->Alloc(size * sizeof(uint16_t),
- kArenaAllocMisc));
+ uint16_t* field_ids = allocator->AllocArray<uint16_t>(size, kArenaAllocMisc);
for (size_t i = 0u; i != size; ++i) {
size_t idx = i;
const MirFieldInfo& cur_info = field_infos[i];
diff --git a/compiler/dex/global_value_numbering_test.cc b/compiler/dex/global_value_numbering_test.cc
index f71b7ae359..54e34eaa81 100644
--- a/compiler/dex/global_value_numbering_test.cc
+++ b/compiler/dex/global_value_numbering_test.cc
@@ -134,8 +134,8 @@ class GlobalValueNumberingTest : public testing::Test {
{ bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
#define DEF_PHI2(bb, reg, src1, src2) \
{ bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
-#define DEF_DIV_REM(bb, opcode, result, dividend, divisor) \
- { bb, opcode, 0u, 0u, 2, { dividend, divisor }, 1, { result } }
+#define DEF_BINOP(bb, opcode, result, src1, src2) \
+ { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
void DoPrepareIFields(const IFieldDef* defs, size_t count) {
cu_.mir_graph->ifield_lowering_infos_.clear();
@@ -229,7 +229,7 @@ class GlobalValueNumberingTest : public testing::Test {
void DoPrepareMIRs(const MIRDef* defs, size_t count) {
mir_count_ = count;
- mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+ mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
ssa_reps_.resize(count);
for (size_t i = 0u; i != count; ++i) {
const MIRDef* def = &defs[i];
@@ -251,8 +251,8 @@ class GlobalValueNumberingTest : public testing::Test {
ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
SGetOrSPutMemAccessType(def->opcode));
} else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
- mir->meta.phi_incoming = static_cast<BasicBlockId*>(
- allocator_->Alloc(def->num_uses * sizeof(BasicBlockId), kArenaAllocDFInfo));
+ mir->meta.phi_incoming =
+ allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
ASSERT_EQ(def->num_uses, bb->predecessors.size());
std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
}
@@ -267,7 +267,6 @@ class GlobalValueNumberingTest : public testing::Test {
mir->offset = i; // LVN uses offset only for debug output
mir->optimization_flags = 0u;
}
- mirs_[count - 1u].next = nullptr;
DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
code_item->insns_size_in_code_units_ = 2u * count;
@@ -279,6 +278,20 @@ class GlobalValueNumberingTest : public testing::Test {
DoPrepareMIRs(defs, count);
}
+ void DoPrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t* map, size_t count) {
+ BasicBlock* bb = cu_.mir_graph->GetBasicBlock(bb_id);
+ ASSERT_TRUE(bb != nullptr);
+ ASSERT_TRUE(bb->data_flow_info != nullptr);
+ bb->data_flow_info->vreg_to_ssa_map_exit =
+ cu_.arena.AllocArray<int32_t>(count, kArenaAllocDFInfo);
+ std::copy_n(map, count, bb->data_flow_info->vreg_to_ssa_map_exit);
+ }
+
+ template <size_t count>
+ void PrepareVregToSsaMapExit(BasicBlockId bb_id, const int32_t (&map)[count]) {
+ DoPrepareVregToSsaMapExit(bb_id, map, count);
+ }
+
void PerformGVN() {
DoPerformGVN<LoopRepeatingTopologicalSortIterator>();
}
@@ -294,9 +307,9 @@ class GlobalValueNumberingTest : public testing::Test {
cu_.mir_graph->ComputeDominators();
cu_.mir_graph->ComputeTopologicalSortOrder();
cu_.mir_graph->SSATransformationEnd();
- cu_.mir_graph->temp_.gvn.ifield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(
+ cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
- cu_.mir_graph->temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(
+ cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
ASSERT_TRUE(gvn_ == nullptr);
gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
@@ -348,6 +361,10 @@ class GlobalValueNumberingTest : public testing::Test {
cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
cu_.access_flags = kAccStatic; // Don't let "this" interfere with this test.
allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+ // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+ // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+ cu_.mir_graph->reg_location_ =
+ cu_.arena.AllocArray<RegLocation>(kMaxSsaRegs, kArenaAllocRegAlloc);
// Bind all possible sregs to live vregs for test purposes.
live_in_v_->SetInitialBits(kMaxSsaRegs);
cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
@@ -1570,6 +1587,40 @@ TEST_F(GlobalValueNumberingTestLoop, Phi) {
EXPECT_NE(value_names_[4], value_names_[3]);
}
+TEST_F(GlobalValueNumberingTestLoop, IFieldLoopVariable) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 0),
+ DEF_IPUT(3, Instruction::IPUT, 0u, 100u, 0u),
+ DEF_IGET(4, Instruction::IGET, 2u, 100u, 0u),
+ DEF_BINOP(4, Instruction::ADD_INT, 3u, 2u, 101u),
+ DEF_IPUT(4, Instruction::IPUT, 3u, 100u, 0u),
+ };
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN();
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_NE(value_names_[2], value_names_[0]);
+ EXPECT_NE(value_names_[3], value_names_[0]);
+ EXPECT_NE(value_names_[3], value_names_[2]);
+
+
+ // Set up vreg_to_ssa_map_exit for prologue and loop and set post-processing mode
+ // as needed for GetStartingVregValueNumber().
+ const int32_t prologue_vreg_to_ssa_map_exit[] = { 0 };
+ const int32_t loop_vreg_to_ssa_map_exit[] = { 3 };
+ PrepareVregToSsaMapExit(3, prologue_vreg_to_ssa_map_exit);
+ PrepareVregToSsaMapExit(4, loop_vreg_to_ssa_map_exit);
+ gvn_->StartPostProcessing();
+
+ // Check that vreg 0 has the same value number as the result of IGET 2u.
+ const LocalValueNumbering* loop = gvn_->GetLvn(4);
+ EXPECT_EQ(value_names_[2], loop->GetStartingVregValueNumber(0));
+}
+
TEST_F(GlobalValueNumberingTestCatch, IFields) {
static const IFieldDef ifields[] = {
{ 0u, 1u, 0u, false, kDexMemAccessWord },
@@ -2225,18 +2276,18 @@ TEST_F(GlobalValueNumberingTest, NormalPathToCatchEntry) {
TEST_F(GlobalValueNumberingTestDiamond, DivZeroCheckDiamond) {
static const MIRDef mirs[] = {
- DEF_DIV_REM(3u, Instruction::DIV_INT, 1u, 20u, 21u),
- DEF_DIV_REM(3u, Instruction::DIV_INT, 2u, 24u, 21u),
- DEF_DIV_REM(3u, Instruction::DIV_INT, 3u, 20u, 23u),
- DEF_DIV_REM(4u, Instruction::DIV_INT, 4u, 24u, 22u),
- DEF_DIV_REM(4u, Instruction::DIV_INT, 9u, 24u, 25u),
- DEF_DIV_REM(5u, Instruction::DIV_INT, 5u, 24u, 21u),
- DEF_DIV_REM(5u, Instruction::DIV_INT, 10u, 24u, 26u),
+ DEF_BINOP(3u, Instruction::DIV_INT, 1u, 20u, 21u),
+ DEF_BINOP(3u, Instruction::DIV_INT, 2u, 24u, 21u),
+ DEF_BINOP(3u, Instruction::DIV_INT, 3u, 20u, 23u),
+ DEF_BINOP(4u, Instruction::DIV_INT, 4u, 24u, 22u),
+ DEF_BINOP(4u, Instruction::DIV_INT, 9u, 24u, 25u),
+ DEF_BINOP(5u, Instruction::DIV_INT, 5u, 24u, 21u),
+ DEF_BINOP(5u, Instruction::DIV_INT, 10u, 24u, 26u),
DEF_PHI2(6u, 27u, 25u, 26u),
- DEF_DIV_REM(6u, Instruction::DIV_INT, 12u, 20u, 27u),
- DEF_DIV_REM(6u, Instruction::DIV_INT, 6u, 24u, 21u),
- DEF_DIV_REM(6u, Instruction::DIV_INT, 7u, 20u, 23u),
- DEF_DIV_REM(6u, Instruction::DIV_INT, 8u, 20u, 22u),
+ DEF_BINOP(6u, Instruction::DIV_INT, 12u, 20u, 27u),
+ DEF_BINOP(6u, Instruction::DIV_INT, 6u, 24u, 21u),
+ DEF_BINOP(6u, Instruction::DIV_INT, 7u, 20u, 23u),
+ DEF_BINOP(6u, Instruction::DIV_INT, 8u, 20u, 22u),
};
static const bool expected_ignore_div_zero_check[] = {
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
new file mode 100644
index 0000000000..2e7f0328d2
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include "gvn_dead_code_elimination.h"
+
+#include "base/bit_vector-inl.h"
+#include "base/macros.h"
+#include "compiler_enums.h"
+#include "dataflow_iterator-inl.h"
+#include "dex_instruction.h"
+#include "dex/mir_graph.h"
+#include "local_value_numbering.h"
+#include "utils/arena_bit_vector.h"
+
+namespace art {
+
+constexpr uint16_t GvnDeadCodeElimination::kNoValue;
+constexpr uint16_t GvnDeadCodeElimination::kNPos;
+
+inline uint16_t GvnDeadCodeElimination::MIRData::PrevChange(int v_reg) const {
+ DCHECK(has_def);
+ DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
+ return (v_reg == vreg_def) ? prev_value.change : prev_value_high.change;
+}
+
+inline void GvnDeadCodeElimination::MIRData::SetPrevChange(int v_reg, uint16_t change) {
+ DCHECK(has_def);
+ DCHECK(v_reg == vreg_def || v_reg == vreg_def + 1);
+ if (v_reg == vreg_def) {
+ prev_value.change = change;
+ } else {
+ prev_value_high.change = change;
+ }
+}
+
+inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData* prev_data) {
+ DCHECK_NE(PrevChange(v_reg), kNPos);
+ DCHECK(v_reg == prev_data->vreg_def || v_reg == prev_data->vreg_def + 1);
+ if (vreg_def == v_reg) {
+ if (prev_data->vreg_def == v_reg) {
+ prev_value = prev_data->prev_value;
+ low_def_over_high_word = prev_data->low_def_over_high_word;
+ } else {
+ prev_value = prev_data->prev_value_high;
+ low_def_over_high_word =
+ prev_data->prev_value_high.value != kNPos && !prev_data->high_def_over_low_word;
+ }
+ } else {
+ if (prev_data->vreg_def == v_reg) {
+ prev_value_high = prev_data->prev_value;
+ high_def_over_low_word =
+ prev_data->prev_value.value != kNPos && !prev_data->low_def_over_high_word;
+ } else {
+ prev_value_high = prev_data->prev_value_high;
+ high_def_over_low_word = prev_data->high_def_over_low_word;
+ }
+ }
+}
+
+GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
+ : num_vregs_(num_vregs),
+ vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
+ mir_data_(alloc->Adapter()) {
+ mir_data_.reserve(100);
+}
+
+inline void GvnDeadCodeElimination::VRegChains::Reset() {
+ DCHECK(mir_data_.empty());
+ std::fill_n(vreg_data_, num_vregs_, VRegValue());
+}
+
+void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
+ uint16_t new_value) {
+ uint16_t pos = mir_data_.size();
+ mir_data_.emplace_back(mir);
+ MIRData* data = &mir_data_.back();
+ data->has_def = true;
+ data->wide_def = wide;
+ data->vreg_def = v_reg;
+
+ if (vreg_data_[v_reg].change != kNPos &&
+ mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) {
+ data->low_def_over_high_word = true;
+ }
+ data->prev_value = vreg_data_[v_reg];
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ vreg_data_[v_reg].value = new_value;
+ vreg_data_[v_reg].change = pos;
+
+ if (wide) {
+ if (vreg_data_[v_reg + 1].change != kNPos &&
+ mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) {
+ data->high_def_over_low_word = true;
+ }
+ data->prev_value_high = vreg_data_[v_reg + 1];
+ DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+ vreg_data_[v_reg + 1].value = new_value;
+ vreg_data_[v_reg + 1].change = pos;
+ }
+}
+
+inline void GvnDeadCodeElimination::VRegChains::AddMIRWithoutDef(MIR* mir) {
+ mir_data_.emplace_back(mir);
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() {
+ MIRData* data = LastMIRData();
+ if (data->has_def) {
+ DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
+ vreg_data_[data->vreg_def] = data->prev_value;
+ if (data->wide_def) {
+ DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
+ vreg_data_[data->vreg_def + 1] = data->prev_value_high;
+ }
+ }
+ mir_data_.pop_back();
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveTrailingNops() {
+ // There's at least one NOP to drop. There may be more.
+ MIRData* last_data = LastMIRData();
+ DCHECK(!last_data->must_keep && !last_data->has_def);
+ do {
+ DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
+ mir_data_.pop_back();
+ if (mir_data_.empty()) {
+ break;
+ }
+ last_data = LastMIRData();
+ } while (!last_data->must_keep && !last_data->has_def);
+}
+
+inline size_t GvnDeadCodeElimination::VRegChains::NumMIRs() const {
+ return mir_data_.size();
+}
+
+inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::GetMIRData(size_t pos) {
+ DCHECK_LT(pos, mir_data_.size());
+ return &mir_data_[pos];
+}
+
+inline GvnDeadCodeElimination::MIRData* GvnDeadCodeElimination::VRegChains::LastMIRData() {
+ DCHECK(!mir_data_.empty());
+ return &mir_data_.back();
+}
+
+uint32_t GvnDeadCodeElimination::VRegChains::NumVRegs() const {
+ return num_vregs_;
+}
+
+void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint16_t value) {
+ DCHECK_NE(value, kNoValue);
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ uint16_t change = vreg_data_[v_reg].change;
+ if (change == kNPos) {
+ vreg_data_[v_reg].value = value;
+ } else {
+ while (true) {
+ MIRData* data = &mir_data_[change];
+ DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
+ if (data->vreg_def == v_reg) { // Low word, use prev_value.
+ if (data->prev_value.change == kNPos) {
+ DCHECK_EQ(data->prev_value.value, kNoValue);
+ data->prev_value.value = value;
+ data->low_def_over_high_word = true;
+ break;
+ }
+ change = data->prev_value.change;
+ } else { // High word, use prev_value_high.
+ if (data->prev_value_high.change == kNPos) {
+ DCHECK_EQ(data->prev_value_high.value, kNoValue);
+ data->prev_value_high.value = value;
+ break;
+ }
+ change = data->prev_value_high.change;
+ }
+ }
+ }
+}
+
+void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool wide,
+ const LocalValueNumbering* lvn) {
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ if (!wide) {
+ if (vreg_data_[v_reg].value == kNoValue) {
+ uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg);
+ if (old_value == kNoValue) {
+ // Maybe there was a wide value in v_reg before. Do not check for wide value in v_reg-1,
+ // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
+ old_value = lvn->GetStartingVregValueNumberWide(v_reg);
+ if (old_value != kNoValue) {
+ InsertInitialValueHigh(v_reg + 1, old_value);
+ }
+ }
+ vreg_data_[v_reg].value = old_value;
+ }
+ } else {
+ DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+ bool check_high = true;
+ if (vreg_data_[v_reg].value == kNoValue) {
+ uint16_t old_value = lvn->GetStartingVregValueNumberWide(v_reg);
+ if (old_value != kNoValue) {
+ InsertInitialValueHigh(v_reg + 1, old_value);
+ check_high = false; // High word has been processed.
+ } else {
+ // Maybe there was a narrow value before. Do not check for wide value in v_reg-1,
+ // that will be done only if we see a definition of v_reg-1, otherwise it's unnecessary.
+ old_value = lvn->GetStartingVregValueNumber(v_reg);
+ }
+ vreg_data_[v_reg].value = old_value;
+ }
+ if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
+ uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
+ if (old_value == kNoValue && static_cast<size_t>(v_reg + 2) < num_vregs_) {
+ // Maybe there was a wide value before.
+ old_value = lvn->GetStartingVregValueNumberWide(v_reg + 1);
+ if (old_value != kNoValue) {
+ InsertInitialValueHigh(v_reg + 2, old_value);
+ }
+ }
+ vreg_data_[v_reg + 1].value = old_value;
+ }
+ }
+}
+
+inline uint16_t GvnDeadCodeElimination::VRegChains::LastChange(int v_reg) {
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ return vreg_data_[v_reg].change;
+}
+
+inline uint16_t GvnDeadCodeElimination::VRegChains::CurrentValue(int v_reg) {
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ return vreg_data_[v_reg].value;
+}
+
+uint16_t GvnDeadCodeElimination::VRegChains::FindKillHead(int v_reg, uint16_t cutoff) {
+ uint16_t current_value = this->CurrentValue(v_reg);
+ DCHECK_NE(current_value, kNoValue);
+ uint16_t change = LastChange(v_reg);
+ DCHECK_LT(change, mir_data_.size());
+ DCHECK_GE(change, cutoff);
+ bool match_high_word = (mir_data_[change].vreg_def != v_reg);
+ do {
+ MIRData* data = &mir_data_[change];
+ DCHECK(data->vreg_def == v_reg || data->vreg_def + 1 == v_reg);
+ if (data->vreg_def == v_reg) { // Low word, use prev_value.
+ if (data->prev_value.value == current_value &&
+ match_high_word == data->low_def_over_high_word) {
+ break;
+ }
+ change = data->prev_value.change;
+ } else { // High word, use prev_value_high.
+ if (data->prev_value_high.value == current_value &&
+ match_high_word != data->high_def_over_low_word) {
+ break;
+ }
+ change = data->prev_value_high.change;
+ }
+ if (change < cutoff) {
+ change = kNPos;
+ }
+ } while (change != kNPos);
+ return change;
+}
+
+uint16_t GvnDeadCodeElimination::VRegChains::FindFirstChangeAfter(int v_reg,
+ uint16_t change) const {
+ DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ DCHECK_LT(change, mir_data_.size());
+ uint16_t result = kNPos;
+ uint16_t search_change = vreg_data_[v_reg].change;
+ while (search_change != kNPos && search_change > change) {
+ result = search_change;
+ search_change = mir_data_[search_change].PrevChange(v_reg);
+ }
+ return result;
+}
+
+void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint16_t new_change) {
+ const MIRData* old_data = GetMIRData(old_change);
+ DCHECK(old_data->has_def);
+ int count = old_data->wide_def ? 2 : 1;
+ for (int v_reg = old_data->vreg_def, end = old_data->vreg_def + count; v_reg != end; ++v_reg) {
+ uint16_t next_change = FindFirstChangeAfter(v_reg, old_change);
+ if (next_change == kNPos) {
+ DCHECK_EQ(vreg_data_[v_reg].change, old_change);
+ vreg_data_[v_reg].change = new_change;
+ } else {
+ DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
+ mir_data_[next_change].SetPrevChange(v_reg, new_change);
+ }
+ }
+}
+
+void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) {
+ MIRData* data = &mir_data_[change];
+ DCHECK(data->has_def);
+ int count = data->wide_def ? 2 : 1;
+ for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
+ uint16_t next_change = FindFirstChangeAfter(v_reg, change);
+ if (next_change == kNPos) {
+ DCHECK_EQ(vreg_data_[v_reg].change, change);
+ vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
+ } else {
+ DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
+ mir_data_[next_change].RemovePrevChange(v_reg, data);
+ }
+ }
+}
+
+inline bool GvnDeadCodeElimination::VRegChains::IsTopChange(uint16_t change) const {
+ DCHECK_LT(change, mir_data_.size());
+ const MIRData* data = &mir_data_[change];
+ DCHECK(data->has_def);
+ DCHECK_LT(data->wide_def ? data->vreg_def + 1u : data->vreg_def, num_vregs_);
+ return vreg_data_[data->vreg_def].change == change &&
+ (!data->wide_def || vreg_data_[data->vreg_def + 1u].change == change);
+}
+
+bool GvnDeadCodeElimination::VRegChains::IsSRegUsed(uint16_t first_change, uint16_t last_change,
+ int s_reg) const {
+ DCHECK_LE(first_change, last_change);
+ DCHECK_LE(last_change, mir_data_.size());
+ for (size_t c = first_change; c != last_change; ++c) {
+ SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+ for (int i = 0; i != ssa_rep->num_uses; ++i) {
+ if (ssa_rep->uses[i] == s_reg) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+void GvnDeadCodeElimination::VRegChains::RenameSRegUses(uint16_t first_change, uint16_t last_change,
+ int old_s_reg, int new_s_reg, bool wide) {
+ for (size_t c = first_change; c != last_change; ++c) {
+ SSARepresentation* ssa_rep = mir_data_[c].mir->ssa_rep;
+ for (int i = 0; i != ssa_rep->num_uses; ++i) {
+ if (ssa_rep->uses[i] == old_s_reg) {
+ ssa_rep->uses[i] = new_s_reg;
+ if (wide) {
+ ++i;
+ DCHECK_LT(i, ssa_rep->num_uses);
+ ssa_rep->uses[i] = new_s_reg + 1;
+ }
+ }
+ }
+ }
+}
+
+void GvnDeadCodeElimination::VRegChains::RenameVRegUses(uint16_t first_change, uint16_t last_change,
+ int old_s_reg, int old_v_reg,
+ int new_s_reg, int new_v_reg) {
+ for (size_t c = first_change; c != last_change; ++c) {
+ MIR* mir = mir_data_[c].mir;
+ if (IsInstructionBinOp2Addr(mir->dalvikInsn.opcode) &&
+ mir->ssa_rep->uses[0] == old_s_reg && old_v_reg != new_v_reg) {
+ // Rewrite binop_2ADDR with plain binop before doing the register rename.
+ ChangeBinOp2AddrToPlainBinOp(mir);
+ }
+ uint64_t df_attr = MIRGraph::GetDataFlowAttributes(mir);
+ size_t use = 0u;
+#define REPLACE_VREG(REG) \
+ if ((df_attr & DF_U##REG) != 0) { \
+ if (mir->ssa_rep->uses[use] == old_s_reg) { \
+ DCHECK_EQ(mir->dalvikInsn.v##REG, static_cast<uint32_t>(old_v_reg)); \
+ mir->dalvikInsn.v##REG = new_v_reg; \
+ mir->ssa_rep->uses[use] = new_s_reg; \
+ if ((df_attr & DF_##REG##_WIDE) != 0) { \
+ DCHECK_EQ(mir->ssa_rep->uses[use + 1], old_s_reg + 1); \
+ mir->ssa_rep->uses[use + 1] = new_s_reg + 1; \
+ } \
+ } \
+ use += ((df_attr & DF_##REG##_WIDE) != 0) ? 2 : 1; \
+ }
+ REPLACE_VREG(A)
+ REPLACE_VREG(B)
+ REPLACE_VREG(C)
+#undef REPLACE_VREG
+ // We may encounter an out-of-order Phi which we need to ignore, otherwise we should
+ // only be asked to rename registers specified by DF_UA, DF_UB and DF_UC.
+ DCHECK_EQ(use,
+ static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi
+ ? 0u
+ : static_cast<size_t>(mir->ssa_rep->num_uses));
+ }
+}
+
+GvnDeadCodeElimination::GvnDeadCodeElimination(const GlobalValueNumbering* gvn,
+ ScopedArenaAllocator* alloc)
+ : gvn_(gvn),
+ mir_graph_(gvn_->GetMirGraph()),
+ vreg_chains_(mir_graph_->GetNumOfCodeAndTempVRs(), alloc),
+ bb_(nullptr),
+ lvn_(nullptr),
+ no_uses_all_since_(0u),
+ unused_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
+ vregs_to_kill_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)),
+ kill_heads_(alloc->AllocArray<uint16_t>(vreg_chains_.NumVRegs(), kArenaAllocMisc)),
+ changes_to_kill_(alloc->Adapter()),
+ dependent_vregs_(new (alloc) ArenaBitVector(alloc, vreg_chains_.NumVRegs(), false)) {
+ changes_to_kill_.reserve(16u);
+}
+
+void GvnDeadCodeElimination::Apply(BasicBlock* bb) {
+ bb_ = bb;
+ lvn_ = gvn_->GetLvn(bb->id);
+
+ RecordPass();
+ BackwardPass();
+
+ DCHECK_EQ(no_uses_all_since_, 0u);
+ lvn_ = nullptr;
+ bb_ = nullptr;
+}
+
+void GvnDeadCodeElimination::RecordPass() {
+ // Record MIRs with vreg definition data, eliminate single instructions.
+ vreg_chains_.Reset();
+ DCHECK_EQ(no_uses_all_since_, 0u);
+ for (MIR* mir = bb_->first_mir_insn; mir != nullptr; mir = mir->next) {
+ if (RecordMIR(mir)) {
+ RecordPassTryToKillOverwrittenMoveOrMoveSrc();
+ RecordPassTryToKillLastMIR();
+ }
+ }
+}
+
+void GvnDeadCodeElimination::BackwardPass() {
+ // Now process MIRs in reverse order, trying to eliminate them.
+ unused_vregs_->ClearAllBits(); // Implicitly depend on all vregs at the end of BB.
+ while (vreg_chains_.NumMIRs() != 0u) {
+ if (BackwardPassTryToKillLastMIR()) {
+ continue;
+ }
+ BackwardPassProcessLastMIR();
+ }
+}
+
+void GvnDeadCodeElimination::KillMIR(MIRData* data) {
+ DCHECK(!data->must_keep);
+ DCHECK(!data->uses_all_vregs);
+ DCHECK(data->has_def);
+ DCHECK(data->mir->ssa_rep->num_defs == 1 || data->mir->ssa_rep->num_defs == 2);
+
+ KillMIR(data->mir);
+ data->has_def = false;
+ data->is_move = false;
+ data->is_move_src = false;
+}
+
+void GvnDeadCodeElimination::KillMIR(MIR* mir) {
+ mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+ mir->ssa_rep->num_uses = 0;
+ mir->ssa_rep->num_defs = 0;
+}
+
+void GvnDeadCodeElimination::ChangeBinOp2AddrToPlainBinOp(MIR* mir) {
+ mir->dalvikInsn.vC = mir->dalvikInsn.vB;
+ mir->dalvikInsn.vB = mir->dalvikInsn.vA;
+ mir->dalvikInsn.opcode = static_cast<Instruction::Code>(
+ mir->dalvikInsn.opcode - Instruction::ADD_INT_2ADDR + Instruction::ADD_INT);
+}
+
+MIR* GvnDeadCodeElimination::CreatePhi(int s_reg, bool fp) {
+ int v_reg = mir_graph_->SRegToVReg(s_reg);
+ MIR* phi = mir_graph_->NewMIR();
+ phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
+ phi->dalvikInsn.vA = v_reg;
+ phi->offset = bb_->start_offset;
+ phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method.
+
+ phi->ssa_rep = static_cast<struct SSARepresentation *>(mir_graph_->GetArena()->Alloc(
+ sizeof(SSARepresentation), kArenaAllocDFInfo));
+
+ mir_graph_->AllocateSSADefData(phi, 1);
+ phi->ssa_rep->defs[0] = s_reg;
+ phi->ssa_rep->fp_def[0] = fp;
+
+ size_t num_uses = bb_->predecessors.size();
+ mir_graph_->AllocateSSAUseData(phi, num_uses);
+ std::fill_n(phi->ssa_rep->fp_use, num_uses, fp);
+ size_t idx = 0u;
+ for (BasicBlockId pred_id : bb_->predecessors) {
+ BasicBlock* pred_bb = mir_graph_->GetBasicBlock(pred_id);
+ DCHECK(pred_bb != nullptr);
+ phi->ssa_rep->uses[idx] = pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+ DCHECK_NE(phi->ssa_rep->uses[idx], INVALID_SREG);
+ idx++;
+ }
+
+ phi->meta.phi_incoming = static_cast<BasicBlockId*>(mir_graph_->GetArena()->Alloc(
+ sizeof(BasicBlockId) * num_uses, kArenaAllocDFInfo));
+ std::copy(bb_->predecessors.begin(), bb_->predecessors.end(), phi->meta.phi_incoming);
+ bb_->PrependMIR(phi);
+ return phi;
+}
+
+MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change,
+ MIR* mir_to_kill) {
+ DCHECK(mir_to_kill->ssa_rep->num_defs == 1 || mir_to_kill->ssa_rep->num_defs == 2);
+ bool wide = (mir_to_kill->ssa_rep->num_defs != 1);
+ int new_s_reg = mir_to_kill->ssa_rep->defs[0];
+
+ // Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
+ // same dalvik reg to keep consistency with subsequent instructions. However, if there's no
+ // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+ // and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
+ if (def_change == kNPos) {
+ bool fp = mir_to_kill->ssa_rep->fp_def[0];
+ if (wide) {
+ DCHECK_EQ(new_s_reg + 1, mir_to_kill->ssa_rep->defs[1]);
+ DCHECK_EQ(fp, mir_to_kill->ssa_rep->fp_def[1]);
+ DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
+ CreatePhi(new_s_reg + 1, fp); // High word Phi.
+ }
+ return CreatePhi(new_s_reg, fp);
+ } else {
+ DCHECK_LT(def_change, last_change);
+ DCHECK_LE(last_change, vreg_chains_.NumMIRs());
+ MIRData* def_data = vreg_chains_.GetMIRData(def_change);
+ DCHECK(def_data->has_def);
+ int old_s_reg = def_data->mir->ssa_rep->defs[0];
+ DCHECK_NE(old_s_reg, new_s_reg);
+ DCHECK_EQ(mir_graph_->SRegToVReg(old_s_reg), mir_graph_->SRegToVReg(new_s_reg));
+ def_data->mir->ssa_rep->defs[0] = new_s_reg;
+ if (wide) {
+ if (static_cast<int>(def_data->mir->dalvikInsn.opcode) == kMirOpPhi) {
+ // Currently the high word Phi is always located after the low word Phi.
+ MIR* phi_high = def_data->mir->next;
+ DCHECK(phi_high != nullptr && static_cast<int>(phi_high->dalvikInsn.opcode) == kMirOpPhi);
+ DCHECK_EQ(phi_high->ssa_rep->defs[0], old_s_reg + 1);
+ phi_high->ssa_rep->defs[0] = new_s_reg + 1;
+ } else {
+ DCHECK_EQ(def_data->mir->ssa_rep->defs[1], old_s_reg + 1);
+ def_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
+ }
+ }
+ vreg_chains_.RenameSRegUses(def_change + 1u, last_change, old_s_reg, new_s_reg, wide);
+ return nullptr;
+ }
+}
+
+
+void GvnDeadCodeElimination::BackwardPassProcessLastMIR() {
+ MIRData* data = vreg_chains_.LastMIRData();
+ if (data->uses_all_vregs) {
+ DCHECK(data->must_keep);
+ unused_vregs_->ClearAllBits();
+ DCHECK_EQ(no_uses_all_since_, vreg_chains_.NumMIRs());
+ --no_uses_all_since_;
+ while (no_uses_all_since_ != 0u &&
+ !vreg_chains_.GetMIRData(no_uses_all_since_ - 1u)->uses_all_vregs) {
+ --no_uses_all_since_;
+ }
+ } else {
+ if (data->has_def) {
+ unused_vregs_->SetBit(data->vreg_def);
+ if (data->wide_def) {
+ unused_vregs_->SetBit(data->vreg_def + 1);
+ }
+ }
+ for (int i = 0, num_uses = data->mir->ssa_rep->num_uses; i != num_uses; ++i) {
+ int v_reg = mir_graph_->SRegToVReg(data->mir->ssa_rep->uses[i]);
+ unused_vregs_->ClearBit(v_reg);
+ }
+ }
+ vreg_chains_.RemoveLastMIRData();
+}
+
+void GvnDeadCodeElimination::RecordPassKillMoveByRenamingSrcDef(uint16_t src_change,
+ uint16_t move_change) {
+ DCHECK_LT(src_change, move_change);
+ MIRData* src_data = vreg_chains_.GetMIRData(src_change);
+ MIRData* move_data = vreg_chains_.GetMIRData(move_change);
+ DCHECK(src_data->is_move_src);
+ DCHECK_EQ(src_data->wide_def, move_data->wide_def);
+ DCHECK(move_data->prev_value.change == kNPos || move_data->prev_value.change <= src_change);
+ DCHECK(!move_data->wide_def || move_data->prev_value_high.change == kNPos ||
+ move_data->prev_value_high.change <= src_change);
+
+ int old_s_reg = src_data->mir->ssa_rep->defs[0];
+ // NOTE: old_s_reg may differ from move_data->mir->ssa_rep->uses[0]; value names must match.
+ int new_s_reg = move_data->mir->ssa_rep->defs[0];
+ DCHECK_NE(old_s_reg, new_s_reg);
+
+ if (IsInstructionBinOp2Addr(src_data->mir->dalvikInsn.opcode) &&
+ src_data->vreg_def != move_data->vreg_def) {
+ // Rewrite binop_2ADDR with plain binop before doing the register rename.
+ ChangeBinOp2AddrToPlainBinOp(src_data->mir);
+ }
+ // Remove src_change from the vreg chain(s).
+ vreg_chains_.RemoveChange(src_change);
+ // Replace the move_change with the src_change, copying all necessary data.
+ src_data->is_move_src = move_data->is_move_src;
+ src_data->low_def_over_high_word = move_data->low_def_over_high_word;
+ src_data->high_def_over_low_word = move_data->high_def_over_low_word;
+ src_data->vreg_def = move_data->vreg_def;
+ src_data->prev_value = move_data->prev_value;
+ src_data->prev_value_high = move_data->prev_value_high;
+ src_data->mir->dalvikInsn.vA = move_data->vreg_def;
+ src_data->mir->ssa_rep->defs[0] = new_s_reg;
+ if (move_data->wide_def) {
+ DCHECK_EQ(src_data->mir->ssa_rep->defs[1], old_s_reg + 1);
+ src_data->mir->ssa_rep->defs[1] = new_s_reg + 1;
+ }
+ vreg_chains_.ReplaceChange(move_change, src_change);
+
+ // Rename uses and kill the move.
+ vreg_chains_.RenameVRegUses(src_change + 1u, vreg_chains_.NumMIRs(),
+ old_s_reg, mir_graph_->SRegToVReg(old_s_reg),
+ new_s_reg, mir_graph_->SRegToVReg(new_s_reg));
+ KillMIR(move_data);
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change) {
+ MIRData* data = vreg_chains_.GetMIRData(check_change);
+ DCHECK(data->is_move || data->is_move_src);
+ int32_t dest_s_reg = data->mir->ssa_rep->defs[0];
+
+ if (data->is_move) {
+ // Check if source vreg has changed since the MOVE.
+ int32_t src_s_reg = data->mir->ssa_rep->uses[0];
+ uint32_t src_v_reg = mir_graph_->SRegToVReg(src_s_reg);
+ uint16_t src_change = vreg_chains_.FindFirstChangeAfter(src_v_reg, check_change);
+ bool wide = data->wide_def;
+ if (wide) {
+ uint16_t src_change_high = vreg_chains_.FindFirstChangeAfter(src_v_reg + 1, check_change);
+ if (src_change_high != kNPos && (src_change == kNPos || src_change_high < src_change)) {
+ src_change = src_change_high;
+ }
+ }
+ if (src_change == kNPos ||
+ !vreg_chains_.IsSRegUsed(src_change + 1u, vreg_chains_.NumMIRs(), dest_s_reg)) {
+ // We can simply change all uses of dest to src.
+ size_t rename_end = (src_change != kNPos) ? src_change + 1u : vreg_chains_.NumMIRs();
+ vreg_chains_.RenameVRegUses(check_change + 1u, rename_end,
+ dest_s_reg, mir_graph_->SRegToVReg(dest_s_reg),
+ src_s_reg, mir_graph_->SRegToVReg(src_s_reg));
+
+ // Now, remove the MOVE from the vreg chain(s) and kill it.
+ vreg_chains_.RemoveChange(check_change);
+ KillMIR(data);
+ return;
+ }
+ }
+
+ if (data->is_move_src) {
+ // Try to find a MOVE to a vreg that wasn't changed since check_change.
+ uint16_t value_name =
+ data->wide_def ? lvn_->GetSregValueWide(dest_s_reg) : lvn_->GetSregValue(dest_s_reg);
+ for (size_t c = check_change + 1u, size = vreg_chains_.NumMIRs(); c != size; ++c) {
+ MIRData* d = vreg_chains_.GetMIRData(c);
+ if (d->is_move && d->wide_def == data->wide_def &&
+ (d->prev_value.change == kNPos || d->prev_value.change <= check_change) &&
+ (!d->wide_def ||
+ d->prev_value_high.change == kNPos || d->prev_value_high.change <= check_change)) {
+ // Compare value names to find move to move.
+ int32_t src_s_reg = d->mir->ssa_rep->uses[0];
+ uint16_t src_name =
+ (d->wide_def ? lvn_->GetSregValueWide(src_s_reg) : lvn_->GetSregValue(src_s_reg));
+ if (value_name == src_name) {
+ RecordPassKillMoveByRenamingSrcDef(check_change, c);
+ return;
+ }
+ }
+ }
+ }
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillOverwrittenMoveOrMoveSrc() {
+ // Check if we're overwriting a the result of a move or the definition of a source of a move.
+ // For MOVE_WIDE, we may be overwriting partially; if that's the case, check that the other
+ // word wasn't previously overwritten - we would have tried to rename back then.
+ MIRData* data = vreg_chains_.LastMIRData();
+ if (!data->has_def) {
+ return;
+ }
+ // NOTE: Instructions such as new-array implicitly use all vregs (if they throw) but they can
+ // define a move source which can be renamed. Therefore we allow the checked change to be the
+ // change before no_uses_all_since_. This has no effect on moves as they never use all vregs.
+ if (data->prev_value.change != kNPos && data->prev_value.change + 1u >= no_uses_all_since_) {
+ MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value.change);
+ bool try_to_kill = false;
+ if (!check_data->is_move && !check_data->is_move_src) {
+ DCHECK(!try_to_kill);
+ } else if (!check_data->wide_def) {
+ // Narrow move; always fully overwritten by the last MIR.
+ try_to_kill = true;
+ } else if (data->low_def_over_high_word) {
+ // Overwriting only the high word; is the low word still valid?
+ DCHECK_EQ(check_data->vreg_def + 1u, data->vreg_def);
+ if (vreg_chains_.LastChange(check_data->vreg_def) == data->prev_value.change) {
+ try_to_kill = true;
+ }
+ } else if (!data->wide_def) {
+ // Overwriting only the low word, is the high word still valid?
+ if (vreg_chains_.LastChange(data->vreg_def + 1) == data->prev_value.change) {
+ try_to_kill = true;
+ }
+ } else {
+ // Overwriting both words; was the high word still from the same move?
+ if (data->prev_value_high.change == data->prev_value.change) {
+ try_to_kill = true;
+ }
+ }
+ if (try_to_kill) {
+ RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value.change);
+ }
+ }
+ if (data->wide_def && data->high_def_over_low_word &&
+ data->prev_value_high.change != kNPos &&
+ data->prev_value_high.change + 1u >= no_uses_all_since_) {
+ MIRData* check_data = vreg_chains_.GetMIRData(data->prev_value_high.change);
+ bool try_to_kill = false;
+ if (!check_data->is_move && !check_data->is_move_src) {
+ DCHECK(!try_to_kill);
+ } else if (!check_data->wide_def) {
+ // Narrow move; always fully overwritten by the last MIR.
+ try_to_kill = true;
+ } else if (vreg_chains_.LastChange(check_data->vreg_def + 1) ==
+ data->prev_value_high.change) {
+ // High word is still valid.
+ try_to_kill = true;
+ }
+ if (try_to_kill) {
+ RecordPassTryToKillOverwrittenMoveOrMoveSrc(data->prev_value_high.change);
+ }
+ }
+}
+
+void GvnDeadCodeElimination::RecordPassTryToKillLastMIR() {
+ MIRData* last_data = vreg_chains_.LastMIRData();
+ if (last_data->must_keep) {
+ return;
+ }
+ if (UNLIKELY(!last_data->has_def)) {
+ // Must be an eliminated MOVE. Drop its data and data of all eliminated MIRs before it.
+ vreg_chains_.RemoveTrailingNops();
+ return;
+ }
+
+ // Try to kill a sequence of consecutive definitions of the same vreg. Allow mixing
+ // wide and non-wide defs; consider high word dead if low word has been overwritten.
+ uint16_t current_value = vreg_chains_.CurrentValue(last_data->vreg_def);
+ uint16_t change = vreg_chains_.NumMIRs() - 1u;
+ MIRData* data = last_data;
+ while (data->prev_value.value != current_value) {
+ --change;
+ if (data->prev_value.change == kNPos || data->prev_value.change != change) {
+ return;
+ }
+ data = vreg_chains_.GetMIRData(data->prev_value.change);
+ if (data->must_keep || !data->has_def || data->vreg_def != last_data->vreg_def) {
+ return;
+ }
+ }
+
+ bool wide = last_data->wide_def;
+ if (wide) {
+ // Check that the low word is valid.
+ if (data->low_def_over_high_word) {
+ return;
+ }
+ // Check that the high word is valid.
+ MIRData* high_data = data;
+ if (!high_data->wide_def) {
+ uint16_t high_change = vreg_chains_.FindFirstChangeAfter(data->vreg_def + 1, change);
+ DCHECK_NE(high_change, kNPos);
+ high_data = vreg_chains_.GetMIRData(high_change);
+ DCHECK_EQ(high_data->vreg_def, data->vreg_def);
+ }
+ if (high_data->prev_value_high.value != current_value || high_data->high_def_over_low_word) {
+ return;
+ }
+ }
+
+ MIR* phi = RenameSRegDefOrCreatePhi(data->prev_value.change, change, last_data->mir);
+ for (size_t i = 0, count = vreg_chains_.NumMIRs() - change; i != count; ++i) {
+ KillMIR(vreg_chains_.LastMIRData()->mir);
+ vreg_chains_.RemoveLastMIRData();
+ }
+ if (phi != nullptr) {
+ // Though the Phi has been added to the beginning, we can put the MIRData at the end.
+ vreg_chains_.AddMIRWithDef(phi, phi->dalvikInsn.vA, wide, current_value);
+ // Reset the previous value to avoid eventually eliminating the Phi itself (unless unused).
+ last_data = vreg_chains_.LastMIRData();
+ last_data->prev_value.value = kNoValue;
+ last_data->prev_value_high.value = kNoValue;
+ }
+}
+
+uint16_t GvnDeadCodeElimination::FindChangesToKill(uint16_t first_change, uint16_t last_change) {
+ // Process dependencies for changes in range [first_change, last_change) and record all
+ // changes that we need to kill. Return kNPos if there's a dependent change that must be
+ // kept unconditionally; otherwise the end of the range processed before encountering
+ // a change that defines a dalvik reg that we need to keep (last_change on full success).
+ changes_to_kill_.clear();
+ dependent_vregs_->ClearAllBits();
+ for (size_t change = first_change; change != last_change; ++change) {
+ MIRData* data = vreg_chains_.GetMIRData(change);
+ DCHECK(!data->uses_all_vregs);
+ bool must_not_depend = data->must_keep;
+ bool depends = false;
+ // Check if the MIR defines a vreg we're trying to eliminate.
+ if (data->has_def && vregs_to_kill_->IsBitSet(data->vreg_def)) {
+ if (change < kill_heads_[data->vreg_def]) {
+ must_not_depend = true;
+ } else {
+ depends = true;
+ }
+ }
+ if (data->has_def && data->wide_def && vregs_to_kill_->IsBitSet(data->vreg_def + 1)) {
+ if (change < kill_heads_[data->vreg_def + 1]) {
+ must_not_depend = true;
+ } else {
+ depends = true;
+ }
+ }
+ if (!depends) {
+ // Check for dependency through SSA reg uses.
+ SSARepresentation* ssa_rep = data->mir->ssa_rep;
+ for (int i = 0; i != ssa_rep->num_uses; ++i) {
+ if (dependent_vregs_->IsBitSet(mir_graph_->SRegToVReg(ssa_rep->uses[i]))) {
+ depends = true;
+ break;
+ }
+ }
+ }
+ // Now check if we can eliminate the insn if we need to.
+ if (depends && must_not_depend) {
+ return kNPos;
+ }
+ if (depends && data->has_def &&
+ vreg_chains_.IsTopChange(change) && !vregs_to_kill_->IsBitSet(data->vreg_def) &&
+ !unused_vregs_->IsBitSet(data->vreg_def) &&
+ (!data->wide_def || !unused_vregs_->IsBitSet(data->vreg_def + 1))) {
+ // This is a top change but neither unnecessary nor one of the top kill changes.
+ return change;
+ }
+ // Finally, update the data.
+ if (depends) {
+ changes_to_kill_.push_back(change);
+ if (data->has_def) {
+ dependent_vregs_->SetBit(data->vreg_def);
+ if (data->wide_def) {
+ dependent_vregs_->SetBit(data->vreg_def + 1);
+ }
+ }
+ } else {
+ if (data->has_def) {
+ dependent_vregs_->ClearBit(data->vreg_def);
+ if (data->wide_def) {
+ dependent_vregs_->ClearBit(data->vreg_def + 1);
+ }
+ }
+ }
+ }
+ return last_change;
+}
+
+void GvnDeadCodeElimination::BackwardPassTryToKillRevertVRegs() {
+}
+
+bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() {
+ MIRData* last_data = vreg_chains_.LastMIRData();
+ if (last_data->must_keep) {
+ return false;
+ }
+ DCHECK(!last_data->uses_all_vregs);
+ if (!last_data->has_def) {
+ // Previously eliminated.
+ DCHECK_EQ(static_cast<int>(last_data->mir->dalvikInsn.opcode), static_cast<int>(kMirOpNop));
+ vreg_chains_.RemoveTrailingNops();
+ return true;
+ }
+ if (unused_vregs_->IsBitSet(last_data->vreg_def) ||
+ (last_data->wide_def && unused_vregs_->IsBitSet(last_data->vreg_def + 1))) {
+ if (last_data->wide_def) {
+ // For wide defs, one of the vregs may still be considered needed, fix that.
+ unused_vregs_->SetBit(last_data->vreg_def);
+ unused_vregs_->SetBit(last_data->vreg_def + 1);
+ }
+ KillMIR(last_data->mir);
+ vreg_chains_.RemoveLastMIRData();
+ return true;
+ }
+
+ vregs_to_kill_->ClearAllBits();
+ size_t num_mirs = vreg_chains_.NumMIRs();
+ DCHECK_NE(num_mirs, 0u);
+ uint16_t kill_change = num_mirs - 1u;
+ uint16_t start = num_mirs;
+ size_t num_killed_top_changes = 0u;
+ while (num_killed_top_changes != kMaxNumTopChangesToKill &&
+ kill_change != kNPos && kill_change != num_mirs) {
+ ++num_killed_top_changes;
+
+ DCHECK(vreg_chains_.IsTopChange(kill_change));
+ MIRData* data = vreg_chains_.GetMIRData(kill_change);
+ int count = data->wide_def ? 2 : 1;
+ for (int v_reg = data->vreg_def, end = data->vreg_def + count; v_reg != end; ++v_reg) {
+ uint16_t kill_head = vreg_chains_.FindKillHead(v_reg, no_uses_all_since_);
+ if (kill_head == kNPos) {
+ return false;
+ }
+ kill_heads_[v_reg] = kill_head;
+ vregs_to_kill_->SetBit(v_reg);
+ start = std::min(start, kill_head);
+ }
+ DCHECK_LT(start, vreg_chains_.NumMIRs());
+
+ kill_change = FindChangesToKill(start, num_mirs);
+ }
+
+ if (kill_change != num_mirs) {
+ return false;
+ }
+
+ // Kill all MIRs marked as dependent.
+ for (uint32_t v_reg : vregs_to_kill_->Indexes()) {
+ // Rename s_regs or create Phi only once for each MIR (only for low word).
+ MIRData* data = vreg_chains_.GetMIRData(vreg_chains_.LastChange(v_reg));
+ DCHECK(data->has_def);
+ if (data->vreg_def == v_reg) {
+ MIRData* kill_head_data = vreg_chains_.GetMIRData(kill_heads_[v_reg]);
+ RenameSRegDefOrCreatePhi(kill_head_data->PrevChange(v_reg), num_mirs, data->mir);
+ } else {
+ DCHECK_EQ(data->vreg_def + 1u, v_reg);
+ DCHECK_EQ(vreg_chains_.GetMIRData(kill_heads_[v_reg - 1u])->PrevChange(v_reg - 1u),
+ vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg));
+ }
+ }
+ unused_vregs_->Union(vregs_to_kill_);
+ for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) {
+ MIRData* data = vreg_chains_.GetMIRData(*it);
+ DCHECK(!data->must_keep);
+ DCHECK(data->has_def);
+ vreg_chains_.RemoveChange(*it);
+ KillMIR(data);
+ }
+
+ vreg_chains_.RemoveTrailingNops();
+ return true;
+}
+
+bool GvnDeadCodeElimination::RecordMIR(MIR* mir) {
+ bool must_keep = false;
+ bool uses_all_vregs = false;
+ bool is_move = false;
+ uint16_t opcode = mir->dalvikInsn.opcode;
+ switch (opcode) {
+ case kMirOpPhi: {
+ // We can't recognize wide variables in Phi from num_defs == 2 as we've got two Phis instead.
+ DCHECK_EQ(mir->ssa_rep->num_defs, 1);
+ int s_reg = mir->ssa_rep->defs[0];
+ bool wide = false;
+ uint16_t new_value = lvn_->GetSregValue(s_reg);
+ if (new_value == kNoValue) {
+ wide = true;
+ new_value = lvn_->GetSregValueWide(s_reg);
+ if (new_value == kNoValue) {
+ return false; // Ignore the high word Phi.
+ }
+ }
+
+ int v_reg = mir_graph_->SRegToVReg(s_reg);
+ DCHECK_EQ(vreg_chains_.CurrentValue(v_reg), kNoValue); // No previous def for v_reg.
+ if (wide) {
+ DCHECK_EQ(vreg_chains_.CurrentValue(v_reg + 1), kNoValue);
+ }
+ vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
+ return true; // Avoid the common processing.
+ }
+
+ case kMirOpNop:
+ case Instruction::NOP:
+ // Don't record NOPs.
+ return false;
+
+ case kMirOpCheck:
+ must_keep = true;
+ uses_all_vregs = true;
+ break;
+
+ case Instruction::RETURN_VOID:
+ case Instruction::RETURN:
+ case Instruction::RETURN_OBJECT:
+ case Instruction::RETURN_WIDE:
+ case Instruction::GOTO:
+ case Instruction::GOTO_16:
+ case Instruction::GOTO_32:
+ case Instruction::PACKED_SWITCH:
+ case Instruction::SPARSE_SWITCH:
+ case Instruction::IF_EQ:
+ case Instruction::IF_NE:
+ case Instruction::IF_LT:
+ case Instruction::IF_GE:
+ case Instruction::IF_GT:
+ case Instruction::IF_LE:
+ case Instruction::IF_EQZ:
+ case Instruction::IF_NEZ:
+ case Instruction::IF_LTZ:
+ case Instruction::IF_GEZ:
+ case Instruction::IF_GTZ:
+ case Instruction::IF_LEZ:
+ case kMirOpFusedCmplFloat:
+ case kMirOpFusedCmpgFloat:
+ case kMirOpFusedCmplDouble:
+ case kMirOpFusedCmpgDouble:
+ case kMirOpFusedCmpLong:
+ must_keep = true;
+ uses_all_vregs = true; // Keep the implicit dependencies on all vregs.
+ break;
+
+ case Instruction::CONST_CLASS:
+ case Instruction::CONST_STRING:
+ case Instruction::CONST_STRING_JUMBO:
+ // NOTE: While we're currently treating CONST_CLASS, CONST_STRING and CONST_STRING_JUMBO
+ // as throwing but we could conceivably try and eliminate those exceptions if we're
+ // retrieving the class/string repeatedly.
+ must_keep = true;
+ uses_all_vregs = true;
+ break;
+
+ case Instruction::MONITOR_ENTER:
+ case Instruction::MONITOR_EXIT:
+ // We can actually try and optimize across the acquire operation of MONITOR_ENTER,
+ // the value names provided by GVN reflect the possible changes to memory visibility.
+ // NOTE: In ART, MONITOR_ENTER and MONITOR_EXIT can throw only NPE.
+ must_keep = true;
+ uses_all_vregs = (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0;
+ break;
+
+ case Instruction::INVOKE_DIRECT:
+ case Instruction::INVOKE_DIRECT_RANGE:
+ case Instruction::INVOKE_VIRTUAL:
+ case Instruction::INVOKE_VIRTUAL_RANGE:
+ case Instruction::INVOKE_SUPER:
+ case Instruction::INVOKE_SUPER_RANGE:
+ case Instruction::INVOKE_INTERFACE:
+ case Instruction::INVOKE_INTERFACE_RANGE:
+ case Instruction::INVOKE_STATIC:
+ case Instruction::INVOKE_STATIC_RANGE:
+ case Instruction::CHECK_CAST:
+ case Instruction::THROW:
+ case Instruction::FILLED_NEW_ARRAY:
+ case Instruction::FILLED_NEW_ARRAY_RANGE:
+ case Instruction::FILL_ARRAY_DATA:
+ must_keep = true;
+ uses_all_vregs = true;
+ break;
+
+ case Instruction::NEW_INSTANCE:
+ case Instruction::NEW_ARRAY:
+ must_keep = true;
+ uses_all_vregs = true;
+ break;
+
+ case kMirOpNullCheck:
+ DCHECK_EQ(mir->ssa_rep->num_uses, 1);
+ if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
+ mir->ssa_rep->num_uses = 0;
+ mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+ return false;
+ }
+ must_keep = true;
+ uses_all_vregs = true;
+ break;
+
+ case Instruction::MOVE_RESULT:
+ case Instruction::MOVE_RESULT_OBJECT:
+ case Instruction::MOVE_RESULT_WIDE:
+ break;
+
+ case Instruction::INSTANCE_OF:
+ break;
+
+ case Instruction::MOVE_EXCEPTION:
+ must_keep = true;
+ break;
+
+ case kMirOpCopy:
+ case Instruction::MOVE:
+ case Instruction::MOVE_FROM16:
+ case Instruction::MOVE_16:
+ case Instruction::MOVE_WIDE:
+ case Instruction::MOVE_WIDE_FROM16:
+ case Instruction::MOVE_WIDE_16:
+ case Instruction::MOVE_OBJECT:
+ case Instruction::MOVE_OBJECT_FROM16:
+ case Instruction::MOVE_OBJECT_16: {
+ is_move = true;
+ // If the MIR defining src vreg is known, allow renaming all uses of src vreg to dest vreg
+ // while updating the defining MIR to directly define dest vreg. However, changing Phi's
+ // def this way doesn't work without changing MIRs in other BBs.
+ int src_v_reg = mir_graph_->SRegToVReg(mir->ssa_rep->uses[0]);
+ int src_change = vreg_chains_.LastChange(src_v_reg);
+ if (src_change != kNPos) {
+ MIRData* src_data = vreg_chains_.GetMIRData(src_change);
+ if (static_cast<int>(src_data->mir->dalvikInsn.opcode) != kMirOpPhi) {
+ src_data->is_move_src = true;
+ }
+ }
+ break;
+ }
+
+ case Instruction::CONST_4:
+ case Instruction::CONST_16:
+ case Instruction::CONST:
+ case Instruction::CONST_HIGH16:
+ case Instruction::CONST_WIDE_16:
+ case Instruction::CONST_WIDE_32:
+ case Instruction::CONST_WIDE:
+ case Instruction::CONST_WIDE_HIGH16:
+ case Instruction::ARRAY_LENGTH:
+ case Instruction::CMPL_FLOAT:
+ case Instruction::CMPG_FLOAT:
+ case Instruction::CMPL_DOUBLE:
+ case Instruction::CMPG_DOUBLE:
+ case Instruction::CMP_LONG:
+ case Instruction::NEG_INT:
+ case Instruction::NOT_INT:
+ case Instruction::NEG_LONG:
+ case Instruction::NOT_LONG:
+ case Instruction::NEG_FLOAT:
+ case Instruction::NEG_DOUBLE:
+ case Instruction::INT_TO_LONG:
+ case Instruction::INT_TO_FLOAT:
+ case Instruction::INT_TO_DOUBLE:
+ case Instruction::LONG_TO_INT:
+ case Instruction::LONG_TO_FLOAT:
+ case Instruction::LONG_TO_DOUBLE:
+ case Instruction::FLOAT_TO_INT:
+ case Instruction::FLOAT_TO_LONG:
+ case Instruction::FLOAT_TO_DOUBLE:
+ case Instruction::DOUBLE_TO_INT:
+ case Instruction::DOUBLE_TO_LONG:
+ case Instruction::DOUBLE_TO_FLOAT:
+ case Instruction::INT_TO_BYTE:
+ case Instruction::INT_TO_CHAR:
+ case Instruction::INT_TO_SHORT:
+ case Instruction::ADD_INT:
+ case Instruction::SUB_INT:
+ case Instruction::MUL_INT:
+ case Instruction::AND_INT:
+ case Instruction::OR_INT:
+ case Instruction::XOR_INT:
+ case Instruction::SHL_INT:
+ case Instruction::SHR_INT:
+ case Instruction::USHR_INT:
+ case Instruction::ADD_LONG:
+ case Instruction::SUB_LONG:
+ case Instruction::MUL_LONG:
+ case Instruction::AND_LONG:
+ case Instruction::OR_LONG:
+ case Instruction::XOR_LONG:
+ case Instruction::SHL_LONG:
+ case Instruction::SHR_LONG:
+ case Instruction::USHR_LONG:
+ case Instruction::ADD_FLOAT:
+ case Instruction::SUB_FLOAT:
+ case Instruction::MUL_FLOAT:
+ case Instruction::DIV_FLOAT:
+ case Instruction::REM_FLOAT:
+ case Instruction::ADD_DOUBLE:
+ case Instruction::SUB_DOUBLE:
+ case Instruction::MUL_DOUBLE:
+ case Instruction::DIV_DOUBLE:
+ case Instruction::REM_DOUBLE:
+ case Instruction::ADD_INT_2ADDR:
+ case Instruction::SUB_INT_2ADDR:
+ case Instruction::MUL_INT_2ADDR:
+ case Instruction::AND_INT_2ADDR:
+ case Instruction::OR_INT_2ADDR:
+ case Instruction::XOR_INT_2ADDR:
+ case Instruction::SHL_INT_2ADDR:
+ case Instruction::SHR_INT_2ADDR:
+ case Instruction::USHR_INT_2ADDR:
+ case Instruction::ADD_LONG_2ADDR:
+ case Instruction::SUB_LONG_2ADDR:
+ case Instruction::MUL_LONG_2ADDR:
+ case Instruction::AND_LONG_2ADDR:
+ case Instruction::OR_LONG_2ADDR:
+ case Instruction::XOR_LONG_2ADDR:
+ case Instruction::SHL_LONG_2ADDR:
+ case Instruction::SHR_LONG_2ADDR:
+ case Instruction::USHR_LONG_2ADDR:
+ case Instruction::ADD_FLOAT_2ADDR:
+ case Instruction::SUB_FLOAT_2ADDR:
+ case Instruction::MUL_FLOAT_2ADDR:
+ case Instruction::DIV_FLOAT_2ADDR:
+ case Instruction::REM_FLOAT_2ADDR:
+ case Instruction::ADD_DOUBLE_2ADDR:
+ case Instruction::SUB_DOUBLE_2ADDR:
+ case Instruction::MUL_DOUBLE_2ADDR:
+ case Instruction::DIV_DOUBLE_2ADDR:
+ case Instruction::REM_DOUBLE_2ADDR:
+ case Instruction::ADD_INT_LIT16:
+ case Instruction::RSUB_INT:
+ case Instruction::MUL_INT_LIT16:
+ case Instruction::AND_INT_LIT16:
+ case Instruction::OR_INT_LIT16:
+ case Instruction::XOR_INT_LIT16:
+ case Instruction::ADD_INT_LIT8:
+ case Instruction::RSUB_INT_LIT8:
+ case Instruction::MUL_INT_LIT8:
+ case Instruction::AND_INT_LIT8:
+ case Instruction::OR_INT_LIT8:
+ case Instruction::XOR_INT_LIT8:
+ case Instruction::SHL_INT_LIT8:
+ case Instruction::SHR_INT_LIT8:
+ case Instruction::USHR_INT_LIT8:
+ break;
+
+ case Instruction::DIV_INT:
+ case Instruction::REM_INT:
+ case Instruction::DIV_LONG:
+ case Instruction::REM_LONG:
+ case Instruction::DIV_INT_2ADDR:
+ case Instruction::REM_INT_2ADDR:
+ case Instruction::DIV_LONG_2ADDR:
+ case Instruction::REM_LONG_2ADDR:
+ if ((mir->optimization_flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
+ must_keep = true;
+ uses_all_vregs = true;
+ }
+ break;
+
+ case Instruction::DIV_INT_LIT16:
+ case Instruction::REM_INT_LIT16:
+ case Instruction::DIV_INT_LIT8:
+ case Instruction::REM_INT_LIT8:
+ if (mir->dalvikInsn.vC == 0) { // Explicit division by 0?
+ must_keep = true;
+ uses_all_vregs = true;
+ }
+ break;
+
+ case Instruction::AGET_OBJECT:
+ case Instruction::AGET:
+ case Instruction::AGET_WIDE:
+ case Instruction::AGET_BOOLEAN:
+ case Instruction::AGET_BYTE:
+ case Instruction::AGET_CHAR:
+ case Instruction::AGET_SHORT:
+ if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+ (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
+ must_keep = true;
+ uses_all_vregs = true;
+ }
+ break;
+
+ case Instruction::APUT_OBJECT:
+ case Instruction::APUT:
+ case Instruction::APUT_WIDE:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_BOOLEAN:
+ case Instruction::APUT_SHORT:
+ case Instruction::APUT_CHAR:
+ must_keep = true;
+ if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+ (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) == 0) {
+ uses_all_vregs = true;
+ }
+ break;
+
+ case Instruction::IGET_OBJECT:
+ case Instruction::IGET:
+ case Instruction::IGET_WIDE:
+ case Instruction::IGET_BOOLEAN:
+ case Instruction::IGET_BYTE:
+ case Instruction::IGET_CHAR:
+ case Instruction::IGET_SHORT: {
+ const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
+ if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+ !info.IsResolved() || !info.FastGet()) {
+ must_keep = true;
+ uses_all_vregs = true;
+ } else if (info.IsVolatile()) {
+ must_keep = true;
+ }
+ break;
+ }
+
+ case Instruction::IPUT_OBJECT:
+ case Instruction::IPUT:
+ case Instruction::IPUT_WIDE:
+ case Instruction::IPUT_BOOLEAN:
+ case Instruction::IPUT_BYTE:
+ case Instruction::IPUT_CHAR:
+ case Instruction::IPUT_SHORT: {
+ must_keep = true;
+ const MirIFieldLoweringInfo& info = mir_graph_->GetIFieldLoweringInfo(mir);
+ if ((mir->optimization_flags & MIR_IGNORE_NULL_CHECK) == 0 ||
+ !info.IsResolved() || !info.FastPut()) {
+ uses_all_vregs = true;
+ }
+ break;
+ }
+
+ case Instruction::SGET_OBJECT:
+ case Instruction::SGET:
+ case Instruction::SGET_WIDE:
+ case Instruction::SGET_BOOLEAN:
+ case Instruction::SGET_BYTE:
+ case Instruction::SGET_CHAR:
+ case Instruction::SGET_SHORT: {
+ const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
+ if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
+ !info.IsResolved() || !info.FastGet()) {
+ must_keep = true;
+ uses_all_vregs = true;
+ } else if (info.IsVolatile()) {
+ must_keep = true;
+ }
+ break;
+ }
+
+ case Instruction::SPUT_OBJECT:
+ case Instruction::SPUT:
+ case Instruction::SPUT_WIDE:
+ case Instruction::SPUT_BOOLEAN:
+ case Instruction::SPUT_BYTE:
+ case Instruction::SPUT_CHAR:
+ case Instruction::SPUT_SHORT: {
+ must_keep = true;
+ const MirSFieldLoweringInfo& info = mir_graph_->GetSFieldLoweringInfo(mir);
+ if ((mir->optimization_flags & MIR_CLASS_IS_INITIALIZED) == 0 ||
+ !info.IsResolved() || !info.FastPut()) {
+ uses_all_vregs = true;
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected opcode: " << opcode;
+ UNREACHABLE();
+ break;
+ }
+
+ if (mir->ssa_rep->num_defs != 0) {
+ DCHECK(mir->ssa_rep->num_defs == 1 || mir->ssa_rep->num_defs == 2);
+ bool wide = (mir->ssa_rep->num_defs == 2);
+ int s_reg = mir->ssa_rep->defs[0];
+ int v_reg = mir_graph_->SRegToVReg(s_reg);
+ uint16_t new_value = wide ? lvn_->GetSregValueWide(s_reg) : lvn_->GetSregValue(s_reg);
+ DCHECK_NE(new_value, kNoValue);
+
+ vreg_chains_.UpdateInitialVRegValue(v_reg, wide, lvn_);
+ vreg_chains_.AddMIRWithDef(mir, v_reg, wide, new_value);
+ if (is_move) {
+ // Allow renaming all uses of dest vreg to src vreg.
+ vreg_chains_.LastMIRData()->is_move = true;
+ }
+ } else {
+ vreg_chains_.AddMIRWithoutDef(mir);
+ DCHECK(!is_move) << opcode;
+ }
+
+ if (must_keep) {
+ MIRData* last_data = vreg_chains_.LastMIRData();
+ last_data->must_keep = true;
+ if (uses_all_vregs) {
+ last_data->uses_all_vregs = true;
+ no_uses_all_since_ = vreg_chains_.NumMIRs();
+ }
+ } else {
+ DCHECK_NE(mir->ssa_rep->num_defs, 0) << opcode;
+ DCHECK(!uses_all_vregs) << opcode;
+ }
+ return true;
+}
+
+} // namespace art
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
new file mode 100644
index 0000000000..9a19f29970
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
+#define ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
+
+#include "base/arena_object.h"
+#include "base/scoped_arena_containers.h"
+#include "global_value_numbering.h"
+
+namespace art {
+
+class ArenaBitVector;
+class BasicBlock;
+class LocalValueNumbering;
+class MIR;
+class MIRGraph;
+
+/**
+ * @class DeadCodeElimination
+ * @details Eliminate dead code based on the results of global value numbering.
+ * Also get rid of MOVE insns when we can use the source instead of destination
+ * without affecting the vreg values at safepoints; this is useful in methods
+ * with a large number of vregs that frequently move values to and from low vregs
+ * to accommodate insns that can work only with the low 16 or 256 vregs.
+ */
+class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> {
+ public:
+ GvnDeadCodeElimination(const GlobalValueNumbering* gvn, ScopedArenaAllocator* alloc);
+
+ // Apply the DCE to a basic block.
+ void Apply(BasicBlock* bb);
+
+ private:
+ static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
+ static constexpr uint16_t kNPos = 0xffffu;
+ static constexpr size_t kMaxNumTopChangesToKill = 2;
+
+ struct VRegValue {
+ VRegValue() : value(kNoValue), change(kNPos) { }
+
+ // Value name as reported by GVN, kNoValue if not available.
+ uint16_t value;
+ // Index of the change in mir_data_ that defined the value, kNPos if initial value for the BB.
+ uint16_t change;
+ };
+
+ struct MIRData {
+ explicit MIRData(MIR* m)
+ : mir(m), uses_all_vregs(false), must_keep(false), is_move(false), is_move_src(false),
+ has_def(false), wide_def(false),
+ low_def_over_high_word(false), high_def_over_low_word(false), vreg_def(0u),
+ prev_value(), prev_value_high() {
+ }
+
+ uint16_t PrevChange(int v_reg) const;
+ void SetPrevChange(int v_reg, uint16_t change);
+ void RemovePrevChange(int v_reg, MIRData* prev_data);
+
+ MIR* mir;
+ bool uses_all_vregs : 1; // If mir uses all vregs, uses in mir->ssa_rep are irrelevant.
+ bool must_keep : 1;
+ bool is_move : 1;
+ bool is_move_src : 1;
+ bool has_def : 1;
+ bool wide_def : 1;
+ bool low_def_over_high_word : 1;
+ bool high_def_over_low_word : 1;
+ uint16_t vreg_def;
+ VRegValue prev_value;
+ VRegValue prev_value_high; // For wide defs.
+ };
+
+ class VRegChains {
+ public:
+ VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc);
+
+ void Reset();
+
+ void AddMIRWithDef(MIR* mir, int v_reg, bool wide, uint16_t new_value);
+ void AddMIRWithoutDef(MIR* mir);
+ void RemoveLastMIRData();
+ void RemoveTrailingNops();
+
+ size_t NumMIRs() const;
+ MIRData* GetMIRData(size_t pos);
+ MIRData* LastMIRData();
+
+ uint32_t NumVRegs() const;
+ void InsertInitialValueHigh(int v_reg, uint16_t value);
+ void UpdateInitialVRegValue(int v_reg, bool wide, const LocalValueNumbering* lvn);
+ uint16_t LastChange(int v_reg);
+ uint16_t CurrentValue(int v_reg);
+
+ uint16_t FindKillHead(int v_reg, uint16_t cutoff);
+ uint16_t FindFirstChangeAfter(int v_reg, uint16_t change) const;
+ void ReplaceChange(uint16_t old_change, uint16_t new_change);
+ void RemoveChange(uint16_t change);
+ bool IsTopChange(uint16_t change) const;
+ bool IsSRegUsed(uint16_t first_change, uint16_t last_change, int s_reg) const;
+ void RenameSRegUses(uint16_t first_change, uint16_t last_change,
+ int old_s_reg, int new_s_reg, bool wide);
+ void RenameVRegUses(uint16_t first_change, uint16_t last_change,
+ int old_s_reg, int old_v_reg, int new_s_reg, int new_v_reg);
+
+ private:
+ const uint32_t num_vregs_;
+ VRegValue* const vreg_data_;
+ ScopedArenaVector<MIRData> mir_data_;
+ };
+
+ void RecordPass();
+ void BackwardPass();
+
+ void KillMIR(MIRData* data);
+ static void KillMIR(MIR* mir);
+ static void ChangeBinOp2AddrToPlainBinOp(MIR* mir);
+ MIR* CreatePhi(int s_reg, bool fp);
+ MIR* RenameSRegDefOrCreatePhi(uint16_t def_change, uint16_t last_change, MIR* mir_to_kill);
+
+ // Update state variables going backwards through a MIR.
+ void BackwardPassProcessLastMIR();
+
+ uint16_t FindChangesToKill(uint16_t first_change, uint16_t last_change);
+ void BackwardPassTryToKillRevertVRegs();
+ bool BackwardPassTryToKillLastMIR();
+
+ void RecordPassKillMoveByRenamingSrcDef(uint16_t src_change, uint16_t move_change);
+ void RecordPassTryToKillOverwrittenMoveOrMoveSrc(uint16_t check_change);
+ void RecordPassTryToKillOverwrittenMoveOrMoveSrc();
+ void RecordPassTryToKillLastMIR();
+
+ bool RecordMIR(MIR* mir);
+
+ const GlobalValueNumbering* const gvn_;
+ MIRGraph* const mir_graph_;
+
+ VRegChains vreg_chains_;
+ BasicBlock* bb_;
+ const LocalValueNumbering* lvn_;
+ size_t no_uses_all_since_; // The change index after the last change with uses_all_vregs set.
+
+ // Data used when processing MIRs in reverse order.
+ ArenaBitVector* unused_vregs_; // vregs that are not needed later.
+ ArenaBitVector* vregs_to_kill_; // vregs that revert to a previous value.
+ uint16_t* kill_heads_; // For each vreg in vregs_to_kill_, the first change to kill.
+ ScopedArenaVector<uint16_t> changes_to_kill_;
+ ArenaBitVector* dependent_vregs_;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_DEX_GVN_DEAD_CODE_ELIMINATION_H_
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
new file mode 100644
index 0000000000..954e9f1d37
--- /dev/null
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -0,0 +1,1800 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataflow_iterator-inl.h"
+#include "dex/mir_field_info.h"
+#include "global_value_numbering.h"
+#include "gvn_dead_code_elimination.h"
+#include "local_value_numbering.h"
+#include "gtest/gtest.h"
+
+namespace art {
+
+class GvnDeadCodeEliminationTest : public testing::Test {
+ protected:
+ static constexpr uint16_t kNoValue = GlobalValueNumbering::kNoValue;
+
+ struct IFieldDef {
+ uint16_t field_idx;
+ uintptr_t declaring_dex_file;
+ uint16_t declaring_field_idx;
+ bool is_volatile;
+ DexMemAccessType type;
+ };
+
+ struct SFieldDef {
+ uint16_t field_idx;
+ uintptr_t declaring_dex_file;
+ uint16_t declaring_field_idx;
+ bool is_volatile;
+ DexMemAccessType type;
+ };
+
+ struct BBDef {
+ static constexpr size_t kMaxSuccessors = 4;
+ static constexpr size_t kMaxPredecessors = 4;
+
+ BBType type;
+ size_t num_successors;
+ BasicBlockId successors[kMaxPredecessors];
+ size_t num_predecessors;
+ BasicBlockId predecessors[kMaxPredecessors];
+ };
+
+ struct MIRDef {
+ static constexpr size_t kMaxSsaDefs = 2;
+ static constexpr size_t kMaxSsaUses = 4;
+
+ BasicBlockId bbid;
+ Instruction::Code opcode;
+ int64_t value;
+ uint32_t field_info;
+ size_t num_uses;
+ int32_t uses[kMaxSsaUses];
+ size_t num_defs;
+ int32_t defs[kMaxSsaDefs];
+ };
+
+#define DEF_SUCC0() \
+ 0u, { }
+#define DEF_SUCC1(s1) \
+ 1u, { s1 }
+#define DEF_SUCC2(s1, s2) \
+ 2u, { s1, s2 }
+#define DEF_SUCC3(s1, s2, s3) \
+ 3u, { s1, s2, s3 }
+#define DEF_SUCC4(s1, s2, s3, s4) \
+ 4u, { s1, s2, s3, s4 }
+#define DEF_PRED0() \
+ 0u, { }
+#define DEF_PRED1(p1) \
+ 1u, { p1 }
+#define DEF_PRED2(p1, p2) \
+ 2u, { p1, p2 }
+#define DEF_PRED3(p1, p2, p3) \
+ 3u, { p1, p2, p3 }
+#define DEF_PRED4(p1, p2, p3, p4) \
+ 4u, { p1, p2, p3, p4 }
+#define DEF_BB(type, succ, pred) \
+ { type, succ, pred }
+
+#define DEF_CONST(bb, opcode, reg, value) \
+ { bb, opcode, value, 0u, 0, { }, 1, { reg } }
+#define DEF_CONST_WIDE(bb, opcode, reg, value) \
+ { bb, opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_CONST_STRING(bb, opcode, reg, index) \
+ { bb, opcode, index, 0u, 0, { }, 1, { reg } }
+#define DEF_IGET(bb, opcode, reg, obj, field_info) \
+ { bb, opcode, 0u, field_info, 1, { obj }, 1, { reg } }
+#define DEF_IGET_WIDE(bb, opcode, reg, obj, field_info) \
+ { bb, opcode, 0u, field_info, 1, { obj }, 2, { reg, reg + 1 } }
+#define DEF_IPUT(bb, opcode, reg, obj, field_info) \
+ { bb, opcode, 0u, field_info, 2, { reg, obj }, 0, { } }
+#define DEF_IPUT_WIDE(bb, opcode, reg, obj, field_info) \
+ { bb, opcode, 0u, field_info, 3, { reg, reg + 1, obj }, 0, { } }
+#define DEF_SGET(bb, opcode, reg, field_info) \
+ { bb, opcode, 0u, field_info, 0, { }, 1, { reg } }
+#define DEF_SGET_WIDE(bb, opcode, reg, field_info) \
+ { bb, opcode, 0u, field_info, 0, { }, 2, { reg, reg + 1 } }
+#define DEF_SPUT(bb, opcode, reg, field_info) \
+ { bb, opcode, 0u, field_info, 1, { reg }, 0, { } }
+#define DEF_SPUT_WIDE(bb, opcode, reg, field_info) \
+ { bb, opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } }
+#define DEF_AGET(bb, opcode, reg, obj, idx) \
+ { bb, opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } }
+#define DEF_AGET_WIDE(bb, opcode, reg, obj, idx) \
+ { bb, opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } }
+#define DEF_APUT(bb, opcode, reg, obj, idx) \
+ { bb, opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } }
+#define DEF_APUT_WIDE(bb, opcode, reg, obj, idx) \
+ { bb, opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } }
+#define DEF_INVOKE1(bb, opcode, reg) \
+ { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_UNIQUE_REF(bb, opcode, reg) \
+ { bb, opcode, 0u, 0u, 0, { }, 1, { reg } } // CONST_CLASS, CONST_STRING, NEW_ARRAY, ...
+#define DEF_IFZ(bb, opcode, reg) \
+ { bb, opcode, 0u, 0u, 1, { reg }, 0, { } }
+#define DEF_MOVE(bb, opcode, reg, src) \
+ { bb, opcode, 0u, 0u, 1, { src }, 1, { reg } }
+#define DEF_MOVE_WIDE(bb, opcode, reg, src) \
+ { bb, opcode, 0u, 0u, 2, { src, src + 1 }, 2, { reg, reg + 1 } }
+#define DEF_PHI2(bb, reg, src1, src2) \
+ { bb, static_cast<Instruction::Code>(kMirOpPhi), 0, 0u, 2u, { src1, src2 }, 1, { reg } }
+#define DEF_UNOP(bb, opcode, result, src1) \
+ { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } }
+#define DEF_BINOP(bb, opcode, result, src1, src2) \
+ { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } }
+
+ void DoPrepareIFields(const IFieldDef* defs, size_t count) {
+ cu_.mir_graph->ifield_lowering_infos_.clear();
+ cu_.mir_graph->ifield_lowering_infos_.reserve(count);
+ for (size_t i = 0u; i != count; ++i) {
+ const IFieldDef* def = &defs[i];
+ MirIFieldLoweringInfo field_info(def->field_idx, def->type);
+ if (def->declaring_dex_file != 0u) {
+ field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+ field_info.declaring_field_idx_ = def->declaring_field_idx;
+ field_info.flags_ =
+ MirIFieldLoweringInfo::kFlagFastGet | MirIFieldLoweringInfo::kFlagFastPut |
+ (field_info.flags_ & ~(def->is_volatile ? 0u : MirIFieldLoweringInfo::kFlagIsVolatile));
+ }
+ cu_.mir_graph->ifield_lowering_infos_.push_back(field_info);
+ }
+ }
+
+ template <size_t count>
+ void PrepareIFields(const IFieldDef (&defs)[count]) {
+ DoPrepareIFields(defs, count);
+ }
+
+ void DoPrepareSFields(const SFieldDef* defs, size_t count) {
+ cu_.mir_graph->sfield_lowering_infos_.clear();
+ cu_.mir_graph->sfield_lowering_infos_.reserve(count);
+ for (size_t i = 0u; i != count; ++i) {
+ const SFieldDef* def = &defs[i];
+ MirSFieldLoweringInfo field_info(def->field_idx, def->type);
+ // Mark even unresolved fields as initialized.
+ field_info.flags_ |= MirSFieldLoweringInfo::kFlagClassIsInitialized;
+ // NOTE: MirSFieldLoweringInfo::kFlagClassIsInDexCache isn't used by GVN.
+ if (def->declaring_dex_file != 0u) {
+ field_info.declaring_dex_file_ = reinterpret_cast<const DexFile*>(def->declaring_dex_file);
+ field_info.declaring_field_idx_ = def->declaring_field_idx;
+ field_info.flags_ =
+ MirSFieldLoweringInfo::kFlagFastGet | MirSFieldLoweringInfo::kFlagFastPut |
+ (field_info.flags_ & ~(def->is_volatile ? 0u : MirSFieldLoweringInfo::kFlagIsVolatile));
+ }
+ cu_.mir_graph->sfield_lowering_infos_.push_back(field_info);
+ }
+ }
+
+ template <size_t count>
+ void PrepareSFields(const SFieldDef (&defs)[count]) {
+ DoPrepareSFields(defs, count);
+ }
+
+ void DoPrepareBasicBlocks(const BBDef* defs, size_t count) {
+ cu_.mir_graph->block_id_map_.clear();
+ cu_.mir_graph->block_list_.clear();
+ ASSERT_LT(3u, count); // null, entry, exit and at least one bytecode block.
+ ASSERT_EQ(kNullBlock, defs[0].type);
+ ASSERT_EQ(kEntryBlock, defs[1].type);
+ ASSERT_EQ(kExitBlock, defs[2].type);
+ for (size_t i = 0u; i != count; ++i) {
+ const BBDef* def = &defs[i];
+ BasicBlock* bb = cu_.mir_graph->CreateNewBB(def->type);
+ if (def->num_successors <= 2) {
+ bb->successor_block_list_type = kNotUsed;
+ bb->fall_through = (def->num_successors >= 1) ? def->successors[0] : 0u;
+ bb->taken = (def->num_successors >= 2) ? def->successors[1] : 0u;
+ } else {
+ bb->successor_block_list_type = kPackedSwitch;
+ bb->fall_through = 0u;
+ bb->taken = 0u;
+ bb->successor_blocks.reserve(def->num_successors);
+ for (size_t j = 0u; j != def->num_successors; ++j) {
+ SuccessorBlockInfo* successor_block_info =
+ static_cast<SuccessorBlockInfo*>(cu_.arena.Alloc(sizeof(SuccessorBlockInfo),
+ kArenaAllocSuccessor));
+ successor_block_info->block = j;
+ successor_block_info->key = 0u; // Not used by class init check elimination.
+ bb->successor_blocks.push_back(successor_block_info);
+ }
+ }
+ bb->predecessors.assign(def->predecessors, def->predecessors + def->num_predecessors);
+ if (def->type == kDalvikByteCode || def->type == kEntryBlock || def->type == kExitBlock) {
+ bb->data_flow_info = static_cast<BasicBlockDataFlow*>(
+ cu_.arena.Alloc(sizeof(BasicBlockDataFlow), kArenaAllocDFInfo));
+ bb->data_flow_info->live_in_v = live_in_v_;
+ bb->data_flow_info->vreg_to_ssa_map_exit = nullptr;
+ }
+ }
+ ASSERT_EQ(count, cu_.mir_graph->block_list_.size());
+ cu_.mir_graph->entry_block_ = cu_.mir_graph->block_list_[1];
+ ASSERT_EQ(kEntryBlock, cu_.mir_graph->entry_block_->block_type);
+ cu_.mir_graph->exit_block_ = cu_.mir_graph->block_list_[2];
+ ASSERT_EQ(kExitBlock, cu_.mir_graph->exit_block_->block_type);
+ }
+
+ template <size_t count>
+ void PrepareBasicBlocks(const BBDef (&defs)[count]) {
+ DoPrepareBasicBlocks(defs, count);
+ }
+
+ int SRegToVReg(int32_t s_reg, bool wide) {
+ int v_reg = cu_.mir_graph->SRegToVReg(s_reg);
+ CHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ if (wide) {
+ CHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+ }
+ return v_reg;
+ }
+
+ int SRegToVReg(int32_t* uses, size_t* use, bool wide) {
+ int v_reg = SRegToVReg(uses[*use], wide);
+ if (wide) {
+ CHECK_EQ(uses[*use] + 1, uses[*use + 1]);
+ *use += 2u;
+ } else {
+ *use += 1u;
+ }
+ return v_reg;
+ }
+
+ void DoPrepareMIRs(const MIRDef* defs, size_t count) {
+ mir_count_ = count;
+ mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+ ssa_reps_.resize(count);
+ for (size_t i = 0u; i != count; ++i) {
+ const MIRDef* def = &defs[i];
+ MIR* mir = &mirs_[i];
+ ASSERT_LT(def->bbid, cu_.mir_graph->block_list_.size());
+ BasicBlock* bb = cu_.mir_graph->block_list_[def->bbid];
+ bb->AppendMIR(mir);
+ mir->dalvikInsn.opcode = def->opcode;
+ mir->dalvikInsn.vB = static_cast<int32_t>(def->value);
+ mir->dalvikInsn.vB_wide = def->value;
+ if (IsInstructionIGetOrIPut(def->opcode)) {
+ ASSERT_LT(def->field_info, cu_.mir_graph->ifield_lowering_infos_.size());
+ mir->meta.ifield_lowering_info = def->field_info;
+ ASSERT_EQ(cu_.mir_graph->ifield_lowering_infos_[def->field_info].MemAccessType(),
+ IGetOrIPutMemAccessType(def->opcode));
+ } else if (IsInstructionSGetOrSPut(def->opcode)) {
+ ASSERT_LT(def->field_info, cu_.mir_graph->sfield_lowering_infos_.size());
+ mir->meta.sfield_lowering_info = def->field_info;
+ ASSERT_EQ(cu_.mir_graph->sfield_lowering_infos_[def->field_info].MemAccessType(),
+ SGetOrSPutMemAccessType(def->opcode));
+ } else if (def->opcode == static_cast<Instruction::Code>(kMirOpPhi)) {
+ mir->meta.phi_incoming =
+ allocator_->AllocArray<BasicBlockId>(def->num_uses, kArenaAllocDFInfo);
+ ASSERT_EQ(def->num_uses, bb->predecessors.size());
+ std::copy(bb->predecessors.begin(), bb->predecessors.end(), mir->meta.phi_incoming);
+ }
+ mir->ssa_rep = &ssa_reps_[i];
+ cu_.mir_graph->AllocateSSAUseData(mir, def->num_uses);
+ std::copy_n(def->uses, def->num_uses, mir->ssa_rep->uses);
+ // Keep mir->ssa_rep->fp_use[.] zero-initialized (false). Not used by DCE, only copied.
+ cu_.mir_graph->AllocateSSADefData(mir, def->num_defs);
+ std::copy_n(def->defs, def->num_defs, mir->ssa_rep->defs);
+ // Keep mir->ssa_rep->fp_def[.] zero-initialized (false). Not used by DCE, only copied.
+ mir->dalvikInsn.opcode = def->opcode;
+ mir->offset = i; // LVN uses offset only for debug output
+ mir->optimization_flags = 0u;
+ uint64_t df_attrs = MIRGraph::GetDataFlowAttributes(mir);
+ if ((df_attrs & DF_DA) != 0) {
+ CHECK_NE(def->num_defs, 0u);
+ mir->dalvikInsn.vA = SRegToVReg(def->defs[0], (df_attrs & DF_A_WIDE) != 0);
+ bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA] = def->defs[0];
+ if ((df_attrs & DF_A_WIDE) != 0) {
+ CHECK_EQ(def->defs[0] + 1, def->defs[1]);
+ bb->data_flow_info->vreg_to_ssa_map_exit[mir->dalvikInsn.vA + 1u] = def->defs[0] + 1;
+ }
+ }
+ if ((df_attrs & (DF_UA | DF_UB | DF_UC)) != 0) {
+ size_t use = 0;
+ if ((df_attrs & DF_UA) != 0) {
+ mir->dalvikInsn.vA = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_A_WIDE) != 0);
+ }
+ if ((df_attrs & DF_UB) != 0) {
+ mir->dalvikInsn.vB = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_B_WIDE) != 0);
+ }
+ if ((df_attrs & DF_UC) != 0) {
+ mir->dalvikInsn.vC = SRegToVReg(mir->ssa_rep->uses, &use, (df_attrs & DF_C_WIDE) != 0);
+ }
+ DCHECK_EQ(def->num_uses, use);
+ }
+ }
+ DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>(
+ cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc));
+ code_item->insns_size_in_code_units_ = 2u * count;
+ code_item->registers_size_ = kMaxVRegs;
+ cu_.mir_graph->current_code_item_ = code_item;
+ }
+
+ template <size_t count>
+ void PrepareMIRs(const MIRDef (&defs)[count]) {
+ DoPrepareMIRs(defs, count);
+ }
+
+ template <size_t count>
+ void PrepareSRegToVRegMap(const int (&map)[count]) {
+ cu_.mir_graph->ssa_base_vregs_.assign(map, map + count);
+ num_vregs_ = *std::max_element(map, map + count) + 1u;
+ AllNodesIterator iterator(cu_.mir_graph.get());
+ for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+ if (bb->data_flow_info != nullptr) {
+ bb->data_flow_info->vreg_to_ssa_map_exit = static_cast<int32_t*>(
+ cu_.arena.Alloc(sizeof(int32_t) * num_vregs_, kArenaAllocDFInfo));
+ std::fill_n(bb->data_flow_info->vreg_to_ssa_map_exit, num_vregs_, INVALID_SREG);
+ }
+ }
+ }
+
+ void PerformGVN() {
+ cu_.mir_graph->SSATransformationStart();
+ cu_.mir_graph->ComputeDFSOrders();
+ cu_.mir_graph->ComputeDominators();
+ cu_.mir_graph->ComputeTopologicalSortOrder();
+ cu_.mir_graph->SSATransformationEnd();
+ cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
+ allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
+ cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
+ allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
+ ASSERT_TRUE(gvn_ == nullptr);
+ gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
+ GlobalValueNumbering::kModeGvn));
+ value_names_.resize(mir_count_, 0xffffu);
+ LoopRepeatingTopologicalSortIterator iterator(cu_.mir_graph.get());
+ bool change = false;
+ for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) {
+ LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+ if (lvn != nullptr) {
+ for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+ value_names_[mir - mirs_] = lvn->GetValueNumber(mir);
+ }
+ }
+ change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+ ASSERT_TRUE(gvn_->Good());
+ }
+ }
+
+ void PerformGVNCodeModifications() {
+ ASSERT_TRUE(gvn_ != nullptr);
+ ASSERT_TRUE(gvn_->Good());
+ gvn_->StartPostProcessing();
+ TopologicalSortIterator iterator(cu_.mir_graph.get());
+ for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+ LocalValueNumbering* lvn = gvn_->PrepareBasicBlock(bb);
+ if (lvn != nullptr) {
+ for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) {
+ uint16_t value_name = lvn->GetValueNumber(mir);
+ ASSERT_EQ(value_name, value_names_[mir - mirs_]);
+ }
+ }
+ bool change = (lvn != nullptr) && gvn_->FinishBasicBlock(bb);
+ ASSERT_FALSE(change);
+ ASSERT_TRUE(gvn_->Good());
+ }
+ }
+
+ void FillVregToSsaRegExitMaps() {
+ // Fill in vreg_to_ssa_map_exit for each BB.
+ PreOrderDfsIterator iterator(cu_.mir_graph.get());
+ for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+ if (bb->block_type == kDalvikByteCode) {
+ CHECK(!bb->predecessors.empty());
+ BasicBlock* pred_bb = cu_.mir_graph->GetBasicBlock(bb->predecessors[0]);
+ for (size_t v_reg = 0; v_reg != num_vregs_; ++v_reg) {
+ if (bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] == INVALID_SREG) {
+ bb->data_flow_info->vreg_to_ssa_map_exit[v_reg] =
+ pred_bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+ }
+ }
+ }
+ }
+ }
+
+ void PerformDCE() {
+ FillVregToSsaRegExitMaps();
+ cu_.mir_graph->GetNumOfCodeAndTempVRs();
+ dce_.reset(new (allocator_.get()) GvnDeadCodeElimination(gvn_.get(), allocator_.get()));
+ PreOrderDfsIterator iterator(cu_.mir_graph.get());
+ for (BasicBlock* bb = iterator.Next(); bb != nullptr; bb = iterator.Next()) {
+ if (bb->block_type == kDalvikByteCode) {
+ dce_->Apply(bb);
+ }
+ }
+ }
+
+ void PerformGVN_DCE() {
+ PerformGVN();
+ PerformGVNCodeModifications(); // Eliminate null/range checks.
+ PerformDCE();
+ }
+
+ template <size_t count>
+ void ExpectValueNamesNE(const size_t (&indexes)[count]) {
+ for (size_t i1 = 0; i1 != count; ++i1) {
+ size_t idx1 = indexes[i1];
+ for (size_t i2 = i1 + 1; i2 != count; ++i2) {
+ size_t idx2 = indexes[i2];
+ EXPECT_NE(value_names_[idx1], value_names_[idx2]) << idx1 << " " << idx2;
+ }
+ }
+ }
+
+ template <size_t count>
+ void ExpectNoNullCheck(const size_t (&indexes)[count]) {
+ for (size_t i = 0; i != count; ++i) {
+ size_t idx = indexes[i];
+ EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[idx].optimization_flags & MIR_IGNORE_NULL_CHECK)
+ << idx;
+ }
+ size_t num_no_null_ck = 0u;
+ for (size_t i = 0; i != mir_count_; ++i) {
+ if ((mirs_[i].optimization_flags & MIR_IGNORE_NULL_CHECK) != 0) {
+ ++num_no_null_ck;
+ }
+ }
+ EXPECT_EQ(count, num_no_null_ck);
+ }
+
+ GvnDeadCodeEliminationTest()
+ : pool_(),
+ cu_(&pool_, kRuntimeISA, nullptr, nullptr),
+ num_vregs_(0u),
+ mir_count_(0u),
+ mirs_(nullptr),
+ ssa_reps_(),
+ allocator_(),
+ gvn_(),
+ dce_(),
+ value_names_(),
+ live_in_v_(new (&cu_.arena) ArenaBitVector(&cu_.arena, kMaxSsaRegs, false, kBitMapMisc)) {
+ cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
+ cu_.access_flags = kAccStatic; // Don't let "this" interfere with this test.
+ allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+ // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+ // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+ cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
+ kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
+ // Bind all possible sregs to live vregs for test purposes.
+ live_in_v_->SetInitialBits(kMaxSsaRegs);
+ cu_.mir_graph->ssa_base_vregs_.reserve(kMaxSsaRegs);
+ cu_.mir_graph->ssa_subscripts_.reserve(kMaxSsaRegs);
+ for (unsigned int i = 0; i < kMaxSsaRegs; i++) {
+ cu_.mir_graph->ssa_base_vregs_.push_back(i);
+ cu_.mir_graph->ssa_subscripts_.push_back(0);
+ }
+ // Set shorty for a void-returning method without arguments.
+ cu_.shorty = "V";
+ }
+
+ static constexpr size_t kMaxSsaRegs = 16384u;
+ static constexpr size_t kMaxVRegs = 256u;
+
+ ArenaPool pool_;
+ CompilationUnit cu_;
+ size_t num_vregs_;
+ size_t mir_count_;
+ MIR* mirs_;
+ std::vector<SSARepresentation> ssa_reps_;
+ std::unique_ptr<ScopedArenaAllocator> allocator_;
+ std::unique_ptr<GlobalValueNumbering> gvn_;
+ std::unique_ptr<GvnDeadCodeElimination> dce_;
+ std::vector<uint16_t> value_names_;
+ ArenaBitVector* live_in_v_;
+};
+
+constexpr uint16_t GvnDeadCodeEliminationTest::kNoValue;
+
+class GvnDeadCodeEliminationTestSimple : public GvnDeadCodeEliminationTest {
+ public:
+ GvnDeadCodeEliminationTestSimple();
+
+ private:
+ static const BBDef kSimpleBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestSimple::kSimpleBbs[] = {
+ DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+ DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+ DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(3)),
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(1)),
+};
+
+GvnDeadCodeEliminationTestSimple::GvnDeadCodeEliminationTestSimple()
+ : GvnDeadCodeEliminationTest() {
+ PrepareBasicBlocks(kSimpleBbs);
+}
+
+class GvnDeadCodeEliminationTestDiamond : public GvnDeadCodeEliminationTest {
+ public:
+ GvnDeadCodeEliminationTestDiamond();
+
+ private:
+ static const BBDef kDiamondBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestDiamond::kDiamondBbs[] = {
+ DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+ DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+ DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(6)),
+ DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 5), DEF_PRED1(1)), // Block #3, top of the diamond.
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)), // Block #4, left side.
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED1(3)), // Block #5, right side.
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED2(4, 5)), // Block #6, bottom.
+};
+
+GvnDeadCodeEliminationTestDiamond::GvnDeadCodeEliminationTestDiamond()
+ : GvnDeadCodeEliminationTest() {
+ PrepareBasicBlocks(kDiamondBbs);
+}
+
+class GvnDeadCodeEliminationTestLoop : public GvnDeadCodeEliminationTest {
+ public:
+ GvnDeadCodeEliminationTestLoop();
+
+ private:
+ static const BBDef kLoopBbs[];
+};
+
+const GvnDeadCodeEliminationTest::BBDef GvnDeadCodeEliminationTestLoop::kLoopBbs[] = {
+ DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()),
+ DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()),
+ DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(5)),
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(4), DEF_PRED1(1)),
+ DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED2(3, 4)), // "taken" loops to self.
+ DEF_BB(kDalvikByteCode, DEF_SUCC1(2), DEF_PRED1(4)),
+};
+
+GvnDeadCodeEliminationTestLoop::GvnDeadCodeEliminationTestLoop()
+ : GvnDeadCodeEliminationTest() {
+ PrepareBasicBlocks(kLoopBbs);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename1) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+ DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 3 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+
+ const size_t no_null_ck_indexes[] = { 1, 3 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+ EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+ EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+ DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+ DEF_CONST(3, Instruction::CONST, 4u, 1000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 3, 4 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+
+ const size_t no_null_ck_indexes[] = { 1, 3 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, true, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the IGET uses the s_reg 0, v_reg 0, defined by mirs_[0].
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+ EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+ EXPECT_EQ(0u, mirs_[3].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename3) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 0u),
+ DEF_IGET(3, Instruction::IGET, 3u, 2u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 3 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+
+ const size_t no_null_ck_indexes[] = { 1, 3 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move.
+ ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
+ // Check that the first IGET is using the s_reg 2, v_reg 2.
+ ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
+ EXPECT_EQ(2, mirs_[1].ssa_rep->uses[0]);
+ EXPECT_EQ(2u, mirs_[1].dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename4) {
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 1u, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 2u, 1u),
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 3u, 1000u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 0, 1 /* high word */ };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 3 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+
+ static const bool eliminated[] = {
+ false, true, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the NEW_INSTANCE defines the s_reg 2, v_reg 2, originally defined by the move 2u.
+ ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(2u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename5) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 3u),
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 1000u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 3, 0, 1 /* high word */ };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 5 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[3]);
+ EXPECT_EQ(value_names_[0], value_names_[4]);
+
+ static const bool eliminated[] = {
+ false, false, false, true, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the NEW_INSTANCE defines the s_reg 4, v_reg 3, originally defined by the move 4u.
+ ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(4, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(3u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename6) {
+ static const MIRDef mirs[] = {
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+ DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 2u, 0u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1 /* high word */, 1, 2 /* high word */ };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+
+ static const bool eliminated[] = {
+ false, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the CONST_WIDE defines the s_reg 2, v_reg 1, originally defined by the move 2u.
+ ASSERT_EQ(2, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(2, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(3, mirs_[0].ssa_rep->defs[1]);
+ EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename7) {
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+ DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 2u, 0u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_NE(value_names_[0], value_names_[2]);
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+
+ static const bool eliminated[] = {
+ false, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
+ ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+ // Check that the ADD_INT inputs are both s_reg1, vreg 1.
+ ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+ EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+ EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
+ EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
+ EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename8) {
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+ DEF_MOVE(3, Instruction::MOVE, 1u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 2u, 0u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_NE(value_names_[0], value_names_[2]);
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+
+ static const bool eliminated[] = {
+ false, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the CONST defines the s_reg 1, v_reg 1, originally defined by the move 1u.
+ ASSERT_EQ(1, mirs_[0].ssa_rep->num_defs);
+ EXPECT_EQ(1, mirs_[0].ssa_rep->defs[0]);
+ EXPECT_EQ(1u, mirs_[0].dalvikInsn.vA);
+ // Check that the ADD_INT_2ADDR was replaced by ADD_INT and inputs are both s_reg 1, vreg 1.
+ EXPECT_EQ(Instruction::ADD_INT, mirs_[2].dalvikInsn.opcode);
+ ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+ EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+ EXPECT_EQ(1, mirs_[2].ssa_rep->uses[1]);
+ EXPECT_EQ(1u, mirs_[2].dalvikInsn.vB);
+ EXPECT_EQ(1u, mirs_[2].dalvikInsn.vC);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Rename9) {
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 1000u),
+ DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 1u, 0u, 0u),
+ DEF_MOVE(3, Instruction::MOVE, 2u, 1u),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 0, 1, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 3 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[1], value_names_[2]);
+
+ static const bool eliminated[] = {
+ false, false, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the ADD_INT_2ADDR was replaced by ADD_INT and output is in s_reg 2, vreg 1.
+ EXPECT_EQ(Instruction::ADD_INT, mirs_[1].dalvikInsn.opcode);
+ ASSERT_EQ(2, mirs_[1].ssa_rep->num_uses);
+ EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
+ EXPECT_EQ(0, mirs_[1].ssa_rep->uses[1]);
+ EXPECT_EQ(0u, mirs_[1].dalvikInsn.vB);
+ EXPECT_EQ(0u, mirs_[1].dalvikInsn.vC);
+ ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
+ EXPECT_EQ(2, mirs_[1].ssa_rep->defs[0]);
+ EXPECT_EQ(1u, mirs_[1].dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename1) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 2u, 1u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+ DEF_CONST(3, Instruction::CONST, 4u, 1000),
+ DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 0, 1 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[3]);
+
+ const size_t no_null_ck_indexes[] = { 1, 5 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 2u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 3u, 0u),
+ DEF_CONST(3, Instruction::CONST, 4u, 1000),
+ DEF_IGET(3, Instruction::IGET, 5u, 3u, 1u),
+ DEF_CONST(3, Instruction::CONST, 6u, 2000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 0, 3, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 4, 5, 6 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[3]);
+
+ const size_t no_null_ck_indexes[] = { 1, 5 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, NoRename3) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ { 1u, 1u, 1u, false, kDexMemAccessWord },
+ { 2u, 1u, 2u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET, 1u, 0u, 0u),
+ DEF_IGET(3, Instruction::IGET, 2u, 0u, 2u),
+ DEF_BINOP(3, Instruction::ADD_INT, 3u, 1u, 2u),
+ DEF_MOVE(3, Instruction::MOVE_OBJECT, 4u, 0u),
+ DEF_IGET(3, Instruction::IGET, 5u, 4u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 2, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 5 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[0], value_names_[4]);
+
+ const size_t no_null_ck_indexes[] = { 1, 2, 5 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple1) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessObject },
+ { 1u, 1u, 1u, false, kDexMemAccessObject },
+ { 2u, 1u, 2u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 1u),
+ DEF_IGET(3, Instruction::IGET, 3u, 2u, 2u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 0u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 5u, 4u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 1, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_NE(value_names_[0], value_names_[1]);
+ EXPECT_NE(value_names_[0], value_names_[2]);
+ EXPECT_NE(value_names_[0], value_names_[3]);
+ EXPECT_NE(value_names_[1], value_names_[2]);
+ EXPECT_NE(value_names_[1], value_names_[3]);
+ EXPECT_NE(value_names_[2], value_names_[3]);
+ EXPECT_EQ(value_names_[1], value_names_[4]);
+ EXPECT_EQ(value_names_[2], value_names_[5]);
+
+ EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[4].optimization_flags & MIR_IGNORE_NULL_CHECK);
+ EXPECT_EQ(MIR_IGNORE_NULL_CHECK, mirs_[5].optimization_flags & MIR_IGNORE_NULL_CHECK);
+
+ static const bool eliminated[] = {
+ false, false, false, false, true, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[1].ssa_rep->num_defs);
+ EXPECT_EQ(4, mirs_[1].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[1].ssa_rep->num_uses);
+ EXPECT_EQ(0, mirs_[1].ssa_rep->uses[0]);
+ ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
+ EXPECT_EQ(5, mirs_[2].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[2].ssa_rep->num_uses);
+ EXPECT_EQ(4, mirs_[2].ssa_rep->uses[0]);
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+ EXPECT_EQ(3, mirs_[3].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+ EXPECT_EQ(5, mirs_[3].ssa_rep->uses[0]);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_IGET(3, Instruction::IGET, 2u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 3u, 2u, 1u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 4u, 3u),
+ DEF_IGET(3, Instruction::IGET, 5u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT_2ADDR, 6u, 5u, 1u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 2, 3, 2, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[2], value_names_[5]);
+ EXPECT_EQ(value_names_[3], value_names_[6]);
+
+ const size_t no_null_ck_indexes[] = { 2, 5 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, true, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+ EXPECT_EQ(6, mirs_[3].ssa_rep->defs[0]);
+ ASSERT_EQ(2, mirs_[3].ssa_rep->num_uses);
+ EXPECT_EQ(2, mirs_[3].ssa_rep->uses[0]);
+ EXPECT_EQ(1, mirs_[3].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
+ EXPECT_EQ(4, mirs_[4].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
+ EXPECT_EQ(6, mirs_[4].ssa_rep->uses[0]);
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple3) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u), // Simple elimination of ADD+MUL
+ DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u), // allows simple elimination of IGET+SUB.
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 5, 5, 4 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[11]);
+ EXPECT_EQ(value_names_[7], value_names_[12]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false, true, true, true, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
+ EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]); // 6 -> 11
+ ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
+ EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
+ EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+ EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12
+ ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+ EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]); // 6 -> 11
+ EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+ EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+ EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, Simple4) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 1u, INT64_C(1)),
+ DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 3u, 1u, 2u),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 5u, 4u),
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 6u, INT64_C(1)),
+ DEF_BINOP(3, Instruction::LONG_TO_FLOAT, 8u, 6u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3, 1, 2, 1, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[1], value_names_[5]);
+ EXPECT_EQ(value_names_[2], value_names_[6]);
+ EXPECT_EQ(value_names_[3], value_names_[7]);
+
+ const size_t no_null_ck_indexes[] = { 3, 7 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ // Simple elimination of CONST_WIDE+LONG_TO_FLOAT allows simple eliminatiion of IGET.
+ false, false, false, false, false, true, true, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[2].ssa_rep->num_defs);
+ EXPECT_EQ(8, mirs_[2].ssa_rep->defs[0]); // 3 -> 8
+ ASSERT_EQ(2, mirs_[2].ssa_rep->num_uses);
+ EXPECT_EQ(1, mirs_[2].ssa_rep->uses[0]);
+ EXPECT_EQ(2, mirs_[2].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_defs);
+ EXPECT_EQ(9, mirs_[3].ssa_rep->defs[0]); // 4 -> 9
+ ASSERT_EQ(1, mirs_[3].ssa_rep->num_uses);
+ EXPECT_EQ(0, mirs_[3].ssa_rep->uses[0]);
+ ASSERT_EQ(1, mirs_[4].ssa_rep->num_defs);
+ EXPECT_EQ(5, mirs_[4].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[4].ssa_rep->num_uses);
+ EXPECT_EQ(9, mirs_[4].ssa_rep->uses[0]); // 4 -> 9
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain1) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 5 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[11]);
+ EXPECT_EQ(value_names_[7], value_names_[12]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false, true, true, true, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[6].ssa_rep->num_defs);
+ EXPECT_EQ(11, mirs_[6].ssa_rep->defs[0]); // 6 -> 11
+ ASSERT_EQ(2, mirs_[6].ssa_rep->num_uses);
+ EXPECT_EQ(5, mirs_[6].ssa_rep->uses[0]);
+ EXPECT_EQ(2, mirs_[6].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+ EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12
+ ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+ EXPECT_EQ(11, mirs_[7].ssa_rep->uses[0]); // 6 -> 11
+ EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+ EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+ EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+ DEF_CONST(3, Instruction::CONST, 13u, 4000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4, 7 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 13 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[11]);
+ EXPECT_EQ(value_names_[7], value_names_[12]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false, true, true, true, true, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+ EXPECT_EQ(12, mirs_[7].ssa_rep->defs[0]); // 7 -> 12
+ ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+ EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
+ EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+ EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+ EXPECT_EQ(12, mirs_[8].ssa_rep->uses[0]); // 7 -> 12
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KillChain3) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+ DEF_CONST(3, Instruction::CONST, 12u, 4000),
+ DEF_BINOP(3, Instruction::SUB_INT, 13u, 11u, 3u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 4, 7, 4 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 12 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[11]);
+ EXPECT_EQ(value_names_[7], value_names_[13]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false, true, true, true, false, true
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that the sregs have been renamed correctly.
+ ASSERT_EQ(1, mirs_[7].ssa_rep->num_defs);
+ EXPECT_EQ(13, mirs_[7].ssa_rep->defs[0]); // 7 -> 13
+ ASSERT_EQ(2, mirs_[7].ssa_rep->num_uses);
+ EXPECT_EQ(6, mirs_[7].ssa_rep->uses[0]);
+ EXPECT_EQ(3, mirs_[7].ssa_rep->uses[1]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_defs);
+ EXPECT_EQ(8, mirs_[8].ssa_rep->defs[0]);
+ ASSERT_EQ(1, mirs_[8].ssa_rep->num_uses);
+ EXPECT_EQ(13, mirs_[8].ssa_rep->uses[0]); // 7 -> 13
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain1) {
+ // KillChain2 without the final CONST.
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 11u, 10u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 12u, 11u, 3u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 5, 4, 6, 4, 7, 7, 4 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[11]);
+ EXPECT_EQ(value_names_[7], value_names_[12]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false, false, false, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
+TEST_F(GvnDeadCodeEliminationTestSimple, KeepChain2) {
+ // KillChain1 with MIRs in the middle of the chain.
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1000),
+ DEF_CONST(3, Instruction::CONST, 2u, 2000),
+ DEF_CONST(3, Instruction::CONST, 3u, 3000),
+ DEF_IGET(3, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_BINOP(3, Instruction::MUL_INT, 6u, 5u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 7u, 6u, 3u),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 8u, 7u),
+ DEF_IGET(3, Instruction::IGET, 9u, 0u, 0u),
+ DEF_BINOP(3, Instruction::ADD_INT, 10u, 9u, 1u),
+ DEF_CONST(3, Instruction::CONST, 11u, 4000),
+ DEF_UNOP(3, Instruction::INT_TO_FLOAT, 12u, 11u),
+ DEF_BINOP(3, Instruction::MUL_INT, 13u, 10u, 2u),
+ DEF_BINOP(3, Instruction::SUB_INT, 14u, 13u, 3u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3, 4, 5, 4, 5, 6, 4, 5, 4, 7, 4, 5 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+ EXPECT_EQ(value_names_[5], value_names_[10]);
+ EXPECT_EQ(value_names_[6], value_names_[13]);
+ EXPECT_EQ(value_names_[7], value_names_[14]);
+
+ const size_t no_null_ck_indexes[] = { 4, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) {
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 1000),
+ DEF_CONST(4, Instruction::CONST, 1u, 1000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+
+ static const bool eliminated[] = {
+ false, true,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created a single-input Phi to replace the CONST 3u.
+ BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+ MIR* phi = bb4->first_mir_insn;
+ ASSERT_TRUE(phi != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+ ASSERT_EQ(1, phi->ssa_rep->num_uses);
+ EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+ ASSERT_EQ(1, phi->ssa_rep->num_defs);
+ EXPECT_EQ(1, phi->ssa_rep->defs[0]);
+ EXPECT_EQ(0u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(4, Instruction::CONST, 1u, 1000),
+ DEF_IPUT(4, Instruction::IPUT, 1u, 0u, 0u),
+ DEF_CONST(5, Instruction::CONST, 3u, 2000),
+ DEF_IPUT(5, Instruction::IPUT, 3u, 0u, 0u),
+ DEF_IGET(6, Instruction::IGET, 5u, 0u, 0u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2 /* dummy */, 1, 2 /* dummy */, 1 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 3, 5 };
+ ExpectValueNamesNE(diff_indexes);
+
+ const size_t no_null_ck_indexes[] = { 2, 4, 5 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, true,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created a two-input Phi to replace the IGET 5u.
+ BasicBlock* bb6 = cu_.mir_graph->GetBasicBlock(6);
+ MIR* phi = bb6->first_mir_insn;
+ ASSERT_TRUE(phi != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+ ASSERT_EQ(2, phi->ssa_rep->num_uses);
+ EXPECT_EQ(1, phi->ssa_rep->uses[0]);
+ EXPECT_EQ(3, phi->ssa_rep->uses[1]);
+ ASSERT_EQ(1, phi->ssa_rep->num_defs);
+ EXPECT_EQ(5, phi->ssa_rep->defs[0]);
+ EXPECT_EQ(1u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock1) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessObject }, // linked list
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
+ DEF_IFZ(3, Instruction::IF_NEZ, 4u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 9u, 8u, 0u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[1], value_names_[6]);
+ EXPECT_EQ(value_names_[2], value_names_[7]);
+ EXPECT_EQ(value_names_[3], value_names_[8]);
+ EXPECT_EQ(value_names_[4], value_names_[9]);
+
+ const size_t no_null_ck_indexes[] = { 1, 6, 7, 8, 9 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, true, true, true, true,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created two single-input Phis to replace the IGET 8u and IGET 9u;
+ // the IGET 6u and IGET 7u were killed without a replacement.
+ BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+ MIR* phi1 = bb4->first_mir_insn;
+ ASSERT_TRUE(phi1 != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi1->dalvikInsn.opcode));
+ MIR* phi2 = phi1->next;
+ ASSERT_TRUE(phi2 != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi2->dalvikInsn.opcode));
+ ASSERT_TRUE(phi2->next == &mirs_[6]);
+ if (phi1->dalvikInsn.vA == 2u) {
+ std::swap(phi1, phi2);
+ }
+ ASSERT_EQ(1, phi1->ssa_rep->num_uses);
+ EXPECT_EQ(3, phi1->ssa_rep->uses[0]);
+ ASSERT_EQ(1, phi1->ssa_rep->num_defs);
+ EXPECT_EQ(8, phi1->ssa_rep->defs[0]);
+ EXPECT_EQ(1u, phi1->dalvikInsn.vA);
+ ASSERT_EQ(1, phi2->ssa_rep->num_uses);
+ EXPECT_EQ(4, phi2->ssa_rep->uses[0]);
+ ASSERT_EQ(1, phi2->ssa_rep->num_defs);
+ EXPECT_EQ(9, phi2->ssa_rep->defs[0]);
+ EXPECT_EQ(2u, phi2->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, KillChainInAnotherBlock2) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessObject }, // linked list
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 1u, 0u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 2u, 1u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 3u, 2u, 0u),
+ DEF_IGET(3, Instruction::IGET_OBJECT, 4u, 3u, 0u),
+ DEF_IFZ(3, Instruction::IF_NEZ, 4u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 6u, 0u, 0u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 7u, 6u, 0u),
+ DEF_IGET(4, Instruction::IGET_OBJECT, 8u, 7u, 0u),
+ DEF_CONST(4, Instruction::CONST, 9u, 1000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 1, 2, 3 /* dummy */, 1, 2, 1, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 3, 4, 9 };
+ ExpectValueNamesNE(diff_indexes);
+ EXPECT_EQ(value_names_[1], value_names_[6]);
+ EXPECT_EQ(value_names_[2], value_names_[7]);
+ EXPECT_EQ(value_names_[3], value_names_[8]);
+
+ const size_t no_null_ck_indexes[] = { 1, 6, 7, 8 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false, true, true, true, false,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created a single-input Phi to replace the IGET 8u;
+ // the IGET 6u and IGET 7u were killed without a replacement.
+ BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+ MIR* phi = bb4->first_mir_insn;
+ ASSERT_TRUE(phi != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+ ASSERT_TRUE(phi->next == &mirs_[6]);
+ ASSERT_EQ(1, phi->ssa_rep->num_uses);
+ EXPECT_EQ(3, phi->ssa_rep->uses[0]);
+ ASSERT_EQ(1, phi->ssa_rep->num_defs);
+ EXPECT_EQ(8, phi->ssa_rep->defs[0]);
+ EXPECT_EQ(1u, phi->dalvikInsn.vA);
+}
+
+TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) {
+ static const IFieldDef ifields[] = {
+ { 0u, 1u, 0u, false, kDexMemAccessWord },
+ };
+ static const MIRDef mirs[] = {
+ DEF_UNIQUE_REF(3, Instruction::NEW_INSTANCE, 0u),
+ DEF_CONST(3, Instruction::CONST, 1u, 1),
+ DEF_CONST(3, Instruction::CONST, 2u, 0),
+ DEF_IPUT(3, Instruction::IPUT, 2u, 0u, 0u),
+ DEF_IGET(4, Instruction::IGET, 4u, 0u, 0u),
+ DEF_BINOP(4, Instruction::ADD_INT, 5u, 4u, 1u),
+ DEF_IPUT(4, Instruction::IPUT, 5u, 0u, 0u),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 2, 3 /* dummy */, 2, 2 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareIFields(ifields);
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ static const size_t diff_indexes[] = { 0, 1, 2, 4, 5 };
+ ExpectValueNamesNE(diff_indexes);
+
+ const size_t no_null_ck_indexes[] = { 3, 4, 6 };
+ ExpectNoNullCheck(no_null_ck_indexes);
+
+ static const bool eliminated[] = {
+ false, false, false, false, true, false, false,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created a two-input Phi to replace the IGET 3u.
+ BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+ MIR* phi = bb4->first_mir_insn;
+ ASSERT_TRUE(phi != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+ ASSERT_TRUE(phi->next == &mirs_[4]);
+ ASSERT_EQ(2, phi->ssa_rep->num_uses);
+ EXPECT_EQ(2, phi->ssa_rep->uses[0]);
+ EXPECT_EQ(5, phi->ssa_rep->uses[1]);
+ ASSERT_EQ(1, phi->ssa_rep->num_defs);
+ EXPECT_EQ(4, phi->ssa_rep->defs[0]);
+ EXPECT_EQ(2u, phi->dalvikInsn.vA);
+}
+
+} // namespace art
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 114346dd5a..99b6683b26 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -901,9 +901,9 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en
// Calculate merged values for the intersection.
for (auto& load_value_entry : my_values->load_value_map) {
uint16_t location = load_value_entry.first;
- bool same_values = true;
- uint16_t value_name = kNoValue;
merge_names_.clear();
+ uint16_t value_name = kNoValue;
+ bool same_values = true;
for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
value_name = Versions::LookupMergeValue(gvn_, lvn, key, location);
same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
@@ -937,6 +937,10 @@ void LocalValueNumbering::MergeAliasingValues(const typename Map::value_type& en
void LocalValueNumbering::Merge(MergeType merge_type) {
DCHECK_GE(gvn_->merge_lvns_.size(), 2u);
+ // Always reserve space in merge_names_. Even if we don't use it in Merge() we may need it
+ // in GetStartingVregValueNumberImpl() when the merge_names_'s allocator is not the top.
+ merge_names_.reserve(gvn_->merge_lvns_.size());
+
IntersectSregValueMaps<&LocalValueNumbering::sreg_value_map_>();
IntersectSregValueMaps<&LocalValueNumbering::sreg_wide_value_map_>();
if (merge_type == kReturnMerge) {
@@ -1169,8 +1173,8 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
int first_s_reg = uses[pos];
bool wide = (first_lvn->sreg_wide_value_map_.count(first_s_reg) != 0u);
// Iterate over *merge_lvns_ and skip incoming sregs for BBs without associated LVN.
- uint16_t value_name = kNoValue;
merge_names_.clear();
+ uint16_t value_name = kNoValue;
bool same_values = true;
for (const LocalValueNumbering* lvn : gvn_->merge_lvns_) {
DCHECK_LT(pos, mir->ssa_rep->num_uses);
@@ -1210,6 +1214,31 @@ uint16_t LocalValueNumbering::HandlePhi(MIR* mir) {
return value_name;
}
+uint16_t LocalValueNumbering::HandleConst(MIR* mir, uint32_t value) {
+ RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+ uint16_t res;
+ if (value == 0u && raw_dest.ref) {
+ res = GlobalValueNumbering::kNullValue;
+ } else {
+ Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
+ res = gvn_->LookupValue(op, Low16Bits(value), High16Bits(value), 0);
+ }
+ SetOperandValue(mir->ssa_rep->defs[0], res);
+ return res;
+}
+
+uint16_t LocalValueNumbering::HandleConstWide(MIR* mir, uint64_t value) {
+ RegLocation raw_dest = gvn_->GetMirGraph()->GetRawDest(mir);
+ Instruction::Code op = raw_dest.fp ? Instruction::CONST_HIGH16 : Instruction::CONST;
+ uint32_t low_word = Low32Bits(value);
+ uint32_t high_word = High32Bits(value);
+ uint16_t low_res = gvn_->LookupValue(op, Low16Bits(low_word), High16Bits(low_word), 1);
+ uint16_t high_res = gvn_->LookupValue(op, Low16Bits(high_word), High16Bits(high_word), 2);
+ uint16_t res = gvn_->LookupValue(op, low_res, high_res, 3);
+ SetOperandValueWide(mir->ssa_rep->defs[0], res);
+ return res;
+}
+
uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) {
uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]);
HandleNullCheck(mir, array);
@@ -1592,12 +1621,18 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
break;
case Instruction::MOVE_EXCEPTION:
case Instruction::NEW_INSTANCE:
- case Instruction::CONST_CLASS:
case Instruction::NEW_ARRAY:
// 1 result, treat as unique each time, use result s_reg - will be unique.
res = MarkNonAliasingNonNull(mir);
SetOperandValue(mir->ssa_rep->defs[0], res);
break;
+ case Instruction::CONST_CLASS:
+ DCHECK_EQ(Low16Bits(mir->dalvikInsn.vB), mir->dalvikInsn.vB);
+ res = gvn_->LookupValue(Instruction::CONST_CLASS, mir->dalvikInsn.vB, 0, 0);
+ SetOperandValue(mir->ssa_rep->defs[0], res);
+ null_checked_.insert(res);
+ non_aliasing_refs_.insert(res);
+ break;
case Instruction::CONST_STRING:
case Instruction::CONST_STRING_JUMBO:
// These strings are internalized, so assign value based on the string pool index.
@@ -1641,53 +1676,29 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
SetOperandValueWide(mir->ssa_rep->defs[0], res);
break;
+ case Instruction::CONST_HIGH16:
+ res = HandleConst(mir, mir->dalvikInsn.vB << 16);
+ break;
case Instruction::CONST:
case Instruction::CONST_4:
case Instruction::CONST_16:
- res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
- High16Bits(mir->dalvikInsn.vB), 0);
- SetOperandValue(mir->ssa_rep->defs[0], res);
- break;
-
- case Instruction::CONST_HIGH16:
- res = gvn_->LookupValue(Instruction::CONST, 0, mir->dalvikInsn.vB, 0);
- SetOperandValue(mir->ssa_rep->defs[0], res);
+ res = HandleConst(mir, mir->dalvikInsn.vB);
break;
case Instruction::CONST_WIDE_16:
- case Instruction::CONST_WIDE_32: {
- uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB),
- High16Bits(mir->dalvikInsn.vB >> 16), 1);
- uint16_t high_res;
- if (mir->dalvikInsn.vB & 0x80000000) {
- high_res = gvn_->LookupValue(Instruction::CONST, 0xffff, 0xffff, 2);
- } else {
- high_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 2);
- }
- res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
- SetOperandValueWide(mir->ssa_rep->defs[0], res);
- }
+ case Instruction::CONST_WIDE_32:
+ res = HandleConstWide(
+ mir,
+ mir->dalvikInsn.vB +
+ ((mir->dalvikInsn.vB & 0x80000000) != 0 ? UINT64_C(0xffffffff00000000) : 0u));
break;
- case Instruction::CONST_WIDE: {
- uint32_t low_word = Low32Bits(mir->dalvikInsn.vB_wide);
- uint32_t high_word = High32Bits(mir->dalvikInsn.vB_wide);
- uint16_t low_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(low_word),
- High16Bits(low_word), 1);
- uint16_t high_res = gvn_->LookupValue(Instruction::CONST, Low16Bits(high_word),
- High16Bits(high_word), 2);
- res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
- SetOperandValueWide(mir->ssa_rep->defs[0], res);
- }
+ case Instruction::CONST_WIDE:
+ res = HandleConstWide(mir, mir->dalvikInsn.vB_wide);
break;
- case Instruction::CONST_WIDE_HIGH16: {
- uint16_t low_res = gvn_->LookupValue(Instruction::CONST, 0, 0, 1);
- uint16_t high_res = gvn_->LookupValue(Instruction::CONST, 0,
- Low16Bits(mir->dalvikInsn.vB), 2);
- res = gvn_->LookupValue(Instruction::CONST, low_res, high_res, 3);
- SetOperandValueWide(mir->ssa_rep->defs[0], res);
- }
+ case Instruction::CONST_WIDE_HIGH16:
+ res = HandleConstWide(mir, static_cast<uint64_t>(mir->dalvikInsn.vB) << 48);
break;
case Instruction::ARRAY_LENGTH: {
@@ -1956,4 +1967,55 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) {
return res;
}
+uint16_t LocalValueNumbering::GetEndingVregValueNumberImpl(int v_reg, bool wide) const {
+ const BasicBlock* bb = gvn_->GetBasicBlock(Id());
+ DCHECK(bb != nullptr);
+ int s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg];
+ if (s_reg == INVALID_SREG) {
+ return kNoValue;
+ }
+ if (wide) {
+ int high_s_reg = bb->data_flow_info->vreg_to_ssa_map_exit[v_reg + 1];
+ if (high_s_reg != s_reg + 1) {
+ return kNoValue; // High word has been overwritten.
+ }
+ return GetSregValueWide(s_reg);
+ } else {
+ return GetSregValue(s_reg);
+ }
+}
+
+uint16_t LocalValueNumbering::GetStartingVregValueNumberImpl(int v_reg, bool wide) const {
+ DCHECK_EQ(gvn_->mode_, GlobalValueNumbering::kModeGvnPostProcessing);
+ DCHECK(gvn_->CanModify());
+ const BasicBlock* bb = gvn_->GetBasicBlock(Id());
+ DCHECK(bb != nullptr);
+ DCHECK_NE(bb->predecessors.size(), 0u);
+ if (bb->predecessors.size() == 1u) {
+ return gvn_->GetLvn(bb->predecessors[0])->GetEndingVregValueNumberImpl(v_reg, wide);
+ }
+ merge_names_.clear();
+ uint16_t value_name = kNoValue;
+ bool same_values = true;
+ for (BasicBlockId pred_id : bb->predecessors) {
+ value_name = gvn_->GetLvn(pred_id)->GetEndingVregValueNumberImpl(v_reg, wide);
+ if (value_name == kNoValue) {
+ return kNoValue;
+ }
+ same_values = same_values && (merge_names_.empty() || value_name == merge_names_.back());
+ merge_names_.push_back(value_name);
+ }
+ if (same_values) {
+ // value_name already contains the result.
+ } else {
+ auto lb = merge_map_.lower_bound(merge_names_);
+ if (lb != merge_map_.end() && !merge_map_.key_comp()(merge_names_, lb->first)) {
+ value_name = lb->second;
+ } else {
+ value_name = kNoValue; // We never assigned a value name to this set of merged names.
+ }
+ }
+ return value_name;
+}
+
} // namespace art
diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h
index aef8c6df0c..97ea05a914 100644
--- a/compiler/dex/local_value_numbering.h
+++ b/compiler/dex/local_value_numbering.h
@@ -19,9 +19,9 @@
#include <memory>
+#include "base/arena_object.h"
#include "base/logging.h"
#include "global_value_numbering.h"
-#include "utils/arena_object.h"
#include "utils/dex_instruction_utils.h"
namespace art {
@@ -52,13 +52,22 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
return div_zero_checked_.find(value_name) != div_zero_checked_.end();
}
- bool IsSregValue(uint16_t s_reg, uint16_t value_name) const {
- auto it = sreg_value_map_.find(s_reg);
- if (it != sreg_value_map_.end()) {
- return it->second == value_name;
- } else {
- return gvn_->HasValue(kNoValue, s_reg, kNoValue, kNoValue, value_name);
- }
+ uint16_t GetSregValue(uint16_t s_reg) const {
+ return GetSregValueImpl(s_reg, &sreg_value_map_);
+ }
+
+ uint16_t GetSregValueWide(uint16_t s_reg) const {
+ return GetSregValueImpl(s_reg, &sreg_wide_value_map_);
+ }
+
+ // Get the starting value number for a given dalvik register.
+ uint16_t GetStartingVregValueNumber(int v_reg) const {
+ return GetStartingVregValueNumberImpl(v_reg, false);
+ }
+
+ // Get the starting value number for a given wide dalvik register.
+ uint16_t GetStartingVregValueNumberWide(int v_reg) const {
+ return GetStartingVregValueNumberImpl(v_reg, true);
}
enum MergeType {
@@ -80,6 +89,20 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
// Key is s_reg, value is value name.
typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap;
+ uint16_t GetEndingVregValueNumberImpl(int v_reg, bool wide) const;
+ uint16_t GetStartingVregValueNumberImpl(int v_reg, bool wide) const;
+
+ uint16_t GetSregValueImpl(int s_reg, const SregValueMap* map) const {
+ uint16_t res = kNoValue;
+ auto lb = map->find(s_reg);
+ if (lb != map->end()) {
+ res = lb->second;
+ } else {
+ res = gvn_->FindValue(kNoValue, s_reg, kNoValue, kNoValue);
+ }
+ return res;
+ }
+
void SetOperandValueImpl(uint16_t s_reg, uint16_t value, SregValueMap* map) {
DCHECK_EQ(map->count(s_reg), 0u) << PrettyMethod(gvn_->cu_->method_idx, *gvn_->cu_->dex_file)
<< " LVN id: " << id_ << ", s_reg: " << s_reg;
@@ -285,6 +308,8 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
void HandleEscapingRef(uint16_t base);
void HandleInvokeArgs(const MIR* mir, const LocalValueNumbering* mir_lvn);
uint16_t HandlePhi(MIR* mir);
+ uint16_t HandleConst(MIR* mir, uint32_t value);
+ uint16_t HandleConstWide(MIR* mir, uint64_t value);
uint16_t HandleAGet(MIR* mir, uint16_t opcode);
void HandleAPut(MIR* mir, uint16_t opcode);
uint16_t HandleIGet(MIR* mir, uint16_t opcode);
@@ -370,9 +395,9 @@ class LocalValueNumbering : public DeletableArenaObject<kArenaAllocMisc> {
ValueNameSet div_zero_checked_;
// Reuse one vector for all merges to avoid leaking too much memory on the ArenaStack.
- ScopedArenaVector<BasicBlockId> merge_names_;
+ mutable ScopedArenaVector<uint16_t> merge_names_;
// Map to identify when different locations merge the same values.
- ScopedArenaSafeMap<ScopedArenaVector<BasicBlockId>, uint16_t> merge_map_;
+ ScopedArenaSafeMap<ScopedArenaVector<uint16_t>, uint16_t> merge_map_;
// New memory version for merge, kNoValue if all memory versions matched.
uint16_t merge_new_memory_version_;
diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc
index c89489287f..d1c3a6b4ba 100644
--- a/compiler/dex/local_value_numbering_test.cc
+++ b/compiler/dex/local_value_numbering_test.cc
@@ -136,7 +136,7 @@ class LocalValueNumberingTest : public testing::Test {
void DoPrepareMIRs(const MIRDef* defs, size_t count) {
mir_count_ = count;
- mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+ mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
ssa_reps_.resize(count);
for (size_t i = 0u; i != count; ++i) {
const MIRDef* def = &defs[i];
@@ -185,9 +185,9 @@ class LocalValueNumberingTest : public testing::Test {
}
void PerformLVN() {
- cu_.mir_graph->temp_.gvn.ifield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(
+ cu_.mir_graph->temp_.gvn.ifield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
allocator_.get(), cu_.mir_graph->ifield_lowering_infos_);
- cu_.mir_graph->temp_.gvn.sfield_ids_ = GlobalValueNumbering::PrepareGvnFieldIds(
+ cu_.mir_graph->temp_.gvn.sfield_ids = GlobalValueNumbering::PrepareGvnFieldIds(
allocator_.get(), cu_.mir_graph->sfield_lowering_infos_);
gvn_.reset(new (allocator_.get()) GlobalValueNumbering(&cu_, allocator_.get(),
GlobalValueNumbering::kModeLvn));
@@ -211,8 +211,14 @@ class LocalValueNumberingTest : public testing::Test {
value_names_() {
cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena));
allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack));
+ // By default, the zero-initialized reg_location_[.] with ref == false tells LVN that
+ // 0 constants are integral, not references. Nothing else is used by LVN/GVN.
+ cu_.mir_graph->reg_location_ = static_cast<RegLocation*>(cu_.arena.Alloc(
+ kMaxSsaRegs * sizeof(cu_.mir_graph->reg_location_[0]), kArenaAllocRegAlloc));
}
+ static constexpr size_t kMaxSsaRegs = 16384u;
+
ArenaPool pool_;
CompilationUnit cu_;
size_t mir_count_;
@@ -772,4 +778,116 @@ TEST_F(LocalValueNumberingTest, DivZeroCheck) {
}
}
+TEST_F(LocalValueNumberingTest, ConstWide) {
+ static const MIRDef mirs[] = {
+ // Core reg constants.
+ DEF_CONST(Instruction::CONST_WIDE_16, 0u, 0),
+ DEF_CONST(Instruction::CONST_WIDE_16, 1u, 1),
+ DEF_CONST(Instruction::CONST_WIDE_16, 2u, -1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 3u, 1 << 16),
+ DEF_CONST(Instruction::CONST_WIDE_32, 4u, -1 << 16),
+ DEF_CONST(Instruction::CONST_WIDE_32, 5u, (1 << 16) + 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 6u, (1 << 16) - 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 7u, -(1 << 16) + 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 8u, -(1 << 16) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 9u, INT64_C(1) << 32),
+ DEF_CONST(Instruction::CONST_WIDE, 10u, INT64_C(-1) << 32),
+ DEF_CONST(Instruction::CONST_WIDE, 11u, (INT64_C(1) << 32) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 12u, (INT64_C(1) << 32) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 13u, (INT64_C(-1) << 32) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 14u, (INT64_C(-1) << 32) - 1),
+ DEF_CONST(Instruction::CONST_WIDE_HIGH16, 15u, 1), // Effectively 1 << 48.
+ DEF_CONST(Instruction::CONST_WIDE_HIGH16, 16u, 0xffff), // Effectively -1 << 48.
+ DEF_CONST(Instruction::CONST_WIDE, 17u, (INT64_C(1) << 48) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 18u, (INT64_C(1) << 48) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 19u, (INT64_C(-1) << 48) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 20u, (INT64_C(-1) << 48) - 1),
+ // FP reg constants.
+ DEF_CONST(Instruction::CONST_WIDE_16, 21u, 0),
+ DEF_CONST(Instruction::CONST_WIDE_16, 22u, 1),
+ DEF_CONST(Instruction::CONST_WIDE_16, 23u, -1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 24u, 1 << 16),
+ DEF_CONST(Instruction::CONST_WIDE_32, 25u, -1 << 16),
+ DEF_CONST(Instruction::CONST_WIDE_32, 26u, (1 << 16) + 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 27u, (1 << 16) - 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 28u, -(1 << 16) + 1),
+ DEF_CONST(Instruction::CONST_WIDE_32, 29u, -(1 << 16) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 30u, INT64_C(1) << 32),
+ DEF_CONST(Instruction::CONST_WIDE, 31u, INT64_C(-1) << 32),
+ DEF_CONST(Instruction::CONST_WIDE, 32u, (INT64_C(1) << 32) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 33u, (INT64_C(1) << 32) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 34u, (INT64_C(-1) << 32) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 35u, (INT64_C(-1) << 32) - 1),
+ DEF_CONST(Instruction::CONST_WIDE_HIGH16, 36u, 1), // Effectively 1 << 48.
+ DEF_CONST(Instruction::CONST_WIDE_HIGH16, 37u, 0xffff), // Effectively -1 << 48.
+ DEF_CONST(Instruction::CONST_WIDE, 38u, (INT64_C(1) << 48) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 39u, (INT64_C(1) << 48) - 1),
+ DEF_CONST(Instruction::CONST_WIDE, 40u, (INT64_C(-1) << 48) + 1),
+ DEF_CONST(Instruction::CONST_WIDE, 41u, (INT64_C(-1) << 48) - 1),
+ };
+
+ PrepareMIRs(mirs);
+ for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs); ++i) {
+ cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
+ }
+ PerformLVN();
+ for (size_t i = 0u; i != mir_count_; ++i) {
+ for (size_t j = i + 1u; j != mir_count_; ++j) {
+ EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
+ }
+ }
+}
+
+TEST_F(LocalValueNumberingTest, Const) {
+ static const MIRDef mirs[] = {
+ // Core reg constants.
+ DEF_CONST(Instruction::CONST_4, 0u, 0),
+ DEF_CONST(Instruction::CONST_4, 1u, 1),
+ DEF_CONST(Instruction::CONST_4, 2u, -1),
+ DEF_CONST(Instruction::CONST_16, 3u, 1 << 4),
+ DEF_CONST(Instruction::CONST_16, 4u, -1 << 4),
+ DEF_CONST(Instruction::CONST_16, 5u, (1 << 4) + 1),
+ DEF_CONST(Instruction::CONST_16, 6u, (1 << 4) - 1),
+ DEF_CONST(Instruction::CONST_16, 7u, -(1 << 4) + 1),
+ DEF_CONST(Instruction::CONST_16, 8u, -(1 << 4) - 1),
+ DEF_CONST(Instruction::CONST_HIGH16, 9u, 1), // Effectively 1 << 16.
+ DEF_CONST(Instruction::CONST_HIGH16, 10u, 0xffff), // Effectively -1 << 16.
+ DEF_CONST(Instruction::CONST, 11u, (1 << 16) + 1),
+ DEF_CONST(Instruction::CONST, 12u, (1 << 16) - 1),
+ DEF_CONST(Instruction::CONST, 13u, (-1 << 16) + 1),
+ DEF_CONST(Instruction::CONST, 14u, (-1 << 16) - 1),
+ // FP reg constants.
+ DEF_CONST(Instruction::CONST_4, 15u, 0),
+ DEF_CONST(Instruction::CONST_4, 16u, 1),
+ DEF_CONST(Instruction::CONST_4, 17u, -1),
+ DEF_CONST(Instruction::CONST_16, 18u, 1 << 4),
+ DEF_CONST(Instruction::CONST_16, 19u, -1 << 4),
+ DEF_CONST(Instruction::CONST_16, 20u, (1 << 4) + 1),
+ DEF_CONST(Instruction::CONST_16, 21u, (1 << 4) - 1),
+ DEF_CONST(Instruction::CONST_16, 22u, -(1 << 4) + 1),
+ DEF_CONST(Instruction::CONST_16, 23u, -(1 << 4) - 1),
+ DEF_CONST(Instruction::CONST_HIGH16, 24u, 1), // Effectively 1 << 16.
+ DEF_CONST(Instruction::CONST_HIGH16, 25u, 0xffff), // Effectively -1 << 16.
+ DEF_CONST(Instruction::CONST, 26u, (1 << 16) + 1),
+ DEF_CONST(Instruction::CONST, 27u, (1 << 16) - 1),
+ DEF_CONST(Instruction::CONST, 28u, (-1 << 16) + 1),
+ DEF_CONST(Instruction::CONST, 29u, (-1 << 16) - 1),
+ // null reference constant.
+ DEF_CONST(Instruction::CONST_4, 30u, 0),
+ };
+
+ PrepareMIRs(mirs);
+ static_assert((arraysize(mirs) & 1) != 0, "missing null or unmatched fp/core");
+ cu_.mir_graph->reg_location_[arraysize(mirs) - 1].ref = true;
+ for (size_t i = arraysize(mirs) / 2u; i != arraysize(mirs) - 1; ++i) {
+ cu_.mir_graph->reg_location_[mirs_[i].ssa_rep->defs[0]].fp = true;
+ }
+ PerformLVN();
+ for (size_t i = 0u; i != mir_count_; ++i) {
+ for (size_t j = i + 1u; j != mir_count_; ++j) {
+ EXPECT_NE(value_names_[i], value_names_[j]) << i << " " << j;
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc
index 473196b98a..31dbc60594 100644
--- a/compiler/dex/mir_analysis.cc
+++ b/compiler/dex/mir_analysis.cc
@@ -18,6 +18,7 @@
#include <memory>
#include "base/logging.h"
+#include "base/scoped_arena_containers.h"
#include "dataflow_iterator-inl.h"
#include "compiler_ir.h"
#include "dex_flags.h"
@@ -29,7 +30,6 @@
#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
-#include "utils/scoped_arena_containers.h"
namespace art {
@@ -1206,10 +1206,8 @@ void MIRGraph::DoCacheFieldLoweringInfo() {
// All IGET/IPUT/SGET/SPUT instructions take 2 code units and there must also be a RETURN.
const uint32_t max_refs = (GetNumDalvikInsns() - 1u) / 2u;
ScopedArenaAllocator allocator(&cu_->arena_stack);
- uint16_t* field_idxs =
- reinterpret_cast<uint16_t*>(allocator.Alloc(max_refs * sizeof(uint16_t), kArenaAllocMisc));
- DexMemAccessType* field_types = reinterpret_cast<DexMemAccessType*>(
- allocator.Alloc(max_refs * sizeof(DexMemAccessType), kArenaAllocMisc));
+ uint16_t* field_idxs = allocator.AllocArray<uint16_t>(max_refs, kArenaAllocMisc);
+ DexMemAccessType* field_types = allocator.AllocArray<DexMemAccessType>(max_refs, kArenaAllocMisc);
// Find IGET/IPUT/SGET/SPUT insns, store IGET/IPUT fields at the beginning, SGET/SPUT at the end.
size_t ifield_pos = 0u;
@@ -1328,8 +1326,8 @@ void MIRGraph::DoCacheMethodLoweringInfo() {
// multi_index_container with one ordered index and one sequential index.
ScopedArenaSet<MapEntry, MapEntryComparator> invoke_map(MapEntryComparator(),
allocator.Adapter());
- const MapEntry** sequential_entries = reinterpret_cast<const MapEntry**>(
- allocator.Alloc(max_refs * sizeof(sequential_entries[0]), kArenaAllocMisc));
+ const MapEntry** sequential_entries =
+ allocator.AllocArray<const MapEntry*>(max_refs, kArenaAllocMisc);
// Find INVOKE insns and their devirtualization targets.
AllNodesIterator iter(this);
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index f09d1ae6d0..f9f7e22b03 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -910,11 +910,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
DF_FORMAT_EXTENDED,
};
-/* Return the base virtual register for a SSA name */
-int MIRGraph::SRegToVReg(int ssa_reg) const {
- return ssa_base_vregs_[ssa_reg];
-}
-
/* Any register that is used before being defined is considered live-in */
void MIRGraph::HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v,
ArenaBitVector* live_in_v, int dalvik_reg_id) {
@@ -1084,9 +1079,9 @@ void MIRGraph::AllocateSSAUseData(MIR *mir, int num_uses) {
mir->ssa_rep->num_uses = num_uses;
if (mir->ssa_rep->num_uses_allocated < num_uses) {
- mir->ssa_rep->uses = static_cast<int*>(arena_->Alloc(sizeof(int) * num_uses, kArenaAllocDFInfo));
+ mir->ssa_rep->uses = arena_->AllocArray<int32_t>(num_uses, kArenaAllocDFInfo);
// NOTE: will be filled in during type & size inference pass
- mir->ssa_rep->fp_use = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_uses, kArenaAllocDFInfo));
+ mir->ssa_rep->fp_use = arena_->AllocArray<bool>(num_uses, kArenaAllocDFInfo);
}
}
@@ -1094,10 +1089,8 @@ void MIRGraph::AllocateSSADefData(MIR *mir, int num_defs) {
mir->ssa_rep->num_defs = num_defs;
if (mir->ssa_rep->num_defs_allocated < num_defs) {
- mir->ssa_rep->defs = static_cast<int*>(arena_->Alloc(sizeof(int) * num_defs,
- kArenaAllocDFInfo));
- mir->ssa_rep->fp_def = static_cast<bool*>(arena_->Alloc(sizeof(bool) * num_defs,
- kArenaAllocDFInfo));
+ mir->ssa_rep->defs = arena_->AllocArray<int32_t>(num_defs, kArenaAllocDFInfo);
+ mir->ssa_rep->fp_def = arena_->AllocArray<bool>(num_defs, kArenaAllocDFInfo);
}
}
@@ -1198,11 +1191,30 @@ void MIRGraph::DataFlowSSAFormatExtended(MIR* mir) {
/* Entry function to convert a block into SSA representation */
bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
- MIR* mir;
-
if (bb->data_flow_info == NULL) return false;
- for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
+ /*
+ * Pruned SSA form: Insert phi nodes for each dalvik register marked in phi_node_blocks
+ * only if the dalvik register is in the live-in set.
+ */
+ BasicBlockId bb_id = bb->id;
+ for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
+ if (temp_.ssa.phi_node_blocks[dalvik_reg]->IsBitSet(bb_id)) {
+ if (!bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
+ /* Variable will be clobbered before being used - no need for phi */
+ vreg_to_ssa_map_[dalvik_reg] = INVALID_SREG;
+ continue;
+ }
+ MIR *phi = NewMIR();
+ phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
+ phi->dalvikInsn.vA = dalvik_reg;
+ phi->offset = bb->start_offset;
+ phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method.
+ bb->PrependMIR(phi);
+ }
+ }
+
+ for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) {
mir->ssa_rep =
static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation),
kArenaAllocDFInfo));
@@ -1315,8 +1327,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) {
* predecessor blocks.
*/
bb->data_flow_info->vreg_to_ssa_map_exit =
- static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumOfCodeAndTempVRs(),
- kArenaAllocDFInfo));
+ arena_->AllocArray<int32_t>(GetNumOfCodeAndTempVRs(), kArenaAllocDFInfo);
memcpy(bb->data_flow_info->vreg_to_ssa_map_exit, vreg_to_ssa_map_,
sizeof(int) * GetNumOfCodeAndTempVRs());
@@ -1368,13 +1379,9 @@ void MIRGraph::CompilerInitializeSSAConversion() {
* Initialize the DalvikToSSAMap map. There is one entry for each
* Dalvik register, and the SSA names for those are the same.
*/
- vreg_to_ssa_map_ =
- static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg,
- kArenaAllocDFInfo));
+ vreg_to_ssa_map_ = arena_->AllocArray<int32_t>(num_reg, kArenaAllocDFInfo);
/* Keep track of the higest def for each dalvik reg */
- ssa_last_defs_ =
- static_cast<int*>(arena_->Alloc(sizeof(int) * num_reg,
- kArenaAllocDFInfo));
+ ssa_last_defs_ = arena_->AllocArray<int>(num_reg, kArenaAllocDFInfo);
for (unsigned int i = 0; i < num_reg; i++) {
vreg_to_ssa_map_[i] = i;
diff --git a/compiler/dex/mir_field_info.h b/compiler/dex/mir_field_info.h
index ff427f88d0..98b2da8299 100644
--- a/compiler/dex/mir_field_info.h
+++ b/compiler/dex/mir_field_info.h
@@ -149,6 +149,7 @@ class MirIFieldLoweringInfo : public MirFieldInfo {
friend class NullCheckEliminationTest;
friend class GlobalValueNumberingTest;
+ friend class GvnDeadCodeEliminationTest;
friend class LocalValueNumberingTest;
};
@@ -223,6 +224,7 @@ class MirSFieldLoweringInfo : public MirFieldInfo {
friend class ClassInitCheckEliminationTest;
friend class GlobalValueNumberingTest;
+ friend class GvnDeadCodeEliminationTest;
friend class LocalValueNumberingTest;
};
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 0f7d45df79..76b5e44df0 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -24,6 +24,7 @@
#include "base/logging.h"
#include "base/stl_util.h"
#include "base/stringprintf.h"
+#include "base/scoped_arena_containers.h"
#include "compiler_ir.h"
#include "dex_file-inl.h"
#include "dex_flags.h"
@@ -34,7 +35,6 @@
#include "leb128.h"
#include "pass_driver_me_post_opt.h"
#include "stack.h"
-#include "utils/scoped_arena_containers.h"
namespace art {
@@ -113,7 +113,6 @@ MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
entry_block_(NULL),
exit_block_(NULL),
current_code_item_(NULL),
- dex_pc_to_block_map_(arena->Adapter()),
m_units_(arena->Adapter()),
method_stack_(arena->Adapter()),
current_method_(kInvalidEntry),
@@ -268,31 +267,14 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
DCHECK(insn != orig_block->first_mir_insn);
DCHECK(insn == bottom_block->first_mir_insn);
DCHECK_EQ(insn->offset, bottom_block->start_offset);
- DCHECK_EQ(dex_pc_to_block_map_[insn->offset], orig_block->id);
// Scan the "bottom" instructions, remapping them to the
// newly created "bottom" block.
MIR* p = insn;
p->bb = bottom_block->id;
- dex_pc_to_block_map_[p->offset] = bottom_block->id;
while (p != bottom_block->last_mir_insn) {
p = p->next;
DCHECK(p != nullptr);
p->bb = bottom_block->id;
- int opcode = p->dalvikInsn.opcode;
- /*
- * Some messiness here to ensure that we only enter real opcodes and only the
- * first half of a potentially throwing instruction that has been split into
- * CHECK and work portions. Since the 2nd half of a split operation is always
- * the first in a BasicBlock, we can't hit it here.
- */
- if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
- BasicBlockId mapped_id = dex_pc_to_block_map_[p->offset];
- // At first glance the instructions should all be mapped to orig_block.
- // However, multiple instructions may correspond to the same dex, hence an earlier
- // instruction may have already moved the mapping for dex to bottom_block.
- DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
- dex_pc_to_block_map_[p->offset] = bottom_block->id;
- }
}
return bottom_block;
@@ -307,12 +289,13 @@ BasicBlock* MIRGraph::SplitBlock(DexOffset code_offset,
* Utilizes a map for fast lookup of the typical cases.
*/
BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
- BasicBlock** immed_pred_block_p) {
+ BasicBlock** immed_pred_block_p,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
if (code_offset >= current_code_item_->insns_size_in_code_units_) {
return nullptr;
}
- int block_id = dex_pc_to_block_map_[code_offset];
+ int block_id = (*dex_pc_to_block_map)[code_offset];
BasicBlock* bb = GetBasicBlock(block_id);
if ((bb != nullptr) && (bb->start_offset == code_offset)) {
@@ -327,19 +310,46 @@ BasicBlock* MIRGraph::FindBlock(DexOffset code_offset, bool create,
if (bb != nullptr) {
// The target exists somewhere in an existing block.
- return SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : nullptr);
+ BasicBlock* bottom_block = SplitBlock(code_offset, bb, bb == *immed_pred_block_p ? immed_pred_block_p : nullptr);
+ DCHECK(bottom_block != nullptr);
+ MIR* p = bottom_block->first_mir_insn;
+ BasicBlock* orig_block = bb;
+ DCHECK_EQ((*dex_pc_to_block_map)[p->offset], orig_block->id);
+ // Scan the "bottom" instructions, remapping them to the
+ // newly created "bottom" block.
+ (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
+ while (p != bottom_block->last_mir_insn) {
+ p = p->next;
+ DCHECK(p != nullptr);
+ int opcode = p->dalvikInsn.opcode;
+ /*
+ * Some messiness here to ensure that we only enter real opcodes and only the
+ * first half of a potentially throwing instruction that has been split into
+ * CHECK and work portions. Since the 2nd half of a split operation is always
+ * the first in a BasicBlock, we can't hit it here.
+ */
+ if ((opcode == kMirOpCheck) || !MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
+ BasicBlockId mapped_id = (*dex_pc_to_block_map)[p->offset];
+ // At first glance the instructions should all be mapped to orig_block.
+ // However, multiple instructions may correspond to the same dex, hence an earlier
+ // instruction may have already moved the mapping for dex to bottom_block.
+ DCHECK((mapped_id == orig_block->id) || (mapped_id == bottom_block->id));
+ (*dex_pc_to_block_map)[p->offset] = bottom_block->id;
+ }
+ }
+ return bottom_block;
}
// Create a new block.
bb = CreateNewBB(kDalvikByteCode);
bb->start_offset = code_offset;
- dex_pc_to_block_map_[bb->start_offset] = bb->id;
+ (*dex_pc_to_block_map)[bb->start_offset] = bb->id;
return bb;
}
/* Identify code range in try blocks and set up the empty catch blocks */
-void MIRGraph::ProcessTryCatchBlocks() {
+void MIRGraph::ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
int tries_size = current_code_item_->tries_size_;
DexOffset offset;
@@ -364,7 +374,7 @@ void MIRGraph::ProcessTryCatchBlocks() {
CatchHandlerIterator iterator(handlers_ptr);
for (; iterator.HasNext(); iterator.Next()) {
uint32_t address = iterator.GetHandlerAddress();
- FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr);
+ FindBlock(address, true /*create*/, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
}
handlers_ptr = iterator.EndDataPointer();
}
@@ -439,7 +449,8 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset,
/* Process instructions with the kBranch flag */
BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
int width, int flags, const uint16_t* code_ptr,
- const uint16_t* code_end) {
+ const uint16_t* code_end,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
DexOffset target = cur_offset;
switch (insn->dalvikInsn.opcode) {
case Instruction::GOTO:
@@ -470,7 +481,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
}
CountBranch(target);
BasicBlock* taken_block = FindBlock(target, /* create */ true,
- /* immed_pred_block_p */ &cur_block);
+ /* immed_pred_block_p */ &cur_block,
+ dex_pc_to_block_map);
cur_block->taken = taken_block->id;
taken_block->predecessors.push_back(cur_block->id);
@@ -480,18 +492,20 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
/* create */
true,
/* immed_pred_block_p */
- &cur_block);
+ &cur_block,
+ dex_pc_to_block_map);
cur_block->fall_through = fallthrough_block->id;
fallthrough_block->predecessors.push_back(cur_block->id);
} else if (code_ptr < code_end) {
- FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+ FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
}
return cur_block;
}
/* Process instructions with the kSwitch flag */
BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
- int width, int flags) {
+ int width, int flags,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
UNUSED(flags);
const uint16_t* switch_data =
reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
@@ -545,7 +559,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
for (i = 0; i < size; i++) {
BasicBlock* case_block = FindBlock(cur_offset + target_table[i], /* create */ true,
- /* immed_pred_block_p */ &cur_block);
+ /* immed_pred_block_p */ &cur_block,
+ dex_pc_to_block_map);
SuccessorBlockInfo* successor_block_info =
static_cast<SuccessorBlockInfo*>(arena_->Alloc(sizeof(SuccessorBlockInfo),
kArenaAllocSuccessor));
@@ -559,7 +574,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
/* Fall-through case */
BasicBlock* fallthrough_block = FindBlock(cur_offset + width, /* create */ true,
- /* immed_pred_block_p */ nullptr);
+ /* immed_pred_block_p */ nullptr,
+ dex_pc_to_block_map);
cur_block->fall_through = fallthrough_block->id;
fallthrough_block->predecessors.push_back(cur_block->id);
return cur_block;
@@ -568,7 +584,8 @@ BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffs
/* Process instructions with the kThrow flag */
BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
int width, int flags, ArenaBitVector* try_block_addr,
- const uint16_t* code_ptr, const uint16_t* code_end) {
+ const uint16_t* code_ptr, const uint16_t* code_end,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
UNUSED(flags);
bool in_try_block = try_block_addr->IsBitSet(cur_offset);
bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
@@ -585,7 +602,8 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
for (; iterator.HasNext(); iterator.Next()) {
BasicBlock* catch_block = FindBlock(iterator.GetHandlerAddress(), false /* create */,
- nullptr /* immed_pred_block_p */);
+ nullptr /* immed_pred_block_p */,
+ dex_pc_to_block_map);
if (insn->dalvikInsn.opcode == Instruction::MONITOR_EXIT &&
IsBadMonitorExitCatch(insn->offset, catch_block->start_offset)) {
// Don't allow monitor-exit to catch its own exception, http://b/15745363 .
@@ -620,7 +638,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
cur_block->explicit_throw = true;
if (code_ptr < code_end) {
// Force creation of new block following THROW via side-effect.
- FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+ FindBlock(cur_offset + width, /* create */ true, /* immed_pred_block_p */ nullptr, dex_pc_to_block_map);
}
if (!in_try_block) {
// Don't split a THROW that can't rethrow - we're done.
@@ -652,7 +670,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
* not automatically terminated after the work portion, and may
* contain following instructions.
*
- * Note also that the dex_pc_to_block_map_ entry for the potentially
+ * Note also that the dex_pc_to_block_map entry for the potentially
* throwing instruction will refer to the original basic block.
*/
BasicBlock* new_block = CreateNewBB(kDalvikByteCode);
@@ -687,7 +705,11 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
// TODO: need to rework expansion of block list & try_block_addr when inlining activated.
// TUNING: use better estimate of basic blocks for following resize.
block_list_.reserve(block_list_.size() + current_code_item_->insns_size_in_code_units_);
- dex_pc_to_block_map_.resize(dex_pc_to_block_map_.size() + current_code_item_->insns_size_in_code_units_);
+ // FindBlock lookup cache.
+ ScopedArenaAllocator allocator(&cu_->arena_stack);
+ ScopedArenaVector<uint16_t> dex_pc_to_block_map(allocator.Adapter());
+ dex_pc_to_block_map.resize(dex_pc_to_block_map.size() +
+ current_code_item_->insns_size_in_code_units_);
// TODO: replace with explicit resize routine. Using automatic extension side effect for now.
try_block_addr_->SetBit(current_code_item_->insns_size_in_code_units_);
@@ -728,7 +750,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
cur_block->predecessors.push_back(entry_block_->id);
/* Identify code range in try blocks and set up the empty catch blocks */
- ProcessTryCatchBlocks();
+ ProcessTryCatchBlocks(&dex_pc_to_block_map);
uint64_t merged_df_flags = 0u;
@@ -777,20 +799,21 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
DCHECK(cur_block->taken == NullBasicBlockId);
// Unreachable instruction, mark for no continuation and end basic block.
flags &= ~Instruction::kContinue;
- FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+ FindBlock(current_offset_ + width, /* create */ true,
+ /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
}
} else {
cur_block->AppendMIR(insn);
}
// Associate the starting dex_pc for this opcode with its containing basic block.
- dex_pc_to_block_map_[insn->offset] = cur_block->id;
+ dex_pc_to_block_map[insn->offset] = cur_block->id;
code_ptr += width;
if (flags & Instruction::kBranch) {
cur_block = ProcessCanBranch(cur_block, insn, current_offset_,
- width, flags, code_ptr, code_end);
+ width, flags, code_ptr, code_end, &dex_pc_to_block_map);
} else if (flags & Instruction::kReturn) {
cur_block->terminated_by_return = true;
cur_block->fall_through = exit_block_->id;
@@ -804,13 +827,15 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
* Create a fallthrough block for real instructions
* (incl. NOP).
*/
- FindBlock(current_offset_ + width, /* create */ true, /* immed_pred_block_p */ nullptr);
+ FindBlock(current_offset_ + width, /* create */ true,
+ /* immed_pred_block_p */ nullptr, &dex_pc_to_block_map);
}
} else if (flags & Instruction::kThrow) {
cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
- code_ptr, code_end);
+ code_ptr, code_end, &dex_pc_to_block_map);
} else if (flags & Instruction::kSwitch) {
- cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+ cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width,
+ flags, &dex_pc_to_block_map);
}
if (verify_flags & Instruction::kVerifyVarArgRange ||
verify_flags & Instruction::kVerifyVarArgRangeNonZero) {
@@ -828,7 +853,8 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
}
current_offset_ += width;
BasicBlock* next_block = FindBlock(current_offset_, /* create */ false,
- /* immed_pred_block_p */ nullptr);
+ /* immed_pred_block_p */ nullptr,
+ &dex_pc_to_block_map);
if (next_block) {
/*
* The next instruction could be the target of a previously parsed
@@ -1573,7 +1599,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
}
}
int length = str.length() + 1;
- ret = static_cast<char*>(arena_->Alloc(length, kArenaAllocDFInfo));
+ ret = arena_->AllocArray<char>(length, kArenaAllocDFInfo);
strncpy(ret, str.c_str(), length);
return ret;
}
@@ -1710,9 +1736,9 @@ CallInfo* MIRGraph::NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type,
move_result_mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
}
info->num_arg_words = mir->ssa_rep->num_uses;
- info->args = (info->num_arg_words == 0) ? NULL : static_cast<RegLocation*>
- (arena_->Alloc(sizeof(RegLocation) * info->num_arg_words, kArenaAllocMisc));
- for (int i = 0; i < info->num_arg_words; i++) {
+ info->args = (info->num_arg_words == 0) ? nullptr :
+ arena_->AllocArray<RegLocation>(info->num_arg_words, kArenaAllocMisc);
+ for (size_t i = 0; i < info->num_arg_words; i++) {
info->args[i] = GetRawSrc(mir, i);
}
info->opt_flags = mir->optimization_flags;
@@ -1742,7 +1768,7 @@ BasicBlock* MIRGraph::NewMemBB(BBType block_type, int block_id) {
void MIRGraph::InitializeConstantPropagation() {
is_constant_v_ = new (arena_) ArenaBitVector(arena_, GetNumSSARegs(), false);
- constant_values_ = static_cast<int*>(arena_->Alloc(sizeof(int) * GetNumSSARegs(), kArenaAllocDFInfo));
+ constant_values_ = arena_->AllocArray<int>(GetNumSSARegs(), kArenaAllocDFInfo);
}
void MIRGraph::InitializeMethodUses() {
@@ -1772,7 +1798,8 @@ void MIRGraph::SSATransformationEnd() {
temp_.ssa.num_vregs = 0u;
temp_.ssa.work_live_vregs = nullptr;
- temp_.ssa.def_block_matrix = nullptr;
+ DCHECK(temp_.ssa.def_block_matrix == nullptr);
+ temp_.ssa.phi_node_blocks = nullptr;
DCHECK(temp_scoped_alloc_.get() != nullptr);
temp_scoped_alloc_.reset();
@@ -2532,4 +2559,13 @@ const uint16_t* MIRGraph::GetInsns(int m_unit_index) const {
return m_units_[m_unit_index]->GetCodeItem()->insns_;
}
+void MIRGraph::SetPuntToInterpreter(bool val) {
+ punt_to_interpreter_ = val;
+ if (val) {
+ // Disable all subsequent optimizations. They may not be safe to run. (For example,
+ // LVN/GVN assumes there are no conflicts found by the type inference pass.)
+ cu_->disable_opt = ~static_cast<decltype(cu_->disable_opt)>(0);
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 5def19128c..e5abd3be51 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -19,17 +19,17 @@
#include <stdint.h>
+#include "base/arena_containers.h"
+#include "base/scoped_arena_containers.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "dex_types.h"
#include "invoke_type.h"
#include "mir_field_info.h"
#include "mir_method_info.h"
-#include "utils/arena_bit_vector.h"
-#include "utils/arena_containers.h"
-#include "utils/scoped_arena_containers.h"
#include "reg_location.h"
#include "reg_storage.h"
+#include "utils/arena_bit_vector.h"
namespace art {
@@ -37,6 +37,7 @@ struct CompilationUnit;
class DexCompilationUnit;
class DexFileMethodInliner;
class GlobalValueNumbering;
+class GvnDeadCodeElimination;
// Forward declaration.
class MIRGraph;
@@ -497,19 +498,19 @@ class ChildBlockIterator {
* more efficient invoke code generation.
*/
struct CallInfo {
- int num_arg_words; // Note: word count, not arg count.
- RegLocation* args; // One for each word of arguments.
- RegLocation result; // Eventual target of MOVE_RESULT.
+ size_t num_arg_words; // Note: word count, not arg count.
+ RegLocation* args; // One for each word of arguments.
+ RegLocation result; // Eventual target of MOVE_RESULT.
int opt_flags;
InvokeType type;
uint32_t dex_idx;
- uint32_t index; // Method idx for invokes, type idx for FilledNewArray.
+ uint32_t index; // Method idx for invokes, type idx for FilledNewArray.
uintptr_t direct_code;
uintptr_t direct_method;
- RegLocation target; // Target of following move_result.
+ RegLocation target; // Target of following move_result.
bool skip_this;
bool is_range;
- DexOffset offset; // Offset in code units.
+ DexOffset offset; // Offset in code units.
MIR* mir;
};
@@ -542,8 +543,9 @@ class MIRGraph {
uint32_t method_idx, jobject class_loader, const DexFile& dex_file);
/* Find existing block */
- BasicBlock* FindBlock(DexOffset code_offset) {
- return FindBlock(code_offset, false, NULL);
+ BasicBlock* FindBlock(DexOffset code_offset,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map) {
+ return FindBlock(code_offset, false, nullptr, dex_pc_to_block_map);
}
const uint16_t* GetCurrentInsns() const {
@@ -625,8 +627,7 @@ class MIRGraph {
}
void EnableOpcodeCounting() {
- opcode_count_ = static_cast<int*>(arena_->Alloc(kNumPackedOpcodes * sizeof(int),
- kArenaAllocMisc));
+ opcode_count_ = arena_->AllocArray<int>(kNumPackedOpcodes, kArenaAllocMisc);
}
void ShowOpcodeStats();
@@ -1052,7 +1053,12 @@ class MIRGraph {
void DumpCheckStats();
MIR* FindMoveResult(BasicBlock* bb, MIR* mir);
- int SRegToVReg(int ssa_reg) const;
+
+ /* Return the base virtual register for a SSA name */
+ int SRegToVReg(int ssa_reg) const {
+ return ssa_base_vregs_[ssa_reg];
+ }
+
void VerifyDataflow();
void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb);
bool EliminateNullChecksGate();
@@ -1065,6 +1071,9 @@ class MIRGraph {
bool ApplyGlobalValueNumberingGate();
bool ApplyGlobalValueNumbering(BasicBlock* bb);
void ApplyGlobalValueNumberingEnd();
+ bool EliminateDeadCodeGate();
+ bool EliminateDeadCode(BasicBlock* bb);
+ void EliminateDeadCodeEnd();
bool EliminateSuspendChecksGate();
bool EliminateSuspendChecks(BasicBlock* bb);
void EliminateSuspendChecksEnd();
@@ -1072,15 +1081,15 @@ class MIRGraph {
uint16_t GetGvnIFieldId(MIR* mir) const {
DCHECK(IsInstructionIGetOrIPut(mir->dalvikInsn.opcode));
DCHECK_LT(mir->meta.ifield_lowering_info, ifield_lowering_infos_.size());
- DCHECK(temp_.gvn.ifield_ids_ != nullptr);
- return temp_.gvn.ifield_ids_[mir->meta.ifield_lowering_info];
+ DCHECK(temp_.gvn.ifield_ids != nullptr);
+ return temp_.gvn.ifield_ids[mir->meta.ifield_lowering_info];
}
uint16_t GetGvnSFieldId(MIR* mir) const {
DCHECK(IsInstructionSGetOrSPut(mir->dalvikInsn.opcode));
DCHECK_LT(mir->meta.sfield_lowering_info, sfield_lowering_infos_.size());
- DCHECK(temp_.gvn.sfield_ids_ != nullptr);
- return temp_.gvn.sfield_ids_[mir->meta.sfield_lowering_info];
+ DCHECK(temp_.gvn.sfield_ids != nullptr);
+ return temp_.gvn.sfield_ids[mir->meta.sfield_lowering_info];
}
/*
@@ -1115,9 +1124,7 @@ class MIRGraph {
return punt_to_interpreter_;
}
- void SetPuntToInterpreter(bool val) {
- punt_to_interpreter_ = val;
- }
+ void SetPuntToInterpreter(bool val);
void DisassembleExtendedInstr(const MIR* mir, std::string* decoded_mir);
char* GetDalvikDisassembly(const MIR* mir);
@@ -1200,7 +1207,7 @@ class MIRGraph {
void ComputeDominators();
void CompilerInitializeSSAConversion();
virtual void InitializeBasicBlockDataFlow();
- void InsertPhiNodes();
+ void FindPhiNodeBlocks();
void DoDFSPreOrderSSARename(BasicBlock* block);
bool DfsOrdersUpToDate() const {
@@ -1249,16 +1256,20 @@ class MIRGraph {
bool ContentIsInsn(const uint16_t* code_ptr);
BasicBlock* SplitBlock(DexOffset code_offset, BasicBlock* orig_block,
BasicBlock** immed_pred_block_p);
- BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p);
- void ProcessTryCatchBlocks();
+ BasicBlock* FindBlock(DexOffset code_offset, bool create, BasicBlock** immed_pred_block_p,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
+ void ProcessTryCatchBlocks(ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
bool IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, NarrowDexOffset catch_offset);
BasicBlock* ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
- int flags, const uint16_t* code_ptr, const uint16_t* code_end);
+ int flags, const uint16_t* code_ptr, const uint16_t* code_end,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
BasicBlock* ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
- int flags);
+ int flags,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
BasicBlock* ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr,
- const uint16_t* code_end);
+ const uint16_t* code_end,
+ ScopedArenaVector<uint16_t>* dex_pc_to_block_map);
int AddNewSReg(int v_reg);
void HandleSSAUse(int* uses, int dalvik_reg, int reg_index);
void DataFlowSSAFormat35C(MIR* mir);
@@ -1280,7 +1291,8 @@ class MIRGraph {
* @param mir The mir to check.
* @return Returns 'true' if the given MIR might throw an exception.
*/
- bool CanThrow(MIR* mir);
+ bool CanThrow(MIR* mir) const;
+
/**
* @brief Combine multiply and add/sub MIRs into corresponding extended MAC MIR.
* @param mul_mir The multiply MIR to be combined.
@@ -1319,7 +1331,7 @@ class MIRGraph {
ArenaVector<int> ssa_base_vregs_;
ArenaVector<int> ssa_subscripts_;
// Map original Dalvik virtual reg i to the current SSA name.
- int* vreg_to_ssa_map_; // length == method->registers_size
+ int32_t* vreg_to_ssa_map_; // length == method->registers_size
int* ssa_last_defs_; // length == method->registers_size
ArenaBitVector* is_constant_v_; // length == num_ssa_reg
int* constant_values_; // length == num_ssa_reg
@@ -1373,12 +1385,14 @@ class MIRGraph {
size_t num_vregs;
ArenaBitVector* work_live_vregs;
ArenaBitVector** def_block_matrix; // num_vregs x num_blocks_.
+ ArenaBitVector** phi_node_blocks; // num_vregs x num_blocks_.
} ssa;
// Global value numbering.
struct {
GlobalValueNumbering* gvn;
- uint16_t* ifield_ids_; // Part of GVN/LVN but cached here for LVN to avoid recalculation.
- uint16_t* sfield_ids_; // Ditto.
+ uint16_t* ifield_ids; // Part of GVN/LVN but cached here for LVN to avoid recalculation.
+ uint16_t* sfield_ids; // Ditto.
+ GvnDeadCodeElimination* dce;
} gvn;
// Suspend check elimination.
struct {
@@ -1391,7 +1405,6 @@ class MIRGraph {
BasicBlock* entry_block_;
BasicBlock* exit_block_;
const DexFile::CodeItem* current_code_item_;
- ArenaVector<uint16_t> dex_pc_to_block_map_; // FindBlock lookup cache.
ArenaVector<DexCompilationUnit*> m_units_; // List of methods included in this graph
typedef std::pair<int, int> MIRLocation; // Insert point, (m_unit_ index, offset)
ArenaVector<MIRLocation> method_stack_; // Include stack
@@ -1433,6 +1446,7 @@ class MIRGraph {
friend class SuspendCheckEliminationTest;
friend class NullCheckEliminationTest;
friend class GlobalValueNumberingTest;
+ friend class GvnDeadCodeEliminationTest;
friend class LocalValueNumberingTest;
friend class TopologicalSortOrderTest;
};
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 8718191069..fd67d4ebec 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -16,17 +16,18 @@
#include "base/bit_vector-inl.h"
#include "base/logging.h"
+#include "base/scoped_arena_containers.h"
#include "dataflow_iterator-inl.h"
#include "dex_flags.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "global_value_numbering.h"
+#include "gvn_dead_code_elimination.h"
#include "local_value_numbering.h"
#include "mir_field_info.h"
#include "quick/dex_file_method_inliner.h"
#include "quick/dex_file_to_method_inliner_map.h"
#include "stack.h"
-#include "utils/scoped_arena_containers.h"
namespace art {
@@ -632,8 +633,7 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
} else {
DCHECK_EQ(SelectKind(if_true), kSelectMove);
DCHECK_EQ(SelectKind(if_false), kSelectMove);
- int* src_ssa =
- static_cast<int*>(arena_->Alloc(sizeof(int) * 3, kArenaAllocDFInfo));
+ int32_t* src_ssa = arena_->AllocArray<int32_t>(3, kArenaAllocDFInfo);
src_ssa[0] = mir->ssa_rep->uses[0];
src_ssa[1] = if_true->ssa_rep->uses[0];
src_ssa[2] = if_false->ssa_rep->uses[0];
@@ -641,15 +641,12 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
mir->ssa_rep->num_uses = 3;
}
mir->ssa_rep->num_defs = 1;
- mir->ssa_rep->defs =
- static_cast<int*>(arena_->Alloc(sizeof(int) * 1, kArenaAllocDFInfo));
- mir->ssa_rep->fp_def =
- static_cast<bool*>(arena_->Alloc(sizeof(bool) * 1, kArenaAllocDFInfo));
+ mir->ssa_rep->defs = arena_->AllocArray<int32_t>(1, kArenaAllocDFInfo);
+ mir->ssa_rep->fp_def = arena_->AllocArray<bool>(1, kArenaAllocDFInfo);
mir->ssa_rep->fp_def[0] = if_true->ssa_rep->fp_def[0];
// Match type of uses to def.
- mir->ssa_rep->fp_use =
- static_cast<bool*>(arena_->Alloc(sizeof(bool) * mir->ssa_rep->num_uses,
- kArenaAllocDFInfo));
+ mir->ssa_rep->fp_use = arena_->AllocArray<bool>(mir->ssa_rep->num_uses,
+ kArenaAllocDFInfo);
for (int i = 0; i < mir->ssa_rep->num_uses; i++) {
mir->ssa_rep->fp_use[i] = mir->ssa_rep->fp_def[0];
}
@@ -900,8 +897,8 @@ bool MIRGraph::EliminateNullChecksGate() {
temp_.nce.num_vregs = GetNumOfCodeAndTempVRs();
temp_.nce.work_vregs_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
temp_scoped_alloc_.get(), temp_.nce.num_vregs, false, kBitMapNullCheck);
- temp_.nce.ending_vregs_to_check_matrix = static_cast<ArenaBitVector**>(
- temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
+ temp_.nce.ending_vregs_to_check_matrix =
+ temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
std::fill_n(temp_.nce.ending_vregs_to_check_matrix, GetNumBlocks(), nullptr);
// reset MIR_MARK
@@ -1133,8 +1130,7 @@ bool MIRGraph::EliminateClassInitChecksGate() {
// Each insn we use here has at least 2 code units, offset/2 will be a unique index.
const size_t end = (GetNumDalvikInsns() + 1u) / 2u;
- temp_.cice.indexes = static_cast<uint16_t*>(
- temp_scoped_alloc_->Alloc(end * sizeof(*temp_.cice.indexes), kArenaAllocGrowableArray));
+ temp_.cice.indexes = temp_scoped_alloc_->AllocArray<uint16_t>(end, kArenaAllocGrowableArray);
std::fill_n(temp_.cice.indexes, end, 0xffffu);
uint32_t unique_class_count = 0u;
@@ -1215,8 +1211,8 @@ bool MIRGraph::EliminateClassInitChecksGate() {
temp_.cice.num_class_bits = 2u * unique_class_count;
temp_.cice.work_classes_to_check = new (temp_scoped_alloc_.get()) ArenaBitVector(
temp_scoped_alloc_.get(), temp_.cice.num_class_bits, false, kBitMapClInitCheck);
- temp_.cice.ending_classes_to_check_matrix = static_cast<ArenaBitVector**>(
- temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * GetNumBlocks(), kArenaAllocMisc));
+ temp_.cice.ending_classes_to_check_matrix =
+ temp_scoped_alloc_->AllocArray<ArenaBitVector*>(GetNumBlocks(), kArenaAllocMisc);
std::fill_n(temp_.cice.ending_classes_to_check_matrix, GetNumBlocks(), nullptr);
DCHECK_GT(temp_.cice.num_class_bits, 0u);
return true;
@@ -1338,9 +1334,9 @@ bool MIRGraph::ApplyGlobalValueNumberingGate() {
DCHECK(temp_scoped_alloc_ == nullptr);
temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
- temp_.gvn.ifield_ids_ =
+ temp_.gvn.ifield_ids =
GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
- temp_.gvn.sfield_ids_ =
+ temp_.gvn.sfield_ids =
GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
DCHECK(temp_.gvn.gvn == nullptr);
temp_.gvn.gvn = new (temp_scoped_alloc_.get()) GlobalValueNumbering(
@@ -1364,8 +1360,8 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() {
// Perform modifications.
DCHECK(temp_.gvn.gvn != nullptr);
if (temp_.gvn.gvn->Good()) {
+ temp_.gvn.gvn->StartPostProcessing();
if (max_nested_loops_ != 0u) {
- temp_.gvn.gvn->StartPostProcessing();
TopologicalSortIterator iter(this);
for (BasicBlock* bb = iter.Next(); bb != nullptr; bb = iter.Next()) {
ScopedArenaAllocator allocator(&cu_->arena_stack); // Reclaim memory after each LVN.
@@ -1383,12 +1379,45 @@ void MIRGraph::ApplyGlobalValueNumberingEnd() {
cu_->disable_opt |= (1u << kLocalValueNumbering);
} else {
LOG(WARNING) << "GVN failed for " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
+ cu_->disable_opt |= (1u << kGvnDeadCodeElimination);
}
+ if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+ EliminateDeadCodeEnd();
+ } // else preserve GVN data for CSE.
+}
+
+bool MIRGraph::EliminateDeadCodeGate() {
+ if ((cu_->disable_opt & (1 << kGvnDeadCodeElimination)) != 0) {
+ return false;
+ }
+ DCHECK(temp_scoped_alloc_ != nullptr);
+ temp_.gvn.dce = new (temp_scoped_alloc_.get()) GvnDeadCodeElimination(temp_.gvn.gvn,
+ temp_scoped_alloc_.get());
+ return true;
+}
+
+bool MIRGraph::EliminateDeadCode(BasicBlock* bb) {
+ DCHECK(temp_scoped_alloc_ != nullptr);
+ DCHECK(temp_.gvn.gvn != nullptr);
+ if (bb->block_type != kDalvikByteCode) {
+ return false;
+ }
+ DCHECK(temp_.gvn.dce != nullptr);
+ temp_.gvn.dce->Apply(bb);
+ return false; // No need to repeat.
+}
+
+void MIRGraph::EliminateDeadCodeEnd() {
+ DCHECK_EQ(temp_.gvn.dce != nullptr, (cu_->disable_opt & (1 << kGvnDeadCodeElimination)) == 0);
+ if (temp_.gvn.dce != nullptr) {
+ delete temp_.gvn.dce;
+ temp_.gvn.dce = nullptr;
+ }
delete temp_.gvn.gvn;
temp_.gvn.gvn = nullptr;
- temp_.gvn.ifield_ids_ = nullptr;
- temp_.gvn.sfield_ids_ = nullptr;
+ temp_.gvn.ifield_ids = nullptr;
+ temp_.gvn.sfield_ids = nullptr;
DCHECK(temp_scoped_alloc_ != nullptr);
temp_scoped_alloc_.reset();
}
@@ -1441,8 +1470,8 @@ void MIRGraph::InlineSpecialMethodsStart() {
temp_.smi.processed_indexes = new (temp_scoped_alloc_.get()) ArenaBitVector(
temp_scoped_alloc_.get(), temp_.smi.num_indexes, false, kBitMapMisc);
temp_.smi.processed_indexes->ClearAllBits();
- temp_.smi.lowering_infos = static_cast<uint16_t*>(temp_scoped_alloc_->Alloc(
- temp_.smi.num_indexes * sizeof(*temp_.smi.lowering_infos), kArenaAllocGrowableArray));
+ temp_.smi.lowering_infos =
+ temp_scoped_alloc_->AllocArray<uint16_t>(temp_.smi.num_indexes, kArenaAllocGrowableArray);
}
void MIRGraph::InlineSpecialMethods(BasicBlock* bb) {
@@ -1558,9 +1587,9 @@ bool MIRGraph::BuildExtendedBBList(class BasicBlock* bb) {
void MIRGraph::BasicBlockOptimizationStart() {
if ((cu_->disable_opt & (1 << kLocalValueNumbering)) == 0) {
temp_scoped_alloc_.reset(ScopedArenaAllocator::Create(&cu_->arena_stack));
- temp_.gvn.ifield_ids_ =
+ temp_.gvn.ifield_ids =
GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), ifield_lowering_infos_);
- temp_.gvn.sfield_ids_ =
+ temp_.gvn.sfield_ids =
GlobalValueNumbering::PrepareGvnFieldIds(temp_scoped_alloc_.get(), sfield_lowering_infos_);
}
}
@@ -1586,8 +1615,8 @@ void MIRGraph::BasicBlockOptimization() {
void MIRGraph::BasicBlockOptimizationEnd() {
// Clean up after LVN.
- temp_.gvn.ifield_ids_ = nullptr;
- temp_.gvn.sfield_ids_ = nullptr;
+ temp_.gvn.ifield_ids = nullptr;
+ temp_.gvn.sfield_ids = nullptr;
temp_scoped_alloc_.reset();
}
@@ -1603,8 +1632,7 @@ bool MIRGraph::EliminateSuspendChecksGate() {
temp_.sce.inliner =
cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file);
}
- suspend_checks_in_loops_ = static_cast<uint32_t*>(
- arena_->Alloc(GetNumBlocks() * sizeof(*suspend_checks_in_loops_), kArenaAllocMisc));
+ suspend_checks_in_loops_ = arena_->AllocArray<uint32_t>(GetNumBlocks(), kArenaAllocMisc);
return true;
}
@@ -1690,7 +1718,7 @@ void MIRGraph::EliminateSuspendChecksEnd() {
temp_.sce.inliner = nullptr;
}
-bool MIRGraph::CanThrow(MIR* mir) {
+bool MIRGraph::CanThrow(MIR* mir) const {
if ((mir->dalvikInsn.FlagsOf() & Instruction::kThrow) == 0) {
return false;
}
@@ -1724,7 +1752,6 @@ bool MIRGraph::CanThrow(MIR* mir) {
// Non-throwing only if range check has been eliminated.
return ((opt_flags & MIR_IGNORE_RANGE_CHECK) == 0);
} else if (mir->dalvikInsn.opcode == Instruction::ARRAY_LENGTH ||
- mir->dalvikInsn.opcode == Instruction::FILL_ARRAY_DATA ||
static_cast<int>(mir->dalvikInsn.opcode) == kMirOpNullCheck) {
// No more checks for these (null check was processed above).
return false;
diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc
index 199bc27481..be05b80d83 100644
--- a/compiler/dex/mir_optimization_test.cc
+++ b/compiler/dex/mir_optimization_test.cc
@@ -280,7 +280,7 @@ class MirOptimizationTest : public testing::Test {
void DoPrepareMIRs(const MIRDef* defs, size_t count) {
mir_count_ = count;
- mirs_ = reinterpret_cast<MIR*>(cu_.arena.Alloc(sizeof(MIR) * count, kArenaAllocMIR));
+ mirs_ = cu_.arena.AllocArray<MIR>(count, kArenaAllocMIR);
uint64_t merged_df_flags = 0u;
for (size_t i = 0u; i != count; ++i) {
const MIRDef* def = &defs[i];
diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc
index 8c8bde63ea..320d06aa06 100644
--- a/compiler/dex/pass_driver_me_opts.cc
+++ b/compiler/dex/pass_driver_me_opts.cc
@@ -45,6 +45,7 @@ void PassDriverMEOpts::SetupPasses(PassManager* pass_manager) {
pass_manager->AddPass(new BBCombine);
pass_manager->AddPass(new CodeLayout);
pass_manager->AddPass(new GlobalValueNumberingPass);
+ pass_manager->AddPass(new DeadCodeEliminationPass);
pass_manager->AddPass(new ConstantPropagation);
pass_manager->AddPass(new MethodUseCount);
pass_manager->AddPass(new BBOptimizations);
diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc
index 4e1322702f..a8b8a54033 100644
--- a/compiler/dex/pass_driver_me_post_opt.cc
+++ b/compiler/dex/pass_driver_me_post_opt.cc
@@ -37,7 +37,7 @@ void PassDriverMEPostOpt::SetupPasses(PassManager* pass_manager) {
pass_manager->AddPass(new InitializeSSATransformation);
pass_manager->AddPass(new ClearPhiInstructions);
pass_manager->AddPass(new DefBlockMatrix);
- pass_manager->AddPass(new CreatePhiNodes);
+ pass_manager->AddPass(new FindPhiNodeBlocksPass);
pass_manager->AddPass(new SSAConversion);
pass_manager->AddPass(new PhiNodeOperands);
pass_manager->AddPass(new PerformInitRegLocations);
diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h
index a3dbc5a273..1ab862503b 100644
--- a/compiler/dex/post_opt_passes.h
+++ b/compiler/dex/post_opt_passes.h
@@ -189,19 +189,19 @@ class DefBlockMatrix : public PassMEMirSsaRep {
};
/**
- * @class CreatePhiNodes
- * @brief Pass to create the phi nodes after SSA calculation
+ * @class FindPhiNodeBlocksPass
+ * @brief Pass to find out where we need to insert the phi nodes for the SSA conversion.
*/
-class CreatePhiNodes : public PassMEMirSsaRep {
+class FindPhiNodeBlocksPass : public PassMEMirSsaRep {
public:
- CreatePhiNodes() : PassMEMirSsaRep("CreatePhiNodes", kNoNodes) {
+ FindPhiNodeBlocksPass() : PassMEMirSsaRep("FindPhiNodeBlocks", kNoNodes) {
}
void Start(PassDataHolder* data) const {
DCHECK(data != nullptr);
CompilationUnit* c_unit = down_cast<PassMEDataHolder*>(data)->c_unit;
DCHECK(c_unit != nullptr);
- c_unit->mir_graph.get()->InsertPhiNodes();
+ c_unit->mir_graph.get()->FindPhiNodeBlocks();
}
};
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index f15b727857..9cf005bc48 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -52,16 +52,13 @@ namespace art {
*/
void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpSparseSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable *tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
uint32_t size = table[1];
- tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -100,17 +97,13 @@ void ArmMir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocati
void ArmMir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpPackedSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable *tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
uint32_t size = table[1];
- tab_rec->targets =
- static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -491,6 +484,28 @@ void ArmMir2Lir::GenSpecialExitSequence() {
NewLIR1(kThumbBx, rs_rARM_LR.GetReg());
}
+void ArmMir2Lir::GenSpecialEntryForSuspend() {
+ // Keep 16-byte stack alignment - push r0, i.e. ArtMethod*, r5, r6, lr.
+ DCHECK(!IsTemp(rs_r5));
+ DCHECK(!IsTemp(rs_r6));
+ core_spill_mask_ =
+ (1u << rs_r5.GetRegNum()) | (1u << rs_r6.GetRegNum()) | (1u << rs_rARM_LR.GetRegNum());
+ num_core_spills_ = 3u;
+ fp_spill_mask_ = 0u;
+ num_fp_spills_ = 0u;
+ frame_size_ = 16u;
+ core_vmap_table_.clear();
+ fp_vmap_table_.clear();
+ NewLIR1(kThumbPush, (1u << rs_r0.GetRegNum()) | // ArtMethod*
+ (core_spill_mask_ & ~(1u << rs_rARM_LR.GetRegNum())) | // Spills other than LR.
+ (1u << 8)); // LR encoded for 16-bit push.
+}
+
+void ArmMir2Lir::GenSpecialExitForSuspend() {
+ // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+ NewLIR1(kThumb2Pop, (1u << rs_r0.GetRegNum()) | core_spill_mask_); // 32-bit because of LR.
+}
+
static bool ArmUseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
// Emit relative calls only within a dex file due to the limited range of the BL insn.
return cu->dex_file == target_method.dex_file;
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 025e69f0ba..67fabbddfe 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -18,9 +18,9 @@
#define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
#include "arm_lir.h"
+#include "base/arena_containers.h"
#include "base/logging.h"
#include "dex/quick/mir_to_lir.h"
-#include "utils/arena_containers.h"
namespace art {
@@ -167,7 +167,9 @@ class ArmMir2Lir FINAL : public Mir2Lir {
void GenDivZeroCheckWide(RegStorage reg);
void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
void GenExitSequence();
- void GenSpecialExitSequence();
+ void GenSpecialExitSequence() OVERRIDE;
+ void GenSpecialEntryForSuspend() OVERRIDE;
+ void GenSpecialExitForSuspend() OVERRIDE;
void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 6492442b94..24e8fdff80 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -51,16 +51,13 @@ namespace art {
*/
void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpSparseSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable *tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
uint32_t size = table[1];
- tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -103,17 +100,13 @@ void Arm64Mir2Lir::GenLargeSparseSwitch(MIR* mir, uint32_t table_offset, RegLoca
void Arm64Mir2Lir::GenLargePackedSwitch(MIR* mir, uint32_t table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpPackedSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable *tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
uint32_t size = table[1];
- tab_rec->targets =
- static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*), kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -399,10 +392,26 @@ void Arm64Mir2Lir::GenSpecialExitSequence() {
NewLIR0(kA64Ret);
}
+void Arm64Mir2Lir::GenSpecialEntryForSuspend() {
+ // Keep 16-byte stack alignment - push x0, i.e. ArtMethod*, lr.
+ core_spill_mask_ = (1u << rs_xLR.GetRegNum());
+ num_core_spills_ = 1u;
+ fp_spill_mask_ = 0u;
+ num_fp_spills_ = 0u;
+ frame_size_ = 16u;
+ core_vmap_table_.clear();
+ fp_vmap_table_.clear();
+ NewLIR4(WIDE(kA64StpPre4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), -frame_size_ / 8);
+}
+
+void Arm64Mir2Lir::GenSpecialExitForSuspend() {
+ // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+ NewLIR4(WIDE(kA64LdpPost4rrXD), rs_x0.GetReg(), rs_xLR.GetReg(), rs_sp.GetReg(), frame_size_ / 8);
+}
+
static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) {
- UNUSED(cu, target_method);
- // Always emit relative calls.
- return true;
+ // Emit relative calls anywhere in the image or within a dex file otherwise.
+ return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file;
}
/*
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index 49ca625096..d5f0536691 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -169,6 +169,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir {
void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
void GenExitSequence() OVERRIDE;
void GenSpecialExitSequence() OVERRIDE;
+ void GenSpecialEntryForSuspend() OVERRIDE;
+ void GenSpecialExitForSuspend() OVERRIDE;
void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 04113dba81..0be9fd4781 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -456,37 +456,29 @@ LIR* Mir2Lir::AddWideData(LIR* *constant_list_p, int val_lo, int val_hi) {
return AddWordData(constant_list_p, val_lo);
}
-static void Push32(std::vector<uint8_t>&buf, int data) {
- buf.push_back(data & 0xff);
- buf.push_back((data >> 8) & 0xff);
- buf.push_back((data >> 16) & 0xff);
- buf.push_back((data >> 24) & 0xff);
-}
-
/**
* @brief Push a compressed reference which needs patching at link/patchoat-time.
* @details This needs to be kept consistent with the code which actually does the patching in
* oat_writer.cc and in the patchoat tool.
*/
-static void PushUnpatchedReference(std::vector<uint8_t>&buf) {
+static void PushUnpatchedReference(CodeBuffer* buf) {
// Note that we can safely initialize the patches to zero. The code deduplication mechanism takes
// the patches into account when determining whether two pieces of codes are functionally
// equivalent.
Push32(buf, UINT32_C(0));
}
-static void AlignBuffer(std::vector<uint8_t>&buf, size_t offset) {
- while (buf.size() < offset) {
- buf.push_back(0);
- }
+static void AlignBuffer(CodeBuffer* buf, size_t offset) {
+ DCHECK_LE(buf->size(), offset);
+ buf->insert(buf->end(), offset - buf->size(), 0u);
}
/* Write the literal pool to the output stream */
void Mir2Lir::InstallLiteralPools() {
- AlignBuffer(code_buffer_, data_offset_);
+ AlignBuffer(&code_buffer_, data_offset_);
LIR* data_lir = literal_list_;
while (data_lir != nullptr) {
- Push32(code_buffer_, data_lir->operands[0]);
+ Push32(&code_buffer_, data_lir->operands[0]);
data_lir = NEXT_LIR(data_lir);
}
// TODO: patches_.reserve() as needed.
@@ -498,7 +490,7 @@ void Mir2Lir::InstallLiteralPools() {
reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
patches_.push_back(LinkerPatch::CodePatch(code_buffer_.size(),
target_dex_file, target_method_idx));
- PushUnpatchedReference(code_buffer_);
+ PushUnpatchedReference(&code_buffer_);
data_lir = NEXT_LIR(data_lir);
}
data_lir = method_literal_list_;
@@ -508,7 +500,7 @@ void Mir2Lir::InstallLiteralPools() {
reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
patches_.push_back(LinkerPatch::MethodPatch(code_buffer_.size(),
target_dex_file, target_method_idx));
- PushUnpatchedReference(code_buffer_);
+ PushUnpatchedReference(&code_buffer_);
data_lir = NEXT_LIR(data_lir);
}
// Push class literals.
@@ -519,7 +511,7 @@ void Mir2Lir::InstallLiteralPools() {
reinterpret_cast<const DexFile*>(UnwrapPointer(data_lir->operands[1]));
patches_.push_back(LinkerPatch::TypePatch(code_buffer_.size(),
class_dex_file, target_type_idx));
- PushUnpatchedReference(code_buffer_);
+ PushUnpatchedReference(&code_buffer_);
data_lir = NEXT_LIR(data_lir);
}
}
@@ -527,7 +519,7 @@ void Mir2Lir::InstallLiteralPools() {
/* Write the switch tables to the output stream */
void Mir2Lir::InstallSwitchTables() {
for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
- AlignBuffer(code_buffer_, tab_rec->offset);
+ AlignBuffer(&code_buffer_, tab_rec->offset);
/*
* For Arm, our reference point is the address of the bx
* instruction that does the launch, so we have to subtract
@@ -557,29 +549,49 @@ void Mir2Lir::InstallSwitchTables() {
LOG(INFO) << "Switch table for offset 0x" << std::hex << bx_offset;
}
if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
- const int32_t* keys = reinterpret_cast<const int32_t*>(&(tab_rec->table[2]));
- for (int elems = 0; elems < tab_rec->table[1]; elems++) {
- int disp = tab_rec->targets[elems]->offset - bx_offset;
+ DCHECK(tab_rec->switch_mir != nullptr);
+ BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
+ DCHECK(bb != nullptr);
+ int elems = 0;
+ for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+ int key = successor_block_info->key;
+ int target = successor_block_info->block;
+ LIR* boundary_lir = InsertCaseLabel(target, key);
+ DCHECK(boundary_lir != nullptr);
+ int disp = boundary_lir->offset - bx_offset;
+ Push32(&code_buffer_, key);
+ Push32(&code_buffer_, disp);
if (cu_->verbose) {
LOG(INFO) << " Case[" << elems << "] key: 0x"
- << std::hex << keys[elems] << ", disp: 0x"
+ << std::hex << key << ", disp: 0x"
<< std::hex << disp;
}
- Push32(code_buffer_, keys[elems]);
- Push32(code_buffer_,
- tab_rec->targets[elems]->offset - bx_offset);
+ elems++;
}
+ DCHECK_EQ(elems, tab_rec->table[1]);
} else {
DCHECK_EQ(static_cast<int>(tab_rec->table[0]),
static_cast<int>(Instruction::kPackedSwitchSignature));
- for (int elems = 0; elems < tab_rec->table[1]; elems++) {
- int disp = tab_rec->targets[elems]->offset - bx_offset;
+ DCHECK(tab_rec->switch_mir != nullptr);
+ BasicBlock* bb = mir_graph_->GetBasicBlock(tab_rec->switch_mir->bb);
+ DCHECK(bb != nullptr);
+ int elems = 0;
+ int low_key = s4FromSwitchData(&tab_rec->table[2]);
+ for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+ int key = successor_block_info->key;
+ DCHECK_EQ(elems + low_key, key);
+ int target = successor_block_info->block;
+ LIR* boundary_lir = InsertCaseLabel(target, key);
+ DCHECK(boundary_lir != nullptr);
+ int disp = boundary_lir->offset - bx_offset;
+ Push32(&code_buffer_, disp);
if (cu_->verbose) {
LOG(INFO) << " Case[" << elems << "] disp: 0x"
<< std::hex << disp;
}
- Push32(code_buffer_, tab_rec->targets[elems]->offset - bx_offset);
+ elems++;
}
+ DCHECK_EQ(elems, tab_rec->table[1]);
}
}
}
@@ -587,7 +599,7 @@ void Mir2Lir::InstallSwitchTables() {
/* Write the fill array dta to the output stream */
void Mir2Lir::InstallFillArrayData() {
for (Mir2Lir::FillArrayData* tab_rec : fill_array_data_) {
- AlignBuffer(code_buffer_, tab_rec->offset);
+ AlignBuffer(&code_buffer_, tab_rec->offset);
for (int i = 0; i < (tab_rec->size + 1) / 2; i++) {
code_buffer_.push_back(tab_rec->table[i] & 0xFF);
code_buffer_.push_back((tab_rec->table[i] >> 8) & 0xFF);
@@ -830,58 +842,25 @@ int Mir2Lir::AssignFillArrayDataOffset(CodeOffset offset) {
* branch table during the assembly phase. All resource flags
* are set to prevent code motion. KeyVal is just there for debugging.
*/
-LIR* Mir2Lir::InsertCaseLabel(DexOffset vaddr, int keyVal) {
- LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id];
+LIR* Mir2Lir::InsertCaseLabel(uint32_t bbid, int keyVal) {
+ LIR* boundary_lir = &block_label_list_[bbid];
LIR* res = boundary_lir;
if (cu_->verbose) {
// Only pay the expense if we're pretty-printing.
LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
- new_label->dalvik_offset = vaddr;
+ BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
+ DCHECK(bb != nullptr);
+ new_label->dalvik_offset = bb->start_offset;
new_label->opcode = kPseudoCaseLabel;
new_label->operands[0] = keyVal;
new_label->flags.fixup = kFixupLabel;
DCHECK(!new_label->flags.use_def_invalid);
new_label->u.m.def_mask = &kEncodeAll;
InsertLIRAfter(boundary_lir, new_label);
- res = new_label;
}
return res;
}
-void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
- const uint16_t* table = tab_rec->table;
- DexOffset base_vaddr = tab_rec->vaddr;
- const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
- int entries = table[1];
- int low_key = s4FromSwitchData(&table[2]);
- for (int i = 0; i < entries; i++) {
- tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], i + low_key);
- }
-}
-
-void Mir2Lir::MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
- const uint16_t* table = tab_rec->table;
- DexOffset base_vaddr = tab_rec->vaddr;
- int entries = table[1];
- const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
- const int32_t* targets = &keys[entries];
- for (int i = 0; i < entries; i++) {
- tab_rec->targets[i] = InsertCaseLabel(base_vaddr + targets[i], keys[i]);
- }
-}
-
-void Mir2Lir::ProcessSwitchTables() {
- for (Mir2Lir::SwitchTable* tab_rec : switch_tables_) {
- if (tab_rec->table[0] == Instruction::kPackedSwitchSignature) {
- MarkPackedCaseLabels(tab_rec);
- } else if (tab_rec->table[0] == Instruction::kSparseSwitchSignature) {
- MarkSparseCaseLabels(tab_rec);
- } else {
- LOG(FATAL) << "Invalid switch table";
- }
- }
-}
-
void Mir2Lir::DumpSparseSwitchTable(const uint16_t* table) {
/*
* Sparse switch data format:
@@ -988,8 +967,11 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena
estimated_native_code_size_(0),
reg_pool_(nullptr),
live_sreg_(0),
+ code_buffer_(mir_graph->GetArena()->Adapter()),
+ encoded_mapping_table_(mir_graph->GetArena()->Adapter()),
core_vmap_table_(mir_graph->GetArena()->Adapter()),
fp_vmap_table_(mir_graph->GetArena()->Adapter()),
+ native_gc_map_(mir_graph->GetArena()->Adapter()),
patches_(mir_graph->GetArena()->Adapter()),
num_core_spills_(0),
num_fp_spills_(0),
@@ -1032,9 +1014,6 @@ void Mir2Lir::Materialize() {
/* Method is not empty */
if (first_lir_insn_) {
- // mark the targets of switch statement case labels
- ProcessSwitchTables();
-
/* Convert LIR into machine code. */
AssembleLIR();
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 9f53b89186..3c9b7a3ed3 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -413,7 +413,7 @@ void Mir2Lir::GenNewArray(uint32_t type_idx, RegLocation rl_dest,
* Current code also throws internal unimp if not 'L', '[' or 'I'.
*/
void Mir2Lir::GenFilledNewArray(CallInfo* info) {
- int elems = info->num_arg_words;
+ size_t elems = info->num_arg_words;
int type_idx = info->index;
FlushAllRegs(); /* Everything to home location */
QuickEntrypointEnum target;
@@ -450,7 +450,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
* of any regs in the source range that have been promoted to
* home location.
*/
- for (int i = 0; i < elems; i++) {
+ for (size_t i = 0; i < elems; i++) {
RegLocation loc = UpdateLoc(info->args[i]);
if (loc.location == kLocPhysReg) {
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
@@ -493,7 +493,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
OpRegRegImm(kOpAdd, r_dst, ref_reg,
mirror::Array::DataOffset(component_size).Int32Value());
// Set up the loop counter (known to be > 0)
- LoadConstant(r_idx, elems - 1);
+ LoadConstant(r_idx, static_cast<int>(elems - 1));
// Generate the copy loop. Going backwards for convenience
LIR* loop_head_target = NewLIR0(kPseudoTargetLabel);
// Copy next element
@@ -515,9 +515,9 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
FreeTemp(r_dst);
FreeTemp(r_src);
} else {
- DCHECK_LE(elems, 5); // Usually but not necessarily non-range.
+ DCHECK_LE(elems, 5u); // Usually but not necessarily non-range.
// TUNING: interleave
- for (int i = 0; i < elems; i++) {
+ for (size_t i = 0; i < elems; i++) {
RegLocation rl_arg;
if (info->args[i].ref) {
rl_arg = LoadValue(info->args[i], kRefReg);
@@ -537,7 +537,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) {
}
if (elems != 0 && info->args[0].ref) {
// If there is at least one potentially non-null value, unconditionally mark the GC card.
- for (int i = 0; i < elems; i++) {
+ for (size_t i = 0; i < elems; i++) {
if (!mir_graph_->IsConstantNullRef(info->args[i])) {
UnconditionallyMarkGCCard(ref_reg);
break;
@@ -2158,7 +2158,7 @@ void Mir2Lir::GenConversionCall(QuickEntrypointEnum trampoline, RegLocation rl_d
}
}
-class SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+class Mir2Lir::SuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
public:
SuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
: LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont) {
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index bb5b0cdd22..8e3df7c7a2 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -405,9 +405,10 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
*/
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
RegLocation* t_loc = nullptr;
+ EnsureInitializedArgMappingToPhysicalReg();
for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) {
// get reg corresponding to input
- RegStorage reg = GetArgMappingToPhysicalReg(i);
+ RegStorage reg = in_to_reg_storage_mapping_.GetReg(i);
t_loc = &ArgLocs[i];
// If the wide input appeared as single, flush it and go
@@ -661,7 +662,7 @@ void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) {
}
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
// Scan the rest of the args - if in phys_reg flush to memory
- for (int next_arg = start; next_arg < info->num_arg_words;) {
+ for (size_t next_arg = start; next_arg < info->num_arg_words;) {
RegLocation loc = info->args[next_arg];
if (loc.wide) {
loc = UpdateLocWide(loc);
@@ -719,10 +720,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
InvokeType type, bool skip_this) {
// If no arguments, just return.
- if (info->num_arg_words == 0)
+ if (info->num_arg_words == 0u)
return call_state;
- const int start_index = skip_this ? 1 : 0;
+ const size_t start_index = skip_this ? 1 : 0;
// Get architecture dependent mapping between output VRs and physical registers
// basing on shorty of method to call.
@@ -733,13 +734,13 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
}
- int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index);
+ size_t stack_map_start = std::max(in_to_reg_storage_mapping.GetEndMappedIn(), start_index);
if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) {
// It is possible that the last mapped reg is 32 bit while arg is 64-bit.
// It will be handled together with low part mapped to register.
stack_map_start++;
}
- int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
+ size_t regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start;
// If it is a range case we can try to copy remaining VRs (not mapped to physical registers)
// using more optimal algorithm.
@@ -755,11 +756,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
RegStorage regRef = TargetReg(kArg3, kRef);
RegStorage regSingle = TargetReg(kArg3, kNotWide);
RegStorage regWide = TargetReg(kArg2, kWide);
- for (int i = start_index;
- i < stack_map_start + regs_left_to_pass_via_stack; i++) {
+ for (size_t i = start_index; i < stack_map_start + regs_left_to_pass_via_stack; i++) {
RegLocation rl_arg = info->args[i];
rl_arg = UpdateRawLoc(rl_arg);
- RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
if (!reg.Valid()) {
int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
{
@@ -799,10 +799,10 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
}
// Finish with VRs mapped to physical registers.
- for (int i = start_index; i < stack_map_start; i++) {
+ for (size_t i = start_index; i < stack_map_start; i++) {
RegLocation rl_arg = info->args[i];
rl_arg = UpdateRawLoc(rl_arg);
- RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ RegStorage reg = in_to_reg_storage_mapping.GetReg(i);
if (reg.Valid()) {
if (rl_arg.wide) {
// if reg is not 64-bit (it is half of 64-bit) then handle it separately.
@@ -852,12 +852,11 @@ int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state,
return call_state;
}
-RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+void Mir2Lir::EnsureInitializedArgMappingToPhysicalReg() {
if (!in_to_reg_storage_mapping_.IsInitialized()) {
ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic);
in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper());
}
- return in_to_reg_storage_mapping_.Get(arg_num);
}
RegLocation Mir2Lir::InlineTarget(CallInfo* info) {
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 9f36e35f5e..db844bcde9 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -44,7 +44,9 @@ LIR* Mir2Lir::LoadConstant(RegStorage r_dest, int value) {
void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
if (rl_dest.fp) {
int pmap_index = SRegToPMap(rl_dest.s_reg_low);
- if (promotion_map_[pmap_index].fp_location == kLocPhysReg) {
+ const bool is_fp_promoted = promotion_map_[pmap_index].fp_location == kLocPhysReg;
+ const bool is_core_promoted = promotion_map_[pmap_index].core_location == kLocPhysReg;
+ if (is_fp_promoted || is_core_promoted) {
// Now, determine if this vreg is ever used as a reference. If not, we're done.
bool used_as_reference = false;
int base_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
@@ -61,7 +63,7 @@ void Mir2Lir::Workaround7250540(RegLocation rl_dest, RegStorage zero_reg) {
temp_reg = AllocTemp();
LoadConstant(temp_reg, 0);
}
- if (promotion_map_[pmap_index].core_location == kLocPhysReg) {
+ if (is_core_promoted) {
// Promoted - just copy in a zero
OpRegCopy(RegStorage::Solo32(promotion_map_[pmap_index].core_reg), temp_reg);
} else {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index ccfdaf60bb..d9471f6fd1 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -68,17 +68,13 @@ bool MipsMir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& s
*/
void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpSparseSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable* tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
int elements = table[1];
- tab_rec->targets =
- static_cast<LIR**>(arena_->Alloc(elements * sizeof(LIR*), kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// The table is composed of 8-byte key/disp pairs
@@ -145,17 +141,13 @@ void MipsMir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLoca
*/
void MipsMir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
- if (cu_->verbose) {
- DumpPackedSwitchTable(table);
- }
// Add the table to the list - we'll process it later
SwitchTable* tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
int size = table[1];
- tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
- kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -323,6 +315,26 @@ void MipsMir2Lir::GenSpecialExitSequence() {
OpReg(kOpBx, rs_rRA);
}
+void MipsMir2Lir::GenSpecialEntryForSuspend() {
+ // Keep 16-byte stack alignment - push A0, i.e. ArtMethod*, 2 filler words and RA.
+ core_spill_mask_ = (1u << rs_rRA.GetRegNum());
+ num_core_spills_ = 1u;
+ fp_spill_mask_ = 0u;
+ num_fp_spills_ = 0u;
+ frame_size_ = 16u;
+ core_vmap_table_.clear();
+ fp_vmap_table_.clear();
+ OpRegImm(kOpSub, rs_rMIPS_SP, frame_size_);
+ Store32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+ Store32Disp(rs_rMIPS_SP, 0, rs_rA0);
+}
+
+void MipsMir2Lir::GenSpecialExitForSuspend() {
+ // Pop the frame. Don't pop ArtMethod*, it's no longer needed.
+ Load32Disp(rs_rMIPS_SP, frame_size_ - 4, rs_rRA);
+ OpRegImm(kOpAdd, rs_rMIPS_SP, frame_size_);
+}
+
/*
* Bit of a hack here - in the absence of a real scheduling pass,
* emit the next instruction in static & direct invoke sequences.
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a37fe40cfa..e1b43ca848 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -141,7 +141,9 @@ class MipsMir2Lir FINAL : public Mir2Lir {
void GenDivZeroCheckWide(RegStorage reg);
void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method);
void GenExitSequence();
- void GenSpecialExitSequence();
+ void GenSpecialExitSequence() OVERRIDE;
+ void GenSpecialEntryForSuspend() OVERRIDE;
+ void GenSpecialExitForSuspend() OVERRIDE;
void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double);
void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir);
void GenSelect(BasicBlock* bb, MIR* mir);
diff --git a/compiler/dex/quick/mips/utility_mips.cc b/compiler/dex/quick/mips/utility_mips.cc
index 6f6bf68fea..ec6edabdbd 100644
--- a/compiler/dex/quick/mips/utility_mips.cc
+++ b/compiler/dex/quick/mips/utility_mips.cc
@@ -56,7 +56,8 @@ LIR* MipsMir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
}
bool MipsMir2Lir::InexpensiveConstantInt(int32_t value) {
- return ((value == 0) || IsUint(16, value) || ((value < 0) && (value >= -32768)));
+ // For encodings, see LoadConstantNoClobber below.
+ return ((value == 0) || IsUint<16>(value) || IsInt<16>(value));
}
bool MipsMir2Lir::InexpensiveConstantFloat(int32_t value) {
@@ -96,9 +97,11 @@ LIR* MipsMir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
/* See if the value can be constructed cheaply */
if (value == 0) {
res = NewLIR2(kMipsMove, r_dest.GetReg(), rZERO);
- } else if ((value > 0) && (value <= 65535)) {
+ } else if (IsUint<16>(value)) {
+ // Use OR with (unsigned) immediate to encode 16b unsigned int.
res = NewLIR3(kMipsOri, r_dest.GetReg(), rZERO, value);
- } else if ((value < 0) && (value >= -32768)) {
+ } else if (IsInt<16>(value)) {
+ // Use ADD with (signed) immediate to encode 16b signed int.
res = NewLIR3(kMipsAddiu, r_dest.GetReg(), rZERO, value);
} else {
res = NewLIR2(kMipsLui, r_dest.GetReg(), value >> 16);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 274e078399..34e5e25efe 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -24,6 +24,69 @@
namespace art {
+class Mir2Lir::SpecialSuspendCheckSlowPath : public Mir2Lir::LIRSlowPath {
+ public:
+ SpecialSuspendCheckSlowPath(Mir2Lir* m2l, LIR* branch, LIR* cont)
+ : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch, cont),
+ num_used_args_(0u) {
+ }
+
+ void PreserveArg(int in_position) {
+ // Avoid duplicates.
+ for (size_t i = 0; i != num_used_args_; ++i) {
+ if (used_args_[i] == in_position) {
+ return;
+ }
+ }
+ DCHECK_LT(num_used_args_, kMaxArgsToPreserve);
+ used_args_[num_used_args_] = in_position;
+ ++num_used_args_;
+ }
+
+ void Compile() OVERRIDE {
+ m2l_->ResetRegPool();
+ m2l_->ResetDefTracking();
+ GenerateTargetLabel(kPseudoSuspendTarget);
+
+ m2l_->LockCallTemps();
+
+ // Generate frame.
+ m2l_->GenSpecialEntryForSuspend();
+
+ // Spill all args.
+ for (size_t i = 0, end = m2l_->in_to_reg_storage_mapping_.GetEndMappedIn(); i < end;
+ i += m2l_->in_to_reg_storage_mapping_.GetShorty(i).IsWide() ? 2u : 1u) {
+ m2l_->SpillArg(i);
+ }
+
+ m2l_->FreeCallTemps();
+
+ // Do the actual suspend call to runtime.
+ m2l_->CallRuntimeHelper(kQuickTestSuspend, true);
+
+ m2l_->LockCallTemps();
+
+ // Unspill used regs. (Don't unspill unused args.)
+ for (size_t i = 0; i != num_used_args_; ++i) {
+ m2l_->UnspillArg(used_args_[i]);
+ }
+
+ // Pop the frame.
+ m2l_->GenSpecialExitForSuspend();
+
+ // Branch to the continue label.
+ DCHECK(cont_ != nullptr);
+ m2l_->OpUnconditionalBranch(cont_);
+
+ m2l_->FreeCallTemps();
+ }
+
+ private:
+ static constexpr size_t kMaxArgsToPreserve = 2u;
+ size_t num_used_args_;
+ int used_args_[kMaxArgsToPreserve];
+};
+
RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) {
RegisterClass res;
switch (shorty_type) {
@@ -54,15 +117,15 @@ RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) {
return res;
}
-void Mir2Lir::LockArg(int in_position, bool) {
- RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+void Mir2Lir::LockArg(size_t in_position) {
+ RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
if (reg_arg.Valid()) {
LockTemp(reg_arg);
}
}
-RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
+RegStorage Mir2Lir::LoadArg(size_t in_position, RegisterClass reg_class, bool wide) {
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -82,7 +145,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
offset += sizeof(uint64_t);
}
- RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+ RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
// TODO: REVISIT: This adds a spill of low part while we could just copy it.
if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -112,7 +175,7 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
return reg_arg;
}
-void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
+void Mir2Lir::LoadArgDirect(size_t in_position, RegLocation rl_dest) {
DCHECK_EQ(rl_dest.location, kLocPhysReg);
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
@@ -132,7 +195,7 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
offset += sizeof(uint64_t);
}
- RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+ RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
// TODO: REVISIT: This adds a spill of low part while we could just copy it.
if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) {
@@ -153,6 +216,41 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
}
}
+void Mir2Lir::SpillArg(size_t in_position) {
+ RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+ if (reg_arg.Valid()) {
+ int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+ ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+ OpSize size = arg.IsRef() ? kReference :
+ (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+ StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+ }
+}
+
+void Mir2Lir::UnspillArg(size_t in_position) {
+ RegStorage reg_arg = in_to_reg_storage_mapping_.GetReg(in_position);
+
+ if (reg_arg.Valid()) {
+ int offset = frame_size_ + StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
+ ShortyArg arg = in_to_reg_storage_mapping_.GetShorty(in_position);
+ OpSize size = arg.IsRef() ? kReference :
+ (arg.IsWide() && reg_arg.GetWideKind() == kWide) ? k64 : k32;
+ LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, size, kNotVolatile);
+ }
+}
+
+Mir2Lir::SpecialSuspendCheckSlowPath* Mir2Lir::GenSpecialSuspendTest() {
+ LockCallTemps();
+ LIR* branch = OpTestSuspend(nullptr);
+ FreeCallTemps();
+ LIR* cont = NewLIR0(kPseudoTargetLabel);
+ SpecialSuspendCheckSlowPath* slow_path =
+ new (arena_) SpecialSuspendCheckSlowPath(this, branch, cont);
+ AddSlowPath(slow_path);
+ return slow_path;
+}
+
bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
// FastInstance() already checked by DexFileMethodInliner.
const InlineIGetIPutData& data = special.d.ifield_data;
@@ -161,13 +259,16 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
return false;
}
- OpSize size = k32;
+ OpSize size;
switch (data.op_variant) {
- case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
- size = kReference;
+ case InlineMethodAnalyser::IGetVariant(Instruction::IGET):
+ size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
break;
case InlineMethodAnalyser::IGetVariant(Instruction::IGET_WIDE):
- size = k64;
+ size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+ break;
+ case InlineMethodAnalyser::IGetVariant(Instruction::IGET_OBJECT):
+ size = kReference;
break;
case InlineMethodAnalyser::IGetVariant(Instruction::IGET_SHORT):
size = kSignedHalf;
@@ -181,11 +282,18 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) {
case InlineMethodAnalyser::IGetVariant(Instruction::IGET_BOOLEAN):
size = kUnsignedByte;
break;
+ default:
+ LOG(FATAL) << "Unknown variant: " << data.op_variant;
+ UNREACHABLE();
}
// Point of no return - no aborts after this
- GenPrintLabel(mir);
+ if (!kLeafOptimization) {
+ auto* slow_path = GenSpecialSuspendTest();
+ slow_path->PreserveArg(data.object_arg);
+ }
LockArg(data.object_arg);
+ GenPrintLabel(mir);
RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]);
@@ -223,13 +331,16 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
return false;
}
- OpSize size = k32;
+ OpSize size;
switch (data.op_variant) {
- case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
- size = kReference;
+ case InlineMethodAnalyser::IPutVariant(Instruction::IPUT):
+ size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kSingle : k32;
break;
case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_WIDE):
- size = k64;
+ size = in_to_reg_storage_mapping_.GetShorty(data.src_arg).IsFP() ? kDouble : k64;
+ break;
+ case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT):
+ size = kReference;
break;
case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_SHORT):
size = kSignedHalf;
@@ -243,12 +354,20 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) {
case InlineMethodAnalyser::IPutVariant(Instruction::IPUT_BOOLEAN):
size = kUnsignedByte;
break;
+ default:
+ LOG(FATAL) << "Unknown variant: " << data.op_variant;
+ UNREACHABLE();
}
// Point of no return - no aborts after this
- GenPrintLabel(mir);
+ if (!kLeafOptimization) {
+ auto* slow_path = GenSpecialSuspendTest();
+ slow_path->PreserveArg(data.object_arg);
+ slow_path->PreserveArg(data.src_arg);
+ }
LockArg(data.object_arg);
- LockArg(data.src_arg, IsWide(size));
+ LockArg(data.src_arg);
+ GenPrintLabel(mir);
RegStorage reg_obj = LoadArg(data.object_arg, kRefReg);
RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile);
RegStorage reg_src = LoadArg(data.src_arg, reg_class, IsWide(size));
@@ -269,8 +388,12 @@ bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) {
bool wide = (data.is_wide != 0u);
// Point of no return - no aborts after this
+ if (!kLeafOptimization) {
+ auto* slow_path = GenSpecialSuspendTest();
+ slow_path->PreserveArg(data.arg);
+ }
+ LockArg(data.arg);
GenPrintLabel(mir);
- LockArg(data.arg, wide);
RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]);
RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class);
LoadArgDirect(data.arg, rl_dest);
@@ -285,15 +408,22 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci
current_dalvik_offset_ = mir->offset;
MIR* return_mir = nullptr;
bool successful = false;
+ EnsureInitializedArgMappingToPhysicalReg();
switch (special.opcode) {
case kInlineOpNop:
successful = true;
DCHECK_EQ(mir->dalvikInsn.opcode, Instruction::RETURN_VOID);
+ if (!kLeafOptimization) {
+ GenSpecialSuspendTest();
+ }
return_mir = mir;
break;
case kInlineOpNonWideConst: {
successful = true;
+ if (!kLeafOptimization) {
+ GenSpecialSuspendTest();
+ }
RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0]));
GenPrintLabel(mir);
LoadConstant(rl_dest.reg, static_cast<int>(special.d.data));
@@ -333,13 +463,17 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci
}
GenSpecialExitSequence();
- core_spill_mask_ = 0;
- num_core_spills_ = 0;
- fp_spill_mask_ = 0;
- num_fp_spills_ = 0;
- frame_size_ = 0;
- core_vmap_table_.clear();
- fp_vmap_table_.clear();
+ if (!kLeafOptimization) {
+ HandleSlowPaths();
+ } else {
+ core_spill_mask_ = 0;
+ num_core_spills_ = 0;
+ fp_spill_mask_ = 0;
+ num_fp_spills_ = 0;
+ frame_size_ = 0;
+ core_vmap_table_.clear();
+ fp_vmap_table_.clear();
+ }
}
return successful;
@@ -1195,9 +1329,7 @@ void Mir2Lir::MethodMIR2LIR() {
cu_->NewTimingSplit("MIR2LIR");
// Hold the labels of each block.
- block_label_list_ =
- static_cast<LIR*>(arena_->Alloc(sizeof(LIR) * mir_graph_->GetNumBlocks(),
- kArenaAllocLIR));
+ block_label_list_ = arena_->AllocArray<LIR>(mir_graph_->GetNumBlocks(), kArenaAllocLIR);
PreOrderDfsIterator iter(mir_graph_);
BasicBlock* curr_bb = iter.Next();
@@ -1289,31 +1421,41 @@ void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty,
InToRegStorageMapper* mapper) {
DCHECK(mapper != nullptr);
DCHECK(shorty != nullptr);
- max_mapped_in_ = -1;
- has_arguments_on_stack_ = false;
+ DCHECK(!IsInitialized());
+ DCHECK_EQ(end_mapped_in_, 0u);
+ DCHECK(!has_arguments_on_stack_);
while (shorty->Next()) {
ShortyArg arg = shorty->GetArg();
RegStorage reg = mapper->GetNextReg(arg);
+ mapping_.emplace_back(arg, reg);
+ if (arg.IsWide()) {
+ mapping_.emplace_back(ShortyArg(kInvalidShorty), RegStorage::InvalidReg());
+ }
if (reg.Valid()) {
- mapping_.Put(count_, reg);
- max_mapped_in_ = count_;
- // If the VR is wide and was mapped as wide then account for it.
- if (arg.IsWide() && reg.Is64Bit()) {
- max_mapped_in_++;
+ end_mapped_in_ = mapping_.size();
+ // If the VR is wide but wasn't mapped as wide then account for it.
+ if (arg.IsWide() && !reg.Is64Bit()) {
+ --end_mapped_in_;
}
} else {
has_arguments_on_stack_ = true;
}
- count_ += arg.IsWide() ? 2 : 1;
}
initialized_ = true;
}
-RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+RegStorage Mir2Lir::InToRegStorageMapping::GetReg(size_t in_position) {
+ DCHECK(IsInitialized());
+ DCHECK_LT(in_position, mapping_.size());
+ DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+ return mapping_[in_position].second;
+}
+
+Mir2Lir::ShortyArg Mir2Lir::InToRegStorageMapping::GetShorty(size_t in_position) {
DCHECK(IsInitialized());
- DCHECK_LT(in_position, count_);
- auto res = mapping_.find(in_position);
- return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+ DCHECK_LT(static_cast<size_t>(in_position), mapping_.size());
+ DCHECK_NE(mapping_[in_position].first.GetType(), kInvalidShorty);
+ return mapping_[in_position].first;
}
} // namespace art
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 888c34eb24..6f3f057038 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -17,6 +17,9 @@
#ifndef ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
#define ART_COMPILER_DEX_QUICK_MIR_TO_LIR_H_
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "base/arena_object.h"
#include "compiled_method.h"
#include "dex/compiler_enums.h"
#include "dex/dex_flags.h"
@@ -29,9 +32,6 @@
#include "leb128.h"
#include "safe_map.h"
#include "utils/array_ref.h"
-#include "utils/arena_allocator.h"
-#include "utils/arena_containers.h"
-#include "utils/arena_object.h"
#include "utils/stack_checks.h"
namespace art {
@@ -146,7 +146,7 @@ typedef int (*NextCallInsn)(CompilationUnit*, CallInfo*, int,
uint32_t method_idx, uintptr_t direct_code,
uintptr_t direct_method, InvokeType type);
-typedef std::vector<uint8_t> CodeBuffer;
+typedef ArenaVector<uint8_t> CodeBuffer;
typedef uint32_t CodeOffset; // Native code offset in bytes.
struct UseDefMasks {
@@ -224,7 +224,7 @@ class Mir2Lir {
struct SwitchTable : EmbeddedData {
LIR* anchor; // Reference instruction for relative offsets.
- LIR** targets; // Array of case targets.
+ MIR* switch_mir; // The switch mir.
};
/* Static register use counts */
@@ -515,6 +515,9 @@ class Mir2Lir {
LIR* const cont_;
};
+ class SuspendCheckSlowPath;
+ class SpecialSuspendCheckSlowPath;
+
// Helper class for changing mem_ref_type_ until the end of current scope. See mem_ref_type_.
class ScopedMemRefType {
public:
@@ -592,7 +595,7 @@ class Mir2Lir {
// strdup(), but allocates from the arena.
char* ArenaStrdup(const char* str) {
size_t len = strlen(str) + 1;
- char* res = reinterpret_cast<char*>(arena_->Alloc(len, kArenaAllocMisc));
+ char* res = arena_->AllocArray<char>(len, kArenaAllocMisc);
if (res != NULL) {
strncpy(res, str, len);
}
@@ -653,7 +656,6 @@ class Mir2Lir {
LIR* ScanLiteralPoolClass(LIR* data_target, const DexFile& dex_file, uint32_t type_idx);
LIR* AddWordData(LIR* *constant_list_p, int value);
LIR* AddWideData(LIR* *constant_list_p, int val_lo, int val_hi);
- void ProcessSwitchTables();
void DumpSparseSwitchTable(const uint16_t* table);
void DumpPackedSwitchTable(const uint16_t* table);
void MarkBoundary(DexOffset offset, const char* inst_str);
@@ -671,9 +673,7 @@ class Mir2Lir {
int AssignLiteralOffset(CodeOffset offset);
int AssignSwitchTablesOffset(CodeOffset offset);
int AssignFillArrayDataOffset(CodeOffset offset);
- virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
- virtual void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
- void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+ LIR* InsertCaseLabel(uint32_t bbid, int keyVal);
// Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated.
virtual RegLocation NarrowRegLoc(RegLocation loc);
@@ -1206,7 +1206,7 @@ class Mir2Lir {
}
}
- RegStorage GetArgMappingToPhysicalReg(int arg_num);
+ void EnsureInitializedArgMappingToPhysicalReg();
virtual RegLocation GetReturnAlt() = 0;
virtual RegLocation GetReturnWideAlt() = 0;
virtual RegLocation LocCReturn() = 0;
@@ -1573,6 +1573,16 @@ class Mir2Lir {
virtual void GenSpecialExitSequence() = 0;
/**
+ * @brief Used to generate stack frame for suspend path of special methods.
+ */
+ virtual void GenSpecialEntryForSuspend() = 0;
+
+ /**
+ * @brief Used to pop the stack frame for suspend path of special methods.
+ */
+ virtual void GenSpecialExitForSuspend() = 0;
+
+ /**
* @brief Used to generate code for special methods that are known to be
* small enough to work in frameless mode.
* @param bb The basic block of the first MIR.
@@ -1593,9 +1603,8 @@ class Mir2Lir {
* @brief Used to lock register if argument at in_position was passed that way.
* @details Does nothing if the argument is passed via stack.
* @param in_position The argument number whose register to lock.
- * @param wide Whether the argument is wide.
*/
- void LockArg(int in_position, bool wide = false);
+ void LockArg(size_t in_position);
/**
* @brief Used to load VR argument to a physical register.
@@ -1605,14 +1614,33 @@ class Mir2Lir {
* @param wide Whether the argument is 64-bit or not.
* @return Returns the register (or register pair) for the loaded argument.
*/
- RegStorage LoadArg(int in_position, RegisterClass reg_class, bool wide = false);
+ RegStorage LoadArg(size_t in_position, RegisterClass reg_class, bool wide = false);
/**
* @brief Used to load a VR argument directly to a specified register location.
* @param in_position The argument number to place in register.
* @param rl_dest The register location where to place argument.
*/
- void LoadArgDirect(int in_position, RegLocation rl_dest);
+ void LoadArgDirect(size_t in_position, RegLocation rl_dest);
+
+ /**
+ * @brief Used to spill register if argument at in_position was passed that way.
+ * @details Does nothing if the argument is passed via stack.
+ * @param in_position The argument number whose register to spill.
+ */
+ void SpillArg(size_t in_position);
+
+ /**
+ * @brief Used to unspill register if argument at in_position was passed that way.
+ * @details Does nothing if the argument is passed via stack.
+ * @param in_position The argument number whose register to spill.
+ */
+ void UnspillArg(size_t in_position);
+
+ /**
+ * @brief Generate suspend test in a special method.
+ */
+ SpecialSuspendCheckSlowPath* GenSpecialSuspendTest();
/**
* @brief Used to generate LIR for special getter method.
@@ -1745,10 +1773,10 @@ class Mir2Lir {
// The source mapping table data (pc -> dex). More entries than in encoded_mapping_table_
DefaultSrcMap src_mapping_table_;
// The encoding mapping table data (dex -> pc offset and pc offset -> dex) with a size prefix.
- std::vector<uint8_t> encoded_mapping_table_;
+ ArenaVector<uint8_t> encoded_mapping_table_;
ArenaVector<uint32_t> core_vmap_table_;
ArenaVector<uint32_t> fp_vmap_table_;
- std::vector<uint8_t> native_gc_map_;
+ ArenaVector<uint8_t> native_gc_map_;
ArenaVector<LinkerPatch> patches_;
int num_core_spills_;
int num_fp_spills_;
@@ -1805,21 +1833,22 @@ class Mir2Lir {
class InToRegStorageMapping {
public:
explicit InToRegStorageMapping(ArenaAllocator* arena)
- : mapping_(std::less<int>(), arena->Adapter()), count_(0),
- max_mapped_in_(0), has_arguments_on_stack_(false), initialized_(false) {}
+ : mapping_(arena->Adapter()),
+ end_mapped_in_(0u), has_arguments_on_stack_(false), initialized_(false) {}
void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper);
/**
- * @return the index of last VR mapped to physical register. In other words
- * any VR starting from (return value + 1) index is mapped to memory.
+ * @return the past-the-end index of VRs mapped to physical registers.
+ * In other words any VR starting from this index is mapped to memory.
*/
- int GetMaxMappedIn() { return max_mapped_in_; }
+ size_t GetEndMappedIn() { return end_mapped_in_; }
bool HasArgumentsOnStack() { return has_arguments_on_stack_; }
- RegStorage Get(int in_position);
+ RegStorage GetReg(size_t in_position);
+ ShortyArg GetShorty(size_t in_position);
bool IsInitialized() { return initialized_; }
private:
- ArenaSafeMap<int, RegStorage> mapping_;
- int count_;
- int max_mapped_in_;
+ static constexpr char kInvalidShorty = '-';
+ ArenaVector<std::pair<ShortyArg, RegStorage>> mapping_;
+ size_t end_mapped_in_;
bool has_arguments_on_stack_;
bool initialized_;
};
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 909077eca2..19c2a5a3a3 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -560,6 +560,7 @@ static uint32_t kCompilerOptimizerDisableFlags = 0 | // Disable specific optimi
// (1 << kNullCheckElimination) |
// (1 << kClassInitCheckElimination) |
// (1 << kGlobalValueNumbering) |
+ (1 << kGvnDeadCodeElimination) |
// (1 << kLocalValueNumbering) |
// (1 << kPromoteRegs) |
// (1 << kTrackLiveTemps) |
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 8efafb23fe..67fb8040f7 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -1191,8 +1191,7 @@ void Mir2Lir::DoPromotion() {
int num_regs = mir_graph_->GetNumOfCodeAndTempVRs();
const int promotion_threshold = 1;
// Allocate the promotion map - one entry for each Dalvik vReg or compiler temp
- promotion_map_ = static_cast<PromotionMap*>
- (arena_->Alloc(num_regs * sizeof(promotion_map_[0]), kArenaAllocRegAlloc));
+ promotion_map_ = arena_->AllocArray<PromotionMap>(num_regs, kArenaAllocRegAlloc);
// Allow target code to add any special registers
AdjustSpillMask();
@@ -1210,12 +1209,8 @@ void Mir2Lir::DoPromotion() {
*/
size_t core_reg_count_size = WideGPRsAreAliases() ? num_regs : num_regs * 2;
size_t fp_reg_count_size = WideFPRsAreAliases() ? num_regs : num_regs * 2;
- RefCounts *core_regs =
- static_cast<RefCounts*>(arena_->Alloc(sizeof(RefCounts) * core_reg_count_size,
- kArenaAllocRegAlloc));
- RefCounts *fp_regs =
- static_cast<RefCounts *>(arena_->Alloc(sizeof(RefCounts) * fp_reg_count_size,
- kArenaAllocRegAlloc));
+ RefCounts *core_regs = arena_->AllocArray<RefCounts>(core_reg_count_size, kArenaAllocRegAlloc);
+ RefCounts *fp_regs = arena_->AllocArray<RefCounts>(fp_reg_count_size, kArenaAllocRegAlloc);
// Set ssa names for original Dalvik registers
for (int i = 0; i < num_regs; i++) {
core_regs[i].s_reg = fp_regs[i].s_reg = i;
diff --git a/compiler/dex/quick/resource_mask.cc b/compiler/dex/quick/resource_mask.cc
index 8a27ecb94f..57e8af32a2 100644
--- a/compiler/dex/quick/resource_mask.cc
+++ b/compiler/dex/quick/resource_mask.cc
@@ -18,8 +18,8 @@
#include "resource_mask.h"
+#include "base/arena_allocator.h"
#include "base/logging.h"
-#include "utils/arena_allocator.h"
#include "utils.h"
namespace art {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index aa0972f861..c3db3a64e5 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -37,84 +37,6 @@ void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocat
}
/*
- * We override InsertCaseLabel, because the first parameter represents
- * a basic block id, instead of a dex offset.
- */
-LIR* X86Mir2Lir::InsertCaseLabel(DexOffset bbid, int keyVal) {
- LIR* boundary_lir = &block_label_list_[bbid];
- LIR* res = boundary_lir;
- if (cu_->verbose) {
- // Only pay the expense if we're pretty-printing.
- LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
- BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
- DCHECK(bb != nullptr);
- new_label->dalvik_offset = bb->start_offset;;
- new_label->opcode = kPseudoCaseLabel;
- new_label->operands[0] = keyVal;
- new_label->flags.fixup = kFixupLabel;
- DCHECK(!new_label->flags.use_def_invalid);
- new_label->u.m.def_mask = &kEncodeAll;
- InsertLIRAfter(boundary_lir, new_label);
- res = new_label;
- }
- return res;
-}
-
-void X86Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
- const uint16_t* table = tab_rec->table;
- const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
- int entries = table[1];
- int low_key = s4FromSwitchData(&table[2]);
- for (int i = 0; i < entries; i++) {
- // The value at targets[i] is a basic block id, instead of a dex offset.
- tab_rec->targets[i] = InsertCaseLabel(targets[i], i + low_key);
- }
-}
-
-/*
- * We convert and create a new packed switch table that stores
- * basic block ids to targets[] by examining successor blocks.
- * Note that the original packed switch table stores dex offsets to targets[].
- */
-const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* table) {
- /*
- * The original packed switch data format:
- * ushort ident = 0x0100 magic value
- * ushort size number of entries in the table
- * int first_key first (and lowest) switch case value
- * int targets[size] branch targets, relative to switch opcode
- *
- * Total size is (4+size*2) 16-bit code units.
- *
- * Note that the new packed switch data format is the same as the original
- * format, except that targets[] are basic block ids.
- *
- */
- BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
- DCHECK(bb != nullptr);
- // Get the number of entries.
- int entries = table[1];
- const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
- int32_t starting_key = as_int32[0];
- // Create a new table.
- int size = sizeof(uint16_t) * (4 + entries * 2);
- uint16_t* new_table = reinterpret_cast<uint16_t*>(arena_->Alloc(size, kArenaAllocMisc));
- // Copy ident, size, and first_key to the new table.
- memcpy(new_table, table, sizeof(uint16_t) * 4);
- // Get the new targets.
- int32_t* new_targets = reinterpret_cast<int32_t*>(&new_table[4]);
- // Find out targets for each entry.
- int i = 0;
- for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
- DCHECK_EQ(starting_key + i, successor_block_info->key);
- // Save target basic block id.
- new_targets[i++] = successor_block_info->block;
- }
- DCHECK_EQ(i, entries);
- return new_table;
-}
-
-/*
* Code pattern will look something like:
*
* mov r_val, ..
@@ -131,16 +53,14 @@ const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* t
* done:
*/
void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
- const uint16_t* old_table = mir_graph_->GetTable(mir, table_offset);
- const uint16_t* table = ConvertPackedSwitchTable(mir, old_table);
+ const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
// Add the table to the list - we'll process it later
SwitchTable* tab_rec =
static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
+ tab_rec->switch_mir = mir;
tab_rec->table = table;
tab_rec->vaddr = current_dalvik_offset_;
int size = table[1];
- tab_rec->targets = static_cast<LIR**>(arena_->Alloc(size * sizeof(LIR*),
- kArenaAllocLIR));
switch_tables_.push_back(tab_rec);
// Get the switch value
@@ -352,6 +272,41 @@ void X86Mir2Lir::GenSpecialExitSequence() {
NewLIR0(kX86Ret);
}
+void X86Mir2Lir::GenSpecialEntryForSuspend() {
+ // Keep 16-byte stack alignment, there's already the return address, so
+ // - for 32-bit push EAX, i.e. ArtMethod*, ESI, EDI,
+ // - for 64-bit push RAX, i.e. ArtMethod*.
+ if (!cu_->target64) {
+ DCHECK(!IsTemp(rs_rSI));
+ DCHECK(!IsTemp(rs_rDI));
+ core_spill_mask_ =
+ (1u << rs_rDI.GetRegNum()) | (1u << rs_rSI.GetRegNum()) | (1u << rs_rRET.GetRegNum());
+ num_core_spills_ = 3u;
+ } else {
+ core_spill_mask_ = (1u << rs_rRET.GetRegNum());
+ num_core_spills_ = 1u;
+ }
+ fp_spill_mask_ = 0u;
+ num_fp_spills_ = 0u;
+ frame_size_ = 16u;
+ core_vmap_table_.clear();
+ fp_vmap_table_.clear();
+ if (!cu_->target64) {
+ NewLIR1(kX86Push32R, rs_rDI.GetReg());
+ NewLIR1(kX86Push32R, rs_rSI.GetReg());
+ }
+ NewLIR1(kX86Push32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod*
+}
+
+void X86Mir2Lir::GenSpecialExitForSuspend() {
+ // Pop the frame. (ArtMethod* no longer needed but restore it anyway.)
+ NewLIR1(kX86Pop32R, TargetReg(kArg0, kRef).GetReg()); // ArtMethod*
+ if (!cu_->target64) {
+ NewLIR1(kX86Pop32R, rs_rSI.GetReg());
+ NewLIR1(kX86Pop32R, rs_rDI.GetReg());
+ }
+}
+
void X86Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) {
if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && (opt_flags & MIR_IGNORE_NULL_CHECK)) {
return;
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 811d4f5d7b..20163b4b76 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -259,6 +259,8 @@ class X86Mir2Lir : public Mir2Lir {
void GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE;
void GenExitSequence() OVERRIDE;
void GenSpecialExitSequence() OVERRIDE;
+ void GenSpecialEntryForSuspend() OVERRIDE;
+ void GenSpecialExitForSuspend() OVERRIDE;
void GenFusedFPCmpBranch(BasicBlock* bb, MIR* mir, bool gt_bias, bool is_double) OVERRIDE;
void GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) OVERRIDE;
void GenSelect(BasicBlock* bb, MIR* mir) OVERRIDE;
@@ -271,11 +273,8 @@ class X86Mir2Lir : public Mir2Lir {
int first_bit, int second_bit) OVERRIDE;
void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
- const uint16_t* ConvertPackedSwitchTable(MIR* mir, const uint16_t* table);
void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
- LIR* InsertCaseLabel(DexOffset vaddr, int keyVal) OVERRIDE;
- void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) OVERRIDE;
/**
* @brief Implement instanceof a final class with x86 specific code.
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4fe7a43a85..91168c78bd 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -863,22 +863,29 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
RegLocation rl_src1 = info->args[0];
RegLocation rl_src2 = info->args[2];
RegLocation rl_dest = InlineTargetWide(info);
- int res_vreg, src1_vreg, src2_vreg;
if (rl_dest.s_reg_low == INVALID_SREG) {
// Result is unused, the code is dead. Inlining successful, no code generated.
return true;
}
+ if (PartiallyIntersects(rl_src1, rl_dest) &&
+ PartiallyIntersects(rl_src2, rl_dest)) {
+ // A special case which we don't want to handle.
+ // This is when src1 is mapped on v0 and v1,
+ // src2 is mapped on v2, v3,
+ // result is mapped on v1, v2
+ return false;
+ }
+
+
/*
* If the result register is the same as the second element, then we
* need to be careful. The reason is that the first copy will
* inadvertently clobber the second element with the first one thus
* yielding the wrong result. Thus we do a swap in that case.
*/
- res_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low);
- src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
- if (res_vreg == src2_vreg) {
+ if (Intersects(rl_src2, rl_dest)) {
std::swap(rl_src1, rl_src2);
}
@@ -893,19 +900,30 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
* nothing else to do because they are equal and we have already
* moved one into the result.
*/
- src1_vreg = mir_graph_->SRegToVReg(rl_src1.s_reg_low);
- src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low);
- if (src1_vreg == src2_vreg) {
+ if (mir_graph_->SRegToVReg(rl_src1.s_reg_low) ==
+ mir_graph_->SRegToVReg(rl_src2.s_reg_low)) {
StoreValueWide(rl_dest, rl_result);
return true;
}
// Free registers to make some room for the second operand.
- // But don't try to free ourselves or promoted registers.
- if (res_vreg != src1_vreg &&
- IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
- FreeTemp(rl_src1.reg);
+ // But don't try to free part of a source which intersects
+ // part of result or promoted registers.
+
+ if (IsTemp(rl_src1.reg.GetLow()) &&
+ (rl_src1.reg.GetLowReg() != rl_result.reg.GetHighReg()) &&
+ (rl_src1.reg.GetLowReg() != rl_result.reg.GetLowReg())) {
+ // Is low part temporary and doesn't intersect any parts of result?
+ FreeTemp(rl_src1.reg.GetLow());
}
+
+ if (IsTemp(rl_src1.reg.GetHigh()) &&
+ (rl_src1.reg.GetHighReg() != rl_result.reg.GetLowReg()) &&
+ (rl_src1.reg.GetHighReg() != rl_result.reg.GetHighReg())) {
+ // Is high part temporary and doesn't intersect any parts of result?
+ FreeTemp(rl_src1.reg.GetHigh());
+ }
+
rl_src2 = LoadValueWide(rl_src2, kCoreReg);
// Do we have a free register for intermediate calculations?
@@ -939,12 +957,15 @@ bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
// Let's put pop 'edi' here to break a bit the dependency chain.
if (tmp == rs_rDI) {
NewLIR1(kX86Pop32R, tmp.GetReg());
+ } else {
+ FreeTemp(tmp);
}
// Conditionally move the other integer into the destination register.
ConditionCode cc = is_min ? kCondGe : kCondLt;
OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow());
OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh());
+ FreeTemp(rl_src2.reg);
StoreValueWide(rl_dest, rl_result);
return true;
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index c4adb09248..8f97d1e7c8 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -1051,10 +1051,10 @@ void X86Mir2Lir::InstallLiteralPools() {
}
for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
- PushWord(&code_buffer_, p->operands[0]);
- PushWord(&code_buffer_, p->operands[1]);
- PushWord(&code_buffer_, p->operands[2]);
- PushWord(&code_buffer_, p->operands[3]);
+ Push32(&code_buffer_, p->operands[0]);
+ Push32(&code_buffer_, p->operands[1]);
+ Push32(&code_buffer_, p->operands[2]);
+ Push32(&code_buffer_, p->operands[3]);
}
}
diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc
index 6bd49de989..197f66d017 100644
--- a/compiler/dex/ssa_transformation.cc
+++ b/compiler/dex/ssa_transformation.cc
@@ -16,9 +16,9 @@
#include "base/bit_vector-inl.h"
#include "base/logging.h"
+#include "base/scoped_arena_containers.h"
#include "compiler_ir.h"
#include "dataflow_iterator-inl.h"
-#include "utils/scoped_arena_containers.h"
#define NOTVISITED (-1)
@@ -137,8 +137,8 @@ void MIRGraph::ComputeDefBlockMatrix() {
/* Allocate num_registers bit vector pointers */
DCHECK(temp_scoped_alloc_ != nullptr);
DCHECK(temp_.ssa.def_block_matrix == nullptr);
- temp_.ssa.def_block_matrix = static_cast<ArenaBitVector**>(
- temp_scoped_alloc_->Alloc(sizeof(ArenaBitVector*) * num_registers, kArenaAllocDFInfo));
+ temp_.ssa.def_block_matrix =
+ temp_scoped_alloc_->AllocArray<ArenaBitVector*>(num_registers, kArenaAllocDFInfo);
int i;
/* Initialize num_register vectors with num_blocks bits each */
@@ -363,8 +363,7 @@ void MIRGraph::ComputeDominators() {
/* Initialize & Clear i_dom_list */
if (max_num_reachable_blocks_ < num_reachable_blocks_) {
- i_dom_list_ = static_cast<int*>(arena_->Alloc(sizeof(int) * num_reachable_blocks,
- kArenaAllocDFInfo));
+ i_dom_list_ = arena_->AllocArray<int>(num_reachable_blocks, kArenaAllocDFInfo);
}
for (int i = 0; i < num_reachable_blocks; i++) {
i_dom_list_[i] = NOTVISITED;
@@ -463,24 +462,28 @@ bool MIRGraph::ComputeBlockLiveIns(BasicBlock* bb) {
return false;
}
-/* Insert phi nodes to for each variable to the dominance frontiers */
-void MIRGraph::InsertPhiNodes() {
- int dalvik_reg;
- ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
- temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapPhi);
- ArenaBitVector* input_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
- temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapInputBlocks);
-
+/* For each dalvik reg, find blocks that need phi nodes according to the dominance frontiers. */
+void MIRGraph::FindPhiNodeBlocks() {
RepeatingPostOrderDfsIterator iter(this);
bool change = false;
for (BasicBlock* bb = iter.Next(false); bb != NULL; bb = iter.Next(change)) {
change = ComputeBlockLiveIns(bb);
}
+ ArenaBitVector* phi_blocks = new (temp_scoped_alloc_.get()) ArenaBitVector(
+ temp_scoped_alloc_.get(), GetNumBlocks(), false, kBitMapBMatrix);
+
+ // Reuse the def_block_matrix storage for phi_node_blocks.
+ ArenaBitVector** def_block_matrix = temp_.ssa.def_block_matrix;
+ ArenaBitVector** phi_node_blocks = def_block_matrix;
+ DCHECK(temp_.ssa.phi_node_blocks == nullptr);
+ temp_.ssa.phi_node_blocks = phi_node_blocks;
+ temp_.ssa.def_block_matrix = nullptr;
+
/* Iterate through each Dalvik register */
- for (dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
- input_blocks->Copy(temp_.ssa.def_block_matrix[dalvik_reg]);
+ for (int dalvik_reg = GetNumOfCodeAndTempVRs() - 1; dalvik_reg >= 0; dalvik_reg--) {
phi_blocks->ClearAllBits();
+ ArenaBitVector* input_blocks = def_block_matrix[dalvik_reg];
do {
// TUNING: When we repeat this, we could skip indexes from the previous pass.
for (uint32_t idx : input_blocks->Indexes()) {
@@ -491,23 +494,8 @@ void MIRGraph::InsertPhiNodes() {
}
} while (input_blocks->Union(phi_blocks));
- /*
- * Insert a phi node for dalvik_reg in the phi_blocks if the Dalvik
- * register is in the live-in set.
- */
- for (uint32_t idx : phi_blocks->Indexes()) {
- BasicBlock* phi_bb = GetBasicBlock(idx);
- /* Variable will be clobbered before being used - no need for phi */
- if (!phi_bb->data_flow_info->live_in_v->IsBitSet(dalvik_reg)) {
- continue;
- }
- MIR *phi = NewMIR();
- phi->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpPhi);
- phi->dalvikInsn.vA = dalvik_reg;
- phi->offset = phi_bb->start_offset;
- phi->m_unit_index = 0; // Arbitrarily assign all Phi nodes to outermost method.
- phi_bb->PrependMIR(phi);
- }
+ def_block_matrix[dalvik_reg] = phi_blocks;
+ phi_blocks = input_blocks; // Reuse the bit vector in next iteration.
}
}
@@ -528,9 +516,7 @@ bool MIRGraph::InsertPhiNodeOperands(BasicBlock* bb) {
size_t num_uses = bb->predecessors.size();
AllocateSSAUseData(mir, num_uses);
int* uses = mir->ssa_rep->uses;
- BasicBlockId* incoming =
- static_cast<BasicBlockId*>(arena_->Alloc(sizeof(BasicBlockId) * num_uses,
- kArenaAllocDFInfo));
+ BasicBlockId* incoming = arena_->AllocArray<BasicBlockId>(num_uses, kArenaAllocDFInfo);
mir->meta.phi_incoming = incoming;
int idx = 0;
for (BasicBlockId pred_id : bb->predecessors) {
@@ -553,12 +539,12 @@ void MIRGraph::DoDFSPreOrderSSARename(BasicBlock* block) {
/* Process this block */
DoSSAConversion(block);
- int map_size = sizeof(int) * GetNumOfCodeAndTempVRs();
/* Save SSA map snapshot */
ScopedArenaAllocator allocator(&cu_->arena_stack);
- int* saved_ssa_map =
- static_cast<int*>(allocator.Alloc(map_size, kArenaAllocDalvikToSSAMap));
+ uint32_t num_vregs = GetNumOfCodeAndTempVRs();
+ int32_t* saved_ssa_map = allocator.AllocArray<int32_t>(num_vregs, kArenaAllocDalvikToSSAMap);
+ size_t map_size = sizeof(saved_ssa_map[0]) * num_vregs;
memcpy(saved_ssa_map, vreg_to_ssa_map_, map_size);
if (block->fall_through != NullBasicBlockId) {
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index f70850a332..b620969ae2 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -440,8 +440,7 @@ void MIRGraph::InitRegLocations() {
// the temp allocation initializes reg location as well (in order to deal with
// case when it will be called after this pass).
int max_regs = GetNumSSARegs() + GetMaxPossibleCompilerTemps();
- RegLocation* loc = static_cast<RegLocation*>(arena_->Alloc(max_regs * sizeof(*loc),
- kArenaAllocRegAlloc));
+ RegLocation* loc = arena_->AllocArray<RegLocation>(max_regs, kArenaAllocRegAlloc);
for (int i = 0; i < GetNumSSARegs(); i++) {
loc[i] = fresh_loc;
loc[i].s_reg_low = i;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 2d8c9d4a9e..b8a893649b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1285,7 +1285,15 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType
*stats_flags |= kFlagDirectCallToBoot | kFlagDirectMethodToBoot;
}
if (!use_dex_cache && force_relocations) {
- if (!IsImage() || !IsImageClass(method->GetDeclaringClassDescriptor())) {
+ bool is_in_image;
+ if (IsImage()) {
+ is_in_image = IsImageClass(method->GetDeclaringClassDescriptor());
+ } else {
+ is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 &&
+ Runtime::Current()->GetHeap()->FindSpaceFromObject(method->GetDeclaringClass(),
+ false)->IsImageSpace();
+ }
+ if (!is_in_image) {
// We can only branch directly to Methods that are resolved in the DexCache.
// Otherwise we won't invoke the resolution trampoline.
use_dex_cache = true;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 2fca2e52f4..b7562442d7 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,7 @@
#include <vector>
#include "arch/instruction_set.h"
+#include "base/arena_allocator.h"
#include "base/mutex.h"
#include "base/timing_logger.h"
#include "class_reference.h"
@@ -38,7 +39,6 @@
#include "runtime.h"
#include "safe_map.h"
#include "thread_pool.h"
-#include "utils/arena_allocator.h"
#include "utils/dedupe_set.h"
#include "utils/swap_space.h"
#include "utils.h"
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 9ec4f281cb..401d5a951d 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -90,19 +90,19 @@ std::vector<uint8_t>* ConstructCIEFrameX86(bool is_x86_64) {
// Length (will be filled in later in this routine).
if (is_x86_64) {
- PushWord(cfi_info, 0xffffffff); // Indicates 64bit
- PushWord(cfi_info, 0);
- PushWord(cfi_info, 0);
+ Push32(cfi_info, 0xffffffff); // Indicates 64bit
+ Push32(cfi_info, 0);
+ Push32(cfi_info, 0);
} else {
- PushWord(cfi_info, 0);
+ Push32(cfi_info, 0);
}
// CIE id: always 0.
if (is_x86_64) {
- PushWord(cfi_info, 0);
- PushWord(cfi_info, 0);
+ Push32(cfi_info, 0);
+ Push32(cfi_info, 0);
} else {
- PushWord(cfi_info, 0);
+ Push32(cfi_info, 0);
}
// Version: always 1.
@@ -318,7 +318,7 @@ class LineTableGenerator FINAL : public Leb128Encoder {
PushByte(data_, 0); // extended opcode:
PushByte(data_, 1 + 4); // length: opcode_size + address_size
PushByte(data_, DW_LNE_set_address);
- PushWord(data_, addr);
+ Push32(data_, addr);
}
void SetLine(unsigned line) {
@@ -507,13 +507,13 @@ static void FillInCFIInformation(OatWriter* oat_writer,
// Start the debug_info section with the header information
// 'unit_length' will be filled in later.
int cunit_length = dbg_info->size();
- PushWord(dbg_info, 0);
+ Push32(dbg_info, 0);
// 'version' - 3.
PushHalf(dbg_info, 3);
// Offset into .debug_abbrev section (always 0).
- PushWord(dbg_info, 0);
+ Push32(dbg_info, 0);
// Address size: 4.
PushByte(dbg_info, 4);
@@ -523,7 +523,7 @@ static void FillInCFIInformation(OatWriter* oat_writer,
PushByte(dbg_info, 1);
// The producer is Android dex2oat.
- PushWord(dbg_info, producer_str_offset);
+ Push32(dbg_info, producer_str_offset);
// The language is Java.
PushByte(dbg_info, DW_LANG_Java);
@@ -532,8 +532,8 @@ static void FillInCFIInformation(OatWriter* oat_writer,
uint32_t cunit_low_pc = 0 - 1;
uint32_t cunit_high_pc = 0;
int cunit_low_pc_pos = dbg_info->size();
- PushWord(dbg_info, 0);
- PushWord(dbg_info, 0);
+ Push32(dbg_info, 0);
+ Push32(dbg_info, 0);
if (dbg_line == nullptr) {
for (size_t i = 0; i < method_info.size(); ++i) {
@@ -546,9 +546,9 @@ static void FillInCFIInformation(OatWriter* oat_writer,
PushByte(dbg_info, 2);
// Enter name, low_pc, high_pc.
- PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_));
- PushWord(dbg_info, dbg.low_pc_ + text_section_offset);
- PushWord(dbg_info, dbg.high_pc_ + text_section_offset);
+ Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
+ Push32(dbg_info, dbg.low_pc_ + text_section_offset);
+ Push32(dbg_info, dbg.high_pc_ + text_section_offset);
}
} else {
// TODO: in gdb info functions <regexp> - reports Java functions, but
@@ -559,15 +559,15 @@ static void FillInCFIInformation(OatWriter* oat_writer,
// method ranges.
// Line number table offset
- PushWord(dbg_info, dbg_line->size());
+ Push32(dbg_info, dbg_line->size());
size_t lnt_length = dbg_line->size();
- PushWord(dbg_line, 0);
+ Push32(dbg_line, 0);
PushHalf(dbg_line, 4); // LNT Version DWARF v4 => 4
size_t lnt_hdr_length = dbg_line->size();
- PushWord(dbg_line, 0); // TODO: 64-bit uses 8-byte here
+ Push32(dbg_line, 0); // TODO: 64-bit uses 8-byte here
PushByte(dbg_line, 1); // minimum_instruction_length (ubyte)
PushByte(dbg_line, 1); // maximum_operations_per_instruction (ubyte) = always 1
@@ -629,9 +629,9 @@ static void FillInCFIInformation(OatWriter* oat_writer,
PushByte(dbg_info, 2);
// Enter name, low_pc, high_pc.
- PushWord(dbg_info, PushStr(dbg_str, dbg.method_name_));
- PushWord(dbg_info, dbg.low_pc_ + text_section_offset);
- PushWord(dbg_info, dbg.high_pc_ + text_section_offset);
+ Push32(dbg_info, PushStr(dbg_str, dbg.method_name_));
+ Push32(dbg_info, dbg.low_pc_ + text_section_offset);
+ Push32(dbg_info, dbg.high_pc_ + text_section_offset);
GetLineInfoForJava(dbg.dbgstream_, dbg.compiled_method_->GetSrcMappingTable(),
&pc2java_map, dbg.low_pc_);
diff --git a/compiler/gc_map_builder.h b/compiler/gc_map_builder.h
index bc8ad41608..4c36ef733c 100644
--- a/compiler/gc_map_builder.h
+++ b/compiler/gc_map_builder.h
@@ -26,15 +26,17 @@ namespace art {
class GcMapBuilder {
public:
- GcMapBuilder(std::vector<uint8_t>* table, size_t entries, uint32_t max_native_offset,
+ template <typename Alloc>
+ GcMapBuilder(std::vector<uint8_t, Alloc>* table, size_t entries, uint32_t max_native_offset,
size_t references_width)
: entries_(entries), references_width_(entries != 0u ? references_width : 0u),
native_offset_width_(entries != 0 && max_native_offset != 0
? sizeof(max_native_offset) - CLZ(max_native_offset) / 8u
: 0u),
- in_use_(entries), table_(table) {
+ in_use_(entries) {
// Resize table and set up header.
table->resize((EntryWidth() * entries) + sizeof(uint32_t));
+ table_ = table->data();
CHECK_LT(native_offset_width_, 1U << 3);
(*table)[0] = native_offset_width_ & 7;
CHECK_LT(references_width_, 1U << 13);
@@ -65,7 +67,7 @@ class GcMapBuilder {
uint32_t native_offset = 0;
size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
for (size_t i = 0; i < native_offset_width_; i++) {
- native_offset |= (*table_)[table_offset + i] << (i * 8);
+ native_offset |= table_[table_offset + i] << (i * 8);
}
return native_offset;
}
@@ -73,13 +75,13 @@ class GcMapBuilder {
void SetCodeOffset(size_t table_index, uint32_t native_offset) {
size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
for (size_t i = 0; i < native_offset_width_; i++) {
- (*table_)[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
+ table_[table_offset + i] = (native_offset >> (i * 8)) & 0xFF;
}
}
void SetReferences(size_t table_index, const uint8_t* references) {
size_t table_offset = (table_index * EntryWidth()) + sizeof(uint32_t);
- memcpy(&(*table_)[table_offset + native_offset_width_], references, references_width_);
+ memcpy(&table_[table_offset + native_offset_width_], references, references_width_);
}
size_t EntryWidth() const {
@@ -95,7 +97,7 @@ class GcMapBuilder {
// Entries that are in use.
std::vector<bool> in_use_;
// The table we're building.
- std::vector<uint8_t>* const table_;
+ uint8_t* table_;
};
} // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index b2342491fa..c588e1a53d 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -273,13 +273,7 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) {
void ImageWriter::AssignImageBinSlot(mirror::Object* object) {
DCHECK(object != nullptr);
- size_t object_size;
- if (object->IsArtMethod()) {
- // Methods are sized based on the target pointer size.
- object_size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
- } else {
- object_size = object->SizeOf();
- }
+ size_t object_size = object->SizeOf();
// The magic happens here. We segregate objects into different bins based
// on how likely they are to get dirty at runtime.
@@ -569,6 +563,7 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT
}
mirror::String* string = obj->AsString();
const uint16_t* utf16_string = string->GetCharArray()->GetData() + string->GetOffset();
+ size_t utf16_length = static_cast<size_t>(string->GetLength());
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
ReaderMutexLock mu(Thread::Current(), *class_linker->DexLock());
size_t dex_cache_count = class_linker->GetDexCacheCount();
@@ -576,10 +571,10 @@ void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATT
DexCache* dex_cache = class_linker->GetDexCache(i);
const DexFile& dex_file = *dex_cache->GetDexFile();
const DexFile::StringId* string_id;
- if (UNLIKELY(string->GetLength() == 0)) {
+ if (UNLIKELY(utf16_length == 0)) {
string_id = dex_file.FindStringId("");
} else {
- string_id = dex_file.FindStringId(utf16_string);
+ string_id = dex_file.FindStringId(utf16_string, utf16_length);
}
if (string_id != nullptr) {
// This string occurs in this dex file, assign the dex cache entry.
@@ -931,7 +926,7 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
if (obj->IsArtMethod()) {
// Size without pointer fields since we don't want to overrun the buffer if target art method
// is 32 bits but source is 64 bits.
- n = mirror::ArtMethod::SizeWithoutPointerFields(sizeof(void*));
+ n = mirror::ArtMethod::SizeWithoutPointerFields(image_writer->target_ptr_size_);
} else {
n = obj->SizeOf();
}
@@ -1016,10 +1011,6 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) {
}
if (orig->IsArtMethod<kVerifyNone>()) {
FixupMethod(orig->AsArtMethod<kVerifyNone>(), down_cast<ArtMethod*>(copy));
- } else if (orig->IsClass() && orig->AsClass()->IsArtMethodClass()) {
- // Set the right size for the target.
- size_t size = mirror::ArtMethod::InstanceSize(target_ptr_size_);
- down_cast<mirror::Class*>(copy)->SetObjectSizeWithoutChecks(size);
}
}
@@ -1031,7 +1022,9 @@ const uint8_t* ImageWriter::GetQuickCode(mirror::ArtMethod* method, bool* quick_
// trampoline.
// Quick entrypoint:
- const uint8_t* quick_code = GetOatAddress(method->GetQuickOatCodeOffset());
+ uint32_t quick_oat_code_offset = PointerToLowMemUInt32(
+ method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_));
+ const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset);
*quick_is_interpreted = false;
if (quick_code != nullptr &&
(!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) {
@@ -1082,11 +1075,12 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) {
// locations.
// Copy all of the fields from the runtime methods to the target methods first since we did a
// bytewise copy earlier.
- copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(orig->GetEntryPointFromInterpreter(),
- target_ptr_size_);
- copy->SetEntryPointFromJniPtrSize<kVerifyNone>(orig->GetEntryPointFromJni(), target_ptr_size_);
+ copy->SetEntryPointFromInterpreterPtrSize<kVerifyNone>(
+ orig->GetEntryPointFromInterpreterPtrSize(target_ptr_size_), target_ptr_size_);
+ copy->SetEntryPointFromJniPtrSize<kVerifyNone>(
+ orig->GetEntryPointFromJniPtrSize(target_ptr_size_), target_ptr_size_);
copy->SetEntryPointFromQuickCompiledCodePtrSize<kVerifyNone>(
- orig->GetEntryPointFromQuickCompiledCode(), target_ptr_size_);
+ orig->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_), target_ptr_size_);
// The resolution method has a special trampoline to call.
Runtime* runtime = Runtime::Current();
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 3c36ffa4e9..9c0157e885 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -899,7 +899,8 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
public:
InitImageMethodVisitor(OatWriter* writer, size_t offset)
- : OatDexMethodVisitor(writer, offset) {
+ : OatDexMethodVisitor(writer, offset),
+ pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) {
}
bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
@@ -932,10 +933,14 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
std::string dump = exc->Dump();
LOG(FATAL) << dump;
}
- method->SetQuickOatCodeOffset(offsets.code_offset_);
+ method->SetEntryPointFromQuickCompiledCodePtrSize(reinterpret_cast<void*>(offsets.code_offset_),
+ pointer_size_);
return true;
}
+
+ protected:
+ const size_t pointer_size_;
};
class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1103,10 +1108,18 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
if (UNLIKELY(target_offset == 0)) {
mirror::ArtMethod* target = GetTargetMethod(patch);
DCHECK(target != nullptr);
- DCHECK_EQ(target->GetQuickOatCodeOffset(), 0u);
- target_offset = target->IsNative()
- ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
- : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+ size_t size = GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
+ const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size);
+ if (oat_code_offset != 0) {
+ DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset));
+ DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(oat_code_offset));
+ DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickGenericJniStub(oat_code_offset));
+ target_offset = PointerToLowMemUInt32(oat_code_offset);
+ } else {
+ target_offset = target->IsNative()
+ ? writer_->oat_header_->GetQuickGenericJniTrampolineOffset()
+ : writer_->oat_header_->GetQuickToInterpreterBridgeOffset();
+ }
}
return target_offset;
}
@@ -1138,10 +1151,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- // NOTE: Direct calls across oat files don't use linker patches.
- DCHECK(writer_->image_writer_ != nullptr);
- uint32_t address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
- writer_->oat_data_offset_ + target_offset);
+ uint32_t address = writer_->image_writer_ == nullptr ? target_offset :
+ PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() +
+ writer_->oat_data_offset_ + target_offset);
DCHECK_LE(offset + 4, code->size());
uint8_t* data = &(*code)[offset];
data[0] = address & 0xffu;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d6c3515726..811a3bdf0c 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_containers.h"
#include "bounds_check_elimination.h"
#include "nodes.h"
-#include "utils/arena_containers.h"
namespace art {
@@ -28,18 +28,11 @@ class MonotonicValueRange;
*/
class ValueBound : public ValueObject {
public:
- ValueBound(HInstruction* instruction, int constant) {
+ ValueBound(HInstruction* instruction, int32_t constant) {
if (instruction != nullptr && instruction->IsIntConstant()) {
- // Normalizing ValueBound with constant instruction.
- int instr_const = instruction->AsIntConstant()->GetValue();
- if (constant >= 0 && (instr_const <= INT_MAX - constant)) {
- // No overflow.
- instruction_ = nullptr;
- constant_ = instr_const + constant;
- return;
- }
- if (constant < 0 && (instr_const >= INT_MIN - constant)) {
- // No underflow.
+ // Normalize ValueBound with constant instruction.
+ int32_t instr_const = instruction->AsIntConstant()->GetValue();
+ if (!WouldAddOverflowOrUnderflow(instr_const, constant)) {
instruction_ = nullptr;
constant_ = instr_const + constant;
return;
@@ -49,6 +42,41 @@ class ValueBound : public ValueObject {
constant_ = constant;
}
+ // Return whether (left + right) overflows or underflows.
+ static bool WouldAddOverflowOrUnderflow(int32_t left, int32_t right) {
+ if (right == 0) {
+ return false;
+ }
+ if ((right > 0) && (left <= INT_MAX - right)) {
+ // No overflow.
+ return false;
+ }
+ if ((right < 0) && (left >= INT_MIN - right)) {
+ // No underflow.
+ return false;
+ }
+ return true;
+ }
+
+ static bool IsAddOrSubAConstant(HInstruction* instruction,
+ HInstruction** left_instruction,
+ int* right_constant) {
+ if (instruction->IsAdd() || instruction->IsSub()) {
+ HBinaryOperation* bin_op = instruction->AsBinaryOperation();
+ HInstruction* left = bin_op->GetLeft();
+ HInstruction* right = bin_op->GetRight();
+ if (right->IsIntConstant()) {
+ *left_instruction = left;
+ int32_t c = right->AsIntConstant()->GetValue();
+ *right_constant = instruction->IsAdd() ? c : -c;
+ return true;
+ }
+ }
+ *left_instruction = nullptr;
+ *right_constant = 0;
+ return false;
+ }
+
// Try to detect useful value bound format from an instruction, e.g.
// a constant or array length related value.
static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) {
@@ -63,13 +91,12 @@ class ValueBound : public ValueObject {
return ValueBound(instruction, 0);
}
// Try to detect (array.length + c) format.
- if (instruction->IsAdd()) {
- HAdd* add = instruction->AsAdd();
- HInstruction* left = add->GetLeft();
- HInstruction* right = add->GetRight();
- if (left->IsArrayLength() && right->IsIntConstant()) {
+ HInstruction *left;
+ int32_t right;
+ if (IsAddOrSubAConstant(instruction, &left, &right)) {
+ if (left->IsArrayLength()) {
*found = true;
- return ValueBound(left, right->AsIntConstant()->GetValue());
+ return ValueBound(left, right);
}
}
@@ -79,10 +106,13 @@ class ValueBound : public ValueObject {
}
HInstruction* GetInstruction() const { return instruction_; }
- int GetConstant() const { return constant_; }
+ int32_t GetConstant() const { return constant_; }
- bool IsRelativeToArrayLength() const {
- return instruction_ != nullptr && instruction_->IsArrayLength();
+ bool IsRelatedToArrayLength() const {
+ // Some bounds are created with HNewArray* as the instruction instead
+ // of HArrayLength*. They are treated the same.
+ return (instruction_ != nullptr) &&
+ (instruction_->IsArrayLength() || instruction_->IsNewArray());
}
bool IsConstant() const {
@@ -96,54 +126,45 @@ class ValueBound : public ValueObject {
return instruction_ == bound.instruction_ && constant_ == bound.constant_;
}
- // Returns if it's certain bound1 >= bound2.
- bool GreaterThanOrEqual(ValueBound bound) const {
- if (instruction_ == bound.instruction_) {
- if (instruction_ == nullptr) {
- // Pure constant.
- return constant_ >= bound.constant_;
- }
- // There might be overflow/underflow. Be conservative for now.
- return false;
+ static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) {
+ // Null check on the NewArray should have been eliminated by instruction
+ // simplifier already.
+ if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+ return instruction->InputAt(0)->AsNewArray();
}
- // Not comparable. Just return false.
- return false;
+ return instruction;
}
- // Returns if it's certain bound1 <= bound2.
- bool LessThanOrEqual(ValueBound bound) const {
- if (instruction_ == bound.instruction_) {
- if (instruction_ == nullptr) {
- // Pure constant.
- return constant_ <= bound.constant_;
- }
- if (IsRelativeToArrayLength()) {
- // Array length is guaranteed to be no less than 0.
- // No overflow/underflow can happen if both constants are negative.
- if (constant_ <= 0 && bound.constant_ <= 0) {
- return constant_ <= bound.constant_;
- }
- // There might be overflow/underflow. Be conservative for now.
- return false;
- }
+ static bool Equal(HInstruction* instruction1, HInstruction* instruction2) {
+ if (instruction1 == instruction2) {
+ return true;
}
- // In case the array length is some constant, we can
- // still compare.
- if (IsConstant() && bound.IsRelativeToArrayLength()) {
- HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0);
- if (array->IsNullCheck()) {
- array = array->AsNullCheck()->InputAt(0);
- }
- if (array->IsNewArray()) {
- HInstruction* len = array->InputAt(0);
- if (len->IsIntConstant()) {
- int len_const = len->AsIntConstant()->GetValue();
- return constant_ <= len_const + bound.GetConstant();
- }
- }
+ if (instruction1 == nullptr || instruction2 == nullptr) {
+ return false;
}
+ // Some bounds are created with HNewArray* as the instruction instead
+ // of HArrayLength*. They are treated the same.
+ instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1);
+ instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2);
+ return instruction1 == instruction2;
+ }
+
+ // Returns if it's certain this->bound >= `bound`.
+ bool GreaterThanOrEqualTo(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ >= bound.constant_;
+ }
+ // Not comparable. Just return false.
+ return false;
+ }
+
+ // Returns if it's certain this->bound <= `bound`.
+ bool LessThanOrEqualTo(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ <= bound.constant_;
+ }
// Not comparable. Just return false.
return false;
}
@@ -151,10 +172,11 @@ class ValueBound : public ValueObject {
// Try to narrow lower bound. Returns the greatest of the two if possible.
// Pick one if they are not comparable.
static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
- if (bound1.instruction_ == bound2.instruction_) {
- // Same instruction, compare the constant part.
- return ValueBound(bound1.instruction_,
- std::max(bound1.constant_, bound2.constant_));
+ if (bound1.GreaterThanOrEqualTo(bound2)) {
+ return bound1;
+ }
+ if (bound2.GreaterThanOrEqualTo(bound1)) {
+ return bound2;
}
// Not comparable. Just pick one. We may lose some info, but that's ok.
@@ -165,58 +187,71 @@ class ValueBound : public ValueObject {
// Try to narrow upper bound. Returns the lowest of the two if possible.
// Pick one if they are not comparable.
static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) {
- if (bound1.instruction_ == bound2.instruction_) {
- // Same instruction, compare the constant part.
- return ValueBound(bound1.instruction_,
- std::min(bound1.constant_, bound2.constant_));
+ if (bound1.LessThanOrEqualTo(bound2)) {
+ return bound1;
+ }
+ if (bound2.LessThanOrEqualTo(bound1)) {
+ return bound2;
}
// Not comparable. Just pick one. We may lose some info, but that's ok.
// Favor array length as upper bound.
- return bound1.IsRelativeToArrayLength() ? bound1 : bound2;
+ return bound1.IsRelatedToArrayLength() ? bound1 : bound2;
}
- // Add a constant to a ValueBound. If the constant part of the ValueBound
- // overflows/underflows, then we can't accurately represent it. For correctness,
- // just return Max/Min() depending on whether the returned ValueBound is used for
- // lower/upper bound.
- ValueBound Add(int c, bool* overflow_or_underflow) const {
- *overflow_or_underflow = false;
+ // Add a constant to a ValueBound.
+ // `overflow` or `underflow` will return whether the resulting bound may
+ // overflow or underflow an int.
+ ValueBound Add(int32_t c, bool* overflow, bool* underflow) const {
+ *overflow = *underflow = false;
if (c == 0) {
return *this;
}
- int new_constant;
+ int32_t new_constant;
if (c > 0) {
if (constant_ > INT_MAX - c) {
- // Constant part overflows.
- *overflow_or_underflow = true;
+ *overflow = true;
return Max();
- } else {
- new_constant = constant_ + c;
}
+
+ new_constant = constant_ + c;
+ // (array.length + non-positive-constant) won't overflow an int.
+ if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) {
+ return ValueBound(instruction_, new_constant);
+ }
+ // Be conservative.
+ *overflow = true;
+ return Max();
} else {
if (constant_ < INT_MIN - c) {
- // Constant part underflows.
- *overflow_or_underflow = true;
- return Max();
- } else {
- new_constant = constant_ + c;
+ *underflow = true;
+ return Min();
}
+
+ new_constant = constant_ + c;
+ // Regardless of the value new_constant, (array.length+new_constant) will
+ // never underflow since array.length is no less than 0.
+ if (IsConstant() || IsRelatedToArrayLength()) {
+ return ValueBound(instruction_, new_constant);
+ }
+ // Be conservative.
+ *underflow = true;
+ return Min();
}
return ValueBound(instruction_, new_constant);
}
private:
HInstruction* instruction_;
- int constant_;
+ int32_t constant_;
};
/**
* Represent a range of lower bound and upper bound, both being inclusive.
* Currently a ValueRange may be generated as a result of the following:
* comparisons related to array bounds, array bounds check, add/sub on top
- * of an existing value range, or a loop phi corresponding to an
+ * of an existing value range, NewArray or a loop phi corresponding to an
* incrementing/decrementing array index (MonotonicValueRange).
*/
class ValueRange : public ArenaObject<kArenaAllocMisc> {
@@ -241,8 +276,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
return true;
}
DCHECK(!other_range->IsMonotonicValueRange());
- return lower_.GreaterThanOrEqual(other_range->lower_) &&
- upper_.LessThanOrEqual(other_range->upper_);
+ return lower_.GreaterThanOrEqualTo(other_range->lower_) &&
+ upper_.LessThanOrEqualTo(other_range->upper_);
}
// Returns the intersection of this and range.
@@ -263,29 +298,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
ValueBound::NarrowUpperBound(upper_, range->upper_));
}
- // Shift a range by a constant. If either bound can't be represented
- // as (instruction+c) format due to possible overflow/underflow,
- // return the full integer range.
- ValueRange* Add(int constant) const {
- bool overflow_or_underflow;
- ValueBound lower = lower_.Add(constant, &overflow_or_underflow);
- if (overflow_or_underflow) {
- // We can't accurately represent the bounds anymore.
- return FullIntRange();
+ // Shift a range by a constant.
+ ValueRange* Add(int32_t constant) const {
+ bool overflow, underflow;
+ ValueBound lower = lower_.Add(constant, &overflow, &underflow);
+ if (underflow) {
+ // Lower bound underflow will wrap around to positive values
+ // and invalidate the upper bound.
+ return nullptr;
}
- ValueBound upper = upper_.Add(constant, &overflow_or_underflow);
- if (overflow_or_underflow) {
- // We can't accurately represent the bounds anymore.
- return FullIntRange();
+ ValueBound upper = upper_.Add(constant, &overflow, &underflow);
+ if (overflow) {
+ // Upper bound overflow will wrap around to negative values
+ // and invalidate the lower bound.
+ return nullptr;
}
return new (allocator_) ValueRange(allocator_, lower, upper);
}
- // Return [INT_MIN, INT_MAX].
- ValueRange* FullIntRange() const {
- return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max());
- }
-
private:
ArenaAllocator* const allocator_;
const ValueBound lower_; // inclusive
@@ -304,7 +334,7 @@ class MonotonicValueRange : public ValueRange {
public:
MonotonicValueRange(ArenaAllocator* allocator,
HInstruction* initial,
- int increment,
+ int32_t increment,
ValueBound bound)
// To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
// used as a regular value range, due to possible overflow/underflow.
@@ -343,23 +373,17 @@ class MonotonicValueRange : public ValueRange {
// make assumptions about the max array length, e.g. due to the max heap size,
// divided by the element size (such as 4 bytes for each integer array), we can
// lower this number and rule out some possible overflows.
- int max_array_len = INT_MAX;
-
- int upper = INT_MAX;
- if (range->GetUpper().IsConstant()) {
- upper = range->GetUpper().GetConstant();
- } else if (range->GetUpper().IsRelativeToArrayLength()) {
- int constant = range->GetUpper().GetConstant();
- if (constant <= 0) {
- // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc.
- upper = max_array_len + constant;
- } else {
- // There might be overflow. Give up narrowing.
- return this;
- }
- } else {
- // There might be overflow. Give up narrowing.
- return this;
+ int32_t max_array_len = INT_MAX;
+
+ // max possible integer value of range's upper value.
+ int32_t upper = INT_MAX;
+ // Try to lower upper.
+ ValueBound upper_bound = range->GetUpper();
+ if (upper_bound.IsConstant()) {
+ upper = upper_bound.GetConstant();
+ } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) {
+ // Normal case. e.g. <= array.length - 1.
+ upper = max_array_len + upper_bound.GetConstant();
}
// If we can prove for the last number in sequence of initial_,
@@ -368,13 +392,13 @@ class MonotonicValueRange : public ValueRange {
// then this MonoticValueRange is narrowed to a normal value range.
// Be conservative first, assume last number in the sequence hits upper.
- int last_num_in_sequence = upper;
+ int32_t last_num_in_sequence = upper;
if (initial_->IsIntConstant()) {
- int initial_constant = initial_->AsIntConstant()->GetValue();
+ int32_t initial_constant = initial_->AsIntConstant()->GetValue();
if (upper <= initial_constant) {
last_num_in_sequence = upper;
} else {
- // Cast to int64_t for the substraction part to avoid int overflow.
+ // Cast to int64_t for the substraction part to avoid int32_t overflow.
last_num_in_sequence = initial_constant +
((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_;
}
@@ -392,23 +416,22 @@ class MonotonicValueRange : public ValueRange {
ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
// Need to take care of underflow. Try to prove underflow won't happen
- // for common cases. Basically need to be able to prove for any value
- // that's >= range->GetLower(), it won't be positive with value+increment.
+ // for common cases.
if (range->GetLower().IsConstant()) {
- int constant = range->GetLower().GetConstant();
+ int32_t constant = range->GetLower().GetConstant();
if (constant >= INT_MIN - increment_) {
return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper);
}
}
- // There might be underflow. Give up narrowing.
+ // For non-constant lower bound, just assume might be underflow. Give up narrowing.
return this;
}
}
private:
HInstruction* const initial_;
- const int increment_;
+ const int32_t increment_;
ValueBound bound_; // Additional value bound info for initial_;
DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
@@ -446,13 +469,26 @@ class BCEVisitor : public HGraphVisitor {
return nullptr;
}
- // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range',
- // and push the narrowed value range to 'successor'.
+ // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
+ // and push the narrowed value range to `successor`.
void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
- HBasicBlock* successor, ValueRange* range) {
+ HBasicBlock* successor, ValueRange* range) {
ValueRange* existing_range = LookupValueRange(instruction, basic_block);
- ValueRange* narrowed_range = (existing_range == nullptr) ?
- range : existing_range->Narrow(range);
+ if (existing_range == nullptr) {
+ if (range != nullptr) {
+ GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range);
+ }
+ return;
+ }
+ if (existing_range->IsMonotonicValueRange()) {
+ DCHECK(instruction->IsLoopHeaderPhi());
+ // Make sure the comparison is in the loop header so each increment is
+ // checked with a comparison.
+ if (instruction->GetBlock() != basic_block) {
+ return;
+ }
+ }
+ ValueRange* narrowed_range = existing_range->Narrow(range);
if (narrowed_range != nullptr) {
GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
}
@@ -472,10 +508,12 @@ class BCEVisitor : public HGraphVisitor {
bool found;
ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
+ // Each comparison can establish a lower bound and an upper bound
+ // for the left hand side.
ValueBound lower = bound;
ValueBound upper = bound;
if (!found) {
- // No constant or array.length+c bound found.
+ // No constant or array.length+c format bound found.
// For i<j, we can still use j's upper bound as i's upper bound. Same for lower.
ValueRange* range = LookupValueRange(right, block);
if (range != nullptr) {
@@ -487,13 +525,13 @@ class BCEVisitor : public HGraphVisitor {
}
}
- bool overflow_or_underflow;
+ bool overflow, underflow;
if (cond == kCondLT || cond == kCondLE) {
if (!upper.Equals(ValueBound::Max())) {
- int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
- ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_upper = ValueBound::Max();
+ int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -501,11 +539,11 @@ class BCEVisitor : public HGraphVisitor {
}
// array.length as a lower bound isn't considered useful.
- if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
- int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive
- ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_lower = ValueBound::Min();
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+ int32_t compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -513,11 +551,11 @@ class BCEVisitor : public HGraphVisitor {
}
} else if (cond == kCondGT || cond == kCondGE) {
// array.length as a lower bound isn't considered useful.
- if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
- int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
- ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_lower = ValueBound::Min();
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+ int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -525,10 +563,10 @@ class BCEVisitor : public HGraphVisitor {
}
if (!upper.Equals(ValueBound::Max())) {
- int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive
- ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_upper = ValueBound::Max();
+ int32_t compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -541,41 +579,56 @@ class BCEVisitor : public HGraphVisitor {
HBasicBlock* block = bounds_check->GetBlock();
HInstruction* index = bounds_check->InputAt(0);
HInstruction* array_length = bounds_check->InputAt(1);
- ValueRange* index_range = LookupValueRange(index, block);
-
- if (index_range != nullptr) {
- ValueBound lower = ValueBound(nullptr, 0); // constant 0
- ValueBound upper = ValueBound(array_length, -1); // array_length - 1
- ValueRange* array_range = new (GetGraph()->GetArena())
- ValueRange(GetGraph()->GetArena(), lower, upper);
- if (index_range->FitsIn(array_range)) {
- ReplaceBoundsCheck(bounds_check, index);
+ DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength());
+
+ if (!index->IsIntConstant()) {
+ ValueRange* index_range = LookupValueRange(index, block);
+ if (index_range != nullptr) {
+ ValueBound lower = ValueBound(nullptr, 0); // constant 0
+ ValueBound upper = ValueBound(array_length, -1); // array_length - 1
+ ValueRange* array_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ if (index_range->FitsIn(array_range)) {
+ ReplaceBoundsCheck(bounds_check, index);
+ return;
+ }
+ }
+ } else {
+ int32_t constant = index->AsIntConstant()->GetValue();
+ if (constant < 0) {
+ // Will always throw exception.
+ return;
+ }
+ if (array_length->IsIntConstant()) {
+ if (constant < array_length->AsIntConstant()->GetValue()) {
+ ReplaceBoundsCheck(bounds_check, index);
+ }
return;
}
- }
- if (index->IsIntConstant()) {
- ValueRange* array_length_range = LookupValueRange(array_length, block);
- int constant = index->AsIntConstant()->GetValue();
- if (array_length_range != nullptr &&
- array_length_range->GetLower().IsConstant()) {
- if (constant < array_length_range->GetLower().GetConstant()) {
+ DCHECK(array_length->IsArrayLength());
+ ValueRange* existing_range = LookupValueRange(array_length, block);
+ if (existing_range != nullptr) {
+ ValueBound lower = existing_range->GetLower();
+ DCHECK(lower.IsConstant());
+ if (constant < lower.GetConstant()) {
ReplaceBoundsCheck(bounds_check, index);
return;
+ } else {
+ // Existing range isn't strong enough to eliminate the bounds check.
+ // Fall through to update the array_length range with info from this
+ // bounds check.
}
}
// Once we have an array access like 'array[5] = 1', we record array.length >= 6.
+ // We currently don't do it for non-constant index since a valid array[i] can't prove
+ // a valid array[i-1] yet due to the lower bound side.
ValueBound lower = ValueBound(nullptr, constant + 1);
ValueBound upper = ValueBound::Max();
ValueRange* range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), lower, upper);
- ValueRange* existing_range = LookupValueRange(array_length, block);
- ValueRange* new_range = range;
- if (existing_range != nullptr) {
- new_range = range->Narrow(existing_range);
- }
- GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range);
+ GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
}
}
@@ -588,14 +641,12 @@ class BCEVisitor : public HGraphVisitor {
if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
DCHECK_EQ(phi->InputCount(), 2U);
HInstruction* instruction = phi->InputAt(1);
- if (instruction->IsAdd()) {
- HAdd* add = instruction->AsAdd();
- HInstruction* left = add->GetLeft();
- HInstruction* right = add->GetRight();
- if (left == phi && right->IsIntConstant()) {
+ HInstruction *left;
+ int32_t increment;
+ if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) {
+ if (left == phi) {
HInstruction* initial_value = phi->InputAt(0);
ValueRange* range = nullptr;
- int increment = right->AsIntConstant()->GetValue();
if (increment == 0) {
// Add constant 0. It's really a fixed value.
range = new (GetGraph()->GetArena()) ValueRange(
@@ -676,29 +727,122 @@ class BCEVisitor : public HGraphVisitor {
// Here we are interested in the typical triangular case of nested loops,
// such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i
// is the index for outer loop. In this case, we know j is bounded by array.length-1.
+
+ // Try to handle (array.length - i) or (array.length + c - i) format.
+ HInstruction* left_of_left; // left input of left.
+ int32_t right_const = 0;
+ if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &right_const)) {
+ left = left_of_left;
+ }
+ // The value of left input of the sub equals (left + right_const).
+
if (left->IsArrayLength()) {
HInstruction* array_length = left->AsArrayLength();
ValueRange* right_range = LookupValueRange(right, sub->GetBlock());
if (right_range != nullptr) {
ValueBound lower = right_range->GetLower();
ValueBound upper = right_range->GetUpper();
- if (lower.IsConstant() && upper.IsRelativeToArrayLength()) {
+ if (lower.IsConstant() && upper.IsRelatedToArrayLength()) {
HInstruction* upper_inst = upper.GetInstruction();
- if (upper_inst->IsArrayLength() &&
- upper_inst->AsArrayLength() == array_length) {
- // (array.length - v) where v is in [c1, array.length + c2]
- // gets [-c2, array.length - c1] as its value range.
- ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
- GetGraph()->GetArena(),
- ValueBound(nullptr, - upper.GetConstant()),
- ValueBound(array_length, - lower.GetConstant()));
- GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ // Make sure it's the same array.
+ if (ValueBound::Equal(array_length, upper_inst)) {
+ int32_t c0 = right_const;
+ int32_t c1 = lower.GetConstant();
+ int32_t c2 = upper.GetConstant();
+ // (array.length + c0 - v) where v is in [c1, array.length + c2]
+ // gets [c0 - c2, array.length + c0 - c1] as its value range.
+ if (!ValueBound::WouldAddOverflowOrUnderflow(c0, -c2) &&
+ !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) {
+ if ((c0 - c1) <= 0) {
+ // array.length + (c0 - c1) won't overflow/underflow.
+ ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound(nullptr, right_const - upper.GetConstant()),
+ ValueBound(array_length, right_const - lower.GetConstant()));
+ GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ }
+ }
}
}
}
}
}
+ void FindAndHandlePartialArrayLength(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsShr() || instruction->IsUShr());
+ HInstruction* right = instruction->GetRight();
+ int32_t right_const;
+ if (right->IsIntConstant()) {
+ right_const = right->AsIntConstant()->GetValue();
+ // Detect division by two or more.
+ if ((instruction->IsDiv() && right_const <= 1) ||
+ (instruction->IsShr() && right_const < 1) ||
+ (instruction->IsUShr() && right_const < 1)) {
+ return;
+ }
+ } else {
+ return;
+ }
+
+ // Try to handle array.length/2 or (array.length-1)/2 format.
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* left_of_left; // left input of left.
+ int32_t c = 0;
+ if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &c)) {
+ left = left_of_left;
+ }
+ // The value of left input of instruction equals (left + c).
+
+ // (array_length + 1) or smaller divided by two or more
+ // always generate a value in [INT_MIN, array_length].
+ // This is true even if array_length is INT_MAX.
+ if (left->IsArrayLength() && c <= 1) {
+ if (instruction->IsUShr() && c < 0) {
+ // Make sure for unsigned shift, left side is not negative.
+ // e.g. if array_length is 2, ((array_length - 3) >>> 2) is way bigger
+ // than array_length.
+ return;
+ }
+ ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound(nullptr, INT_MIN),
+ ValueBound(left, 0));
+ GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+ }
+ }
+
+ void VisitDiv(HDiv* div) {
+ FindAndHandlePartialArrayLength(div);
+ }
+
+ void VisitShr(HShr* shr) {
+ FindAndHandlePartialArrayLength(shr);
+ }
+
+ void VisitUShr(HUShr* ushr) {
+ FindAndHandlePartialArrayLength(ushr);
+ }
+
+ void VisitNewArray(HNewArray* new_array) {
+ HInstruction* len = new_array->InputAt(0);
+ if (!len->IsIntConstant()) {
+ HInstruction *left;
+ int32_t right_const;
+ if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) {
+ // (left + right_const) is used as size to new the array.
+ // We record "-right_const <= left <= new_array - right_const";
+ ValueBound lower = ValueBound(nullptr, -right_const);
+ // We use new_array for the bound instead of new_array.length,
+ // which isn't available as an instruction yet. new_array will
+ // be treated the same as new_array.length when it's used in a ValueBound.
+ ValueBound upper = ValueBound(new_array, -right_const);
+ ValueRange* range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range);
+ }
+ }
+ }
+
std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 3dcb08d195..a298413d14 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -14,19 +14,22 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "bounds_check_elimination.h"
#include "builder.h"
#include "gvn.h"
+#include "instruction_simplifier.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
namespace art {
-static void RunGvn(HGraph* graph) {
+static void RunSimplifierAndGvn(HGraph* graph) {
+ InstructionSimplifier simplify(graph);
+ simplify.Run();
SideEffectsAnalysis side_effects(graph);
side_effects.Run();
GVNOptimization(graph, side_effects).Run();
@@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
block3->AddSuccessor(block4); // False successor
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check2));
@@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
block3->AddSuccessor(exit);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
block3->AddSuccessor(exit);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
exit->AddInstruction(new (&allocator) HExit());
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check5));
@@ -397,7 +400,6 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
HInstruction* array_length = new (allocator) HArrayLength(null_check);
HInstruction* cmp = nullptr;
@@ -413,6 +415,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
loop_header->AddInstruction(array_length);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
null_check = new (allocator) HNullCheck(parameter, 0);
array_length = new (allocator) HArrayLength(null_check);
@@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
bounds_check_elimination_with_initial_minus_1.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// array[i] = 10; // Can't eliminate due to overflow concern. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
bounds_check_elimination_with_increment_2.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
bounds_check_elimination_with_increment_2_from_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -541,7 +544,6 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(array_length);
HInstruction* cmp = nullptr;
if (cond == kCondLE) {
cmp = new (allocator) HLessThanOrEqual(phi, constant_initial);
@@ -553,6 +555,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
loop_header->AddPhi(phi);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(array_length);
HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
null_check = new (allocator) HNullCheck(parameter, 0);
@@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
bounds_check_elimination_with_initial_minus_1.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
bounds_check_elimination_with_less_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -623,13 +626,13 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
bounds_check_elimination_increment_minus_2.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
}
-// int[] array = new array[10];
+// int[] array = new int[10];
// for (int i=0; i<10; i+=increment) { array[i] = 10; }
static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
HInstruction** bounds_check,
@@ -669,7 +672,6 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* cmp = nullptr;
if (cond == kCondGE) {
cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10);
@@ -681,6 +683,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
loop_header->AddPhi(phi);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
HArrayLength* array_length = new (allocator) HArrayLength(null_check);
@@ -705,39 +708,39 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. }
HInstruction* bounds_check = nullptr;
HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_increment_8(graph);
bounds_check_elimination_increment_8.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -782,7 +785,6 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
HInstruction* array_length = new (allocator) HArrayLength(null_check);
HInstruction* cmp = nullptr;
@@ -797,6 +799,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
loop_header->AddInstruction(array_length);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
null_check = new (allocator) HNullCheck(parameter, 0);
array_length = new (allocator) HArrayLength(null_check);
@@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -901,7 +904,6 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph);
graph->AddBlock(outer_header);
HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
- phi_i->AddInput(constant_0);
HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
@@ -913,11 +915,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
outer_header->AddInstruction(add);
outer_header->AddInstruction(cmp);
outer_header->AddInstruction(if_inst);
+ phi_i->AddInput(constant_0);
HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph);
graph->AddBlock(inner_header);
HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
- phi_j->AddInput(constant_0);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i);
@@ -931,6 +933,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
inner_header->AddInstruction(add);
inner_header->AddInstruction(cmp);
inner_header->AddInstruction(if_inst);
+ phi_j->AddInput(constant_0);
HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph);
graph->AddBlock(inner_body_compare);
@@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
outer_body_add->AddSuccessor(outer_header);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
// gvn should remove the same bounds check.
ASSERT_FALSE(IsRemoved(bounds_check1));
ASSERT_FALSE(IsRemoved(bounds_check2));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c5101363ee..3e4a6169d9 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -17,13 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_BUILDER_H_
+#include "base/arena_object.h"
#include "dex_file.h"
#include "dex_file-inl.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "optimizing_compiler_stats.h"
#include "primitive.h"
-#include "utils/arena_object.h"
#include "utils/growable_array.h"
#include "nodes.h"
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index fd4e391470..2a57fdc929 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,9 +40,17 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
}
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
- DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+static bool IsSingleGoto(HBasicBlock* block) {
+ HLoopInformation* loop_info = block->GetLoopInformation();
+ // TODO: Remove the null check b/19084197.
+ return (block->GetFirstInstruction() != nullptr)
+ && (block->GetFirstInstruction() == block->GetLastInstruction())
+ && block->GetLastInstruction()->IsGoto()
+ // Back edges generate the suspend check.
+ && (loop_info == nullptr || !loop_info->IsBackEdge(block));
+}
+void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
Initialize();
if (!is_leaf) {
MarkNotLeaf();
@@ -58,19 +66,43 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
CompileInternal(allocator, /* is_baseline */ true);
}
+bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
+ DCHECK_EQ(block_order_->Get(current_block_index_), current);
+ return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
+}
+
+HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
+ for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
+ HBasicBlock* block = block_order_->Get(i);
+ if (!IsSingleGoto(block)) {
+ return block;
+ }
+ }
+ return nullptr;
+}
+
+HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
+ while (IsSingleGoto(block)) {
+ block = block->GetSuccessors().Get(0);
+ }
+ return block;
+}
+
void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
- HGraphVisitor* location_builder = GetLocationBuilder();
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
HBasicBlock* block = block_order_->Get(current_block_index_);
+ // Don't generate code for an empty block. Its predecessors will branch to its successor
+ // directly. Also, the label of that block will not be emitted, so this helps catch
+ // errors where we reference that label.
+ if (IsSingleGoto(block)) continue;
Bind(block);
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (is_baseline) {
- current->Accept(location_builder);
- InitLocations(current);
+ InitLocationsBaseline(current);
}
current->Accept(instruction_visitor);
}
@@ -88,7 +120,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
// The register allocator already called `InitializeCodeGeneration`,
// where the frame size has been computed.
- DCHECK_NE(frame_size_, kUninitializedFrameSize);
DCHECK(block_order_ != nullptr);
Initialize();
CompileInternal(allocator, /* is_baseline */ false);
@@ -138,13 +169,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
ComputeSpillMask();
first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
- SetFrameSize(RoundUp(
- number_of_spill_slots * kVRegSize
- + number_of_out_slots * kVRegSize
- + maximum_number_of_live_core_registers * GetWordSize()
- + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
- + FrameEntrySpillSize(),
- kStackAlignment));
+ if (number_of_spill_slots == 0
+ && !HasAllocatedCalleeSaveRegisters()
+ && IsLeafMethod()
+ && !RequiresCurrentMethod()) {
+ DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
+ DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+ SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
+ } else {
+ SetFrameSize(RoundUp(
+ number_of_spill_slots * kVRegSize
+ + number_of_out_slots * kVRegSize
+ + maximum_number_of_live_core_registers * GetWordSize()
+ + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+ + FrameEntrySpillSize(),
+ kStackAlignment));
+ }
}
Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
@@ -294,7 +334,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
}
}
-void CodeGenerator::InitLocations(HInstruction* instruction) {
+void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
+ AllocateLocations(instruction);
if (instruction->GetLocations() == nullptr) {
if (instruction->IsTemporary()) {
HInstruction* previous = instruction->GetPrevious();
@@ -320,10 +361,17 @@ void CodeGenerator::InitLocations(HInstruction* instruction) {
}
}
-bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
- DCHECK_EQ(block_order_->Get(current_block_index_), current);
- return (current_block_index_ < block_order_->Size() - 1)
- && (block_order_->Get(current_block_index_ + 1) == next);
+void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+ instruction->Accept(GetLocationBuilder());
+ LocationSummary* locations = instruction->GetLocations();
+ if (!instruction->IsSuspendCheckEntry()) {
+ if (locations != nullptr && locations->CanCall()) {
+ MarkNotLeaf();
+ }
+ if (instruction->NeedsCurrentMethod()) {
+ SetRequiresCurrentMethod();
+ }
+ }
}
CodeGenerator* CodeGenerator::Create(HGraph* graph,
@@ -572,7 +620,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
Location location = locations->GetEnvironmentAt(i);
switch (location.GetKind()) {
case Location::kConstant: {
- DCHECK(current == location.GetConstant());
+ DCHECK_EQ(current, location.GetConstant());
if (current->IsLongConstant()) {
int64_t value = current->AsLongConstant()->GetValue();
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value));
@@ -588,6 +636,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
} else if (current->IsIntConstant()) {
int32_t value = current->AsIntConstant()->GetValue();
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+ } else if (current->IsNullConstant()) {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0);
} else {
DCHECK(current->IsFloatConstant());
int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ab63b911b2..f46a36d02f 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,7 +30,6 @@
namespace art {
static size_t constexpr kVRegSize = 4;
-static size_t constexpr kUninitializedFrameSize = 0;
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
@@ -92,6 +91,8 @@ class CodeGenerator {
HGraph* GetGraph() const { return graph_; }
+ HBasicBlock* GetNextBlockToEmit() const;
+ HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
@@ -107,8 +108,6 @@ class CodeGenerator {
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
- virtual HGraphVisitor* GetLocationBuilder() = 0;
- virtual HGraphVisitor* GetInstructionVisitor() = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
@@ -196,6 +195,15 @@ class CodeGenerator {
void MarkNotLeaf() {
is_leaf_ = false;
+ requires_current_method_ = true;
+ }
+
+ void SetRequiresCurrentMethod() {
+ requires_current_method_ = true;
+ }
+
+ bool RequiresCurrentMethod() const {
+ return requires_current_method_;
}
// Clears the spill slots taken by loop phis in the `LocationSummary` of the
@@ -228,6 +236,41 @@ class CodeGenerator {
allocated_registers_.Add(location);
}
+ void AllocateLocations(HInstruction* instruction);
+
+ // Tells whether the stack frame of the compiled method is
+ // considered "empty", that is either actually having a size of zero,
+ // or just containing the saved return address register.
+ bool HasEmptyFrame() const {
+ return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
+ }
+
+ static int32_t GetInt32ValueOf(HConstant* constant) {
+ if (constant->IsIntConstant()) {
+ return constant->AsIntConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ return 0;
+ } else {
+ DCHECK(constant->IsFloatConstant());
+ return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+ }
+ }
+
+ static int64_t GetInt64ValueOf(HConstant* constant) {
+ if (constant->IsIntConstant()) {
+ return constant->AsIntConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ return 0;
+ } else if (constant->IsFloatConstant()) {
+ return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+ } else if (constant->IsLongConstant()) {
+ return constant->AsLongConstant()->GetValue();
+ } else {
+ DCHECK(constant->IsDoubleConstant());
+ return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+ }
+ }
+
protected:
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
@@ -236,7 +279,7 @@ class CodeGenerator {
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options)
- : frame_size_(kUninitializedFrameSize),
+ : frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
@@ -255,6 +298,7 @@ class CodeGenerator {
block_order_(nullptr),
current_block_index_(0),
is_leaf_(true),
+ requires_current_method_(false),
stack_map_stream_(graph->GetArena()) {}
// Register allocation logic.
@@ -269,11 +313,12 @@ class CodeGenerator {
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
virtual ParallelMoveResolver* GetMoveResolver() = 0;
+ virtual HGraphVisitor* GetLocationBuilder() = 0;
+ virtual HGraphVisitor* GetInstructionVisitor() = 0;
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
- DCHECK_NE(frame_size_, kUninitializedFrameSize);
return GetFrameSize() - FrameEntrySpillSize();
}
@@ -289,6 +334,25 @@ class CodeGenerator {
return GetFpuSpillSize() + GetCoreSpillSize();
}
+ bool HasAllocatedCalleeSaveRegisters() const {
+ // We check the core registers against 1 because it always comprises the return PC.
+ return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
+ || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
+ }
+
+ bool CallPushesPC() const {
+ InstructionSet instruction_set = GetInstructionSet();
+ return instruction_set == kX86 || instruction_set == kX86_64;
+ }
+
+ // Arm64 has its own type for a label, so we need to templatize this method
+ // to share the logic.
+ template <typename T>
+ T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
+ block = FirstNonEmptyBlock(block);
+ return raw_pointer_to_labels_array + block->GetBlockId();
+ }
+
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
@@ -311,7 +375,7 @@ class CodeGenerator {
const uint32_t fpu_callee_save_mask_;
private:
- void InitLocations(HInstruction* instruction);
+ void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void CompileInternal(CodeAllocator* allocator, bool is_baseline);
@@ -328,8 +392,12 @@ class CodeGenerator {
// we are generating code for.
size_t current_block_index_;
+ // Whether the method is a leaf method.
bool is_leaf_;
+ // Whether an instruction in the graph accesses the current method.
+ bool requires_current_method_;
+
StackMapStream stack_map_stream_;
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 78fd181dcf..e864ae1cec 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,8 @@
#include "arch/arm/instruction_set_features_arm.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_arm.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
@@ -32,11 +34,6 @@ namespace art {
namespace arm {
-static DRegister FromLowSToD(SRegister reg) {
- DCHECK_EQ(reg % 2, 0);
- return static_cast<DRegister>(reg / 2);
-}
-
static bool ExpectedPairLayout(Location location) {
// We expected this for both core and fpu register pairs.
return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
@@ -58,6 +55,10 @@ static constexpr Register kCoreCalleeSaves[] =
static constexpr SRegister kFpuCalleeSaves[] =
{ S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
+// D31 cannot be split into two S registers, and the register allocator only works on
+// S registers. Therefore there is no need to block it.
+static constexpr DRegister DTMP = D31;
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -73,20 +74,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis
#define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
-class SlowPathCodeARM : public SlowPathCode {
- public:
- SlowPathCodeARM() : entry_label_(), exit_label_() {}
-
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
-
- private:
- Label entry_label_;
- Label exit_label_;
-
- DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
-};
-
class NullCheckSlowPathARM : public SlowPathCodeARM {
public:
explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
@@ -396,10 +383,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
move_resolver_(graph->GetArena(), this),
assembler_(true),
isa_features_(isa_features) {
- // Save one extra register for baseline. Note that on thumb2, there is no easy
- // instruction to restore just the PC, so this actually helps both baseline
- // and non-baseline to save and restore at least two registers at entry and exit.
- AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline));
// Save the PC register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(PC));
}
@@ -508,6 +491,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) {
void CodeGeneratorARM::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+ // Save one extra register for baseline. Note that on thumb2, there is no easy
+ // instruction to restore just the PC, so this actually helps both baseline
+ // and non-baseline to save and restore at least two registers at entry and exit.
+ core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
// We use vpush and vpop for saving and restoring floating point registers, which take
@@ -529,6 +516,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Bind(&frame_entry_label_);
+ if (HasEmptyFrame()) {
+ return;
+ }
+
if (!skip_overflow_check) {
__ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
__ LoadFromOffset(kLoadWord, IP, IP, 0);
@@ -547,6 +538,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
}
void CodeGeneratorARM::GenerateFrameExit() {
+ if (HasEmptyFrame()) {
+ __ bx(LR);
+ return;
+ }
__ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
@@ -784,8 +779,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- int32_t value = const_to_move->AsIntConstant()->GetValue();
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ int32_t value = GetInt32ValueOf(const_to_move);
if (location.IsRegister()) {
__ LoadImmediate(location.AsRegister<Register>(), value);
} else {
@@ -952,8 +947,8 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
__ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
} else {
DCHECK(locations->InAt(1).IsConstant());
- int32_t value =
- locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ HConstant* constant = locations->InAt(1).GetConstant();
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
ShifterOperand operand;
if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
__ cmp(left, operand);
@@ -1114,6 +1109,17 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1168,44 +1174,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+ codegen_->GetInstructionSetFeatures());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void CodeGeneratorARM::LoadCurrentMethod(Register reg) {
+ DCHECK(RequiresCurrentMethod());
__ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
}
-void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-
- // TODO: Implement all kinds of calls:
- // 1) boot -> boot
- // 2) app -> boot
- // 3) app -> app
- //
- // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorARM intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
- // temp = method;
- codegen_->LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ LoadFromOffset(
- kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
- // temp = temp[index_in_cache]
- __ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
- // LR = temp[offset_of_quick_compiled_code]
- __ LoadFromOffset(kLoadWord, LR, temp,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArmWordSize).Int32Value());
- // LR()
- __ blx(LR);
- } else {
- __ bl(codegen_->GetFrameEntryLabel());
+void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
}
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
- DCHECK(!codegen_->IsLeafMethod());
+ Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+
+ codegen_->GenerateStaticOrDirectCall(invoke, temp);
}
void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
@@ -1223,10 +1222,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+ codegen_->GetInstructionSetFeatures());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
+
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
@@ -3366,16 +3375,44 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
__ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
}
} else if (source.IsDoubleStackSlot()) {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ if (destination.IsDoubleStackSlot()) {
+ __ LoadDFromOffset(DTMP, SP, source.GetStackIndex());
+ __ StoreDToOffset(DTMP, SP, destination.GetStackIndex());
+ } else if (destination.IsRegisterPair()) {
+ DCHECK(ExpectedPairLayout(destination));
+ __ LoadFromOffset(
+ kLoadWordPair, destination.AsRegisterPairLow<Register>(), SP, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsFpuRegisterPair()) << destination;
+ __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ SP,
+ source.GetStackIndex());
+ }
+ } else if (source.IsRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+ __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ DCHECK(ExpectedPairLayout(source));
+ __ StoreToOffset(
+ kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex());
+ }
+ } else if (source.IsFpuRegisterPair()) {
+ if (destination.IsFpuRegisterPair()) {
+ __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+ SP,
+ destination.GetStackIndex());
+ }
} else {
DCHECK(source.IsConstant()) << source;
- HInstruction* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- int32_t value = constant->AsIntConstant()->GetValue();
+ HConstant* constant = source.GetConstant();
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
if (destination.IsRegister()) {
__ LoadImmediate(destination.AsRegister<Register>(), value);
} else {
@@ -3385,17 +3422,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
}
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
- if (destination.IsRegister()) {
- // In the presence of long or double constants, the parallel move resolver will
- // split the move into two, but keeps the same constant for both moves. Here,
- // we use the low or high part depending on which register this move goes to.
- if (destination.reg() % 2 == 0) {
- __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value));
- } else {
- __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value));
- }
+ if (destination.IsRegisterPair()) {
+ __ LoadImmediate(destination.AsRegisterPairLow<Register>(), Low32Bits(value));
+ __ LoadImmediate(destination.AsRegisterPairHigh<Register>(), High32Bits(value));
} else {
- DCHECK(destination.IsDoubleStackSlot());
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
__ LoadImmediate(IP, Low32Bits(value));
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
__ LoadImmediate(IP, High32Bits(value));
@@ -3403,20 +3434,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
}
} else if (constant->IsDoubleConstant()) {
double value = constant->AsDoubleConstant()->GetValue();
- uint64_t int_value = bit_cast<uint64_t, double>(value);
- if (destination.IsFpuRegister()) {
- // In the presence of long or double constants, the parallel move resolver will
- // split the move into two, but keeps the same constant for both moves. Here,
- // we use the low or high part depending on which register this move goes to.
- if (destination.reg() % 2 == 0) {
- __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
- bit_cast<float, uint32_t>(Low32Bits(int_value)));
- } else {
- __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
- bit_cast<float, uint32_t>(High32Bits(int_value)));
- }
+ if (destination.IsFpuRegisterPair()) {
+ __ LoadDImmediate(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), value);
} else {
- DCHECK(destination.IsDoubleStackSlot());
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
__ LoadImmediate(IP, Low32Bits(int_value));
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
__ LoadImmediate(IP, High32Bits(int_value));
@@ -3474,6 +3496,40 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
__ vmovrs(IP, source.AsFpuRegister<SRegister>());
__ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
__ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+ } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+ __ vmovdrr(DTMP, source.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
+ __ Mov(source.AsRegisterPairLow<Register>(), destination.AsRegisterPairLow<Register>());
+ __ Mov(source.AsRegisterPairHigh<Register>(), destination.AsRegisterPairHigh<Register>());
+ __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+ destination.AsRegisterPairHigh<Register>(),
+ DTMP);
+ } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+ Register low_reg = source.IsRegisterPair()
+ ? source.AsRegisterPairLow<Register>()
+ : destination.AsRegisterPairLow<Register>();
+ int mem = source.IsRegisterPair()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+ DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+ __ vmovdrr(DTMP, low_reg, static_cast<Register>(low_reg + 1));
+ __ LoadFromOffset(kLoadWordPair, low_reg, SP, mem);
+ __ StoreDToOffset(DTMP, SP, mem);
+ } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+ DRegister first = FromLowSToD(source.AsFpuRegisterPairLow<SRegister>());
+ DRegister second = FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+ __ vmovd(DTMP, first);
+ __ vmovd(first, second);
+ __ vmovd(second, DTMP);
+ } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+ DRegister reg = source.IsFpuRegisterPair()
+ ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())
+ : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+ int mem = source.IsFpuRegisterPair()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+ __ vmovd(DTMP, reg);
+ __ LoadDFromOffset(reg, SP, mem);
+ __ StoreDToOffset(DTMP, SP, mem);
} else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
: destination.AsFpuRegister<SRegister>();
@@ -3482,7 +3538,7 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
: source.GetStackIndex();
__ vmovrs(IP, reg);
- __ LoadFromOffset(kLoadWord, IP, SP, mem);
+ __ LoadSFromOffset(reg, SP, mem);
__ StoreToOffset(kStoreWord, IP, SP, mem);
} else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
@@ -3776,5 +3832,50 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) {
+ DCHECK_EQ(temp, kArtMethodRegister);
+
+ // TODO: Implement all kinds of calls:
+ // 1) boot -> boot
+ // 2) app -> boot
+ // 3) app -> app
+ //
+ // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+ // temp = temp[index_in_cache]
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+ // LR = temp[offset_of_quick_compiled_code]
+ __ LoadFromOffset(kLoadWord, LR, temp,
+ mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmWordSize).Int32Value());
+ // LR()
+ __ blx(LR);
+ } else {
+ __ bl(GetFrameEntryLabel());
+ }
+
+ RecordPcInfo(invoke, invoke->GetDexPc());
+ DCHECK(!IsLeafMethod());
+}
+
+void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 4b03dffd38..f1a3729c13 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] =
{ S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 };
static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+static constexpr Register kArtMethodRegister = R0;
+
+static constexpr DRegister FromLowSToD(SRegister reg) {
+ return DCHECK_CONSTEXPR(reg % 2 == 0, , D0)
+ static_cast<DRegister>(reg / 2);
+}
+
+
class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> {
public:
InvokeDexCallingConvention()
@@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver {
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
};
+class SlowPathCodeARM : public SlowPathCode {
+ public:
+ SlowPathCodeARM() : entry_label_(), exit_label_() {}
+
+ Label* GetEntryLabel() { return &entry_label_; }
+ Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Label entry_label_;
+ Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
+};
+
class LocationsBuilderARM : public HGraphVisitor {
public:
LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -230,7 +252,7 @@ class CodeGeneratorARM : public CodeGenerator {
void MarkGCCard(Register temp, Register card, Register object, Register value);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
@@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator {
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3bc23fe4f3..0d7864fa35 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
kNumberOfAllocatableRegisterPairs,
- (1 << LR),
- 0,
+ callee_saved_core_registers.list(),
+ callee_saved_fp_registers.list(),
compiler_options),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this) {
// Save the link register (containing the return address) to mimic Quick.
- AddAllocatedRegister(Location::RegisterLocation(LR));
+ AddAllocatedRegister(LocationFrom(lr));
}
#undef __
@@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireX();
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+ __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
__ Ldr(wzr, MemOperand(temp, 0));
RecordPcInfo(nullptr, 0);
}
- int frame_size = GetFrameSize();
- __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
-
- // Stack layout:
- // sp[frame_size - 8] : lr.
- // ... : other preserved registers.
- // sp[frame_size - regs_size]: first preserved register.
- // ... : reserved frame space.
- // sp[0] : current method.
+ if (!HasEmptyFrame()) {
+ int frame_size = GetFrameSize();
+ // Stack layout:
+ // sp[frame_size - 8] : lr.
+ // ... : other preserved core registers.
+ // ... : other preserved fp registers.
+ // ... : reserved frame space.
+ // sp[0] : current method.
+ __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+ __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ }
}
void CodeGeneratorARM64::GenerateFrameExit() {
- int frame_size = GetFrameSize();
- __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
- __ Drop(frame_size);
+ if (!HasEmptyFrame()) {
+ int frame_size = GetFrameSize();
+ __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ __ Drop(frame_size);
+ }
}
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -486,18 +491,21 @@ void CodeGeneratorARM64::Move(HInstruction* instruction,
Primitive::Type type = instruction->GetType();
DCHECK_NE(type, Primitive::kPrimVoid);
- if (instruction->IsIntConstant() || instruction->IsLongConstant()) {
- int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue()
- : instruction->AsLongConstant()->GetValue();
+ if (instruction->IsIntConstant()
+ || instruction->IsLongConstant()
+ || instruction->IsNullConstant()) {
+ int64_t value = GetInt64ValueOf(instruction->AsConstant());
if (location.IsRegister()) {
Register dst = RegisterFrom(location, type);
- DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) ||
+ DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
(instruction->IsLongConstant() && dst.Is64Bits()));
__ Mov(dst, value);
} else {
DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+ Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
+ ? temps.AcquireW()
+ : temps.AcquireX();
__ Mov(temp, value);
__ Str(temp, StackOperandFrom(location));
}
@@ -555,26 +563,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
__ Bind(&done);
}
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
- // Block reserved registers:
- // ip0 (VIXL temporary)
- // ip1 (VIXL temporary)
- // tr
- // lr
- // sp is not part of the allocatable registers, so we don't need to block it.
- // TODO: Avoid blocking callee-saved registers, and instead preserve them
- // where necessary.
+void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+ // Blocked core registers:
+ // lr : Runtime reserved.
+ // tr : Runtime reserved.
+ // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+ // ip1 : VIXL core temp.
+ // ip0 : VIXL core temp.
+ //
+ // Blocked fp registers:
+ // d31 : VIXL fp temp.
CPURegList reserved_core_registers = vixl_reserved_core_registers;
reserved_core_registers.Combine(runtime_reserved_core_registers);
- reserved_core_registers.Combine(quick_callee_saved_registers);
while (!reserved_core_registers.IsEmpty()) {
blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
}
+
CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
- reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP());
- while (!reserved_core_registers.IsEmpty()) {
+ while (!reserved_fp_registers.IsEmpty()) {
blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
}
+
+ if (is_baseline) {
+ CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
+ while (!reserved_core_baseline_registers.IsEmpty()) {
+ blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
+ }
+
+ CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
+ while (!reserved_fp_baseline_registers.IsEmpty()) {
+ blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+ }
+ }
}
Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
@@ -626,10 +646,12 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg
}
void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
- if (constant->IsIntConstant() || constant->IsLongConstant()) {
- __ Mov(Register(destination),
- constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
- : constant->AsLongConstant()->GetValue());
+ if (constant->IsIntConstant()) {
+ __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
+ } else if (constant->IsLongConstant()) {
+ __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
+ } else if (constant->IsNullConstant()) {
+ __ Mov(Register(destination), 0);
} else if (constant->IsFloatConstant()) {
__ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
} else {
@@ -643,6 +665,8 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
DCHECK(constant.IsConstant());
HConstant* cst = constant.GetConstant();
return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+ // Null is mapped to a core W register, which we associate with kPrimInt.
+ (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
(cst->IsLongConstant() && type == Primitive::kPrimLong) ||
(cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
@@ -663,7 +687,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
if (unspecified_type) {
HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
if (source.IsStackSlot() ||
- (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+ (src_cst != nullptr && (src_cst->IsIntConstant()
+ || src_cst->IsFloatConstant()
+ || src_cst->IsNullConstant()))) {
// For stack slots and 32bit constants, a 64bit type is appropriate.
type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
} else {
@@ -709,7 +735,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
UseScratchRegisterScope temps(GetVIXLAssembler());
HConstant* src_cst = source.GetConstant();
CPURegister temp;
- if (src_cst->IsIntConstant()) {
+ if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
temp = temps.AcquireW();
} else if (src_cst->IsLongConstant()) {
temp = temps.AcquireX();
@@ -947,6 +973,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
}
void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
+ DCHECK(RequiresCurrentMethod());
DCHECK(current_method.IsW());
__ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
}
@@ -1370,7 +1397,13 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ HInstruction* right = compare->InputAt(1);
+ if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+ (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) {
+ locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ }
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -1400,9 +1433,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
case Primitive::kPrimDouble: {
Register result = OutputRegister(compare);
FPRegister left = InputFPRegisterAt(compare, 0);
- FPRegister right = InputFPRegisterAt(compare, 1);
-
- __ Fcmp(left, right);
+ if (compare->GetLocations()->InAt(1).IsConstant()) {
+ if (kIsDebugBuild) {
+ HInstruction* right = compare->GetLocations()->InAt(1).GetConstant();
+ DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+ (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0)));
+ }
+ // 0.0 is the only immediate that can be encoded directly in a FCMP instruction.
+ __ Fcmp(left, 0.0);
+ } else {
+ __ Fcmp(left, InputFPRegisterAt(compare, 1));
+ }
if (compare->IsGtBias()) {
__ Cset(result, ne);
} else {
@@ -1752,6 +1793,16 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
@@ -2545,6 +2596,18 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
HandleBinaryOp(instruction);
}
+void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
#undef __
#undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9a99dcccea..afb7fc3718 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi
const vixl::Register tr = vixl::x18; // Thread Register
static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke.
+const vixl::Register kQuickSuspendRegister = vixl::x19;
const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
-const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
- vixl::kXRegSize,
- kArm64CalleeSaveRefSpills);
+// TODO: When the runtime does not use kQuickSuspendRegister as a suspend
+// counter remove it from the reserved registers list.
+const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr);
+
+// Callee-saved registers defined by AAPCS64.
+const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
+ vixl::kXRegSize,
+ vixl::x19.code(),
+ vixl::x30.code());
+const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
+ vixl::kDRegSize,
+ vixl::d8.code(),
+ vixl::d15.code());
Location ARM64ReturnLocation(Primitive::Type return_type);
class SlowPathCodeARM64 : public SlowPathCode {
@@ -191,16 +201,20 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
- static const vixl::CPURegList& GetFramePreservedRegisters() {
- static const vixl::CPURegList frame_preserved_regs =
- vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
- return frame_preserved_regs;
+ vixl::CPURegList GetFramePreservedCoreRegisters() const {
+ return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+ core_spill_mask_);
+ }
+
+ vixl::CPURegList GetFramePreservedFPRegisters() const {
+ return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+ fpu_spill_mask_);
}
void Bind(HBasicBlock* block) OVERRIDE;
vixl::Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_ + block->GetBlockId();
+ return CommonGetLabelOf<vixl::Label>(block_labels_, block);
}
void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 98f93a418a..1101569174 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() {
RecordPcInfo(nullptr, 0);
}
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
- __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ if (!HasEmptyFrame()) {
+ __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ }
}
void CodeGeneratorX86::GenerateFrameExit() {
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ if (!HasEmptyFrame()) {
+ __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ }
}
void CodeGeneratorX86::Bind(HBasicBlock* block) {
@@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) {
}
void CodeGeneratorX86::LoadCurrentMethod(Register reg) {
+ DCHECK(RequiresCurrentMethod());
__ movl(reg, Address(ESP, kCurrentMethodStackOffset));
}
@@ -597,13 +602,7 @@ void CodeGeneratorX86::Move32(Location destination, Location source) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int32_t value;
- if (constant->IsIntConstant()) {
- value = constant->AsIntConstant()->GetValue();
- } else {
- DCHECK(constant->IsFloatConstant());
- value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
- }
+ int32_t value = GetInt32ValueOf(constant);
__ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
} else {
DCHECK(source.IsStackSlot());
@@ -669,8 +668,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- Immediate imm(const_to_move->AsIntConstant()->GetValue());
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ Immediate imm(GetInt32ValueOf(const_to_move));
if (location.IsRegister()) {
__ movl(location.AsRegister<Register>(), imm);
} else if (location.IsStackSlot()) {
@@ -920,7 +919,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
locations->InAt(1).AsRegister<Register>());
} else if (locations->InAt(1).IsConstant()) {
HConstant* instruction = locations->InAt(1).GetConstant();
- Immediate imm(instruction->AsIntConstant()->GetValue());
+ Immediate imm(CodeGenerator::GetInt32ValueOf(instruction));
__ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
} else {
__ cmpl(locations->InAt(0).AsRegister<Register>(),
@@ -989,6 +988,17 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1799,7 +1809,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::Any());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1843,8 +1853,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
case Primitive::kPrimFloat: {
if (second.IsFpuRegister()) {
__ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
- } else {
- __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
}
break;
}
@@ -1852,8 +1860,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble: {
if (second.IsFpuRegister()) {
__ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
- } else {
- __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
}
break;
}
@@ -3495,8 +3501,8 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- Immediate imm(constant->AsIntConstant()->GetValue());
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ Immediate imm(CodeGenerator::GetInt32ValueOf(constant));
if (destination.IsRegister()) {
__ movl(destination.AsRegister<Register>(), imm);
} else {
@@ -3904,5 +3910,17 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace x86
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 107ddafea4..f5a9b7d1f7 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -234,7 +234,7 @@ class CodeGeneratorX86 : public CodeGenerator {
void LoadCurrentMethod(Register reg);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ff53a0603..41a19e11f0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
RecordPcInfo(nullptr, 0);
}
+ if (HasEmptyFrame()) {
+ return;
+ }
+
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
@@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
}
void CodeGeneratorX86_64::GenerateFrameExit() {
+ if (HasEmptyFrame()) {
+ return;
+ }
uint32_t xmm_spill_location = GetFpuSpillStart();
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
@@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
}
void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
+ DCHECK(RequiresCurrentMethod());
__ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
}
@@ -599,13 +607,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int32_t value;
- if (constant->IsFloatConstant()) {
- value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
- } else {
- DCHECK(constant->IsIntConstant());
- value = constant->AsIntConstant()->GetValue();
- }
+ int32_t value = GetInt32ValueOf(constant);
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
} else {
DCHECK(source.IsStackSlot()) << source;
@@ -649,8 +651,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- Immediate imm(const_to_move->AsIntConstant()->GetValue());
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ Immediate imm(GetInt32ValueOf(const_to_move));
if (location.IsRegister()) {
__ movl(location.AsRegister<CpuRegister>(), imm);
} else if (location.IsStackSlot()) {
@@ -790,7 +792,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
// Materialized condition, compare against 0.
Location lhs = if_instr->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0));
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
} else {
__ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
Immediate(0));
@@ -806,8 +808,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
if (rhs.IsRegister()) {
__ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
} else if (rhs.IsConstant()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(),
- Immediate(rhs.GetConstant()->AsIntConstant()->GetValue()));
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+ }
} else {
__ cmpl(lhs.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -883,15 +889,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
// Clear register: setcc only sets the low byte.
__ xorq(reg, reg);
- if (locations->InAt(1).IsRegister()) {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- locations->InAt(1).AsRegister<CpuRegister>());
- } else if (locations->InAt(1).IsConstant()) {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()));
+ Location lhs = locations->InAt(0);
+ Location rhs = locations->InAt(1);
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+ }
} else {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
__ setcc(X86_64Condition(comp->GetCondition()), reg);
}
@@ -1018,6 +1028,17 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1840,8 +1861,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
switch (add->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -1869,16 +1890,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
LocationSummary* locations = add->GetLocations();
Location first = locations->InAt(0);
Location second = locations->InAt(1);
- DCHECK(first.Equals(locations->Out()));
+ Location out = locations->Out();
switch (add->GetResultType()) {
case Primitive::kPrimInt: {
if (second.IsRegister()) {
- __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+ __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ } else {
+ __ leal(out.AsRegister<CpuRegister>(), Address(
+ first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
+ }
} else if (second.IsConstant()) {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
- __ addl(first.AsRegister<CpuRegister>(), imm);
+ if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+ __ addl(out.AsRegister<CpuRegister>(),
+ Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+ } else {
+ __ leal(out.AsRegister<CpuRegister>(), Address(
+ first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
+ }
} else {
+ DCHECK(first.Equals(locations->Out()));
__ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
}
break;
@@ -2754,7 +2786,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr
Location obj = locations->InAt(0);
if (obj.IsRegister()) {
- __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0));
+ __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
} else if (obj.IsStackSlot()) {
__ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
} else {
@@ -3236,13 +3268,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
}
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- Immediate imm(constant->AsIntConstant()->GetValue());
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
if (destination.IsRegister()) {
- __ movl(destination.AsRegister<CpuRegister>(), imm);
+ if (value == 0) {
+ __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
+ } else {
+ __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
+ }
} else {
DCHECK(destination.IsStackSlot()) << destination;
- __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+ __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
}
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
@@ -3675,5 +3711,17 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
}
}
+void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dbdbf869db..707c9992c0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -232,7 +232,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void LoadCurrentMethod(CpuRegister reg);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index b246c6f98d..7623e421fd 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -14,11 +14,11 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "dex_instruction.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index e05d9b3b0f..2bfecc696a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "pretty_printer.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4ebb1363cc..a7f1f74e27 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -160,6 +160,22 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
instruction->GetId()));
}
}
+
+ // Ensure 'instruction' has pointers to its inputs' use entries.
+ for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+ HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
+ HInstruction* input = input_record.GetInstruction();
+ HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
+ if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+ AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
+ "at input %u (%s:%d).",
+ instruction->DebugName(),
+ instruction->GetId(),
+ static_cast<unsigned>(i),
+ input->DebugName(),
+ input->GetId()));
+ }
+ }
}
void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
@@ -285,6 +301,19 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
}
}
+static Primitive::Type PrimitiveKind(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ return Primitive::kPrimInt;
+ default:
+ return type;
+ }
+}
+
void SSAChecker::VisitPhi(HPhi* phi) {
VisitInstruction(phi);
@@ -321,18 +350,17 @@ void SSAChecker::VisitPhi(HPhi* phi) {
}
}
}
-}
-
-static Primitive::Type PrimitiveKind(Primitive::Type type) {
- switch (type) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimShort:
- case Primitive::kPrimChar:
- case Primitive::kPrimInt:
- return Primitive::kPrimInt;
- default:
- return type;
+ // Ensure that the inputs have the same primitive kind as the phi.
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) {
+ AddError(StringPrintf(
+ "Input %d at index %zu of phi %d from block %d does not have the "
+ "same type as the phi: %s versus %s",
+ input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+ Primitive::PrettyDescriptor(input->GetType()),
+ Primitive::PrettyDescriptor(phi->GetType())));
+ }
}
}
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index c59f8366fa..4742e4d073 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 835bca688f..c59273753e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -184,6 +184,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << " " << instruction->GetValue();
}
+ void VisitPhi(HPhi* phi) OVERRIDE {
+ output_ << " " << phi->GetRegNumber();
+ }
+
void PrintInstruction(HInstruction* instruction) {
output_ << instruction->DebugName();
instruction->Accept(this);
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 89bba2d9f6..cb448c883f 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -270,7 +270,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
set = new (allocator_) ValueSet(allocator_);
} else {
HBasicBlock* dominator = block->GetDominator();
- set = sets_.Get(dominator->GetBlockId())->Copy();
+ set = sets_.Get(dominator->GetBlockId());
if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) {
// We have to copy if the dominator has other successors, or `block` is not a successor
// of the dominator.
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 4a48fee2fb..a81d49aa0c 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "gvn.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 32f6972c84..d55a3ca00b 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -159,7 +159,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
SsaDeadPhiElimination dead_phi(callee_graph);
HDeadCodeElimination dce(callee_graph);
HConstantFolding fold(callee_graph);
- InstructionSimplifier simplify(callee_graph);
+ InstructionSimplifier simplify(callee_graph, stats_);
HOptimization* optimizations[] = {
&redundant_phi,
@@ -176,7 +176,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
if (depth_ + 1 < kDepthLimit) {
HInliner inliner(
- callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1);
+ callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1);
inliner.Run();
}
@@ -221,7 +221,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
// after optimizations get a unique id.
graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
- outer_stats_->RecordStat(kInlinedInvoke);
+ MaybeRecordStat(kInlinedInvoke);
return true;
}
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 07d893e7c9..8e9cf837df 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -35,10 +35,9 @@ class HInliner : public HOptimization {
CompilerDriver* compiler_driver,
OptimizingCompilerStats* stats,
size_t depth = 0)
- : HOptimization(outer_graph, true, "inliner"),
+ : HOptimization(outer_graph, true, "inliner", stats),
outer_compilation_unit_(outer_compilation_unit),
compiler_driver_(compiler_driver),
- outer_stats_(stats),
depth_(depth) {}
void Run() OVERRIDE;
@@ -48,7 +47,6 @@ class HInliner : public HOptimization {
const DexCompilationUnit& outer_compilation_unit_;
CompilerDriver* const compiler_driver_;
- OptimizingCompilerStats* const outer_stats_;
const size_t depth_;
DISALLOW_COPY_AND_ASSIGN(HInliner);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17c8f337ca..fd99070780 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,11 +16,15 @@
#include "instruction_simplifier.h"
+#include "mirror/class-inl.h"
+#include "scoped_thread_state_change.h"
+
namespace art {
class InstructionSimplifierVisitor : public HGraphVisitor {
public:
- explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+ InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), stats_(stats) {}
private:
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -28,10 +32,14 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitArraySet(HArraySet* equal) OVERRIDE;
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
+ void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+
+ OptimizingCompilerStats* stats_;
};
void InstructionSimplifier::Run() {
- InstructionSimplifierVisitor visitor(graph_);
+ InstructionSimplifierVisitor visitor(graph_, stats_);
visitor.VisitInsertionOrder();
}
@@ -40,6 +48,28 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
if (!obj->CanBeNull()) {
null_check->ReplaceWith(obj);
null_check->GetBlock()->RemoveInstruction(null_check);
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kRemovedNullCheck);
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
+ HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+ if (!load_class->IsResolved()) {
+ // If the class couldn't be resolve it's not safe to compare against it. It's
+ // default type would be Top which might be wider that the actual class type
+ // and thus producing wrong results.
+ return;
+ }
+ ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo();
+ ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ ScopedObjectAccess soa(Thread::Current());
+ if (class_rti.IsSupertypeOf(obj_rti)) {
+ check_cast->GetBlock()->RemoveInstruction(check_cast);
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast);
+ }
}
}
@@ -75,6 +105,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
}
}
+void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
+ HInstruction* input = instruction->InputAt(0);
+ // If the array is a NewArray with constant size, replace the array length
+ // with the constant instruction. This helps the bounds check elimination phase.
+ if (input->IsNewArray()) {
+ input = input->InputAt(0);
+ if (input->IsIntConstant()) {
+ instruction->ReplaceWith(input);
+ }
+ }
+}
+
void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
HInstruction* value = instruction->GetValue();
if (value->GetType() != Primitive::kPrimNot) return;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index bca6697d05..a7ff755aed 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -19,6 +19,7 @@
#include "nodes.h"
#include "optimization.h"
+#include "optimizing_compiler_stats.h"
namespace art {
@@ -27,8 +28,10 @@ namespace art {
*/
class InstructionSimplifier : public HOptimization {
public:
- explicit InstructionSimplifier(HGraph* graph, const char* name = "instruction_simplifier")
- : HOptimization(graph, true, name) {}
+ InstructionSimplifier(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = "instruction_simplifier")
+ : HOptimization(graph, true, name, stats) {}
void Run() OVERRIDE;
};
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
new file mode 100644
index 0000000000..a82d80af13
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/arm/assembler_arm.h"
+
+namespace art {
+
+namespace arm {
+
+ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() {
+ return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
+ return codegen_->GetGraph()->GetArena();
+}
+
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) {
+ if (!trg.IsValid()) {
+ DCHECK(type == Primitive::kPrimVoid);
+ return;
+ }
+
+ DCHECK_NE(type, Primitive::kPrimVoid);
+
+ if (Primitive::IsIntegralType(type)) {
+ if (type == Primitive::kPrimLong) {
+ Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
+ Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
+ Register res_reg_lo = R0;
+ Register res_reg_hi = R1;
+ if (trg_reg_lo != res_reg_hi) {
+ if (trg_reg_lo != res_reg_lo) {
+ __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+ __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+ } else {
+ DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi);
+ }
+ } else {
+ __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+ __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+ }
+ } else {
+ Register trg_reg = trg.AsRegister<Register>();
+ Register res_reg = R0;
+ if (trg_reg != res_reg) {
+ __ mov(trg_reg, ShifterOperand(res_reg));
+ }
+ }
+ } else {
+ UNIMPLEMENTED(FATAL) << "Floating-point return.";
+ }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
+ if (invoke->InputCount() == 0) {
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+ InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(arena);
+
+ for (size_t i = 0; i < invoke->InputCount(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+// summary. If an intrinsic modifies those locations before a slowpath call, they must be
+// restored!
+class IntrinsicSlowPathARM : public SlowPathCodeARM {
+ public:
+ explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in);
+ __ Bind(GetEntryLabel());
+
+ codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+ MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
+ } else {
+ UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+ UNREACHABLE();
+ }
+
+ // Copy the result back to the expected output.
+ Location out = invoke_->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
+ DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+ }
+
+ codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ __ b(GetExitLabel());
+ }
+
+ private:
+ // The instruction where this slow path is happening.
+ HInvoke* const invoke_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM);
+};
+
+#undef __
+
+bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ LocationSummary* res = invoke->GetLocations();
+ return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ __ vmovrrd(output.AsRegisterPairLow<Register>(),
+ output.AsRegisterPairHigh<Register>(),
+ FromLowSToD(input.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>());
+ }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()),
+ input.AsRegisterPairLow<Register>(),
+ input.AsRegisterPairHigh<Register>());
+ } else {
+ __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location in = locations->InAt(0);
+ Location out = locations->Out();
+
+ if (is64bit) {
+ __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+ bool is64bit,
+ ArmAssembler* assembler) {
+ Location in = locations->InAt(0);
+ Location output = locations->Out();
+
+ Register mask = locations->GetTemp(0).AsRegister<Register>();
+
+ if (is64bit) {
+ Register in_reg_lo = in.AsRegisterPairLow<Register>();
+ Register in_reg_hi = in.AsRegisterPairHigh<Register>();
+ Register out_reg_lo = output.AsRegisterPairLow<Register>();
+ Register out_reg_hi = output.AsRegisterPairHigh<Register>();
+
+ DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected.";
+
+ __ Asr(mask, in_reg_hi, 31);
+ __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask));
+ __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask));
+ __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo));
+ __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi));
+ } else {
+ Register in_reg = in.AsRegister<Register>();
+ Register out_reg = output.AsRegister<Register>();
+
+ __ Asr(mask, in_reg, 31);
+ __ add(out_reg, in_reg, ShifterOperand(mask));
+ __ eor(out_reg, mask, ShifterOperand(out_reg));
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+ bool is_min,
+ ArmAssembler* assembler) {
+ Register op1 = locations->InAt(0).AsRegister<Register>();
+ Register op2 = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ __ cmp(op1, ShifterOperand(op2));
+
+ __ it((is_min) ? Condition::LT : Condition::GT, kItElse);
+ __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT);
+ __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ ArmAssembler* assembler = GetAssembler();
+ __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+ // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+ // exception. So we can't use ldrd as addr may be unaligned.
+ Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+ Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+ if (addr == lo) {
+ __ ldr(hi, Address(addr, 4));
+ __ ldr(lo, Address(addr, 0));
+ } else {
+ __ ldr(lo, Address(addr, 0));
+ __ ldr(hi, Address(addr, 4));
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+ // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+ // exception. So we can't use ldrd as addr may be unaligned.
+ __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0));
+ __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ LoadFromOffset(kLoadWord,
+ invoke->GetLocations()->Out().AsRegister<Register>(),
+ TR,
+ Thread::PeerOffset<kArmPointerSize>().Int32Value());
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+ Primitive::Type type,
+ bool is_volatile,
+ CodeGeneratorARM* codegen) {
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK((type == Primitive::kPrimInt) ||
+ (type == Primitive::kPrimLong) ||
+ (type == Primitive::kPrimNot));
+ ArmAssembler* assembler = codegen->GetAssembler();
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
+
+ if (type == Primitive::kPrimLong) {
+ Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+ __ add(IP, base, ShifterOperand(offset));
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+ __ ldrexd(trg_lo, trg_hi, IP);
+ } else {
+ __ ldrd(trg_lo, Address(IP));
+ }
+ } else {
+ Register trg = locations->Out().AsRegister<Register>();
+ __ ldr(trg, Address(base, offset));
+ }
+
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+ const ArmInstructionSetFeatures& features,
+ Primitive::Type type,
+ bool is_volatile,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+
+ if (type == Primitive::kPrimLong) {
+ // Potentially need temps for ldrexd-strexd loop.
+ if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+ locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
+ locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
+ }
+ } else if (type == Primitive::kPrimNot) {
+ // Temps for card-marking.
+ locations->AddTemp(Location::RequiresRegister()); // Temp.
+ locations->AddTemp(Location::RequiresRegister()); // Card.
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+ Primitive::Type type,
+ bool is_volatile,
+ bool is_ordered,
+ CodeGeneratorARM* codegen) {
+ ArmAssembler* assembler = codegen->GetAssembler();
+
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
+ Register value;
+
+ if (is_volatile || is_ordered) {
+ __ dmb(ISH);
+ }
+
+ if (type == Primitive::kPrimLong) {
+ Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
+ value = value_lo;
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register temp_lo = locations->GetTemp(0).AsRegister<Register>();
+ Register temp_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
+
+ __ add(IP, base, ShifterOperand(offset));
+ Label loop_head;
+ __ Bind(&loop_head);
+ __ ldrexd(temp_lo, temp_hi, IP);
+ __ strexd(temp_lo, value_lo, value_hi, IP);
+ __ cmp(temp_lo, ShifterOperand(0));
+ __ b(&loop_head, NE);
+ } else {
+ __ add(IP, base, ShifterOperand(offset));
+ __ strd(value_lo, Address(IP));
+ }
+ } else {
+ value = locations->InAt(3).AsRegister<Register>();
+ __ str(value, Address(base, offset));
+ }
+
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+
+ if (type == Primitive::kPrimNot) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen->MarkGCCard(temp, card, base, value);
+ }
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister()); // Pointer.
+ locations->AddTemp(Location::RequiresRegister()); // Temp 1.
+ locations->AddTemp(Location::RequiresRegister()); // Temp 2.
+}
+
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+ DCHECK_NE(type, Primitive::kPrimLong);
+
+ ArmAssembler* assembler = codegen->GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>(); // Boolean result.
+
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B).
+ Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected.
+ Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value.
+
+ Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory.
+ Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
+
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
+ codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo);
+ }
+
+ // Prevent reordering with prior memory operations.
+ __ dmb(ISH);
+
+ __ add(tmp_ptr, base, ShifterOperand(offset));
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+ // result = tmp != 0;
+
+ Label loop_head;
+ __ Bind(&loop_head);
+
+ __ ldrex(tmp_lo, tmp_ptr);
+
+ __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+
+ __ it(EQ, ItState::kItT);
+ __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
+ __ cmp(tmp_lo, ShifterOperand(1), EQ);
+
+ __ b(&loop_head, EQ);
+
+ __ dmb(ISH);
+
+ __ rsbs(out, tmp_lo, ShifterOperand(1));
+ __ it(CC);
+ __ mov(out, ShifterOperand(0), CC);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Location of reference to data array
+ const MemberOffset value_offset = mirror::String::ValueOffset();
+ // Location of count
+ const MemberOffset count_offset = mirror::String::CountOffset();
+ // Starting offset within data array
+ const MemberOffset offset_offset = mirror::String::OffsetOffset();
+ // Start of char data with array_
+ const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
+
+ Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer.
+ Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character.
+ Register out = locations->Out().AsRegister<Register>(); // Result character.
+
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register array_temp = locations->GetTemp(1).AsRegister<Register>();
+
+ // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+ // the cost.
+ // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+ // we will not optimize the code for constants (which would save a register).
+
+ SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length.
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ cmp(idx, ShifterOperand(temp));
+ __ b(slow_path->GetEntryLabel(), CS);
+
+ // Index computation.
+ __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset.
+ __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset.
+ __ add(temp, temp, ShifterOperand(idx));
+ DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting.
+ __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+
+ // Load the value.
+ __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp].
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name) \
+void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+} \
+void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
+UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(MathCeil) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathFloor) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure.
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
new file mode 100644
index 0000000000..8bfb7d4686
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class ArmInstructionSetFeatures;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace arm {
+
+class ArmAssembler;
+class CodeGeneratorARM;
+
+class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
+ const ArmInstructionSetFeatures& features)
+ : arena_(arena), features_(features) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* arena_;
+
+ const ArmInstructionSetFeatures& features_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM);
+};
+
+class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ ArmAssembler* GetAssembler();
+
+ ArenaAllocator* GetAllocator();
+
+ CodeGeneratorARM* codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM);
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a3d7d8389..8874edc341 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
}
static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
- // We only support FP registers here.
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
- // The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCallOnSlowPath,
kIntrinsified);
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 10f24d8148..bf9b8e59c5 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -66,8 +66,7 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info)
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* input = environment->GetInstructionAt(i);
if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
- HUseListNode<HEnvironment*>* env_use = environment->GetInstructionEnvUseAt(i);
- input->RemoveEnvironmentUser(env_use);
+ environment->RemoveAsUserOfInput(i);
HInstruction* incoming = input->InputAt(0);
environment->SetRawEnvAt(i, incoming);
incoming->AddEnvUseAt(environment, i);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index eb27965c79..f22b7a7e82 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
#include <fstream>
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "code_generator.h"
@@ -29,7 +30,6 @@
#include "pretty_printer.h"
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index ac8759c805..28000c18f8 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 0558b85b47..17914e8206 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -24,7 +25,6 @@
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index c9be570c73..907eff162f 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -24,7 +25,6 @@
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 990d662d86..4ac1fe8573 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -64,6 +64,13 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
std::ostream& operator<<(std::ostream& os, const Location& location) {
os << location.DebugString();
+ if (location.IsRegister() || location.IsFpuRegister()) {
+ os << location.reg();
+ } else if (location.IsPair()) {
+ os << location.low() << ":" << location.high();
+ } else if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+ os << location.GetStackIndex();
+ }
return os;
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index bf27c5cf7a..198cc15cce 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -17,10 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_
#define ART_COMPILER_OPTIMIZING_LOCATIONS_H_
+#include "base/arena_object.h"
#include "base/bit_field.h"
#include "base/bit_vector.h"
#include "base/value_object.h"
-#include "utils/arena_object.h"
#include "utils/growable_array.h"
namespace art {
@@ -151,6 +151,10 @@ class Location : public ValueObject {
return GetKind() == kFpuRegisterPair;
}
+ bool IsRegisterKind() const {
+ return IsRegister() || IsFpuRegister() || IsRegisterPair() || IsFpuRegisterPair();
+ }
+
int reg() const {
DCHECK(IsRegister() || IsFpuRegister());
return GetPayload();
@@ -268,6 +272,20 @@ class Location : public ValueObject {
return value_ == other.value_;
}
+ bool Contains(Location other) const {
+ if (Equals(other)) {
+ return true;
+ } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
+ return other.reg() == low() || other.reg() == high();
+ } else if (IsRegisterPair() && other.IsRegister()) {
+ return other.reg() == low() || other.reg() == high();
+ } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
+ return (GetStackIndex() == other.GetStackIndex())
+ || (GetStackIndex() + 4 == other.GetStackIndex());
+ }
+ return false;
+ }
+
const char* DebugString() const {
switch (GetKind()) {
case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cd36598171..93787b8bfd 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -18,6 +18,7 @@
#include "ssa_builder.h"
#include "utils/growable_array.h"
+#include "scoped_thread_state_change.h"
namespace art {
@@ -33,17 +34,14 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) {
static void RemoveAsUser(HInstruction* instruction) {
for (size_t i = 0; i < instruction->InputCount(); i++) {
- instruction->InputAt(i)->RemoveUser(instruction, i);
+ instruction->RemoveAsUserOfInput(i);
}
HEnvironment* environment = instruction->GetEnvironment();
if (environment != nullptr) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i);
- if (vreg_env_use != nullptr) {
- HInstruction* vreg = environment->GetInstructionAt(i);
- DCHECK(vreg != nullptr);
- vreg->RemoveEnvironmentUser(vreg_env_use);
+ if (environment->GetInstructionAt(i) != nullptr) {
+ environment->RemoveAsUserOfInput(i);
}
}
}
@@ -63,22 +61,19 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
}
}
-void HGraph::RemoveBlock(HBasicBlock* block) const {
- for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
- block->GetSuccessors().Get(j)->RemovePredecessor(block);
- }
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- block->RemovePhi(it.Current()->AsPhi());
- }
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- block->RemoveInstruction(it.Current());
- }
-}
-
void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
for (size_t i = 0; i < blocks_.Size(); ++i) {
if (!visited.IsBitSet(i)) {
- RemoveBlock(blocks_.Get(i));
+ HBasicBlock* block = blocks_.Get(i);
+ for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+ block->GetSuccessors().Get(j)->RemovePredecessor(block);
+ }
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false);
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false);
+ }
}
}
}
@@ -292,6 +287,15 @@ bool HGraph::AnalyzeNaturalLoops() const {
return true;
}
+HNullConstant* HGraph::GetNullConstant() {
+ if (cached_null_constant_ == nullptr) {
+ cached_null_constant_ = new (arena_) HNullConstant();
+ entry_block_->InsertInstructionBefore(cached_null_constant_,
+ entry_block_->GetLastInstruction());
+ }
+ return cached_null_constant_;
+}
+
void HLoopInformation::Add(HBasicBlock* block) {
blocks_.SetBit(block->GetBlockId());
}
@@ -429,22 +433,24 @@ void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
static void Remove(HInstructionList* instruction_list,
HBasicBlock* block,
- HInstruction* instruction) {
+ HInstruction* instruction,
+ bool ensure_safety) {
DCHECK_EQ(block, instruction->GetBlock());
- DCHECK(instruction->GetUses().IsEmpty());
- DCHECK(instruction->GetEnvUses().IsEmpty());
instruction->SetBlock(nullptr);
instruction_list->RemoveInstruction(instruction);
-
- RemoveAsUser(instruction);
+ if (ensure_safety) {
+ DCHECK(instruction->GetUses().IsEmpty());
+ DCHECK(instruction->GetEnvUses().IsEmpty());
+ RemoveAsUser(instruction);
+ }
}
-void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
- Remove(&instructions_, this, instruction);
+void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+ Remove(&instructions_, this, instruction, ensure_safety);
}
-void HBasicBlock::RemovePhi(HPhi* phi) {
- Remove(&phis_, this, phi);
+void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) {
+ Remove(&phis_, this, phi, ensure_safety);
}
void HEnvironment::CopyFrom(HEnvironment* env) {
@@ -457,15 +463,9 @@ void HEnvironment::CopyFrom(HEnvironment* env) {
}
}
-template <typename T>
-static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) {
- HUseListNode<T>* current;
- for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) {
- current = use_it.Current();
- if (current->GetUser() == user && current->GetIndex() == input_index) {
- list->Remove(current);
- }
- }
+void HEnvironment::RemoveAsUserOfInput(size_t index) const {
+ const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
+ user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
}
HInstruction* HInstruction::GetNextDisregardingMoves() const {
@@ -484,14 +484,6 @@ HInstruction* HInstruction::GetPreviousDisregardingMoves() const {
return previous;
}
-void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
- RemoveFromUseList(user, input_index, &uses_);
-}
-
-void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) {
- env_uses_.Remove(use);
-}
-
void HInstructionList::AddInstruction(HInstruction* instruction) {
if (first_instruction_ == nullptr) {
DCHECK(last_instruction_ == nullptr);
@@ -602,7 +594,7 @@ void HInstruction::ReplaceWith(HInstruction* other) {
}
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
- InputAt(index)->RemoveUser(this, index);
+ RemoveAsUserOfInput(index);
SetRawInputAt(index, replacement);
replacement->AddUseAt(this, index);
}
@@ -613,7 +605,7 @@ size_t HInstruction::EnvironmentSize() const {
void HPhi::AddInput(HInstruction* input) {
DCHECK(input->GetBlock() != nullptr);
- inputs_.Add(input);
+ inputs_.Add(HUserRecord<HInstruction*>(input));
input->AddUseAt(this, inputs_.Size() - 1);
}
@@ -990,4 +982,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
invoke->GetBlock()->RemoveInstruction(invoke);
}
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
+ ScopedObjectAccess soa(Thread::Current());
+ os << "["
+ << " is_top=" << rhs.IsTop()
+ << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+ << " is_exact=" << rhs.IsExact()
+ << " ]";
+ return os;
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 30d869d026..de448cc483 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,23 +17,28 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_H_
#define ART_COMPILER_OPTIMIZING_NODES_H_
+#include "base/arena_object.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "handle.h"
+#include "handle_scope.h"
#include "invoke_type.h"
#include "locations.h"
+#include "mirror/class.h"
#include "offsets.h"
#include "primitive.h"
-#include "utils/arena_object.h"
#include "utils/arena_bit_vector.h"
#include "utils/growable_array.h"
namespace art {
+class GraphChecker;
class HBasicBlock;
class HEnvironment;
class HInstruction;
class HIntConstant;
class HInvoke;
class HGraphVisitor;
+class HNullConstant;
class HPhi;
class HSuspendCheck;
class LiveInterval;
@@ -194,6 +199,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
return reverse_post_order_;
}
+ HNullConstant* GetNullConstant();
+
private:
HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -205,7 +212,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
ArenaBitVector* visiting);
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited) const;
- void RemoveBlock(HBasicBlock* block) const;
ArenaAllocator* const arena_;
@@ -233,6 +239,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// The current id to assign to a newly added instruction. See HInstruction.id_.
int32_t current_instruction_id_;
+ // Cached null constant that might be created when building SSA form.
+ HNullConstant* cached_null_constant_;
+
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
DISALLOW_COPY_AND_ASSIGN(HGraph);
};
@@ -481,14 +490,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
void ReplaceWith(HBasicBlock* other);
void AddInstruction(HInstruction* instruction);
- void RemoveInstruction(HInstruction* instruction);
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
// Replace instruction `initial` with `replacement` within this block.
void ReplaceAndRemoveInstructionWith(HInstruction* initial,
HInstruction* replacement);
void AddPhi(HPhi* phi);
void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
- void RemovePhi(HPhi* phi);
+ // RemoveInstruction and RemovePhi delete a given instruction from the respective
+ // instruction list. With 'ensure_safety' set to true, it verifies that the
+ // instruction is not in use and removes it from the use lists of its inputs.
+ void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
+ void RemovePhi(HPhi* phi, bool ensure_safety = true);
bool IsLoopHeader() const {
return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
@@ -574,6 +586,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
M(ArrayLength, Instruction) \
M(ArraySet, Instruction) \
M(BoundsCheck, Instruction) \
+ M(BoundType, Instruction) \
M(CheckCast, Instruction) \
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
@@ -610,6 +623,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
M(NewInstance, Instruction) \
M(Not, UnaryOperation) \
M(NotEqual, Condition) \
+ M(NullConstant, Instruction) \
M(NullCheck, Instruction) \
M(Or, BinaryOperation) \
M(ParallelMove, Instruction) \
@@ -704,6 +718,9 @@ class HUseList : public ValueObject {
}
void Remove(HUseListNode<T>* node) {
+ DCHECK(node != nullptr);
+ DCHECK(Contains(node));
+
if (node->prev_ != nullptr) {
node->prev_->next_ = node->next_;
}
@@ -715,6 +732,18 @@ class HUseList : public ValueObject {
}
}
+ bool Contains(const HUseListNode<T>* node) const {
+ if (node == nullptr) {
+ return false;
+ }
+ for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+ if (current == node) {
+ return true;
+ }
+ }
+ return false;
+ }
+
bool IsEmpty() const {
return first_ == nullptr;
}
@@ -750,6 +779,33 @@ class HUseIterator : public ValueObject {
friend class HValue;
};
+// This class is used by HEnvironment and HInstruction classes to record the
+// instructions they use and pointers to the corresponding HUseListNodes kept
+// by the used instructions.
+template <typename T>
+class HUserRecord : public ValueObject {
+ public:
+ HUserRecord() : instruction_(nullptr), use_node_(nullptr) {}
+ explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {}
+
+ HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node)
+ : instruction_(old_record.instruction_), use_node_(use_node) {
+ DCHECK(instruction_ != nullptr);
+ DCHECK(use_node_ != nullptr);
+ DCHECK(old_record.use_node_ == nullptr);
+ }
+
+ HInstruction* GetInstruction() const { return instruction_; }
+ HUseListNode<T>* GetUseNode() const { return use_node_; }
+
+ private:
+ // Instruction used by the user.
+ HInstruction* instruction_;
+
+ // Corresponding entry in the use list kept by 'instruction_'.
+ HUseListNode<T>* use_node_;
+};
+
// Represents the side effects an instruction may have.
class SideEffects : public ValueObject {
public:
@@ -820,50 +876,118 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
: vregs_(arena, number_of_vregs) {
vregs_.SetSize(number_of_vregs);
for (size_t i = 0; i < number_of_vregs; i++) {
- vregs_.Put(i, VRegInfo(nullptr, nullptr));
+ vregs_.Put(i, HUserRecord<HEnvironment*>());
}
}
void CopyFrom(HEnvironment* env);
void SetRawEnvAt(size_t index, HInstruction* instruction) {
- vregs_.Put(index, VRegInfo(instruction, nullptr));
+ vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
}
+ HInstruction* GetInstructionAt(size_t index) const {
+ return vregs_.Get(index).GetInstruction();
+ }
+
+ void RemoveAsUserOfInput(size_t index) const;
+
+ size_t Size() const { return vregs_.Size(); }
+
+ private:
// Record instructions' use entries of this environment for constant-time removal.
+ // It should only be called by HInstruction when a new environment use is added.
void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
DCHECK(env_use->GetUser() == this);
size_t index = env_use->GetIndex();
- VRegInfo info = vregs_.Get(index);
- DCHECK(info.vreg_ != nullptr);
- DCHECK(info.node_ == nullptr);
- vregs_.Put(index, VRegInfo(info.vreg_, env_use));
+ vregs_.Put(index, HUserRecord<HEnvironment*>(vregs_.Get(index), env_use));
}
- HInstruction* GetInstructionAt(size_t index) const {
- return vregs_.Get(index).vreg_;
+ GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+
+ friend HInstruction;
+
+ DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+};
+
+class ReferenceTypeInfo : ValueObject {
+ public:
+ typedef Handle<mirror::Class> TypeHandle;
+
+ static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (type_handle->IsObjectClass()) {
+ // Override the type handle to be consistent with the case when we get to
+ // Top but don't have the Object class available. It avoids having to guess
+ // what value the type_handle has when it's Top.
+ return ReferenceTypeInfo(TypeHandle(), is_exact, true);
+ } else {
+ return ReferenceTypeInfo(type_handle, is_exact, false);
+ }
}
- HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const {
- return vregs_.Get(index).node_;
+ static ReferenceTypeInfo CreateTop(bool is_exact) {
+ return ReferenceTypeInfo(TypeHandle(), is_exact, true);
}
- size_t Size() const { return vregs_.Size(); }
+ bool IsExact() const { return is_exact_; }
+ bool IsTop() const { return is_top_; }
- private:
- struct VRegInfo {
- HInstruction* vreg_;
- HUseListNode<HEnvironment*>* node_;
+ Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
- VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use)
- : vreg_(instruction), node_(env_use) {}
- };
+ bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (IsTop()) {
+ // Top (equivalent for java.lang.Object) is supertype of anything.
+ return true;
+ }
+ if (rti.IsTop()) {
+ // If we get here `this` is not Top() so it can't be a supertype.
+ return false;
+ }
+ return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
- GrowableArray<VRegInfo> vregs_;
+ // Returns true if the type information provide the same amount of details.
+ // Note that it does not mean that the instructions have the same actual type
+ // (e.g. tops are equal but they can be the result of a merge).
+ bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (IsExact() != rti.IsExact()) {
+ return false;
+ }
+ if (IsTop() && rti.IsTop()) {
+ // `Top` means java.lang.Object, so the types are equivalent.
+ return true;
+ }
+ if (IsTop() || rti.IsTop()) {
+ // If only one is top or object than they are not equivalent.
+ // NB: We need this extra check because the type_handle of `Top` is invalid
+ // and we cannot inspect its reference.
+ return false;
+ }
- DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+ // Finally check the types.
+ return GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+ }
+
+ private:
+ ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {}
+ ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top)
+ : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {}
+
+ // The class of the object.
+ TypeHandle type_handle_;
+ // Whether or not the type is exact or a superclass of the actual type.
+ // Whether or not we have any information about this type.
+ bool is_exact_;
+ // A true value here means that the object type should be java.lang.Object.
+ // We don't have access to the corresponding mirror object every time so this
+ // flag acts as a substitute. When true, the TypeHandle refers to a null
+ // pointer and should not be used.
+ bool is_top_;
};
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
class HInstruction : public ArenaObject<kArenaAllocMisc> {
public:
explicit HInstruction(SideEffects side_effects)
@@ -876,7 +1000,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
locations_(nullptr),
live_interval_(nullptr),
lifetime_position_(kNoLifetime),
- side_effects_(side_effects) {}
+ side_effects_(side_effects),
+ reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
virtual ~HInstruction() {}
@@ -899,13 +1024,15 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
virtual size_t InputCount() const = 0;
- virtual HInstruction* InputAt(size_t i) const = 0;
+ HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
virtual void Accept(HGraphVisitor* visitor) = 0;
virtual const char* DebugName() const = 0;
virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
- virtual void SetRawInputAt(size_t index, HInstruction* input) = 0;
+ void SetRawInputAt(size_t index, HInstruction* input) {
+ SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+ }
virtual bool NeedsEnvironment() const { return false; }
virtual bool IsControlFlow() const { return false; }
@@ -914,12 +1041,24 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
// Does not apply for all instructions, but having this at top level greatly
// simplifies the null check elimination.
- virtual bool CanBeNull() const { return true; }
+ virtual bool CanBeNull() const {
+ DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types";
+ return true;
+ }
virtual bool CanDoImplicitNullCheck() const { return false; }
+ void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) {
+ reference_type_info_ = reference_type_info;
+ }
+
+ ReferenceTypeInfo GetReferenceTypeInfo() const { return reference_type_info_; }
+
void AddUseAt(HInstruction* user, size_t index) {
- uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+ DCHECK(user != nullptr);
+ HUseListNode<HInstruction*>* use =
+ uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+ user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use));
}
void AddEnvUseAt(HEnvironment* user, size_t index) {
@@ -929,11 +1068,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
user->RecordEnvUse(env_use);
}
- void RemoveUser(HInstruction* user, size_t index);
- void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use);
+ void RemoveAsUserOfInput(size_t input) {
+ HUserRecord<HInstruction*> input_use = InputRecordAt(input);
+ input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode());
+ }
- const HUseList<HInstruction*>& GetUses() { return uses_; }
- const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; }
+ const HUseList<HInstruction*>& GetUses() const { return uses_; }
+ const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
@@ -1015,7 +1156,25 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; }
bool HasLiveInterval() const { return live_interval_ != nullptr; }
+ bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); }
+
+ // Returns whether the code generation of the instruction will require to have access
+ // to the current method. Such instructions are:
+ // (1): Instructions that require an environment, as calling the runtime requires
+ // to walk the stack and have the current method stored at a specific stack address.
+ // (2): Object literals like classes and strings, that are loaded from the dex cache
+ // fields of the current method.
+ bool NeedsCurrentMethod() const {
+ return NeedsEnvironment() || IsLoadClass() || IsLoadString();
+ }
+
+ protected:
+ virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
+ virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+
private:
+ void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
+
HInstruction* previous_;
HInstruction* next_;
HBasicBlock* block_;
@@ -1050,7 +1209,12 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
const SideEffects side_effects_;
+ // TODO: for primitive types this should be marked as invalid.
+ ReferenceTypeInfo reference_type_info_;
+
+ friend class GraphChecker;
friend class HBasicBlock;
+ friend class HEnvironment;
friend class HGraph;
friend class HInstructionList;
@@ -1170,15 +1334,16 @@ class HTemplateInstruction: public HInstruction {
virtual ~HTemplateInstruction() {}
virtual size_t InputCount() const { return N; }
- virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; }
protected:
- virtual void SetRawInputAt(size_t i, HInstruction* instruction) {
- inputs_[i] = instruction;
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; }
+
+ void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_[i] = input;
}
private:
- EmbeddedArray<HInstruction*, N> inputs_;
+ EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_;
friend class SsaBuilder;
};
@@ -1663,6 +1828,22 @@ class HDoubleConstant : public HConstant {
DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
};
+class HNullConstant : public HConstant {
+ public:
+ HNullConstant() : HConstant(Primitive::kPrimNot) {}
+
+ bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+
+ size_t ComputeHashCode() const OVERRIDE { return 0; }
+
+ DECLARE_INSTRUCTION(NullConstant);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HNullConstant);
+};
+
// Constants of the type int. Those can be from Dex instructions, or
// synthesized (for example with the if-eqz instruction).
class HIntConstant : public HConstant {
@@ -1718,7 +1899,6 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
class HInvoke : public HInstruction {
public:
virtual size_t InputCount() const { return inputs_.Size(); }
- virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
// Runtime needs to walk the stack, so Dex -> Dex calls need to
// know their environment.
@@ -1728,10 +1908,6 @@ class HInvoke : public HInstruction {
SetRawInputAt(index, argument);
}
- virtual void SetRawInputAt(size_t index, HInstruction* input) {
- inputs_.Put(index, input);
- }
-
virtual Primitive::Type GetType() const { return return_type_; }
uint32_t GetDexPc() const { return dex_pc_; }
@@ -1763,7 +1939,12 @@ class HInvoke : public HInstruction {
inputs_.SetSize(number_of_arguments);
}
- GrowableArray<HInstruction*> inputs_;
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+ void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_.Put(index, input);
+ }
+
+ GrowableArray<HUserRecord<HInstruction*> > inputs_;
const Primitive::Type return_type_;
const uint32_t dex_pc_;
const uint32_t dex_method_index_;
@@ -2259,11 +2440,6 @@ class HPhi : public HInstruction {
}
size_t InputCount() const OVERRIDE { return inputs_.Size(); }
- HInstruction* InputAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
-
- void SetRawInputAt(size_t index, HInstruction* input) OVERRIDE {
- inputs_.Put(index, input);
- }
void AddInput(HInstruction* input);
@@ -2282,8 +2458,15 @@ class HPhi : public HInstruction {
DECLARE_INSTRUCTION(Phi);
+ protected:
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+
+ void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_.Put(index, input);
+ }
+
private:
- GrowableArray<HInstruction*> inputs_;
+ GrowableArray<HUserRecord<HInstruction*> > inputs_;
const uint32_t reg_number_;
Primitive::Type type_;
bool is_live_;
@@ -2608,7 +2791,8 @@ class HLoadClass : public HExpression<0> {
type_index_(type_index),
is_referrers_class_(is_referrers_class),
dex_pc_(dex_pc),
- generate_clinit_check_(false) {}
+ generate_clinit_check_(false),
+ loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
bool CanBeMoved() const OVERRIDE { return true; }
@@ -2646,6 +2830,20 @@ class HLoadClass : public HExpression<0> {
return !is_referrers_class_;
}
+ ReferenceTypeInfo GetLoadedClassRTI() {
+ return loaded_class_rti_;
+ }
+
+ void SetLoadedClassRTI(ReferenceTypeInfo rti) {
+ // Make sure we only set exact types (the loaded class should never be merged).
+ DCHECK(rti.IsExact());
+ loaded_class_rti_ = rti;
+ }
+
+ bool IsResolved() {
+ return loaded_class_rti_.IsExact();
+ }
+
DECLARE_INSTRUCTION(LoadClass);
private:
@@ -2656,6 +2854,8 @@ class HLoadClass : public HExpression<0> {
// Used for code generation.
bool generate_clinit_check_;
+ ReferenceTypeInfo loaded_class_rti_;
+
DISALLOW_COPY_AND_ASSIGN(HLoadClass);
};
@@ -2858,6 +3058,32 @@ class HInstanceOf : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
};
+class HBoundType : public HExpression<1> {
+ public:
+ HBoundType(HInstruction* input, ReferenceTypeInfo bound_type)
+ : HExpression(Primitive::kPrimNot, SideEffects::None()),
+ bound_type_(bound_type) {
+ SetRawInputAt(0, input);
+ }
+
+ const ReferenceTypeInfo& GetBoundType() const { return bound_type_; }
+
+ bool CanBeNull() const OVERRIDE {
+ // `null instanceof ClassX` always return false so we can't be null.
+ return false;
+ }
+
+ DECLARE_INSTRUCTION(BoundType);
+
+ private:
+ // Encodes the most upper class that this instruction can have. In other words
+ // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()).
+ // It is used to bound the type in cases like `if (x instanceof ClassX) {}`
+ const ReferenceTypeInfo bound_type_;
+
+ DISALLOW_COPY_AND_ASSIGN(HBoundType);
+};
+
class HCheckCast : public HTemplateInstruction<2> {
public:
HCheckCast(HInstruction* object,
@@ -2959,7 +3185,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
// True if this blocks a move from the given location.
bool Blocks(Location loc) const {
- return !IsEliminated() && source_.Equals(loc);
+ return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
}
// A move is redundant if it's been eliminated, if its source and
@@ -3000,46 +3226,19 @@ class HParallelMove : public HTemplateInstruction<0> {
void AddMove(Location source, Location destination, HInstruction* instruction) {
DCHECK(source.IsValid());
DCHECK(destination.IsValid());
- // The parallel move resolver does not handle pairs. So we decompose the
- // pair locations into two moves.
- if (source.IsPair() && destination.IsPair()) {
- AddMove(source.ToLow(), destination.ToLow(), instruction);
- AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
- } else if (source.IsPair()) {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
- AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction);
- AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr);
- } else if (destination.IsPair()) {
- if (source.IsConstant()) {
- // We put the same constant in the move. The code generator will handle which
- // low or high part to use.
- AddMove(source, destination.ToLow(), instruction);
- AddMove(source, destination.ToHigh(), nullptr);
- } else {
- DCHECK(source.IsDoubleStackSlot());
- AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
- // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
- // always be 4.
- static constexpr int kHighOffset = 4;
- AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
- destination.ToHigh(),
- nullptr);
- }
- } else {
- if (kIsDebugBuild) {
- if (instruction != nullptr) {
- for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
- << "Doing parallel moves for the same instruction.";
- }
- }
+ if (kIsDebugBuild) {
+ if (instruction != nullptr) {
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
- << "Same destination for two moves in a parallel move.";
+ DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
+ << "Doing parallel moves for the same instruction.";
}
}
- moves_.Add(MoveOperands(source, destination, instruction));
+ for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+ DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
+ << "Same destination for two moves in a parallel move.";
+ }
}
+ moves_.Add(MoveOperands(source, destination, instruction));
}
MoveOperands* MoveOperandsAt(size_t index) const {
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 5dbdc74924..4cf22d3b2e 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "nodes.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b99f6784f7..b13e07eb22 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,6 +21,12 @@
namespace art {
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+ if (stats_ != nullptr) {
+ stats_->RecordStat(compilation_stat);
+ }
+}
+
void HOptimization::Check() {
if (kIsDebugBuild) {
if (is_in_ssa_form_) {
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index d9e082a7f3..af39e092c7 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
#include "nodes.h"
+#include "optimizing_compiler_stats.h"
namespace art {
@@ -34,8 +35,10 @@ class HOptimization : public ValueObject {
public:
HOptimization(HGraph* graph,
bool is_in_ssa_form,
- const char* pass_name)
+ const char* pass_name,
+ OptimizingCompilerStats* stats = nullptr)
: graph_(graph),
+ stats_(stats),
is_in_ssa_form_(is_in_ssa_form),
pass_name_(pass_name) {}
@@ -51,7 +54,11 @@ class HOptimization : public ValueObject {
void Check();
protected:
+ void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+
HGraph* const graph_;
+ // Used to record stats about the optimization.
+ OptimizingCompilerStats* const stats_;
private:
// Does the analyzed graph use the SSA form?
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c518f33f53..2fef8c7b3a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -19,6 +19,7 @@
#include <fstream>
#include <stdint.h>
+#include "base/arena_allocator.h"
#include "base/dumpable.h"
#include "base/timing_logger.h"
#include "bounds_check_elimination.h"
@@ -47,7 +48,6 @@
#include "ssa_phi_elimination.h"
#include "ssa_liveness_analysis.h"
#include "reference_type_propagation.h"
-#include "utils/arena_allocator.h"
namespace art {
@@ -201,6 +201,7 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* driver,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info) const;
@@ -293,13 +294,15 @@ static void RunOptimizations(HOptimization* optimizations[],
static void RunOptimizations(HGraph* graph,
CompilerDriver* driver,
OptimizingCompilerStats* stats,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
- PassInfoPrinter* pass_info_printer) {
+ PassInfoPrinter* pass_info_printer,
+ StackHandleScopeCollection* handles) {
SsaRedundantPhiElimination redundant_phi(graph);
SsaDeadPhiElimination dead_phi(graph);
HDeadCodeElimination dce(graph);
HConstantFolding fold1(graph);
- InstructionSimplifier simplify1(graph);
+ InstructionSimplifier simplify1(graph, stats);
HInliner inliner(graph, dex_compilation_unit, driver, stats);
@@ -308,8 +311,8 @@ static void RunOptimizations(HGraph* graph,
GVNOptimization gvn(graph, side_effects);
LICM licm(graph, side_effects);
BoundsCheckElimination bce(graph);
- ReferenceTypePropagation type_propagation(graph);
- InstructionSimplifier simplify2(graph, "instruction_simplifier_after_types");
+ ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles);
+ InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
@@ -348,10 +351,12 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* compiler_driver,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer) const {
- RunOptimizations(
- graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info_printer);
+ StackHandleScopeCollection handles(Thread::Current());
+ RunOptimizations(graph, compiler_driver, &compilation_stats_,
+ dex_file, dex_compilation_unit, pass_info_printer, &handles);
PrepareForRegisterAllocation(graph).Run();
SsaLivenessAnalysis liveness(*graph, codegen);
@@ -376,7 +381,10 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
- codegen->GetFrameSize(),
+ // Follow Quick's behavior and set the frame size to zero if it is
+ // considered "empty" (see the definition of
+ // art::CodeGenerator::HasEmptyFrame).
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
ArrayRef<const uint8_t>(stack_map));
@@ -400,17 +408,21 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
- return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
- codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(allocator.GetMemory()),
- codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
- &src_mapping_table,
- AlignVectorSize(mapping_table),
- AlignVectorSize(vmap_table),
- AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>());
+ return CompiledMethod::SwapAllocCompiledMethod(
+ compiler_driver,
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(allocator.GetMemory()),
+ // Follow Quick's behavior and set the frame size to zero if it is
+ // considered "empty" (see the definition of
+ // art::CodeGenerator::HasEmptyFrame).
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ codegen->GetFpuSpillMask(),
+ &src_mapping_table,
+ AlignVectorSize(mapping_table),
+ AlignVectorSize(vmap_table),
+ AlignVectorSize(gc_map),
+ ArrayRef<const uint8_t>());
}
CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
@@ -508,6 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
return CompileOptimized(graph,
codegen.get(),
compiler_driver,
+ dex_file,
dex_compilation_unit,
&pass_info_printer);
} else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index cc2723df99..3ebf0f8cd2 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -43,6 +43,8 @@ enum MethodCompilationStat {
kNotCompiledCantAccesType,
kNotOptimizedRegisterAllocator,
kNotCompiledUnhandledInstruction,
+ kRemovedCheckedCast,
+ kRemovedNullCheck,
kLastStat
};
@@ -96,6 +98,8 @@ class OptimizingCompilerStats {
case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+ case kRemovedCheckedCast: return "kRemovedCheckedCast";
+ case kRemovedNullCheck: return "kRemovedNullCheck";
default: LOG(FATAL) << "invalid stat";
}
return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index debe466560..7d0641ec13 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -57,17 +57,49 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
// unallocated, or the move was already eliminated).
for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
MoveOperands* move = parallel_move->MoveOperandsAt(i);
- // The parallel move resolver algorithm does not work with register pairs.
- DCHECK(!move->GetSource().IsPair());
- DCHECK(!move->GetDestination().IsPair());
if (!move->IsRedundant()) {
moves_.Add(move);
}
}
}
+// Update the source of `move`, knowing that `updated_location` has been swapped
+// with `new_source`. Note that `updated_location` can be a pair, therefore if
+// `move` is non-pair, we need to extract which register to use.
+static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
+ Location source = move->GetSource();
+ if (new_source.GetKind() == source.GetKind()) {
+ DCHECK(updated_location.Equals(source));
+ move->SetSource(new_source);
+ } else if (new_source.IsStackSlot()
+ || new_source.IsDoubleStackSlot()
+ || source.IsStackSlot()
+ || source.IsDoubleStackSlot()) {
+ // Stack slots never take part of a pair/non-pair swap.
+ DCHECK(updated_location.Equals(source));
+ move->SetSource(new_source);
+ } else if (source.IsRegister()) {
+ DCHECK(new_source.IsRegisterPair()) << new_source;
+ DCHECK(updated_location.IsRegisterPair()) << updated_location;
+ if (updated_location.low() == source.reg()) {
+ move->SetSource(Location::RegisterLocation(new_source.low()));
+ } else {
+ DCHECK_EQ(updated_location.high(), source.reg());
+ move->SetSource(Location::RegisterLocation(new_source.high()));
+ }
+ } else if (source.IsFpuRegister()) {
+ DCHECK(new_source.IsFpuRegisterPair()) << new_source;
+ DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
+ if (updated_location.low() == source.reg()) {
+ move->SetSource(Location::FpuRegisterLocation(new_source.low()));
+ } else {
+ DCHECK_EQ(updated_location.high(), source.reg());
+ move->SetSource(Location::FpuRegisterLocation(new_source.high()));
+ }
+ }
+}
-void ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We
// mark a move as "pending" on entry to PerformMove in order to detect
@@ -75,35 +107,59 @@ void ParallelMoveResolver::PerformMove(size_t index) {
// which means that a call to PerformMove could change any source operand
// in the move graph.
- DCHECK(!moves_.Get(index)->IsPending());
- DCHECK(!moves_.Get(index)->IsRedundant());
+ MoveOperands* move = moves_.Get(index);
+ DCHECK(!move->IsPending());
+ if (move->IsRedundant()) {
+ // Because we swap register pairs first, following, un-pending
+ // moves may become redundant.
+ move->Eliminate();
+ return nullptr;
+ }
// Clear this move's destination to indicate a pending move. The actual
// destination is saved in a stack-allocated local. Recursion may allow
// multiple moves to be pending.
- DCHECK(!moves_.Get(index)->GetSource().IsInvalid());
- Location destination = moves_.Get(index)->MarkPending();
+ DCHECK(!move->GetSource().IsInvalid());
+ Location destination = move->MarkPending();
// Perform a depth-first traversal of the move graph to resolve
// dependencies. Any unperformed, unpending move with a source the same
// as this one's destination blocks this one so recursively perform all
// such moves.
+ MoveOperands* required_swap = nullptr;
for (size_t i = 0; i < moves_.Size(); ++i) {
const MoveOperands& other_move = *moves_.Get(i);
if (other_move.Blocks(destination) && !other_move.IsPending()) {
// Though PerformMove can change any source operand in the move graph,
- // this call cannot create a blocking move via a swap (this loop does
- // not miss any). Assume there is a non-blocking move with source A
+ // calling `PerformMove` cannot create a blocking move via a swap
+ // (this loop does not miss any).
+ // For example, assume there is a non-blocking move with source A
// and this move is blocked on source B and there is a swap of A and
// B. Then A and B must be involved in the same cycle (or they would
// not be swapped). Since this move's destination is B and there is
// only a single incoming edge to an operand, this move must also be
// involved in the same cycle. In that case, the blocking move will
// be created but will be "pending" when we return from PerformMove.
- PerformMove(i);
+ required_swap = PerformMove(i);
+
+ if (required_swap == move) {
+ // If this move is required to swap, we do so without looking
+ // at the next moves. Swapping is not blocked by anything, it just
+ // updates other moves's source.
+ break;
+ } else if (required_swap == moves_.Get(i)) {
+ // If `other_move` was swapped, we iterate again to find a new
+ // potential cycle.
+ required_swap = nullptr;
+ i = 0;
+ } else if (required_swap != nullptr) {
+ // A move is required to swap. We walk back the cycle to find the
+ // move by just returning from this `PerforrmMove`.
+ moves_.Get(index)->ClearPending(destination);
+ return required_swap;
+ }
}
}
- MoveOperands* move = moves_.Get(index);
// We are about to resolve this move and don't need it marked as
// pending, so restore its destination.
@@ -113,19 +169,30 @@ void ParallelMoveResolver::PerformMove(size_t index) {
// so it may now be the last move in the cycle. If so remove it.
if (move->GetSource().Equals(destination)) {
move->Eliminate();
- return;
+ DCHECK(required_swap == nullptr);
+ return nullptr;
}
// The move may be blocked on a (at most one) pending move, in which case
// we have a cycle. Search for such a blocking move and perform a swap to
// resolve it.
bool do_swap = false;
- for (size_t i = 0; i < moves_.Size(); ++i) {
- const MoveOperands& other_move = *moves_.Get(i);
- if (other_move.Blocks(destination)) {
- DCHECK(other_move.IsPending());
- do_swap = true;
- break;
+ if (required_swap != nullptr) {
+ DCHECK_EQ(required_swap, move);
+ do_swap = true;
+ } else {
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& other_move = *moves_.Get(i);
+ if (other_move.Blocks(destination)) {
+ DCHECK(other_move.IsPending());
+ if (!destination.IsPair() && other_move.GetSource().IsPair()) {
+ // We swap pairs before swapping non-pairs. Go back from the
+ // cycle by returning the pair that must be swapped.
+ return moves_.Get(i);
+ }
+ do_swap = true;
+ break;
+ }
}
}
@@ -140,15 +207,21 @@ void ParallelMoveResolver::PerformMove(size_t index) {
for (size_t i = 0; i < moves_.Size(); ++i) {
const MoveOperands& other_move = *moves_.Get(i);
if (other_move.Blocks(source)) {
- moves_.Get(i)->SetSource(swap_destination);
+ UpdateSourceOf(moves_.Get(i), source, swap_destination);
} else if (other_move.Blocks(swap_destination)) {
- moves_.Get(i)->SetSource(source);
+ UpdateSourceOf(moves_.Get(i), swap_destination, source);
}
}
+ // If the swap was required because of a pair in the middle of a cycle,
+ // we return the swapped move, so that the caller knows it needs to re-iterate
+ // its dependency loop.
+ return required_swap;
} else {
// This move is not blocked.
EmitMove(index);
move->Eliminate();
+ DCHECK(required_swap == nullptr);
+ return nullptr;
}
}
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 7ec1dd2deb..3fa1b37afd 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -83,7 +83,15 @@ class ParallelMoveResolver : public ValueObject {
// Perform the move at the moves_ index in question (possibly requiring
// other moves to satisfy dependencies).
- void PerformMove(size_t index);
+ //
+ // Return whether another move in the dependency cycle needs to swap. This
+ // is to handle pair swaps, where we want the pair to swap first to avoid
+ // building pairs that are unexpected by the code generator. For example, if
+ // we were to swap R1 with R2, we would need to update all locations using
+ // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
+ // the code generator understand such pairs, but it's easier and cleaner to
+ // just not create such pairs and exchange pairs in priority.
+ MoveOperands* PerformMove(size_t index);
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
};
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 28b5697bbd..44a3da2817 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
@@ -165,7 +165,7 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterPairLocation(2, 3),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
}
{
@@ -180,7 +180,7 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterLocation(4),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
}
{
@@ -195,7 +195,89 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterLocation(0),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::RegisterPairLocation(0, 1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::RegisterPairLocation(0, 1),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
}
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 12acd0884a..2d9a2bf330 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -42,6 +42,11 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
+void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
+ bound_type->ReplaceWith(bound_type->InputAt(0));
+ bound_type->GetBlock()->RemoveInstruction(bound_type);
+}
+
void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
HLoadClass* cls = check->GetLoadClass();
check->ReplaceWith(cls);
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 0fdb65ffe0..0f697fbc25 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -36,6 +36,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
virtual void VisitNullCheck(HNullCheck* check) OVERRIDE;
virtual void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+ virtual void VisitBoundType(HBoundType* bound_type) OVERRIDE;
virtual void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
virtual void VisitCondition(HCondition* condition) OVERRIDE;
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 9cf8235d85..293fde978e 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "dex_file.h"
@@ -21,7 +22,6 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index 7e274f6ebf..fe23fcf326 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -40,6 +40,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_
// Re-compute and update the type of the instruction. Returns
// whether or not the type was changed.
bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
+ DCHECK(phi->IsLive());
Primitive::Type existing = phi->GetType();
Primitive::Type new_type = existing;
@@ -49,15 +50,20 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
}
phi->SetType(new_type);
- if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+ if (new_type == Primitive::kPrimDouble
+ || new_type == Primitive::kPrimFloat
+ || new_type == Primitive::kPrimNot) {
// If the phi is of floating point type, we need to update its inputs to that
// type. For inputs that are phis, we need to recompute their types.
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
HInstruction* input = phi->InputAt(i);
if (input->GetType() != new_type) {
- HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+ HInstruction* equivalent = (new_type == Primitive::kPrimNot)
+ ? SsaBuilder::GetReferenceTypeEquivalent(input)
+ : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
phi->ReplaceInput(equivalent, i);
if (equivalent->IsPhi()) {
+ equivalent->AsPhi()->SetLive();
AddToWorklist(equivalent->AsPhi());
}
}
@@ -78,15 +84,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
if (block->IsLoopHeader()) {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- // Set the initial type for the phi. Use the non back edge input for reaching
- // a fixed point faster.
- Primitive::Type phi_type = phi->GetType();
- // We merge with the existing type, that has been set by the SSA builder.
- DCHECK(phi_type == Primitive::kPrimVoid
- || phi_type == Primitive::kPrimFloat
- || phi_type == Primitive::kPrimDouble);
- phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
- AddToWorklist(phi);
+ if (phi->IsLive()) {
+ AddToWorklist(phi);
+ }
}
} else {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -95,7 +95,10 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
// doing a reverse post-order visit, therefore either the phi users are
// non-loop phi and will be visited later in the visit, or are loop-phis,
// and they are already in the work list.
- UpdateType(it.Current()->AsPhi());
+ HPhi* phi = it.Current()->AsPhi();
+ if (phi->IsLive()) {
+ UpdateType(phi);
+ }
}
}
}
@@ -110,13 +113,14 @@ void PrimitiveTypePropagation::ProcessWorklist() {
}
void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
+ DCHECK(instruction->IsLive());
worklist_.Add(instruction);
}
void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr) {
+ if (phi != nullptr && phi->IsLive()) {
AddToWorklist(phi);
}
}
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 24e6837f45..76b8d7eacf 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,16 +16,17 @@
#include "reference_type_propagation.h"
+#include "class_linker.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "scoped_thread_state_change.h"
+
namespace art {
-// TODO: Only do the analysis on reference types. We currently have to handle
-// the `null` constant, that is represented as a `HIntConstant` and therefore
-// has the Primitive::kPrimInt type.
+// TODO: handle: a !=/== null.
void ReferenceTypePropagation::Run() {
- // Compute null status for instructions.
-
- // To properly propagate not-null info we need to visit in the dominator-based order.
+ // To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
// We take advantage of this order in `VisitBasicBlock`.
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -34,9 +35,210 @@ void ReferenceTypePropagation::Run() {
ProcessWorklist();
}
+void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
+ // TODO: handle other instructions that give type info
+ // (NewArray/Call/Field accesses/array accesses)
+
+ // Initialize exact types first for faster convergence.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (instr->IsNewInstance()) {
+ VisitNewInstance(instr->AsNewInstance());
+ } else if (instr->IsLoadClass()) {
+ VisitLoadClass(instr->AsLoadClass());
+ }
+ }
+
+ // Handle Phis.
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ VisitPhi(it.Current()->AsPhi());
+ }
+
+ // Add extra nodes to bound types.
+ BoundTypeForIfInstanceOf(block);
+}
+
+// Detects if `block` is the True block for the pattern
+// `if (x instanceof ClassX) { }`
+// If that's the case insert an HBoundType instruction to bound the type of `x`
+// to `ClassX` in the scope of the dominated blocks.
+void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
+ HInstruction* lastInstruction = block->GetLastInstruction();
+ if (!lastInstruction->IsIf()) {
+ return;
+ }
+ HInstruction* ifInput = lastInstruction->InputAt(0);
+ // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual).
+ if (!ifInput->IsEqual()) {
+ return;
+ }
+ HInstruction* instanceOf = ifInput->InputAt(0);
+ HInstruction* comp_value = ifInput->InputAt(1);
+ if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) {
+ return;
+ }
+
+ HInstruction* obj = instanceOf->InputAt(0);
+ HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
+
+ ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+ ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
+
+ // Narrow the type as much as possible.
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
+ bound_type->SetReferenceTypeInfo(obj_rti);
+ } else {
+ bound_type->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+ }
+ }
+
+ block->InsertInstructionBefore(bound_type, lastInstruction);
+ // Pick the right successor based on the value we compare against.
+ HIntConstant* comp_value_int = comp_value->AsIntConstant();
+ HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0
+ ? lastInstruction->AsIf()->IfFalseSuccessor()
+ : lastInstruction->AsIf()->IfTrueSuccessor();
+
+ for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
+ HInstruction* user = it.Current()->GetUser();
+ if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
+ user->ReplaceInput(bound_type, it.Current()->GetIndex());
+ }
+ }
+}
+
+void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+ // Get type from dex cache assuming it was populated by the verifier.
+ mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+ if (resolved_class != nullptr) {
+ MutableHandle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+ instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true));
+ }
+}
+
+void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+ // Get type from dex cache assuming it was populated by the verifier.
+ mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+ if (resolved_class != nullptr) {
+ Handle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+ instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true));
+ }
+ Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass());
+ instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true));
+}
+
+void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
+ if (phi->GetType() != Primitive::kPrimNot) {
+ return;
+ }
+
+ if (phi->GetBlock()->IsLoopHeader()) {
+ // Set the initial type for the phi. Use the non back edge input for reaching
+ // a fixed point faster.
+ AddToWorklist(phi);
+ phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
+ phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
+ } else {
+ // Eagerly compute the type of the phi, for quicker convergence. Note
+ // that we don't need to add users to the worklist because we are
+ // doing a reverse post-order visit, therefore either the phi users are
+ // non-loop phi and will be visited later in the visit, or are loop-phis,
+ // and they are already in the work list.
+ UpdateNullability(phi);
+ UpdateReferenceTypeInfo(phi);
+ }
+}
+
+ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
+ const ReferenceTypeInfo& b) {
+ bool is_exact = a.IsExact() && b.IsExact();
+ bool is_top = a.IsTop() || b.IsTop();
+ Handle<mirror::Class> type_handle;
+
+ if (!is_top) {
+ if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
+ type_handle = a.GetTypeHandle();
+ } else if (a.IsSupertypeOf(b)) {
+ type_handle = a.GetTypeHandle();
+ is_exact = false;
+ } else if (b.IsSupertypeOf(a)) {
+ type_handle = b.GetTypeHandle();
+ is_exact = false;
+ } else {
+ // TODO: Find a common super class.
+ is_top = true;
+ is_exact = false;
+ }
+ }
+
+ return is_top
+ ? ReferenceTypeInfo::CreateTop(is_exact)
+ : ReferenceTypeInfo::Create(type_handle, is_exact);
+}
+
+bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+
+ ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo();
+ if (instr->IsBoundType()) {
+ UpdateBoundType(instr->AsBoundType());
+ } else if (instr->IsPhi()) {
+ UpdatePhi(instr->AsPhi());
+ } else {
+ LOG(FATAL) << "Invalid instruction (should not get here)";
+ }
+
+ return !previous_rti.IsEqual(instr->GetReferenceTypeInfo());
+}
+
+void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
+ ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+ // Be sure that we don't go over the bounded type.
+ ReferenceTypeInfo bound_rti = instr->GetBoundType();
+ if (!bound_rti.IsSupertypeOf(new_rti)) {
+ new_rti = bound_rti;
+ }
+ instr->SetReferenceTypeInfo(new_rti);
+}
+
+void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+ ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+ if (new_rti.IsTop() && !new_rti.IsExact()) {
+ // Early return if we are Top and inexact.
+ instr->SetReferenceTypeInfo(new_rti);
+ return;
+ }
+ for (size_t i = 1; i < instr->InputCount(); i++) {
+ new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+ if (new_rti.IsTop()) {
+ if (!new_rti.IsExact()) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ }
+ instr->SetReferenceTypeInfo(new_rti);
+}
+
// Re-computes and updates the nullability of the instruction. Returns whether or
// not the nullability was changed.
-bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
+bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
+ DCHECK(instr->IsPhi() || instr->IsBoundType());
+
+ if (!instr->IsPhi()) {
+ return false;
+ }
+
+ HPhi* phi = instr->AsPhi();
bool existing_can_be_null = phi->CanBeNull();
bool new_can_be_null = false;
for (size_t i = 0; i < phi->InputCount(); i++) {
@@ -47,48 +249,26 @@ bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
return existing_can_be_null != new_can_be_null;
}
-
-void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
- if (block->IsLoopHeader()) {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- // Set the initial type for the phi. Use the non back edge input for reaching
- // a fixed point faster.
- HPhi* phi = it.Current()->AsPhi();
- AddToWorklist(phi);
- phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
- }
- } else {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- // Eagerly compute the type of the phi, for quicker convergence. Note
- // that we don't need to add users to the worklist because we are
- // doing a reverse post-order visit, therefore either the phi users are
- // non-loop phi and will be visited later in the visit, or are loop-phis,
- // and they are already in the work list.
- UpdateNullability(it.Current()->AsPhi());
- }
- }
-}
-
void ReferenceTypePropagation::ProcessWorklist() {
while (!worklist_.IsEmpty()) {
- HPhi* instruction = worklist_.Pop();
- if (UpdateNullability(instruction)) {
+ HInstruction* instruction = worklist_.Pop();
+ if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) {
AddDependentInstructionsToWorklist(instruction);
}
}
}
-void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType();
worklist_.Add(instruction);
}
-void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr) {
- AddToWorklist(phi);
+ HInstruction* user = it.Current()->GetUser();
+ if (user->IsPhi() || user->IsBoundType()) {
+ AddToWorklist(user);
}
}
}
-
} // namespace art
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index a74319d0c5..e346dbfc6c 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -17,31 +17,57 @@
#ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
#define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
+#include "driver/dex_compilation_unit.h"
+#include "handle_scope-inl.h"
#include "nodes.h"
#include "optimization.h"
+#include "optimizing_compiler_stats.h"
namespace art {
/**
* Propagates reference types to instructions.
- * TODO: Currently only nullability is computed.
*/
class ReferenceTypePropagation : public HOptimization {
public:
- explicit ReferenceTypePropagation(HGraph* graph)
+ ReferenceTypePropagation(HGraph* graph,
+ const DexFile& dex_file,
+ const DexCompilationUnit& dex_compilation_unit,
+ StackHandleScopeCollection* handles)
: HOptimization(graph, true, "reference_type_propagation"),
+ dex_file_(dex_file),
+ dex_compilation_unit_(dex_compilation_unit),
+ handles_(handles),
worklist_(graph->GetArena(), kDefaultWorklistSize) {}
void Run() OVERRIDE;
private:
+ void VisitNewInstance(HNewInstance* new_instance);
+ void VisitLoadClass(HLoadClass* load_class);
+ void VisitPhi(HPhi* phi);
void VisitBasicBlock(HBasicBlock* block);
+
+ void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ void BoundTypeForIfInstanceOf(HBasicBlock* block);
+
void ProcessWorklist();
- void AddToWorklist(HPhi* phi);
- void AddDependentInstructionsToWorklist(HPhi* phi);
- bool UpdateNullability(HPhi* phi);
+ void AddToWorklist(HInstruction* instr);
+ void AddDependentInstructionsToWorklist(HInstruction* instr);
+
+ bool UpdateNullability(HInstruction* instr);
+ bool UpdateReferenceTypeInfo(HInstruction* instr);
+
+ ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ const DexFile& dex_file_;
+ const DexCompilationUnit& dex_compilation_unit_;
+ StackHandleScopeCollection* handles_;
- GrowableArray<HPhi*> worklist_;
+ GrowableArray<HInstruction*> worklist_;
static constexpr size_t kDefaultWorklistSize = 8;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 3809720cb4..54e62a5b2c 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
temp_intervals_(allocator, 4),
- spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ int_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
safepoints_(allocator, 0),
processing_core_registers_(false),
number_of_registers_(-1),
@@ -252,8 +255,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
&& (instruction->GetType() != Primitive::kPrimFloat);
if (locations->CanCall()) {
- if (!instruction->IsSuspendCheck()) {
- codegen_->MarkNotLeaf();
+ if (codegen_->IsLeafMethod()) {
+ // TODO: We do this here because we do not want the suspend check to artificially
+ // create live registers. We should find another place, but this is currently the
+ // simplest.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
}
safepoints_.Add(instruction);
if (locations->OnlyCallsOnSlowPath()) {
@@ -433,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
}
}
- return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_,
+ return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
allocator_, processing_core_registers_, log_fatal_on_failure);
}
@@ -1128,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
}
size_t end = last_sibling->GetEnd();
+ GrowableArray<size_t>* spill_slots = nullptr;
+ switch (interval->GetType()) {
+ case Primitive::kPrimDouble:
+ spill_slots = &double_spill_slots_;
+ break;
+ case Primitive::kPrimLong:
+ spill_slots = &long_spill_slots_;
+ break;
+ case Primitive::kPrimFloat:
+ spill_slots = &float_spill_slots_;
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ spill_slots = &int_spill_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+ }
+
// Find an available spill slot.
size_t slot = 0;
- for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
- // We check if it is less rather than less or equal because the parallel move
- // resolver does not work when a single spill slot needs to be exchanged with
- // a double spill slot. The strict comparison avoids needing to exchange these
- // locations at the same lifetime position.
- if (spill_slots_.Get(slot) < parent->GetStart()
- && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+ for (size_t e = spill_slots->Size(); slot < e; ++slot) {
+ if (spill_slots->Get(slot) <= parent->GetStart()
+ && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) {
break;
}
}
if (parent->NeedsTwoSpillSlots()) {
- if (slot == spill_slots_.Size()) {
+ if (slot == spill_slots->Size()) {
// We need a new spill slot.
- spill_slots_.Add(end);
- spill_slots_.Add(end);
- } else if (slot == spill_slots_.Size() - 1) {
- spill_slots_.Put(slot, end);
- spill_slots_.Add(end);
+ spill_slots->Add(end);
+ spill_slots->Add(end);
+ } else if (slot == spill_slots->Size() - 1) {
+ spill_slots->Put(slot, end);
+ spill_slots->Add(end);
} else {
- spill_slots_.Put(slot, end);
- spill_slots_.Put(slot + 1, end);
+ spill_slots->Put(slot, end);
+ spill_slots->Put(slot + 1, end);
}
} else {
- if (slot == spill_slots_.Size()) {
+ if (slot == spill_slots->Size()) {
// We need a new spill slot.
- spill_slots_.Add(end);
+ spill_slots->Add(end);
} else {
- spill_slots_.Put(slot, end);
+ spill_slots->Put(slot, end);
}
}
- parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
+ // Note that the exact spill slot location will be computed when we resolve,
+ // that is when we know the number of spill slots for each type.
+ parent->SetSpillSlot(slot);
}
static bool IsValidDestination(Location destination) {
@@ -1511,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
}
void RegisterAllocator::Resolve() {
- codegen_->InitializeCodeGeneration(spill_slots_.Size(),
+ codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
maximum_number_of_live_core_registers_,
maximum_number_of_live_fp_registers_,
reserved_out_slots_,
@@ -1537,6 +1566,39 @@ void RegisterAllocator::Resolve() {
} else if (current->HasSpillSlot()) {
current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
}
+ } else if (current->HasSpillSlot()) {
+ // Adjust the stack slot, now that we know the number of them for each type.
+ // The way this implementation lays out the stack is the following:
+ // [parameter slots ]
+ // [double spill slots ]
+ // [long spill slots ]
+ // [float spill slots ]
+ // [int/ref values ]
+ // [maximum out values ] (number of arguments for calls)
+ // [art method ].
+ uint32_t slot = current->GetSpillSlot();
+ switch (current->GetType()) {
+ case Primitive::kPrimDouble:
+ slot += long_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimLong:
+ slot += float_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimFloat:
+ slot += int_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ slot += reserved_out_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+ }
+ current->SetSpillSlot(slot * kVRegSize);
}
Location source = current->ToLocation();
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index b8f70bdc18..ff2f106b74 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -75,7 +75,10 @@ class RegisterAllocator {
}
size_t GetNumberOfSpillSlots() const {
- return spill_slots_.Size();
+ return int_spill_slots_.Size()
+ + long_spill_slots_.Size()
+ + float_spill_slots_.Size()
+ + double_spill_slots_.Size();
}
private:
@@ -171,8 +174,14 @@ class RegisterAllocator {
// where an instruction requires a temporary.
GrowableArray<LiveInterval*> temp_intervals_;
- // The spill slots allocated for live intervals.
- GrowableArray<size_t> spill_slots_;
+ // The spill slots allocated for live intervals. We ensure spill slots
+ // are typed to avoid (1) doing moves and swaps between two different kinds
+ // of registers, and (2) swapping between a single stack slot and a double
+ // stack slot. This simplifies the parallel move resolver.
+ GrowableArray<size_t> int_spill_slots_;
+ GrowableArray<size_t> long_spill_slots_;
+ GrowableArray<size_t> float_spill_slots_;
+ GrowableArray<size_t> double_spill_slots_;
// Instructions that need a safepoint.
GrowableArray<HInstruction*> safepoints_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 0cc00c0fde..e5d06a9f8b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -25,7 +26,6 @@
#include "register_allocator.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c9a21aa681..3dc75059b2 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -42,20 +42,33 @@ void SsaBuilder::BuildSsa() {
}
}
- // 3) Remove dead phis. This will remove phis that are only used by environments:
+ // 3) Mark dead phis. This will mark phis that are only used by environments:
// at the DEX level, the type of these phis does not need to be consistent, but
// our code generator will complain if the inputs of a phi do not have the same
- // type (modulo the special case of `null`).
- SsaDeadPhiElimination dead_phis(GetGraph());
- dead_phis.Run();
+ // type. The marking allows the type propagation to know which phis it needs
+ // to handle. We mark but do not eliminate: the elimination will be done in
+ // step 5).
+ {
+ SsaDeadPhiElimination dead_phis(GetGraph());
+ dead_phis.MarkDeadPhis();
+ }
// 4) Propagate types of phis. At this point, phis are typed void in the general
- // case, or float or double when we created a floating-point equivalent. So we
+ // case, or float/double/reference when we created an equivalent phi. So we
// need to propagate the types across phis to give them a correct type.
PrimitiveTypePropagation type_propagation(GetGraph());
type_propagation.Run();
- // 5) Clear locals.
+ // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run
+ // the algorithm and this time elimimates them.
+ // TODO: Make this work with debug info and reference liveness. We currently
+ // eagerly remove phis used in environments.
+ {
+ SsaDeadPhiElimination dead_phis(GetGraph());
+ dead_phis.Run();
+ }
+
+ // 6) Clear locals.
// TODO: Move this to a dead code eliminator phase.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
@@ -185,15 +198,24 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) {
/**
* Because of Dex format, we might end up having the same phi being
- * used for non floating point operations and floating point operations. Because
- * we want the graph to be correctly typed (and thereafter avoid moves between
+ * used for non floating point operations and floating point / reference operations.
+ * Because we want the graph to be correctly typed (and thereafter avoid moves between
* floating point registers and core registers), we need to create a copy of the
- * phi with a floating point type.
+ * phi with a floating point / reference type.
*/
-static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
- // We place the floating point phi next to this phi.
+static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+ // We place the floating point /reference phi next to this phi.
HInstruction* next = phi->GetNext();
- if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) {
+ if (next != nullptr
+ && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
+ && next->GetType() != type) {
+ // Move to the next phi to see if it is the one we are looking for.
+ next = next->GetNext();
+ }
+
+ if (next == nullptr
+ || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
+ || (next->GetType() != type)) {
ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
@@ -223,7 +245,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
} else if (value->IsIntConstant()) {
return GetFloatEquivalent(value->AsIntConstant());
} else if (value->IsPhi()) {
- return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+ return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
} else {
// For other instructions, we assume the verifier has checked that the dex format is correctly
// typed and the value in a dex register will not be used for both floating point and
@@ -234,12 +256,25 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
}
}
+HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
+ if (value->IsIntConstant()) {
+ DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+ return value->GetBlock()->GetGraph()->GetNullConstant();
+ } else {
+ DCHECK(value->IsPhi());
+ return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
+ }
+}
+
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
- if (load->GetType() != value->GetType()
- && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
- // If the operation requests a specific type, we make sure its input is of that type.
- value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+ // If the operation requests a specific type, we make sure its input is of that type.
+ if (load->GetType() != value->GetType()) {
+ if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
+ value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+ } else if (load->GetType() == Primitive::kPrimNot) {
+ value = GetReferenceTypeEquivalent(value);
+ }
}
load->ReplaceWith(value);
load->GetBlock()->RemoveInstruction(load);
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2eec87b618..148e9590c3 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,6 +58,8 @@ class SsaBuilder : public HGraphVisitor {
HInstruction* instruction,
Primitive::Type type);
+ static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
private:
// Locals for the current block being visited.
HEnvironment* current_locals_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1b06315fce..bebb73ba22 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() {
// to differentiate between the start and end of an instruction. Adding 2 to
// the lifetime position for each instruction ensures the start of an
// instruction is different than the end of the previous instruction.
- HGraphVisitor* location_builder = codegen_->GetLocationBuilder();
for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
block->SetLifetimeStart(lifetime_position);
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* current = inst_it.Current();
- current->Accept(location_builder);
+ codegen_->AllocateLocations(current);
LocationSummary* locations = current->GetLocations();
if (locations != nullptr && locations->Out().IsValid()) {
instructions_from_ssa_index_.Add(current);
@@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
HInstruction* current = inst_it.Current();
- current->Accept(codegen_->GetLocationBuilder());
+ codegen_->AllocateLocations(current);
LocationSummary* locations = current->GetLocations();
if (locations != nullptr && locations->Out().IsValid()) {
instructions_from_ssa_index_.Add(current);
@@ -312,7 +311,12 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
return live_in->UnionIfNotIn(live_out, kill);
}
+static int RegisterOrLowRegister(Location location) {
+ return location.IsPair() ? location.low() : location.reg();
+}
+
int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+ DCHECK(!IsHighInterval());
if (GetParent() == this && defined_by_ != nullptr) {
// This is the first interval for the instruction. Try to find
// a register based on its definition.
@@ -334,8 +338,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
if (user->IsPhi()) {
// If the phi has a register, try to use the same.
Location phi_location = user->GetLiveInterval()->ToLocation();
- if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) {
- return phi_location.reg();
+ if (phi_location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(phi_location));
+ int reg = RegisterOrLowRegister(phi_location);
+ if (free_until[reg] >= use_position) {
+ return reg;
+ }
}
const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
// If the instruction dies at the phi assignment, we can try having the
@@ -348,8 +356,11 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
HInstruction* input = user->InputAt(i);
Location location = input->GetLiveInterval()->GetLocationAt(
predecessors.Get(i)->GetLifetimeEnd() - 1);
- if (location.IsRegister() && free_until[location.reg()] >= use_position) {
- return location.reg();
+ if (location.IsRegisterKind()) {
+ int reg = RegisterOrLowRegister(location);
+ if (free_until[reg] >= use_position) {
+ return reg;
+ }
}
}
}
@@ -360,8 +371,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
// We use the user's lifetime position - 1 (and not `use_position`) because the
// register is blocked at the beginning of the user.
size_t position = user->GetLifetimePosition() - 1;
- if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) {
- return expected.reg();
+ if (expected.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(expected));
+ int reg = RegisterOrLowRegister(expected);
+ if (free_until[reg] >= position) {
+ return reg;
+ }
}
}
}
@@ -383,8 +398,9 @@ int LiveInterval::FindHintAtDefinition() const {
// If the input dies at the end of the predecessor, we know its register can
// be reused.
Location input_location = input_interval.ToLocation();
- if (SameRegisterKind(input_location)) {
- return input_location.reg();
+ if (input_location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(input_location));
+ return RegisterOrLowRegister(input_location);
}
}
}
@@ -399,8 +415,9 @@ int LiveInterval::FindHintAtDefinition() const {
// If the input dies at the start of this instruction, we know its register can
// be reused.
Location location = input_interval.ToLocation();
- if (SameRegisterKind(location)) {
- return location.reg();
+ if (location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(location));
+ return RegisterOrLowRegister(location);
}
}
}
@@ -409,9 +426,19 @@ int LiveInterval::FindHintAtDefinition() const {
}
bool LiveInterval::SameRegisterKind(Location other) const {
- return IsFloatingPoint()
- ? other.IsFpuRegister()
- : other.IsRegister();
+ if (IsFloatingPoint()) {
+ if (IsLowInterval() || IsHighInterval()) {
+ return other.IsFpuRegisterPair();
+ } else {
+ return other.IsFpuRegister();
+ }
+ } else {
+ if (IsLowInterval() || IsHighInterval()) {
+ return other.IsRegisterPair();
+ } else {
+ return other.IsRegister();
+ }
+ }
}
bool LiveInterval::NeedsTwoSpillSlots() const {
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index fd30c1bc76..2f2e2d1fab 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -19,6 +19,11 @@
namespace art {
void SsaDeadPhiElimination::Run() {
+ MarkDeadPhis();
+ EliminateDeadPhis();
+}
+
+void SsaDeadPhiElimination::MarkDeadPhis() {
// Add to the worklist phis referenced by non-phi instructions.
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
@@ -49,7 +54,9 @@ void SsaDeadPhiElimination::Run() {
}
}
}
+}
+void SsaDeadPhiElimination::EliminateDeadPhis() {
// Remove phis that are not live. Visit in post order so that phis
// that are not inputs of loop phis can be removed when they have
// no users left (dead phis might use dead phis).
@@ -57,31 +64,33 @@ void SsaDeadPhiElimination::Run() {
HBasicBlock* block = it.Current();
HInstruction* current = block->GetFirstPhi();
HInstruction* next = nullptr;
+ HPhi* phi;
while (current != nullptr) {
+ phi = current->AsPhi();
next = current->GetNext();
- if (current->AsPhi()->IsDead()) {
- if (current->HasUses()) {
- for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done();
+ if (phi->IsDead()) {
+ // Make sure the phi is only used by other dead phis.
+ if (kIsDebugBuild) {
+ for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
use_it.Advance()) {
- HUseListNode<HInstruction*>* user_node = use_it.Current();
- HInstruction* user = user_node->GetUser();
+ HInstruction* user = use_it.Current()->GetUser();
DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
DCHECK(user->AsPhi()->IsDead()) << user->GetId();
- // Just put itself as an input. The phi will be removed in this loop anyway.
- user->SetRawInputAt(user_node->GetIndex(), user);
- current->RemoveUser(user, user_node->GetIndex());
}
}
- if (current->HasEnvironmentUses()) {
- for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done();
- use_it.Advance()) {
- HUseListNode<HEnvironment*>* user_node = use_it.Current();
- HEnvironment* user = user_node->GetUser();
- user->SetRawEnvAt(user_node->GetIndex(), nullptr);
- current->RemoveEnvironmentUser(user_node);
- }
+ // Remove the phi from use lists of its inputs.
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ phi->RemoveAsUserOfInput(i);
+ }
+ // Remove the phi from environments that use it.
+ for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done();
+ use_it.Advance()) {
+ HUseListNode<HEnvironment*>* user_node = use_it.Current();
+ HEnvironment* user = user_node->GetUser();
+ user->SetRawEnvAt(user_node->GetIndex(), nullptr);
}
- block->RemovePhi(current->AsPhi());
+ // Delete it from the instruction list.
+ block->RemovePhi(phi, /*ensure_safety=*/ false);
}
current = next;
}
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index b7899712d6..88a5279e14 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -34,6 +34,9 @@ class SsaDeadPhiElimination : public HOptimization {
void Run() OVERRIDE;
+ void MarkDeadPhis();
+ void EliminateDeadPhis();
+
private:
GrowableArray<HPhi*> worklist_;
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 7e90b37fe6..7fc1ec6dd1 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "dex_file.h"
@@ -22,7 +23,6 @@
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
#include "ssa_builder.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 3974e53e6f..5283d5dcca 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -166,18 +166,23 @@ class StackMapStream : public ValueObject {
stack_map.SetStackMask(*entry.sp_mask);
}
- // Set the register map.
- MemoryRegion register_region = dex_register_maps_region.Subregion(
- next_dex_register_map_offset,
- DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
- next_dex_register_map_offset += register_region.size();
- DexRegisterMap dex_register_map(register_region);
- stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
-
- for (size_t j = 0; j < entry.num_dex_registers; ++j) {
- DexRegisterEntry register_entry =
- dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
- dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+ if (entry.num_dex_registers != 0) {
+ // Set the register map.
+ MemoryRegion register_region = dex_register_maps_region.Subregion(
+ next_dex_register_map_offset,
+ DexRegisterMap::kFixedSize
+ + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
+ next_dex_register_map_offset += register_region.size();
+ DexRegisterMap dex_register_map(register_region);
+ stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
+
+ for (size_t j = 0; j < entry.num_dex_registers; ++j) {
+ DexRegisterEntry register_entry =
+ dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
+ dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+ }
+ } else {
+ stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap);
}
// Set the inlining info.
@@ -196,7 +201,7 @@ class StackMapStream : public ValueObject {
inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
}
} else {
- stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo);
+ stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo);
}
}
}
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 5ee6ae049c..744fb45fff 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -61,6 +61,7 @@ TEST(StackMapTest, Test1) {
MemoryRegion stack_mask = stack_map.GetStackMask();
ASSERT_TRUE(SameBits(stack_mask, sp_mask));
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
@@ -107,6 +108,7 @@ TEST(StackMapTest, Test2) {
MemoryRegion stack_mask = stack_map.GetStackMask();
ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
deleted file mode 100644
index a80ad938a6..0000000000
--- a/compiler/utils/arena_allocator.cc
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <algorithm>
-#include <iomanip>
-#include <numeric>
-
-#include "arena_allocator.h"
-#include "base/logging.h"
-#include "base/mutex.h"
-#include "thread-inl.h"
-#include <memcheck/memcheck.h>
-
-namespace art {
-
-// Memmap is a bit slower than malloc according to my measurements.
-static constexpr bool kUseMemMap = false;
-static constexpr bool kUseMemSet = true && kUseMemMap;
-static constexpr size_t kValgrindRedZoneBytes = 8;
-constexpr size_t Arena::kDefaultSize;
-
-template <bool kCount>
-const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
- "Misc ",
- "BasicBlock ",
- "BBList "
- "BBPreds ",
- "DfsPreOrd ",
- "DfsPostOrd ",
- "DomPostOrd ",
- "TopoOrd ",
- "Lowering ",
- "LIR ",
- "LIR masks ",
- "SwitchTbl ",
- "FillArray ",
- "SlowPaths ",
- "MIR ",
- "DataFlow ",
- "GrowList ",
- "GrowBitMap ",
- "SSA2Dalvik ",
- "Dalvik2SSA ",
- "DebugInfo ",
- "Successor ",
- "RegAlloc ",
- "Data ",
- "Preds ",
- "STL ",
-};
-
-template <bool kCount>
-ArenaAllocatorStatsImpl<kCount>::ArenaAllocatorStatsImpl()
- : num_allocations_(0u) {
- std::fill_n(alloc_stats_, arraysize(alloc_stats_), 0u);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::Copy(const ArenaAllocatorStatsImpl& other) {
- num_allocations_ = other.num_allocations_;
- std::copy(other.alloc_stats_, other.alloc_stats_ + arraysize(alloc_stats_), alloc_stats_);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::RecordAlloc(size_t bytes, ArenaAllocKind kind) {
- alloc_stats_[kind] += bytes;
- ++num_allocations_;
-}
-
-template <bool kCount>
-size_t ArenaAllocatorStatsImpl<kCount>::NumAllocations() const {
- return num_allocations_;
-}
-
-template <bool kCount>
-size_t ArenaAllocatorStatsImpl<kCount>::BytesAllocated() const {
- const size_t init = 0u; // Initial value of the correct type.
- return std::accumulate(alloc_stats_, alloc_stats_ + arraysize(alloc_stats_), init);
-}
-
-template <bool kCount>
-void ArenaAllocatorStatsImpl<kCount>::Dump(std::ostream& os, const Arena* first,
- ssize_t lost_bytes_adjustment) const {
- size_t malloc_bytes = 0u;
- size_t lost_bytes = 0u;
- size_t num_arenas = 0u;
- for (const Arena* arena = first; arena != nullptr; arena = arena->next_) {
- malloc_bytes += arena->Size();
- lost_bytes += arena->RemainingSpace();
- ++num_arenas;
- }
- // The lost_bytes_adjustment is used to make up for the fact that the current arena
- // may not have the bytes_allocated_ updated correctly.
- lost_bytes += lost_bytes_adjustment;
- const size_t bytes_allocated = BytesAllocated();
- os << " MEM: used: " << bytes_allocated << ", allocated: " << malloc_bytes
- << ", lost: " << lost_bytes << "\n";
- size_t num_allocations = NumAllocations();
- if (num_allocations != 0) {
- os << "Number of arenas allocated: " << num_arenas << ", Number of allocations: "
- << num_allocations << ", avg size: " << bytes_allocated / num_allocations << "\n";
- }
- os << "===== Allocation by kind\n";
- static_assert(arraysize(kAllocNames) == kNumArenaAllocKinds, "arraysize of kAllocNames");
- for (int i = 0; i < kNumArenaAllocKinds; i++) {
- os << kAllocNames[i] << std::setw(10) << alloc_stats_[i] << "\n";
- }
-}
-
-// Explicitly instantiate the used implementation.
-template class ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations>;
-
-Arena::Arena(size_t size)
- : bytes_allocated_(0),
- map_(nullptr),
- next_(nullptr) {
- if (kUseMemMap) {
- std::string error_msg;
- map_ = MemMap::MapAnonymous("dalvik-arena", NULL, size, PROT_READ | PROT_WRITE, false,
- &error_msg);
- CHECK(map_ != nullptr) << error_msg;
- memory_ = map_->Begin();
- size_ = map_->Size();
- } else {
- memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
- size_ = size;
- }
-}
-
-Arena::~Arena() {
- if (kUseMemMap) {
- delete map_;
- } else {
- free(reinterpret_cast<void*>(memory_));
- }
-}
-
-void Arena::Reset() {
- if (bytes_allocated_) {
- if (kUseMemSet || !kUseMemMap) {
- memset(Begin(), 0, bytes_allocated_);
- } else {
- map_->MadviseDontNeedAndZero();
- }
- bytes_allocated_ = 0;
- }
-}
-
-ArenaPool::ArenaPool()
- : lock_("Arena pool lock"),
- free_arenas_(nullptr) {
-}
-
-ArenaPool::~ArenaPool() {
- while (free_arenas_ != nullptr) {
- auto* arena = free_arenas_;
- free_arenas_ = free_arenas_->next_;
- delete arena;
- }
-}
-
-Arena* ArenaPool::AllocArena(size_t size) {
- Thread* self = Thread::Current();
- Arena* ret = nullptr;
- {
- MutexLock lock(self, lock_);
- if (free_arenas_ != nullptr && LIKELY(free_arenas_->Size() >= size)) {
- ret = free_arenas_;
- free_arenas_ = free_arenas_->next_;
- }
- }
- if (ret == nullptr) {
- ret = new Arena(size);
- }
- ret->Reset();
- return ret;
-}
-
-size_t ArenaPool::GetBytesAllocated() const {
- size_t total = 0;
- MutexLock lock(Thread::Current(), lock_);
- for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
- total += arena->GetBytesAllocated();
- }
- return total;
-}
-
-void ArenaPool::FreeArenaChain(Arena* first) {
- if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) {
- for (Arena* arena = first; arena != nullptr; arena = arena->next_) {
- VALGRIND_MAKE_MEM_UNDEFINED(arena->memory_, arena->bytes_allocated_);
- }
- }
- if (first != nullptr) {
- Arena* last = first;
- while (last->next_ != nullptr) {
- last = last->next_;
- }
- Thread* self = Thread::Current();
- MutexLock lock(self, lock_);
- last->next_ = free_arenas_;
- free_arenas_ = first;
- }
-}
-
-size_t ArenaAllocator::BytesAllocated() const {
- return ArenaAllocatorStats::BytesAllocated();
-}
-
-ArenaAllocator::ArenaAllocator(ArenaPool* pool)
- : pool_(pool),
- begin_(nullptr),
- end_(nullptr),
- ptr_(nullptr),
- arena_head_(nullptr),
- running_on_valgrind_(RUNNING_ON_VALGRIND > 0) {
-}
-
-void ArenaAllocator::UpdateBytesAllocated() {
- if (arena_head_ != nullptr) {
- // Update how many bytes we have allocated into the arena so that the arena pool knows how
- // much memory to zero out.
- arena_head_->bytes_allocated_ = ptr_ - begin_;
- }
-}
-
-void* ArenaAllocator::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
- size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
- if (UNLIKELY(ptr_ + rounded_bytes > end_)) {
- // Obtain a new block.
- ObtainNewArenaForAllocation(rounded_bytes);
- if (UNLIKELY(ptr_ == nullptr)) {
- return nullptr;
- }
- }
- ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
- uint8_t* ret = ptr_;
- ptr_ += rounded_bytes;
- // Check that the memory is already zeroed out.
- for (uint8_t* ptr = ret; ptr < ptr_; ++ptr) {
- CHECK_EQ(*ptr, 0U);
- }
- VALGRIND_MAKE_MEM_NOACCESS(ret + bytes, rounded_bytes - bytes);
- return ret;
-}
-
-ArenaAllocator::~ArenaAllocator() {
- // Reclaim all the arenas by giving them back to the thread pool.
- UpdateBytesAllocated();
- pool_->FreeArenaChain(arena_head_);
-}
-
-void ArenaAllocator::ObtainNewArenaForAllocation(size_t allocation_size) {
- UpdateBytesAllocated();
- Arena* new_arena = pool_->AllocArena(std::max(Arena::kDefaultSize, allocation_size));
- new_arena->next_ = arena_head_;
- arena_head_ = new_arena;
- // Update our internal data structures.
- ptr_ = begin_ = new_arena->Begin();
- end_ = new_arena->End();
-}
-
-MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
- ssize_t lost_bytes_adjustment)
- : name_(name),
- stats_(stats),
- first_arena_(first_arena),
- lost_bytes_adjustment_(lost_bytes_adjustment) {
-}
-
-void MemStats::Dump(std::ostream& os) const {
- os << name_ << " stats:\n";
- stats_->Dump(os, first_arena_, lost_bytes_adjustment_);
-}
-
-// Dump memory usage stats.
-MemStats ArenaAllocator::GetMemStats() const {
- ssize_t lost_bytes_adjustment =
- (arena_head_ == nullptr) ? 0 : (end_ - ptr_) - arena_head_->RemainingSpace();
- return MemStats("ArenaAllocator", this, arena_head_, lost_bytes_adjustment);
-}
-
-} // namespace art
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
deleted file mode 100644
index 7f5bc9ac4c..0000000000
--- a/compiler/utils/arena_allocator.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (C) 2013 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
-#define ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
-
-#include <stdint.h>
-#include <stddef.h>
-
-#include "base/macros.h"
-#include "base/mutex.h"
-#include "mem_map.h"
-#include "utils.h"
-#include "utils/debug_stack.h"
-
-namespace art {
-
-class Arena;
-class ArenaPool;
-class ArenaAllocator;
-class ArenaStack;
-class ScopedArenaAllocator;
-class MemStats;
-
-template <typename T>
-class ArenaAllocatorAdapter;
-
-static constexpr bool kArenaAllocatorCountAllocations = false;
-
-// Type of allocation for memory tuning.
-enum ArenaAllocKind {
- kArenaAllocMisc,
- kArenaAllocBB,
- kArenaAllocBBList,
- kArenaAllocBBPredecessors,
- kArenaAllocDfsPreOrder,
- kArenaAllocDfsPostOrder,
- kArenaAllocDomPostOrder,
- kArenaAllocTopologicalSortOrder,
- kArenaAllocLoweringInfo,
- kArenaAllocLIR,
- kArenaAllocLIRResourceMask,
- kArenaAllocSwitchTable,
- kArenaAllocFillArrayData,
- kArenaAllocSlowPaths,
- kArenaAllocMIR,
- kArenaAllocDFInfo,
- kArenaAllocGrowableArray,
- kArenaAllocGrowableBitMap,
- kArenaAllocSSAToDalvikMap,
- kArenaAllocDalvikToSSAMap,
- kArenaAllocDebugInfo,
- kArenaAllocSuccessor,
- kArenaAllocRegAlloc,
- kArenaAllocData,
- kArenaAllocPredecessors,
- kArenaAllocSTL,
- kNumArenaAllocKinds
-};
-
-template <bool kCount>
-class ArenaAllocatorStatsImpl;
-
-template <>
-class ArenaAllocatorStatsImpl<false> {
- public:
- ArenaAllocatorStatsImpl() = default;
- ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default;
- ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete;
-
- void Copy(const ArenaAllocatorStatsImpl& other) { UNUSED(other); }
- void RecordAlloc(size_t bytes, ArenaAllocKind kind) { UNUSED(bytes, kind); }
- size_t NumAllocations() const { return 0u; }
- size_t BytesAllocated() const { return 0u; }
- void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const {
- UNUSED(os); UNUSED(first); UNUSED(lost_bytes_adjustment);
- }
-};
-
-template <bool kCount>
-class ArenaAllocatorStatsImpl {
- public:
- ArenaAllocatorStatsImpl();
- ArenaAllocatorStatsImpl(const ArenaAllocatorStatsImpl& other) = default;
- ArenaAllocatorStatsImpl& operator = (const ArenaAllocatorStatsImpl& other) = delete;
-
- void Copy(const ArenaAllocatorStatsImpl& other);
- void RecordAlloc(size_t bytes, ArenaAllocKind kind);
- size_t NumAllocations() const;
- size_t BytesAllocated() const;
- void Dump(std::ostream& os, const Arena* first, ssize_t lost_bytes_adjustment) const;
-
- private:
- size_t num_allocations_;
- // TODO: Use std::array<size_t, kNumArenaAllocKinds> from C++11 when we upgrade the STL.
- size_t alloc_stats_[kNumArenaAllocKinds]; // Bytes used by various allocation kinds.
-
- static const char* const kAllocNames[];
-};
-
-typedef ArenaAllocatorStatsImpl<kArenaAllocatorCountAllocations> ArenaAllocatorStats;
-
-class Arena {
- public:
- static constexpr size_t kDefaultSize = 128 * KB;
- explicit Arena(size_t size = kDefaultSize);
- ~Arena();
- void Reset();
- uint8_t* Begin() {
- return memory_;
- }
-
- uint8_t* End() {
- return memory_ + size_;
- }
-
- size_t Size() const {
- return size_;
- }
-
- size_t RemainingSpace() const {
- return Size() - bytes_allocated_;
- }
-
- size_t GetBytesAllocated() const {
- return bytes_allocated_;
- }
-
- private:
- size_t bytes_allocated_;
- uint8_t* memory_;
- size_t size_;
- MemMap* map_;
- Arena* next_;
- friend class ArenaPool;
- friend class ArenaAllocator;
- friend class ArenaStack;
- friend class ScopedArenaAllocator;
- template <bool kCount> friend class ArenaAllocatorStatsImpl;
- DISALLOW_COPY_AND_ASSIGN(Arena);
-};
-
-class ArenaPool {
- public:
- ArenaPool();
- ~ArenaPool();
- Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
- void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
- size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
-
- private:
- mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- Arena* free_arenas_ GUARDED_BY(lock_);
- DISALLOW_COPY_AND_ASSIGN(ArenaPool);
-};
-
-class ArenaAllocator : private DebugStackRefCounter, private ArenaAllocatorStats {
- public:
- explicit ArenaAllocator(ArenaPool* pool);
- ~ArenaAllocator();
-
- // Get adapter for use in STL containers. See arena_containers.h .
- ArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL);
-
- // Returns zeroed memory.
- void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
- if (UNLIKELY(running_on_valgrind_)) {
- return AllocValgrind(bytes, kind);
- }
- bytes = RoundUp(bytes, 8);
- if (UNLIKELY(ptr_ + bytes > end_)) {
- // Obtain a new block.
- ObtainNewArenaForAllocation(bytes);
- if (UNLIKELY(ptr_ == nullptr)) {
- return nullptr;
- }
- }
- ArenaAllocatorStats::RecordAlloc(bytes, kind);
- uint8_t* ret = ptr_;
- ptr_ += bytes;
- return ret;
- }
-
- template <typename T> T* AllocArray(size_t length) {
- return static_cast<T*>(Alloc(length * sizeof(T), kArenaAllocMisc));
- }
-
- void* AllocValgrind(size_t bytes, ArenaAllocKind kind);
- void ObtainNewArenaForAllocation(size_t allocation_size);
- size_t BytesAllocated() const;
- MemStats GetMemStats() const;
-
- private:
- void UpdateBytesAllocated();
-
- ArenaPool* pool_;
- uint8_t* begin_;
- uint8_t* end_;
- uint8_t* ptr_;
- Arena* arena_head_;
- bool running_on_valgrind_;
-
- template <typename U>
- friend class ArenaAllocatorAdapter;
-
- DISALLOW_COPY_AND_ASSIGN(ArenaAllocator);
-}; // ArenaAllocator
-
-class MemStats {
- public:
- MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
- ssize_t lost_bytes_adjustment = 0);
- void Dump(std::ostream& os) const;
-
- private:
- const char* const name_;
- const ArenaAllocatorStats* const stats_;
- const Arena* const first_arena_;
- const ssize_t lost_bytes_adjustment_;
-}; // MemStats
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_ARENA_ALLOCATOR_H_
diff --git a/compiler/utils/arena_allocator_test.cc b/compiler/utils/arena_allocator_test.cc
index 71565407a2..706552739f 100644
--- a/compiler/utils/arena_allocator_test.cc
+++ b/compiler/utils/arena_allocator_test.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "gtest/gtest.h"
-#include "utils/arena_allocator.h"
#include "utils/arena_bit_vector.h"
namespace art {
diff --git a/compiler/utils/arena_bit_vector.cc b/compiler/utils/arena_bit_vector.cc
index f17e5a92a4..ddc0c818c2 100644
--- a/compiler/utils/arena_bit_vector.cc
+++ b/compiler/utils/arena_bit_vector.cc
@@ -14,9 +14,10 @@
* limitations under the License.
*/
-#include "arena_allocator.h"
#include "arena_bit_vector.h"
+#include "base/arena_allocator.h"
+
namespace art {
template <typename ArenaAlloc>
diff --git a/compiler/utils/arena_bit_vector.h b/compiler/utils/arena_bit_vector.h
index 34f1ca9129..f2a74527da 100644
--- a/compiler/utils/arena_bit_vector.h
+++ b/compiler/utils/arena_bit_vector.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
#define ART_COMPILER_UTILS_ARENA_BIT_VECTOR_H_
-#include "arena_object.h"
+#include "base/arena_object.h"
#include "base/bit_vector.h"
namespace art {
@@ -35,14 +35,10 @@ enum OatBitMapKind {
kBitMapDominators,
kBitMapIDominated,
kBitMapDomFrontier,
- kBitMapPhi,
- kBitMapTmpBlocks,
- kBitMapInputBlocks,
kBitMapRegisterV,
kBitMapTempSSARegisterV,
kBitMapNullCheck,
kBitMapClInitCheck,
- kBitMapTmpBlockV,
kBitMapPredecessors,
kNumBitMapKinds
};
diff --git a/compiler/utils/arena_containers.h b/compiler/utils/arena_containers.h
deleted file mode 100644
index 825259157a..0000000000
--- a/compiler/utils/arena_containers.h
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
-#define ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
-
-#include <deque>
-#include <queue>
-#include <set>
-#include <vector>
-
-#include "utils/arena_allocator.h"
-#include "safe_map.h"
-
-namespace art {
-
-// Adapter for use of ArenaAllocator in STL containers.
-// Use ArenaAllocator::Adapter() to create an adapter to pass to container constructors.
-// For example,
-// struct Foo {
-// explicit Foo(ArenaAllocator* allocator)
-// : foo_vector(allocator->Adapter(kArenaAllocMisc)),
-// foo_map(std::less<int>(), allocator->Adapter()) {
-// }
-// ArenaVector<int> foo_vector;
-// ArenaSafeMap<int, int> foo_map;
-// };
-template <typename T>
-class ArenaAllocatorAdapter;
-
-template <typename T>
-using ArenaDeque = std::deque<T, ArenaAllocatorAdapter<T>>;
-
-template <typename T>
-using ArenaQueue = std::queue<T, ArenaDeque<T>>;
-
-template <typename T>
-using ArenaVector = std::vector<T, ArenaAllocatorAdapter<T>>;
-
-template <typename T, typename Comparator = std::less<T>>
-using ArenaSet = std::set<T, Comparator, ArenaAllocatorAdapter<T>>;
-
-template <typename K, typename V, typename Comparator = std::less<K>>
-using ArenaSafeMap =
- SafeMap<K, V, Comparator, ArenaAllocatorAdapter<std::pair<const K, V>>>;
-
-// Implementation details below.
-
-template <bool kCount>
-class ArenaAllocatorAdapterKindImpl;
-
-template <>
-class ArenaAllocatorAdapterKindImpl<false> {
- public:
- // Not tracking allocations, ignore the supplied kind and arbitrarily provide kArenaAllocSTL.
- explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) { UNUSED(kind); }
- ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default;
- ArenaAllocKind Kind() { return kArenaAllocSTL; }
-};
-
-template <bool kCount>
-class ArenaAllocatorAdapterKindImpl {
- public:
- explicit ArenaAllocatorAdapterKindImpl(ArenaAllocKind kind) : kind_(kind) { }
- ArenaAllocatorAdapterKindImpl& operator=(const ArenaAllocatorAdapterKindImpl& other) = default;
- ArenaAllocKind Kind() { return kind_; }
-
- private:
- ArenaAllocKind kind_;
-};
-
-typedef ArenaAllocatorAdapterKindImpl<kArenaAllocatorCountAllocations> ArenaAllocatorAdapterKind;
-
-template <>
-class ArenaAllocatorAdapter<void>
- : private DebugStackReference, private ArenaAllocatorAdapterKind {
- public:
- typedef void value_type;
- typedef void* pointer;
- typedef const void* const_pointer;
-
- template <typename U>
- struct rebind {
- typedef ArenaAllocatorAdapter<U> other;
- };
-
- explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator,
- ArenaAllocKind kind = kArenaAllocSTL)
- : DebugStackReference(arena_allocator),
- ArenaAllocatorAdapterKind(kind),
- arena_allocator_(arena_allocator) {
- }
- template <typename U>
- ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other)
- : DebugStackReference(other),
- ArenaAllocatorAdapterKind(other),
- arena_allocator_(other.arena_allocator_) {
- }
- ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default;
- ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default;
- ~ArenaAllocatorAdapter() = default;
-
- private:
- ArenaAllocator* arena_allocator_;
-
- template <typename U>
- friend class ArenaAllocatorAdapter;
-};
-
-template <typename T>
-class ArenaAllocatorAdapter : private DebugStackReference, private ArenaAllocatorAdapterKind {
- public:
- typedef T value_type;
- typedef T* pointer;
- typedef T& reference;
- typedef const T* const_pointer;
- typedef const T& const_reference;
- typedef size_t size_type;
- typedef ptrdiff_t difference_type;
-
- template <typename U>
- struct rebind {
- typedef ArenaAllocatorAdapter<U> other;
- };
-
- explicit ArenaAllocatorAdapter(ArenaAllocator* arena_allocator, ArenaAllocKind kind)
- : DebugStackReference(arena_allocator),
- ArenaAllocatorAdapterKind(kind),
- arena_allocator_(arena_allocator) {
- }
- template <typename U>
- ArenaAllocatorAdapter(const ArenaAllocatorAdapter<U>& other)
- : DebugStackReference(other),
- ArenaAllocatorAdapterKind(other),
- arena_allocator_(other.arena_allocator_) {
- }
- ArenaAllocatorAdapter(const ArenaAllocatorAdapter& other) = default;
- ArenaAllocatorAdapter& operator=(const ArenaAllocatorAdapter& other) = default;
- ~ArenaAllocatorAdapter() = default;
-
- size_type max_size() const {
- return static_cast<size_type>(-1) / sizeof(T);
- }
-
- pointer address(reference x) const { return &x; }
- const_pointer address(const_reference x) const { return &x; }
-
- pointer allocate(size_type n, ArenaAllocatorAdapter<void>::pointer hint = nullptr) {
- UNUSED(hint);
- DCHECK_LE(n, max_size());
- return reinterpret_cast<T*>(arena_allocator_->Alloc(n * sizeof(T),
- ArenaAllocatorAdapterKind::Kind()));
- }
- void deallocate(pointer p, size_type n) {
- UNUSED(p, n);
- }
-
- void construct(pointer p, const_reference val) {
- new (static_cast<void*>(p)) value_type(val);
- }
- void destroy(pointer p) {
- p->~value_type();
- }
-
- private:
- ArenaAllocator* arena_allocator_;
-
- template <typename U>
- friend class ArenaAllocatorAdapter;
-
- template <typename U>
- friend bool operator==(const ArenaAllocatorAdapter<U>& lhs,
- const ArenaAllocatorAdapter<U>& rhs);
-};
-
-template <typename T>
-inline bool operator==(const ArenaAllocatorAdapter<T>& lhs,
- const ArenaAllocatorAdapter<T>& rhs) {
- return lhs.arena_allocator_ == rhs.arena_allocator_;
-}
-
-template <typename T>
-inline bool operator!=(const ArenaAllocatorAdapter<T>& lhs,
- const ArenaAllocatorAdapter<T>& rhs) {
- return !(lhs == rhs);
-}
-
-inline ArenaAllocatorAdapter<void> ArenaAllocator::Adapter(ArenaAllocKind kind) {
- return ArenaAllocatorAdapter<void>(this, kind);
-}
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_ARENA_CONTAINERS_H_
diff --git a/compiler/utils/arena_object.h b/compiler/utils/arena_object.h
deleted file mode 100644
index d64c419954..0000000000
--- a/compiler/utils/arena_object.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_ARENA_OBJECT_H_
-#define ART_COMPILER_UTILS_ARENA_OBJECT_H_
-
-#include "arena_allocator.h"
-#include "base/logging.h"
-#include "scoped_arena_allocator.h"
-
-namespace art {
-
-// Parent for arena allocated objects giving appropriate new and delete operators.
-template<enum ArenaAllocKind kAllocKind>
-class ArenaObject {
- public:
- // Allocate a new ArenaObject of 'size' bytes in the Arena.
- void* operator new(size_t size, ArenaAllocator* allocator) {
- return allocator->Alloc(size, kAllocKind);
- }
-
- static void* operator new(size_t size, ScopedArenaAllocator* arena) {
- return arena->Alloc(size, kAllocKind);
- }
-
- void operator delete(void*, size_t) {
- LOG(FATAL) << "UNREACHABLE";
- UNREACHABLE();
- }
-};
-
-
-// Parent for arena allocated objects that get deleted, gives appropriate new and delete operators.
-// Currently this is used by the quick compiler for debug reference counting arena allocations.
-template<enum ArenaAllocKind kAllocKind>
-class DeletableArenaObject {
- public:
- // Allocate a new ArenaObject of 'size' bytes in the Arena.
- void* operator new(size_t size, ArenaAllocator* allocator) {
- return allocator->Alloc(size, kAllocKind);
- }
-
- static void* operator new(size_t size, ScopedArenaAllocator* arena) {
- return arena->Alloc(size, kAllocKind);
- }
-
- void operator delete(void*, size_t) {
- // Nop.
- }
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_ARENA_OBJECT_H_
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 05287732c5..a52e6eb30f 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -166,7 +166,7 @@ uint32_t ShifterOperand::encodingThumb() const {
}
uint32_t Address::encodingArm() const {
- CHECK(IsAbsoluteUint(12, offset_));
+ CHECK(IsAbsoluteUint<12>(offset_));
uint32_t encoding;
if (is_immed_offset_) {
if (offset_ < 0) {
@@ -245,6 +245,7 @@ uint32_t Address::encodingThumb(bool is_32bit) const {
// This is very like the ARM encoding except the offset is 10 bits.
uint32_t Address::encodingThumbLdrdStrd() const {
+ DCHECK(IsImmediate());
uint32_t encoding;
uint32_t am = am_;
// If P is 0 then W must be 1 (Different from ARM).
@@ -277,11 +278,12 @@ uint32_t Address::encoding3() const {
// Encoding for vfp load/store addressing.
uint32_t Address::vencoding() const {
+ CHECK(IsAbsoluteUint<10>(offset_)); // In the range -1020 to +1020.
+ CHECK_ALIGNED(offset_, 2); // Multiple of 4.
+
const uint32_t offset_mask = (1 << 12) - 1;
uint32_t encoding = encodingArm();
uint32_t offset = encoding & offset_mask;
- CHECK(IsAbsoluteUint(10, offset)); // In the range -1020 to +1020.
- CHECK_ALIGNED(offset, 2); // Multiple of 4.
CHECK((am_ == Offset) || (am_ == NegOffset));
uint32_t vencoding_value = (encoding & (0xf << kRnShift)) | (offset >> 2);
if (am_ == Offset) {
@@ -297,13 +299,13 @@ bool Address::CanHoldLoadOffsetArm(LoadOperandType type, int offset) {
case kLoadSignedHalfword:
case kLoadUnsignedHalfword:
case kLoadWordPair:
- return IsAbsoluteUint(8, offset); // Addressing mode 3.
+ return IsAbsoluteUint<8>(offset); // Addressing mode 3.
case kLoadUnsignedByte:
case kLoadWord:
- return IsAbsoluteUint(12, offset); // Addressing mode 2.
+ return IsAbsoluteUint<12>(offset); // Addressing mode 2.
case kLoadSWord:
case kLoadDWord:
- return IsAbsoluteUint(10, offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset); // VFP addressing mode.
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -315,13 +317,13 @@ bool Address::CanHoldStoreOffsetArm(StoreOperandType type, int offset) {
switch (type) {
case kStoreHalfword:
case kStoreWordPair:
- return IsAbsoluteUint(8, offset); // Addressing mode 3.
+ return IsAbsoluteUint<8>(offset); // Addressing mode 3.
case kStoreByte:
case kStoreWord:
- return IsAbsoluteUint(12, offset); // Addressing mode 2.
+ return IsAbsoluteUint<12>(offset); // Addressing mode 2.
case kStoreSWord:
case kStoreDWord:
- return IsAbsoluteUint(10, offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset); // VFP addressing mode.
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -335,12 +337,12 @@ bool Address::CanHoldLoadOffsetThumb(LoadOperandType type, int offset) {
case kLoadUnsignedHalfword:
case kLoadUnsignedByte:
case kLoadWord:
- return IsAbsoluteUint(12, offset);
+ return IsAbsoluteUint<12>(offset);
case kLoadSWord:
case kLoadDWord:
- return IsAbsoluteUint(10, offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset); // VFP addressing mode.
case kLoadWordPair:
- return IsAbsoluteUint(10, offset);
+ return IsAbsoluteUint<10>(offset);
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
@@ -353,12 +355,12 @@ bool Address::CanHoldStoreOffsetThumb(StoreOperandType type, int offset) {
case kStoreHalfword:
case kStoreByte:
case kStoreWord:
- return IsAbsoluteUint(12, offset);
+ return IsAbsoluteUint<12>(offset);
case kStoreSWord:
case kStoreDWord:
- return IsAbsoluteUint(10, offset); // VFP addressing mode.
+ return IsAbsoluteUint<10>(offset); // VFP addressing mode.
case kStoreWordPair:
- return IsAbsoluteUint(10, offset);
+ return IsAbsoluteUint<10>(offset);
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index d9122764d0..8730f52eca 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -536,8 +536,44 @@ class ArmAssembler : public Assembler {
virtual void LoadImmediate(Register rd, int32_t value, Condition cond = AL) = 0;
void LoadSImmediate(SRegister sd, float value, Condition cond = AL) {
if (!vmovs(sd, value, cond)) {
- LoadImmediate(IP, bit_cast<int32_t, float>(value), cond);
- vmovsr(sd, IP, cond);
+ int32_t int_value = bit_cast<int32_t, float>(value);
+ if (int_value == bit_cast<int32_t, float>(0.0f)) {
+ // 0.0 is quite common, so we special case it by loading
+ // 2.0 in `sd` and then substracting it.
+ bool success = vmovs(sd, 2.0, cond);
+ CHECK(success);
+ vsubs(sd, sd, sd, cond);
+ } else {
+ LoadImmediate(IP, int_value, cond);
+ vmovsr(sd, IP, cond);
+ }
+ }
+ }
+
+ void LoadDImmediate(DRegister sd, double value, Condition cond = AL) {
+ if (!vmovd(sd, value, cond)) {
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
+ if (int_value == bit_cast<uint64_t, double>(0.0)) {
+ // 0.0 is quite common, so we special case it by loading
+ // 2.0 in `sd` and then substracting it.
+ bool success = vmovd(sd, 2.0, cond);
+ CHECK(success);
+ vsubd(sd, sd, sd, cond);
+ } else {
+ if (sd < 16) {
+ SRegister low = static_cast<SRegister>(sd << 1);
+ SRegister high = static_cast<SRegister>(low + 1);
+ LoadSImmediate(low, bit_cast<float, uint32_t>(Low32Bits(int_value)), cond);
+ if (High32Bits(int_value) == Low32Bits(int_value)) {
+ vmovs(high, low);
+ } else {
+ LoadSImmediate(high, bit_cast<float, uint32_t>(High32Bits(int_value)), cond);
+ }
+ } else {
+ LOG(FATAL) << "Unimplemented loading of double into a D register "
+ << "that cannot be split into two S registers";
+ }
+ }
}
}
diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc
index 8d1fb60725..95796916b4 100644
--- a/compiler/utils/arm/assembler_arm32.cc
+++ b/compiler/utils/arm/assembler_arm32.cc
@@ -1254,7 +1254,7 @@ void Arm32Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR
void Arm32Assembler::svc(uint32_t imm24) {
- CHECK(IsUint(24, imm24)) << imm24;
+ CHECK(IsUint<24>(imm24)) << imm24;
int32_t encoding = (AL << kConditionShift) | B27 | B26 | B25 | B24 | imm24;
Emit(encoding);
}
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 5383c28f82..6d0571e263 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2080,7 +2080,7 @@ void Thumb2Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR.
void Thumb2Assembler::svc(uint32_t imm8) {
- CHECK(IsUint(8, imm8)) << imm8;
+ CHECK(IsUint<8>(imm8)) << imm8;
int16_t encoding = B15 | B14 | B12 |
B11 | B10 | B9 | B8 |
imm8;
@@ -2089,7 +2089,7 @@ void Thumb2Assembler::svc(uint32_t imm8) {
void Thumb2Assembler::bkpt(uint16_t imm8) {
- CHECK(IsUint(8, imm8)) << imm8;
+ CHECK(IsUint<8>(imm8)) << imm8;
int16_t encoding = B15 | B13 | B12 |
B11 | B10 | B9 |
imm8;
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index e571e72402..ebea9d4262 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -198,6 +198,18 @@ TEST_F(AssemblerThumb2Test, strexd) {
DriverStr(expected, "strexd");
}
+TEST_F(AssemblerThumb2Test, LdrdStrd) {
+ GetAssembler()->ldrd(arm::R0, arm::Address(arm::R2, 8));
+ GetAssembler()->ldrd(arm::R0, arm::Address(arm::R12));
+ GetAssembler()->strd(arm::R0, arm::Address(arm::R2, 8));
+
+ const char* expected =
+ "ldrd r0, r1, [r2, #8]\n"
+ "ldrd r0, r1, [r12]\n"
+ "strd r0, r1, [r2, #8]\n";
+ DriverStr(expected, "ldrdstrd");
+}
+
TEST_F(AssemblerThumb2Test, eor) {
#define __ GetAssembler()->
__ eor(arm::R1, arm::R1, arm::ShifterOperand(arm::R0));
diff --git a/compiler/utils/debug_stack.h b/compiler/utils/debug_stack.h
deleted file mode 100644
index 1bb0624187..0000000000
--- a/compiler/utils/debug_stack.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_DEBUG_STACK_H_
-#define ART_COMPILER_UTILS_DEBUG_STACK_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "globals.h"
-
-namespace art {
-
-// Helper classes for reference counting to enforce construction/destruction order and
-// usage of the top element of a stack in debug mode with no overhead in release mode.
-
-// Reference counter. No references allowed in destructor or in explicitly called CheckNoRefs().
-template <bool kIsDebug>
-class DebugStackRefCounterImpl;
-// Reference. Allows an explicit check that it's the top reference.
-template <bool kIsDebug>
-class DebugStackReferenceImpl;
-// Indirect top reference. Checks that the reference is the top reference when used.
-template <bool kIsDebug>
-class DebugStackIndirectTopRefImpl;
-
-typedef DebugStackRefCounterImpl<kIsDebugBuild> DebugStackRefCounter;
-typedef DebugStackReferenceImpl<kIsDebugBuild> DebugStackReference;
-typedef DebugStackIndirectTopRefImpl<kIsDebugBuild> DebugStackIndirectTopRef;
-
-// Non-debug mode specializations. This should be optimized away.
-
-template <>
-class DebugStackRefCounterImpl<false> {
- public:
- size_t IncrementRefCount() { return 0u; }
- void DecrementRefCount() { }
- size_t GetRefCount() const { return 0u; }
- void CheckNoRefs() const { }
-};
-
-template <>
-class DebugStackReferenceImpl<false> {
- public:
- explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<false>* counter) { UNUSED(counter); }
- DebugStackReferenceImpl(const DebugStackReferenceImpl& other) = default;
- DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) = default;
- void CheckTop() { }
-};
-
-template <>
-class DebugStackIndirectTopRefImpl<false> {
- public:
- explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<false>* ref) { UNUSED(ref); }
- DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other) = default;
- DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) = default;
- void CheckTop() { }
-};
-
-// Debug mode versions.
-
-template <bool kIsDebug>
-class DebugStackRefCounterImpl {
- public:
- DebugStackRefCounterImpl() : ref_count_(0u) { }
- ~DebugStackRefCounterImpl() { CheckNoRefs(); }
- size_t IncrementRefCount() { return ++ref_count_; }
- void DecrementRefCount() { --ref_count_; }
- size_t GetRefCount() const { return ref_count_; }
- void CheckNoRefs() const { CHECK_EQ(ref_count_, 0u); }
-
- private:
- size_t ref_count_;
-};
-
-template <bool kIsDebug>
-class DebugStackReferenceImpl {
- public:
- explicit DebugStackReferenceImpl(DebugStackRefCounterImpl<kIsDebug>* counter)
- : counter_(counter), ref_count_(counter->IncrementRefCount()) {
- }
- DebugStackReferenceImpl(const DebugStackReferenceImpl& other)
- : counter_(other.counter_), ref_count_(counter_->IncrementRefCount()) {
- }
- DebugStackReferenceImpl& operator=(const DebugStackReferenceImpl& other) {
- CHECK(counter_ == other.counter_);
- return *this;
- }
- ~DebugStackReferenceImpl() { counter_->DecrementRefCount(); }
- void CheckTop() { CHECK_EQ(counter_->GetRefCount(), ref_count_); }
-
- private:
- DebugStackRefCounterImpl<true>* counter_;
- size_t ref_count_;
-};
-
-template <bool kIsDebug>
-class DebugStackIndirectTopRefImpl {
- public:
- explicit DebugStackIndirectTopRefImpl(DebugStackReferenceImpl<kIsDebug>* ref)
- : ref_(ref) {
- CheckTop();
- }
- DebugStackIndirectTopRefImpl(const DebugStackIndirectTopRefImpl& other)
- : ref_(other.ref_) {
- CheckTop();
- }
- DebugStackIndirectTopRefImpl& operator=(const DebugStackIndirectTopRefImpl& other) {
- CHECK(ref_ == other.ref_);
- CheckTop();
- return *this;
- }
- ~DebugStackIndirectTopRefImpl() {
- CheckTop();
- }
- void CheckTop() {
- ref_->CheckTop();
- }
-
- private:
- DebugStackReferenceImpl<kIsDebug>* ref_;
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_DEBUG_STACK_H_
diff --git a/compiler/utils/dex_instruction_utils.h b/compiler/utils/dex_instruction_utils.h
index 2c6e525e1d..bb2c592f13 100644
--- a/compiler/utils/dex_instruction_utils.h
+++ b/compiler/utils/dex_instruction_utils.h
@@ -110,6 +110,10 @@ constexpr bool IsInstructionAGetOrAPut(Instruction::Code code) {
return Instruction::AGET <= code && code <= Instruction::APUT_SHORT;
}
+constexpr bool IsInstructionBinOp2Addr(Instruction::Code code) {
+ return Instruction::ADD_INT_2ADDR <= code && code <= Instruction::REM_DOUBLE_2ADDR;
+}
+
// TODO: Remove the #if guards below when we fully migrate to C++14.
constexpr bool IsInvokeInstructionRange(Instruction::Code opcode) {
diff --git a/compiler/utils/dwarf_cfi.cc b/compiler/utils/dwarf_cfi.cc
index 83e5f5ad39..a7e09c6517 100644
--- a/compiler/utils/dwarf_cfi.cc
+++ b/compiler/utils/dwarf_cfi.cc
@@ -37,7 +37,7 @@ void DW_CFA_advance_loc(std::vector<uint8_t>* buf, uint32_t increment) {
} else {
// Four byte delta.
buf->push_back(0x04);
- PushWord(buf, increment);
+ Push32(buf, increment);
}
}
@@ -68,35 +68,35 @@ void DW_CFA_restore_state(std::vector<uint8_t>* buf) {
void WriteFDEHeader(std::vector<uint8_t>* buf, bool is_64bit) {
// 'length' (filled in by other functions).
if (is_64bit) {
- PushWord(buf, 0xffffffff); // Indicates 64bit
- PushWord(buf, 0);
- PushWord(buf, 0);
+ Push32(buf, 0xffffffff); // Indicates 64bit
+ Push32(buf, 0);
+ Push32(buf, 0);
} else {
- PushWord(buf, 0);
+ Push32(buf, 0);
}
// 'CIE_pointer' (filled in by linker).
if (is_64bit) {
- PushWord(buf, 0);
- PushWord(buf, 0);
+ Push32(buf, 0);
+ Push32(buf, 0);
} else {
- PushWord(buf, 0);
+ Push32(buf, 0);
}
// 'initial_location' (filled in by linker).
if (is_64bit) {
- PushWord(buf, 0);
- PushWord(buf, 0);
+ Push32(buf, 0);
+ Push32(buf, 0);
} else {
- PushWord(buf, 0);
+ Push32(buf, 0);
}
// 'address_range' (filled in by other functions).
if (is_64bit) {
- PushWord(buf, 0);
- PushWord(buf, 0);
+ Push32(buf, 0);
+ Push32(buf, 0);
} else {
- PushWord(buf, 0);
+ Push32(buf, 0);
}
// Augmentation length: 0
diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h
index 6af4853e09..821e28b4a0 100644
--- a/compiler/utils/growable_array.h
+++ b/compiler/utils/growable_array.h
@@ -20,7 +20,7 @@
#include <stdint.h>
#include <stddef.h>
-#include "arena_object.h"
+#include "base/arena_object.h"
namespace art {
@@ -33,16 +33,14 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
: arena_(arena),
num_allocated_(init_length),
num_used_(0) {
- elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
- kArenaAllocGrowableArray));
+ elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
}
GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data)
: arena_(arena),
num_allocated_(init_length),
num_used_(init_length) {
- elem_list_ = static_cast<T*>(arena_->Alloc(sizeof(T) * init_length,
- kArenaAllocGrowableArray));
+ elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray);
for (size_t i = 0; i < init_length; ++i) {
elem_list_[i] = initial_data;
}
@@ -58,8 +56,7 @@ class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> {
if (new_length > target_length) {
target_length = new_length;
}
- T* new_array = static_cast<T*>(arena_->Alloc(sizeof(T) * target_length,
- kArenaAllocGrowableArray));
+ T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray);
memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
num_allocated_ = target_length;
elem_list_ = new_array;
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
deleted file mode 100644
index d9e0619de6..0000000000
--- a/compiler/utils/scoped_arena_allocator.cc
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "scoped_arena_allocator.h"
-
-#include "utils/arena_allocator.h"
-#include <memcheck/memcheck.h>
-
-namespace art {
-
-static constexpr size_t kValgrindRedZoneBytes = 8;
-
-ArenaStack::ArenaStack(ArenaPool* arena_pool)
- : DebugStackRefCounter(),
- stats_and_pool_(arena_pool),
- bottom_arena_(nullptr),
- top_arena_(nullptr),
- top_ptr_(nullptr),
- top_end_(nullptr),
- running_on_valgrind_(RUNNING_ON_VALGRIND > 0) {
-}
-
-ArenaStack::~ArenaStack() {
- DebugStackRefCounter::CheckNoRefs();
- stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
-}
-
-void ArenaStack::Reset() {
- DebugStackRefCounter::CheckNoRefs();
- stats_and_pool_.pool->FreeArenaChain(bottom_arena_);
- bottom_arena_ = nullptr;
- top_arena_ = nullptr;
- top_ptr_ = nullptr;
- top_end_ = nullptr;
-}
-
-MemStats ArenaStack::GetPeakStats() const {
- DebugStackRefCounter::CheckNoRefs();
- return MemStats("ArenaStack peak", static_cast<const TaggedStats<Peak>*>(&stats_and_pool_),
- bottom_arena_);
-}
-
-uint8_t* ArenaStack::AllocateFromNextArena(size_t rounded_bytes) {
- UpdateBytesAllocated();
- size_t allocation_size = std::max(Arena::kDefaultSize, rounded_bytes);
- if (UNLIKELY(top_arena_ == nullptr)) {
- top_arena_ = bottom_arena_ = stats_and_pool_.pool->AllocArena(allocation_size);
- top_arena_->next_ = nullptr;
- } else if (top_arena_->next_ != nullptr && top_arena_->next_->Size() >= allocation_size) {
- top_arena_ = top_arena_->next_;
- } else {
- Arena* tail = top_arena_->next_;
- top_arena_->next_ = stats_and_pool_.pool->AllocArena(allocation_size);
- top_arena_ = top_arena_->next_;
- top_arena_->next_ = tail;
- }
- top_end_ = top_arena_->End();
- // top_ptr_ shall be updated by ScopedArenaAllocator.
- return top_arena_->Begin();
-}
-
-void ArenaStack::UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats) {
- if (PeakStats()->BytesAllocated() < CurrentStats()->BytesAllocated()) {
- PeakStats()->Copy(*CurrentStats());
- }
- CurrentStats()->Copy(restore_stats);
-}
-
-void ArenaStack::UpdateBytesAllocated() {
- if (top_arena_ != nullptr) {
- // Update how many bytes we have allocated into the arena so that the arena pool knows how
- // much memory to zero out. Though ScopedArenaAllocator doesn't guarantee the memory is
- // zero-initialized, the Arena may be reused by ArenaAllocator which does guarantee this.
- size_t allocated = static_cast<size_t>(top_ptr_ - top_arena_->Begin());
- if (top_arena_->bytes_allocated_ < allocated) {
- top_arena_->bytes_allocated_ = allocated;
- }
- }
-}
-
-void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
- size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 8);
- uint8_t* ptr = top_ptr_;
- if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
- ptr = AllocateFromNextArena(rounded_bytes);
- CHECK(ptr != nullptr) << "Failed to allocate memory";
- }
- CurrentStats()->RecordAlloc(bytes, kind);
- top_ptr_ = ptr + rounded_bytes;
- VALGRIND_MAKE_MEM_UNDEFINED(ptr, bytes);
- VALGRIND_MAKE_MEM_NOACCESS(ptr + bytes, rounded_bytes - bytes);
- return ptr;
-}
-
-ScopedArenaAllocator::ScopedArenaAllocator(ArenaStack* arena_stack)
- : DebugStackReference(arena_stack),
- DebugStackRefCounter(),
- ArenaAllocatorStats(*arena_stack->CurrentStats()),
- arena_stack_(arena_stack),
- mark_arena_(arena_stack->top_arena_),
- mark_ptr_(arena_stack->top_ptr_),
- mark_end_(arena_stack->top_end_) {
-}
-
-ScopedArenaAllocator::~ScopedArenaAllocator() {
- DoReset();
-}
-
-void ScopedArenaAllocator::Reset() {
- DoReset();
- // If this allocator was Create()d, we need to move the arena_stack_->top_ptr_ past *this.
- if (mark_ptr_ == reinterpret_cast<uint8_t*>(this)) {
- arena_stack_->top_ptr_ = mark_ptr_ + RoundUp(sizeof(ScopedArenaAllocator), 8);
- }
-}
-
-void ScopedArenaAllocator::DoReset() {
- DebugStackReference::CheckTop();
- DebugStackRefCounter::CheckNoRefs();
- arena_stack_->UpdatePeakStatsAndRestore(*this);
- arena_stack_->UpdateBytesAllocated();
- if (LIKELY(mark_arena_ != nullptr)) {
- arena_stack_->top_arena_ = mark_arena_;
- arena_stack_->top_ptr_ = mark_ptr_;
- arena_stack_->top_end_ = mark_end_;
- } else if (arena_stack_->bottom_arena_ != nullptr) {
- mark_arena_ = arena_stack_->top_arena_ = arena_stack_->bottom_arena_;
- mark_ptr_ = arena_stack_->top_ptr_ = mark_arena_->Begin();
- mark_end_ = arena_stack_->top_end_ = mark_arena_->End();
- }
-}
-
-} // namespace art
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
deleted file mode 100644
index 523f158969..0000000000
--- a/compiler/utils/scoped_arena_allocator.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
-#define ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
-
-#include "base/logging.h"
-#include "base/macros.h"
-#include "utils/arena_allocator.h"
-#include "utils/debug_stack.h"
-#include "globals.h"
-
-namespace art {
-
-class ArenaStack;
-class ScopedArenaAllocator;
-
-template <typename T>
-class ScopedArenaAllocatorAdapter;
-
-// Holds a list of Arenas for use by ScopedArenaAllocator stack.
-class ArenaStack : private DebugStackRefCounter {
- public:
- explicit ArenaStack(ArenaPool* arena_pool);
- ~ArenaStack();
-
- void Reset();
-
- size_t PeakBytesAllocated() {
- return PeakStats()->BytesAllocated();
- }
-
- MemStats GetPeakStats() const;
-
- private:
- struct Peak;
- struct Current;
- template <typename Tag> struct TaggedStats : ArenaAllocatorStats { };
- struct StatsAndPool : TaggedStats<Peak>, TaggedStats<Current> {
- explicit StatsAndPool(ArenaPool* arena_pool) : pool(arena_pool) { }
- ArenaPool* const pool;
- };
-
- ArenaAllocatorStats* PeakStats() {
- return static_cast<TaggedStats<Peak>*>(&stats_and_pool_);
- }
-
- ArenaAllocatorStats* CurrentStats() {
- return static_cast<TaggedStats<Current>*>(&stats_and_pool_);
- }
-
- // Private - access via ScopedArenaAllocator or ScopedArenaAllocatorAdapter.
- void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
- if (UNLIKELY(running_on_valgrind_)) {
- return AllocValgrind(bytes, kind);
- }
- size_t rounded_bytes = RoundUp(bytes, 8);
- uint8_t* ptr = top_ptr_;
- if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
- ptr = AllocateFromNextArena(rounded_bytes);
- }
- CurrentStats()->RecordAlloc(bytes, kind);
- top_ptr_ = ptr + rounded_bytes;
- return ptr;
- }
-
- uint8_t* AllocateFromNextArena(size_t rounded_bytes);
- void UpdatePeakStatsAndRestore(const ArenaAllocatorStats& restore_stats);
- void UpdateBytesAllocated();
- void* AllocValgrind(size_t bytes, ArenaAllocKind kind);
-
- StatsAndPool stats_and_pool_;
- Arena* bottom_arena_;
- Arena* top_arena_;
- uint8_t* top_ptr_;
- uint8_t* top_end_;
-
- const bool running_on_valgrind_;
-
- friend class ScopedArenaAllocator;
- template <typename T>
- friend class ScopedArenaAllocatorAdapter;
-
- DISALLOW_COPY_AND_ASSIGN(ArenaStack);
-};
-
-class ScopedArenaAllocator
- : private DebugStackReference, private DebugStackRefCounter, private ArenaAllocatorStats {
- public:
- // Create a ScopedArenaAllocator directly on the ArenaStack when the scope of
- // the allocator is not exactly a C++ block scope. For example, an optimization
- // pass can create the scoped allocator in Start() and destroy it in End().
- static ScopedArenaAllocator* Create(ArenaStack* arena_stack) {
- void* addr = arena_stack->Alloc(sizeof(ScopedArenaAllocator), kArenaAllocMisc);
- ScopedArenaAllocator* allocator = new(addr) ScopedArenaAllocator(arena_stack);
- allocator->mark_ptr_ = reinterpret_cast<uint8_t*>(addr);
- return allocator;
- }
-
- explicit ScopedArenaAllocator(ArenaStack* arena_stack);
- ~ScopedArenaAllocator();
-
- void Reset();
-
- void* Alloc(size_t bytes, ArenaAllocKind kind) ALWAYS_INLINE {
- DebugStackReference::CheckTop();
- return arena_stack_->Alloc(bytes, kind);
- }
-
- // Get adapter for use in STL containers. See scoped_arena_containers.h .
- ScopedArenaAllocatorAdapter<void> Adapter(ArenaAllocKind kind = kArenaAllocSTL);
-
- // Allow a delete-expression to destroy but not deallocate allocators created by Create().
- static void operator delete(void* ptr) { UNUSED(ptr); }
-
- private:
- ArenaStack* const arena_stack_;
- Arena* mark_arena_;
- uint8_t* mark_ptr_;
- uint8_t* mark_end_;
-
- void DoReset();
-
- template <typename T>
- friend class ScopedArenaAllocatorAdapter;
-
- DISALLOW_COPY_AND_ASSIGN(ScopedArenaAllocator);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_SCOPED_ARENA_ALLOCATOR_H_
diff --git a/compiler/utils/scoped_arena_containers.h b/compiler/utils/scoped_arena_containers.h
deleted file mode 100644
index df93b273d1..0000000000
--- a/compiler/utils/scoped_arena_containers.h
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Copyright (C) 2014 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
-#define ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
-
-#include <deque>
-#include <queue>
-#include <set>
-#include <vector>
-
-#include "utils/arena_containers.h" // For ArenaAllocatorAdapterKind.
-#include "utils/scoped_arena_allocator.h"
-#include "safe_map.h"
-
-namespace art {
-
-// Adapter for use of ScopedArenaAllocator in STL containers.
-// Use ScopedArenaAllocator::Adapter() to create an adapter to pass to container constructors.
-// For example,
-// void foo(ScopedArenaAllocator* allocator) {
-// ScopedArenaVector<int> foo_vector(allocator->Adapter(kArenaAllocMisc));
-// ScopedArenaSafeMap<int, int> foo_map(std::less<int>(), allocator->Adapter());
-// // Use foo_vector and foo_map...
-// }
-template <typename T>
-class ScopedArenaAllocatorAdapter;
-
-template <typename T>
-using ScopedArenaDeque = std::deque<T, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename T>
-using ScopedArenaQueue = std::queue<T, ScopedArenaDeque<T>>;
-
-template <typename T>
-using ScopedArenaVector = std::vector<T, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename T, typename Comparator = std::less<T>>
-using ScopedArenaSet = std::set<T, Comparator, ScopedArenaAllocatorAdapter<T>>;
-
-template <typename K, typename V, typename Comparator = std::less<K>>
-using ScopedArenaSafeMap =
- SafeMap<K, V, Comparator, ScopedArenaAllocatorAdapter<std::pair<const K, V>>>;
-
-// Implementation details below.
-
-template <>
-class ScopedArenaAllocatorAdapter<void>
- : private DebugStackReference, private DebugStackIndirectTopRef,
- private ArenaAllocatorAdapterKind {
- public:
- typedef void value_type;
- typedef void* pointer;
- typedef const void* const_pointer;
-
- template <typename U>
- struct rebind {
- typedef ScopedArenaAllocatorAdapter<U> other;
- };
-
- explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator,
- ArenaAllocKind kind = kArenaAllocSTL)
- : DebugStackReference(arena_allocator),
- DebugStackIndirectTopRef(arena_allocator),
- ArenaAllocatorAdapterKind(kind),
- arena_stack_(arena_allocator->arena_stack_) {
- }
- template <typename U>
- ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other)
- : DebugStackReference(other),
- DebugStackIndirectTopRef(other),
- ArenaAllocatorAdapterKind(other),
- arena_stack_(other.arena_stack_) {
- }
- ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default;
- ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default;
- ~ScopedArenaAllocatorAdapter() = default;
-
- private:
- ArenaStack* arena_stack_;
-
- template <typename U>
- friend class ScopedArenaAllocatorAdapter;
-};
-
-template <typename T>
-class ScopedArenaAllocatorAdapter
- : private DebugStackReference, private DebugStackIndirectTopRef,
- private ArenaAllocatorAdapterKind {
- public:
- typedef T value_type;
- typedef T* pointer;
- typedef T& reference;
- typedef const T* const_pointer;
- typedef const T& const_reference;
- typedef size_t size_type;
- typedef ptrdiff_t difference_type;
-
- template <typename U>
- struct rebind {
- typedef ScopedArenaAllocatorAdapter<U> other;
- };
-
- explicit ScopedArenaAllocatorAdapter(ScopedArenaAllocator* arena_allocator,
- ArenaAllocKind kind = kArenaAllocSTL)
- : DebugStackReference(arena_allocator),
- DebugStackIndirectTopRef(arena_allocator),
- ArenaAllocatorAdapterKind(kind),
- arena_stack_(arena_allocator->arena_stack_) {
- }
- template <typename U>
- ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter<U>& other)
- : DebugStackReference(other),
- DebugStackIndirectTopRef(other),
- ArenaAllocatorAdapterKind(other),
- arena_stack_(other.arena_stack_) {
- }
- ScopedArenaAllocatorAdapter(const ScopedArenaAllocatorAdapter& other) = default;
- ScopedArenaAllocatorAdapter& operator=(const ScopedArenaAllocatorAdapter& other) = default;
- ~ScopedArenaAllocatorAdapter() = default;
-
- size_type max_size() const {
- return static_cast<size_type>(-1) / sizeof(T);
- }
-
- pointer address(reference x) const { return &x; }
- const_pointer address(const_reference x) const { return &x; }
-
- pointer allocate(size_type n, ScopedArenaAllocatorAdapter<void>::pointer hint = nullptr) {
- UNUSED(hint);
- DCHECK_LE(n, max_size());
- DebugStackIndirectTopRef::CheckTop();
- return reinterpret_cast<T*>(arena_stack_->Alloc(n * sizeof(T),
- ArenaAllocatorAdapterKind::Kind()));
- }
- void deallocate(pointer p, size_type n) {
- UNUSED(p);
- UNUSED(n);
- DebugStackIndirectTopRef::CheckTop();
- }
-
- void construct(pointer p, const_reference val) {
- // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
- new (static_cast<void*>(p)) value_type(val);
- }
- void destroy(pointer p) {
- // Don't CheckTop(), allow reusing existing capacity of a vector/deque below the top.
- p->~value_type();
- }
-
- private:
- ArenaStack* arena_stack_;
-
- template <typename U>
- friend class ScopedArenaAllocatorAdapter;
-
- template <typename U>
- friend bool operator==(const ScopedArenaAllocatorAdapter<U>& lhs,
- const ScopedArenaAllocatorAdapter<U>& rhs);
-};
-
-template <typename T>
-inline bool operator==(const ScopedArenaAllocatorAdapter<T>& lhs,
- const ScopedArenaAllocatorAdapter<T>& rhs) {
- return lhs.arena_stack_ == rhs.arena_stack_;
-}
-
-template <typename T>
-inline bool operator!=(const ScopedArenaAllocatorAdapter<T>& lhs,
- const ScopedArenaAllocatorAdapter<T>& rhs) {
- return !(lhs == rhs);
-}
-
-inline ScopedArenaAllocatorAdapter<void> ScopedArenaAllocator::Adapter(ArenaAllocKind kind) {
- return ScopedArenaAllocatorAdapter<void>(this, kind);
-}
-
-} // namespace art
-
-#endif // ART_COMPILER_UTILS_SCOPED_ARENA_CONTAINERS_H_
diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h
index 2d0d77af78..1f8f5da6cd 100644
--- a/compiler/utils/swap_space.h
+++ b/compiler/utils/swap_space.h
@@ -23,12 +23,12 @@
#include <stdint.h>
#include <stddef.h>
+#include "base/debug_stack.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/mutex.h"
#include "mem_map.h"
#include "utils.h"
-#include "utils/debug_stack.h"
namespace art {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 03744e4149..8f4208b417 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1290,7 +1290,7 @@ void X86Assembler::j(Condition condition, Label* label) {
static const int kLongSize = 6;
int offset = label->Position() - buffer_.Size();
CHECK_LE(offset, 0);
- if (IsInt(8, offset - kShortSize)) {
+ if (IsInt<8>(offset - kShortSize)) {
EmitUint8(0x70 + condition);
EmitUint8((offset - kShortSize) & 0xFF);
} else {
@@ -1325,7 +1325,7 @@ void X86Assembler::jmp(Label* label) {
static const int kLongSize = 5;
int offset = label->Position() - buffer_.Size();
CHECK_LE(offset, 0);
- if (IsInt(8, offset - kShortSize)) {
+ if (IsInt<8>(offset - kShortSize)) {
EmitUint8(0xEB);
EmitUint8((offset - kShortSize) & 0xFF);
} else {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 3a44ace649..2dde90744e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -35,10 +35,10 @@ class Immediate : public ValueObject {
int32_t value() const { return value_; }
- bool is_int8() const { return IsInt(8, value_); }
- bool is_uint8() const { return IsUint(8, value_); }
- bool is_int16() const { return IsInt(16, value_); }
- bool is_uint16() const { return IsUint(16, value_); }
+ bool is_int8() const { return IsInt<8>(value_); }
+ bool is_uint8() const { return IsUint<8>(value_); }
+ bool is_int16() const { return IsInt<16>(value_); }
+ bool is_uint16() const { return IsUint<16>(value_); }
private:
const int32_t value_;
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 7e8e769249..f2704b72a4 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -328,6 +328,14 @@ void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
}
+void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x8D);
+ EmitOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(dst, src);
@@ -1507,7 +1515,7 @@ void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
// See whether imm can be represented as a sign-extended 8bit value.
int32_t v32 = static_cast<int32_t>(imm.value());
- if (IsInt32(8, v32)) {
+ if (IsInt<8>(v32)) {
// Sign-extension works.
EmitUint8(0x6B);
EmitOperand(reg.LowBits(), Operand(reg));
@@ -1547,7 +1555,7 @@ void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
// See whether imm can be represented as a sign-extended 8bit value.
int64_t v64 = imm.value();
- if (IsInt64(8, v64)) {
+ if (IsInt<8>(v64)) {
// Sign-extension works.
EmitUint8(0x6B);
EmitOperand(reg.LowBits(), Operand(reg));
@@ -1697,7 +1705,7 @@ void X86_64Assembler::notq(CpuRegister reg) {
void X86_64Assembler::enter(const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xC8);
- CHECK(imm.is_uint16());
+ CHECK(imm.is_uint16()) << imm.value();
EmitUint8(imm.value() & 0xFF);
EmitUint8((imm.value() >> 8) & 0xFF);
EmitUint8(0x00);
@@ -1751,7 +1759,7 @@ void X86_64Assembler::j(Condition condition, Label* label) {
static const int kLongSize = 6;
int offset = label->Position() - buffer_.Size();
CHECK_LE(offset, 0);
- if (IsInt(8, offset - kShortSize)) {
+ if (IsInt<8>(offset - kShortSize)) {
EmitUint8(0x70 + condition);
EmitUint8((offset - kShortSize) & 0xFF);
} else {
@@ -1788,7 +1796,7 @@ void X86_64Assembler::jmp(Label* label) {
static const int kLongSize = 5;
int offset = label->Position() - buffer_.Size();
CHECK_LE(offset, 0);
- if (IsInt(8, offset - kShortSize)) {
+ if (IsInt<8>(offset - kShortSize)) {
EmitUint8(0xEB);
EmitUint8((offset - kShortSize) & 0xFF);
} else {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 2fc251b07a..5dfcf4541b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -42,15 +42,11 @@ class Immediate : public ValueObject {
int64_t value() const { return value_; }
- bool is_int8() const { return IsInt(8, value_); }
- bool is_uint8() const { return IsUint(8, value_); }
- bool is_int16() const { return IsInt(16, value_); }
- bool is_uint16() const { return IsUint(16, value_); }
- bool is_int32() const {
- // This does not work on 32b machines: return IsInt(32, value_);
- int64_t limit = static_cast<int64_t>(1) << 31;
- return (-limit <= value_) && (value_ < limit);
- }
+ bool is_int8() const { return IsInt<8>(value_); }
+ bool is_uint8() const { return IsUint<8>(value_); }
+ bool is_int16() const { return IsInt<16>(value_); }
+ bool is_uint16() const { return IsUint<16>(value_); }
+ bool is_int32() const { return IsInt<32>(value_); }
private:
const int64_t value_;
@@ -296,6 +292,7 @@ class X86_64Assembler FINAL : public Assembler {
void movw(const Address& dst, const Immediate& imm);
void leaq(CpuRegister dst, const Address& src);
+ void leal(CpuRegister dst, const Address& src);
void movaps(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 6df4144004..00f508b23f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -44,10 +44,10 @@ static constexpr size_t kRandomIterations = 100000; // Hosts are pretty powerfu
TEST(AssemblerX86_64, SignExtension) {
// 32bit.
for (int32_t i = 0; i < 128; i++) {
- EXPECT_TRUE(IsInt32(8, i)) << i;
+ EXPECT_TRUE(IsInt<8>(i)) << i;
}
for (int32_t i = 128; i < 255; i++) {
- EXPECT_FALSE(IsInt32(8, i)) << i;
+ EXPECT_FALSE(IsInt<8>(i)) << i;
}
// Do some higher ones randomly.
std::random_device rd;
@@ -55,54 +55,65 @@ TEST(AssemblerX86_64, SignExtension) {
std::uniform_int_distribution<int32_t> uniform_dist(256, INT32_MAX);
for (size_t i = 0; i < kRandomIterations; i++) {
int32_t value = uniform_dist(e1);
- EXPECT_FALSE(IsInt32(8, value)) << value;
+ EXPECT_FALSE(IsInt<8>(value)) << value;
}
// Negative ones.
for (int32_t i = -1; i >= -128; i--) {
- EXPECT_TRUE(IsInt32(8, i)) << i;
+ EXPECT_TRUE(IsInt<8>(i)) << i;
}
for (int32_t i = -129; i > -256; i--) {
- EXPECT_FALSE(IsInt32(8, i)) << i;
+ EXPECT_FALSE(IsInt<8>(i)) << i;
}
// Do some lower ones randomly.
std::uniform_int_distribution<int32_t> uniform_dist2(INT32_MIN, -256);
for (size_t i = 0; i < 100; i++) {
int32_t value = uniform_dist2(e1);
- EXPECT_FALSE(IsInt32(8, value)) << value;
+ EXPECT_FALSE(IsInt<8>(value)) << value;
}
// 64bit.
for (int64_t i = 0; i < 128; i++) {
- EXPECT_TRUE(IsInt64(8, i)) << i;
+ EXPECT_TRUE(IsInt<8>(i)) << i;
}
for (int32_t i = 128; i < 255; i++) {
- EXPECT_FALSE(IsInt64(8, i)) << i;
+ EXPECT_FALSE(IsInt<8>(i)) << i;
}
// Do some higher ones randomly.
std::uniform_int_distribution<int64_t> uniform_dist3(256, INT64_MAX);
for (size_t i = 0; i < 100; i++) {
int64_t value = uniform_dist3(e1);
- EXPECT_FALSE(IsInt64(8, value)) << value;
+ EXPECT_FALSE(IsInt<8>(value)) << value;
}
// Negative ones.
for (int64_t i = -1; i >= -128; i--) {
- EXPECT_TRUE(IsInt64(8, i)) << i;
+ EXPECT_TRUE(IsInt<8>(i)) << i;
}
for (int64_t i = -129; i > -256; i--) {
- EXPECT_FALSE(IsInt64(8, i)) << i;
+ EXPECT_FALSE(IsInt<8>(i)) << i;
}
// Do some lower ones randomly.
std::uniform_int_distribution<int64_t> uniform_dist4(INT64_MIN, -256);
for (size_t i = 0; i < kRandomIterations; i++) {
int64_t value = uniform_dist4(e1);
- EXPECT_FALSE(IsInt64(8, value)) << value;
+ EXPECT_FALSE(IsInt<8>(value)) << value;
}
+
+ int64_t value = INT64_C(0x1200000010);
+ x86_64::Immediate imm(value);
+ EXPECT_FALSE(imm.is_int8());
+ EXPECT_FALSE(imm.is_int16());
+ EXPECT_FALSE(imm.is_int32());
+ value = INT64_C(0x8000000000000001);
+ x86_64::Immediate imm2(value);
+ EXPECT_FALSE(imm2.is_int8());
+ EXPECT_FALSE(imm2.is_int16());
+ EXPECT_FALSE(imm2.is_int32());
}
struct X86_64CpuRegisterCompare {