summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/compiler_enums.h1
-rw-r--r--compiler/dex/gvn_dead_code_elimination.cc66
-rw-r--r--compiler/dex/gvn_dead_code_elimination.h1
-rw-r--r--compiler/dex/gvn_dead_code_elimination_test.cc81
-rw-r--r--compiler/dex/mir_dataflow.cc3
-rw-r--r--compiler/dex/mir_graph.cc7
-rw-r--r--compiler/dex/mir_method_info.cc3
-rw-r--r--compiler/dex/quick/codegen_util.cc16
-rw-r--r--compiler/dex/quick/mir_to_lir.cc15
-rw-r--r--compiler/dex/quick/quick_compiler.cc1
-rw-r--r--compiler/jni/quick/jni_compiler.cc46
-rw-r--r--compiler/oat_test.cc2
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc36
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc18
-rw-r--r--compiler/optimizing/builder.cc24
-rw-r--r--compiler/optimizing/builder.h5
-rw-r--r--compiler/optimizing/code_generator.cc15
-rw-r--r--compiler/optimizing/code_generator_arm.cc18
-rw-r--r--compiler/optimizing/code_generator_arm64.cc33
-rw-r--r--compiler/optimizing/code_generator_x86.cc26
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc84
-rw-r--r--compiler/optimizing/code_generator_x86_64.h3
-rw-r--r--compiler/optimizing/codegen_test.cc10
-rw-r--r--compiler/optimizing/constant_folding.cc21
-rw-r--r--compiler/optimizing/dead_code_elimination.cc24
-rw-r--r--compiler/optimizing/dead_code_elimination.h6
-rw-r--r--compiler/optimizing/dominator_test.cc2
-rw-r--r--compiler/optimizing/find_loops_test.cc11
-rw-r--r--compiler/optimizing/graph_checker.cc55
-rw-r--r--compiler/optimizing/graph_checker_test.cc2
-rw-r--r--compiler/optimizing/graph_test.cc14
-rw-r--r--compiler/optimizing/graph_visualizer.cc27
-rw-r--r--compiler/optimizing/gvn_test.cc8
-rw-r--r--compiler/optimizing/inliner.cc6
-rw-r--r--compiler/optimizing/instruction_simplifier.cc26
-rw-r--r--compiler/optimizing/intrinsics_arm.cc90
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc87
-rw-r--r--compiler/optimizing/intrinsics_x86.cc183
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc157
-rw-r--r--compiler/optimizing/licm.cc21
-rw-r--r--compiler/optimizing/linearize_test.cc2
-rw-r--r--compiler/optimizing/live_ranges_test.cc2
-rw-r--r--compiler/optimizing/liveness_test.cc60
-rw-r--r--compiler/optimizing/locations.cc6
-rw-r--r--compiler/optimizing/locations.h9
-rw-r--r--compiler/optimizing/nodes.cc153
-rw-r--r--compiler/optimizing/nodes.h217
-rw-r--r--compiler/optimizing/nodes_test.cc57
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc5
-rw-r--r--compiler/optimizing/optimizing_compiler.cc8
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h44
-rw-r--r--compiler/optimizing/optimizing_unit_test.h7
-rw-r--r--compiler/optimizing/pretty_printer_test.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc7
-rw-r--r--compiler/optimizing/register_allocator_test.cc16
-rw-r--r--compiler/optimizing/ssa_builder.cc18
-rw-r--r--compiler/optimizing/ssa_builder.h17
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc18
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h36
-rw-r--r--compiler/optimizing/ssa_test.cc20
-rw-r--r--compiler/optimizing/suspend_check_test.cc2
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc23
-rw-r--r--compiler/utils/mips64/assembler_mips64.h2
-rw-r--r--compiler/utils/x86/assembler_x86.cc8
-rw-r--r--compiler/utils/x86/assembler_x86.h2
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc6
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc8
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h2
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc6
69 files changed, 1503 insertions, 514 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 0acdd422df..b78b3d7d75 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -172,7 +172,6 @@ enum ExtendedMIROpcode {
kMirOpRangeCheck,
kMirOpDivZeroCheck,
kMirOpCheck,
- kMirOpCheckPart2,
kMirOpSelect,
// Vector opcodes:
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc
index 4f0e9d1b67..915fbcda04 100644
--- a/compiler/dex/gvn_dead_code_elimination.cc
+++ b/compiler/dex/gvn_dead_code_elimination.cc
@@ -20,6 +20,7 @@
#include "base/bit_vector-inl.h"
#include "base/macros.h"
+#include "base/allocator.h"
#include "compiler_enums.h"
#include "dataflow_iterator-inl.h"
#include "dex_instruction.h"
@@ -75,6 +76,9 @@ inline void GvnDeadCodeElimination::MIRData::RemovePrevChange(int v_reg, MIRData
GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAllocator* alloc)
: num_vregs_(num_vregs),
vreg_data_(alloc->AllocArray<VRegValue>(num_vregs, kArenaAllocMisc)),
+ vreg_high_words_(num_vregs, false, Allocator::GetNoopAllocator(),
+ BitVector::BitsToWords(num_vregs),
+ alloc->AllocArray<uint32_t>(BitVector::BitsToWords(num_vregs))),
mir_data_(alloc->Adapter()) {
mir_data_.reserve(100);
}
@@ -82,6 +86,7 @@ GvnDeadCodeElimination::VRegChains::VRegChains(uint32_t num_vregs, ScopedArenaAl
inline void GvnDeadCodeElimination::VRegChains::Reset() {
DCHECK(mir_data_.empty());
std::fill_n(vreg_data_, num_vregs_, VRegValue());
+ vreg_high_words_.ClearAllBits();
}
void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool wide,
@@ -93,24 +98,26 @@ void GvnDeadCodeElimination::VRegChains::AddMIRWithDef(MIR* mir, int v_reg, bool
data->wide_def = wide;
data->vreg_def = v_reg;
- if (vreg_data_[v_reg].change != kNPos &&
- mir_data_[vreg_data_[v_reg].change].vreg_def + 1 == v_reg) {
- data->low_def_over_high_word = true;
- }
- data->prev_value = vreg_data_[v_reg];
DCHECK_LT(static_cast<size_t>(v_reg), num_vregs_);
+ data->prev_value = vreg_data_[v_reg];
+ data->low_def_over_high_word =
+ (vreg_data_[v_reg].change != kNPos)
+ ? GetMIRData(vreg_data_[v_reg].change)->vreg_def + 1 == v_reg
+ : vreg_high_words_.IsBitSet(v_reg);
vreg_data_[v_reg].value = new_value;
vreg_data_[v_reg].change = pos;
+ vreg_high_words_.ClearBit(v_reg);
if (wide) {
- if (vreg_data_[v_reg + 1].change != kNPos &&
- mir_data_[vreg_data_[v_reg + 1].change].vreg_def == v_reg + 1) {
- data->high_def_over_low_word = true;
- }
- data->prev_value_high = vreg_data_[v_reg + 1];
DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
+ data->prev_value_high = vreg_data_[v_reg + 1];
+ data->high_def_over_low_word =
+ (vreg_data_[v_reg + 1].change != kNPos)
+ ? GetMIRData(vreg_data_[v_reg + 1].change)->vreg_def == v_reg + 1
+ : !vreg_high_words_.IsBitSet(v_reg + 1);
vreg_data_[v_reg + 1].value = new_value;
vreg_data_[v_reg + 1].change = pos;
+ vreg_high_words_.SetBit(v_reg + 1);
}
}
@@ -123,9 +130,17 @@ void GvnDeadCodeElimination::VRegChains::RemoveLastMIRData() {
if (data->has_def) {
DCHECK_EQ(vreg_data_[data->vreg_def].change, NumMIRs() - 1u);
vreg_data_[data->vreg_def] = data->prev_value;
+ DCHECK(!vreg_high_words_.IsBitSet(data->vreg_def));
+ if (data->low_def_over_high_word) {
+ vreg_high_words_.SetBit(data->vreg_def);
+ }
if (data->wide_def) {
DCHECK_EQ(vreg_data_[data->vreg_def + 1].change, NumMIRs() - 1u);
vreg_data_[data->vreg_def + 1] = data->prev_value_high;
+ DCHECK(vreg_high_words_.IsBitSet(data->vreg_def + 1));
+ if (data->high_def_over_low_word) {
+ vreg_high_words_.ClearBit(data->vreg_def + 1);
+ }
}
}
mir_data_.pop_back();
@@ -169,6 +184,7 @@ void GvnDeadCodeElimination::VRegChains::InsertInitialValueHigh(int v_reg, uint1
uint16_t change = vreg_data_[v_reg].change;
if (change == kNPos) {
vreg_data_[v_reg].value = value;
+ vreg_high_words_.SetBit(v_reg);
} else {
while (true) {
MIRData* data = &mir_data_[change];
@@ -208,6 +224,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
}
}
vreg_data_[v_reg].value = old_value;
+ DCHECK(!vreg_high_words_.IsBitSet(v_reg)); // Keep marked as low word.
}
} else {
DCHECK_LT(static_cast<size_t>(v_reg + 1), num_vregs_);
@@ -223,6 +240,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
old_value = lvn->GetStartingVregValueNumber(v_reg);
}
vreg_data_[v_reg].value = old_value;
+ DCHECK(!vreg_high_words_.IsBitSet(v_reg)); // Keep marked as low word.
}
if (check_high && vreg_data_[v_reg + 1].value == kNoValue) {
uint16_t old_value = lvn->GetStartingVregValueNumber(v_reg + 1);
@@ -234,6 +252,7 @@ void GvnDeadCodeElimination::VRegChains::UpdateInitialVRegValue(int v_reg, bool
}
}
vreg_data_[v_reg + 1].value = old_value;
+ DCHECK(!vreg_high_words_.IsBitSet(v_reg + 1)); // Keep marked as low word.
}
}
}
@@ -300,6 +319,8 @@ void GvnDeadCodeElimination::VRegChains::ReplaceChange(uint16_t old_change, uint
if (next_change == kNPos) {
DCHECK_EQ(vreg_data_[v_reg].change, old_change);
vreg_data_[v_reg].change = new_change;
+ DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == old_data->vreg_def + 1);
+ // No change in vreg_high_words_.
} else {
DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), old_change);
mir_data_[next_change].SetPrevChange(v_reg, new_change);
@@ -316,6 +337,13 @@ void GvnDeadCodeElimination::VRegChains::RemoveChange(uint16_t change) {
if (next_change == kNPos) {
DCHECK_EQ(vreg_data_[v_reg].change, change);
vreg_data_[v_reg] = (data->vreg_def == v_reg) ? data->prev_value : data->prev_value_high;
+ DCHECK_EQ(vreg_high_words_.IsBitSet(v_reg), v_reg == data->vreg_def + 1);
+ if (data->vreg_def == v_reg && data->low_def_over_high_word) {
+ vreg_high_words_.SetBit(v_reg);
+ } else if (data->vreg_def != v_reg &&
+ (data->high_def_over_low_word || data->prev_value_high.value == kNoValue)) {
+ vreg_high_words_.ClearBit(v_reg);
+ }
} else {
DCHECK_EQ(mir_data_[next_change].PrevChange(v_reg), change);
mir_data_[next_change].RemovePrevChange(v_reg, data);
@@ -533,7 +561,7 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1
// Just before we kill mir_to_kill, we need to replace the previous SSA reg assigned to the
// same dalvik reg to keep consistency with subsequent instructions. However, if there's no
- // defining MIR for that dalvik reg, the preserved valus must come from its predecessors
+ // defining MIR for that dalvik reg, the preserved values must come from its predecessors
// and we need to create a new Phi (a degenerate Phi if there's only a single predecessor).
if (def_change == kNPos) {
if (wide) {
@@ -541,7 +569,21 @@ MIR* GvnDeadCodeElimination::RenameSRegDefOrCreatePhi(uint16_t def_change, uint1
DCHECK_EQ(mir_graph_->SRegToVReg(new_s_reg) + 1, mir_graph_->SRegToVReg(new_s_reg + 1));
CreatePhi(new_s_reg + 1); // High word Phi.
}
- return CreatePhi(new_s_reg);
+ MIR* phi = CreatePhi(new_s_reg);
+ // If this is a degenerate Phi with all inputs being the same SSA reg, we need to its uses.
+ DCHECK_NE(phi->ssa_rep->num_uses, 0u);
+ int old_s_reg = phi->ssa_rep->uses[0];
+ bool all_same = true;
+ for (size_t i = 1u, num = phi->ssa_rep->num_uses; i != num; ++i) {
+ if (phi->ssa_rep->uses[i] != old_s_reg) {
+ all_same = false;
+ break;
+ }
+ }
+ if (all_same) {
+ vreg_chains_.RenameSRegUses(0u, last_change, old_s_reg, new_s_reg, wide);
+ }
+ return phi;
} else {
DCHECK_LT(def_change, last_change);
DCHECK_LE(last_change, vreg_chains_.NumMIRs());
diff --git a/compiler/dex/gvn_dead_code_elimination.h b/compiler/dex/gvn_dead_code_elimination.h
index bc75a01778..06022db501 100644
--- a/compiler/dex/gvn_dead_code_elimination.h
+++ b/compiler/dex/gvn_dead_code_elimination.h
@@ -121,6 +121,7 @@ class GvnDeadCodeElimination : public DeletableArenaObject<kArenaAllocMisc> {
private:
const uint32_t num_vregs_;
VRegValue* const vreg_data_;
+ BitVector vreg_high_words_;
ScopedArenaVector<MIRData> mir_data_;
};
diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc
index f9f0882f08..4f8127338c 100644
--- a/compiler/dex/gvn_dead_code_elimination_test.cc
+++ b/compiler/dex/gvn_dead_code_elimination_test.cc
@@ -1629,6 +1629,52 @@ TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi1) {
}
TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi2) {
+ static const MIRDef mirs[] = {
+ DEF_CONST(3, Instruction::CONST, 0u, 1000),
+ DEF_MOVE(4, Instruction::MOVE, 1u, 0u),
+ DEF_CONST(4, Instruction::CONST, 2u, 1000),
+ };
+
+ static const int32_t sreg_to_vreg_map[] = { 0, 1, 0 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+
+ static const bool eliminated[] = {
+ false, false, true,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+ // Check that we've created a single-input Phi to replace the CONST 3u.
+ BasicBlock* bb4 = cu_.mir_graph->GetBasicBlock(4);
+ MIR* phi = bb4->first_mir_insn;
+ ASSERT_TRUE(phi != nullptr);
+ ASSERT_EQ(kMirOpPhi, static_cast<int>(phi->dalvikInsn.opcode));
+ ASSERT_EQ(1, phi->ssa_rep->num_uses);
+ EXPECT_EQ(0, phi->ssa_rep->uses[0]);
+ ASSERT_EQ(1, phi->ssa_rep->num_defs);
+ EXPECT_EQ(2, phi->ssa_rep->defs[0]);
+ EXPECT_EQ(0u, phi->dalvikInsn.vA);
+ MIR* move = phi->next;
+ ASSERT_TRUE(move != nullptr);
+ ASSERT_EQ(Instruction::MOVE, move->dalvikInsn.opcode);
+ ASSERT_EQ(1, move->ssa_rep->num_uses);
+ EXPECT_EQ(2, move->ssa_rep->uses[0]);
+ ASSERT_EQ(1, move->ssa_rep->num_defs);
+ EXPECT_EQ(1, move->ssa_rep->defs[0]);
+ EXPECT_EQ(1u, move->dalvikInsn.vA);
+ EXPECT_EQ(0u, move->dalvikInsn.vB);
+}
+
+TEST_F(GvnDeadCodeEliminationTestDiamond, CreatePhi3) {
static const IFieldDef ifields[] = {
{ 0u, 1u, 0u, false, kDexMemAccessWord },
};
@@ -1850,4 +1896,39 @@ TEST_F(GvnDeadCodeEliminationTestLoop, IFieldLoopVariable) {
EXPECT_EQ(2u, phi->dalvikInsn.vA);
}
+TEST_F(GvnDeadCodeEliminationTestDiamond, LongOverlaps1) {
+ static const MIRDef mirs[] = {
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 0u, 1000u),
+ DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 2u, 1000u),
+ DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 4u, 0u),
+ DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 6u, 2u),
+ DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 8u, 4u),
+ DEF_MOVE_WIDE(4, Instruction::MOVE_WIDE, 10u, 6u),
+ };
+
+ // The last insn should overlap the first and second.
+ static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3 };
+ PrepareSRegToVRegMap(sreg_to_vreg_map);
+
+ PrepareMIRs(mirs);
+ static const int32_t wide_sregs[] = { 0, 2, 4, 6, 8, 10 };
+ MarkAsWideSRegs(wide_sregs);
+ PerformGVN_DCE();
+
+ ASSERT_EQ(arraysize(mirs), value_names_.size());
+ EXPECT_EQ(value_names_[0], value_names_[1]);
+ EXPECT_EQ(value_names_[0], value_names_[2]);
+ EXPECT_EQ(value_names_[0], value_names_[3]);
+ EXPECT_EQ(value_names_[0], value_names_[4]);
+
+ static const bool eliminated[] = {
+ false, false, false, false, false, false,
+ };
+ static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch");
+ for (size_t i = 0; i != arraysize(eliminated); ++i) {
+ bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop);
+ EXPECT_EQ(eliminated[i], actually_eliminated) << i;
+ }
+}
+
} // namespace art
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index b4aec98e01..a7ba061984 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -834,9 +834,6 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
// 10B MIR_CHECK
0,
- // 10C MIR_CHECKPART2
- 0,
-
// 10D MIR_SELECT
DF_DA | DF_UB,
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 9e3fbbc967..1871f07106 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -52,8 +52,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = {
"OpNullCheck",
"OpRangeCheck",
"OpDivZeroCheck",
- "Check1",
- "Check2",
+ "Check",
"Select",
"ConstVector",
"MoveVector",
@@ -1508,7 +1507,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) {
Instruction::Format dalvik_format = Instruction::k10x; // Default to no-operand format.
// Handle special cases that recover the original dalvik instruction.
- if ((opcode == kMirOpCheck) || (opcode == kMirOpCheckPart2)) {
+ if (opcode == kMirOpCheck) {
str.append(extended_mir_op_names_[opcode - kMirOpFirst]);
str.append(": ");
// Recover the original Dex instruction.
@@ -2517,8 +2516,6 @@ int MIR::DecodedInstruction::FlagsOf() const {
return Instruction::kContinue | Instruction::kThrow;
case kMirOpCheck:
return Instruction::kContinue | Instruction::kThrow;
- case kMirOpCheckPart2:
- return Instruction::kContinue;
case kMirOpSelect:
return Instruction::kContinue;
case kMirOpConstVector:
diff --git a/compiler/dex/mir_method_info.cc b/compiler/dex/mir_method_info.cc
index 5654604797..94be1fd4a5 100644
--- a/compiler/dex/mir_method_info.cc
+++ b/compiler/dex/mir_method_info.cc
@@ -169,7 +169,8 @@ void MirMethodLoweringInfo::Resolve(CompilerDriver* compiler_driver,
~(kFlagFastPath | kFlagIsIntrinsic | kFlagIsSpecial | kFlagClassIsInitialized |
(kInvokeTypeMask << kBitSharpTypeBegin));
it->flags_ = other_flags |
- (fast_path_flags != 0 ? kFlagFastPath : 0u) |
+ // String init path is a special always-fast path.
+ (fast_path_flags != 0 || string_init ? kFlagFastPath : 0u) |
((is_intrinsic_or_special & kInlineIntrinsic) != 0 ? kFlagIsIntrinsic : 0u) |
((is_intrinsic_or_special & kInlineSpecial) != 0 ? kFlagIsSpecial : 0u) |
(static_cast<uint16_t>(invoke_type) << kBitSharpTypeBegin) |
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index fb68335e6e..86bb69d01e 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -1391,22 +1391,6 @@ void Mir2Lir::InitReferenceVRegs(BasicBlock* bb, BitVector* references) {
}
}
}
- if (bb->block_type != kEntryBlock && bb->first_mir_insn != nullptr &&
- static_cast<int>(bb->first_mir_insn->dalvikInsn.opcode) == kMirOpCheckPart2) {
- // In Mir2Lir::MethodBlockCodeGen() we have artificially moved the throwing
- // instruction to the previous block. However, the MIRGraph data used above
- // doesn't reflect that, so we still need to process that MIR insn here.
- MIR* mir = nullptr;
- BasicBlock* pred_bb = bb;
- // Traverse empty blocks.
- while (mir == nullptr && pred_bb->predecessors.size() == 1u) {
- pred_bb = mir_graph_->GetBasicBlock(bb->predecessors[0]);
- DCHECK(pred_bb != nullptr);
- mir = pred_bb->last_mir_insn;
- }
- DCHECK(mir != nullptr);
- UpdateReferenceVRegsLocal(nullptr, mir, references);
- }
}
bool Mir2Lir::UpdateReferenceVRegsLocal(MIR* mir, MIR* prev_mir, BitVector* references) {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index e9e9161a1c..e3e87ecb13 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -1187,7 +1187,6 @@ void Mir2Lir::HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
case kMirOpRangeCheck:
case kMirOpDivZeroCheck:
case kMirOpCheck:
- case kMirOpCheckPart2:
// Ignore these known opcodes
break;
default:
@@ -1276,20 +1275,6 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) {
head_lir->u.m.def_mask = &kEncodeAll;
}
- if (opcode == kMirOpCheck) {
- // Combine check and work halves of throwing instruction.
- MIR* work_half = mir->meta.throw_insn;
- mir->dalvikInsn = work_half->dalvikInsn;
- mir->optimization_flags = work_half->optimization_flags;
- mir->meta = work_half->meta; // Whatever the work_half had, we need to copy it.
- opcode = work_half->dalvikInsn.opcode;
- SSARepresentation* ssa_rep = work_half->ssa_rep;
- work_half->ssa_rep = mir->ssa_rep;
- mir->ssa_rep = ssa_rep;
- work_half->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheckPart2);
- work_half->meta.throw_insn = mir;
- }
-
if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) {
HandleExtendedMethodMIR(bb, mir);
continue;
diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc
index 73cfe92c45..7ca438225f 100644
--- a/compiler/dex/quick/quick_compiler.cc
+++ b/compiler/dex/quick/quick_compiler.cc
@@ -403,7 +403,6 @@ static int kAllOpcodes[] = {
kMirOpRangeCheck,
kMirOpDivZeroCheck,
kMirOpCheck,
- kMirOpCheckPart2,
kMirOpSelect,
};
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 6f2cb25911..a06303d23e 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -138,7 +138,8 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
// Check handle scope offset is within frame
CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
- // TODO: Insert the read barrier for this load.
+ // Note this LoadRef() already includes the heap poisoning negation.
+ // Note this LoadRef() does not include read barrier. It will be handled below.
__ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
mr_conv->MethodRegister(), mirror::ArtMethod::DeclaringClassOffset());
__ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
@@ -189,6 +190,49 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
size_t current_out_arg_size = main_out_arg_size;
__ IncreaseFrameSize(main_out_arg_size);
+ // Call the read barrier for the declaring class loaded from the method for a static call.
+ // Note that we always have outgoing param space available for at least two params.
+ if (kUseReadBarrier && is_static) {
+ ThreadOffset<4> read_barrier32 = QUICK_ENTRYPOINT_OFFSET(4, pReadBarrierJni);
+ ThreadOffset<8> read_barrier64 = QUICK_ENTRYPOINT_OFFSET(8, pReadBarrierJni);
+ main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+ main_jni_conv->Next(); // Skip JNIEnv.
+ FrameOffset class_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+ main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+ // Pass the handle for the class as the first argument.
+ if (main_jni_conv->IsCurrentParamOnStack()) {
+ FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
+ __ CreateHandleScopeEntry(out_off, class_handle_scope_offset,
+ mr_conv->InterproceduralScratchRegister(),
+ false);
+ } else {
+ ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
+ __ CreateHandleScopeEntry(out_reg, class_handle_scope_offset,
+ ManagedRegister::NoRegister(), false);
+ }
+ main_jni_conv->Next();
+ // Pass the current thread as the second argument and call.
+ if (main_jni_conv->IsCurrentParamInRegister()) {
+ __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
+ if (is_64_bit_target) {
+ __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier64),
+ main_jni_conv->InterproceduralScratchRegister());
+ } else {
+ __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier32),
+ main_jni_conv->InterproceduralScratchRegister());
+ }
+ } else {
+ __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
+ main_jni_conv->InterproceduralScratchRegister());
+ if (is_64_bit_target) {
+ __ CallFromThread64(read_barrier64, main_jni_conv->InterproceduralScratchRegister());
+ } else {
+ __ CallFromThread32(read_barrier32, main_jni_conv->InterproceduralScratchRegister());
+ }
+ }
+ main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); // Reset.
+ }
+
// 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
// can occur. The result is the saved JNI local state that is restored by the exit call. We
// abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index dbdcc96fc1..a871a82d95 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -176,7 +176,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
EXPECT_EQ(72U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(28U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(111 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+ EXPECT_EQ(112 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
}
TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 92fa6db507..b2b54965b5 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -281,15 +281,22 @@ class ArrayAccessInsideLoopFinder : public ValueObject {
return false;
}
+ static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
+ for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) {
+ HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i);
+ if (!block->Dominates(back_edge)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
void Run() {
HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
- // Must be simplified loop.
- DCHECK_EQ(loop_info->GetBackEdges().Size(), 1U);
for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
HBasicBlock* block = it_loop.Current();
DCHECK(block->IsInLoop());
- HBasicBlock* back_edge = loop_info->GetBackEdges().Get(0);
- if (!block->Dominates(back_edge)) {
+ if (!DominatesAllBackEdges(block, loop_info)) {
// In order not to trigger deoptimization unnecessarily, make sure
// that all array accesses collected are really executed in the loop.
// For array accesses in a branch inside the loop, don't collect the
@@ -1151,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor {
bounds_check->GetBlock()->RemoveInstruction(bounds_check);
}
+ static bool HasSameInputAtBackEdges(HPhi* phi) {
+ DCHECK(phi->IsLoopHeaderPhi());
+ // Start with input 1. Input 0 is from the incoming block.
+ HInstruction* input1 = phi->InputAt(1);
+ DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+ *phi->GetBlock()->GetPredecessors().Get(1)));
+ for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+ DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+ *phi->GetBlock()->GetPredecessors().Get(i)));
+ if (input1 != phi->InputAt(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
void VisitPhi(HPhi* phi) {
- if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
- DCHECK_EQ(phi->InputCount(), 2U);
+ if (phi->IsLoopHeaderPhi()
+ && (phi->GetType() == Primitive::kPrimInt)
+ && HasSameInputAtBackEdges(phi)) {
HInstruction* instruction = phi->InputAt(1);
HInstruction *left;
int32_t increment;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 97be778dbd..163458f75c 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,7 +42,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -147,7 +147,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -219,7 +219,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -291,7 +291,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -364,7 +364,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
int initial,
int increment,
IfCondition cond = kCondGE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -501,7 +501,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
int initial,
int increment = -1,
IfCondition cond = kCondLE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -632,7 +632,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
int initial,
int increment,
IfCondition cond) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -743,7 +743,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
HInstruction** bounds_check,
int initial,
IfCondition cond = kCondGE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -868,7 +868,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 0f44af07b8..a5c6f23343 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -282,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
// To avoid splitting blocks, we compute ahead of time the instructions that
// start a new block, and create these blocks.
- ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
+ if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+ return false;
+ }
// Note that the compiler driver is null when unit testing.
if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
@@ -349,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
current_block_ = block;
}
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
const uint16_t* code_end,
size_t* number_of_branches) {
branch_targets_.SetSize(code_end - code_ptr);
@@ -374,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
}
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
- if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+
+ if (code_ptr >= code_end) {
+ if (instruction.CanFlowThrough()) {
+ // In the normal case we should never hit this but someone can artificially forge a dex
+ // file to fall-through out the method code. In this case we bail out compilation.
+ return false;
+ }
+ } else if (FindBlockStartingAt(dex_pc) == nullptr) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
}
@@ -406,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
// Fall-through. Add a block if there is more code afterwards.
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
- if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+ if (code_ptr >= code_end) {
+ // In the normal case we should never hit this but someone can artificially forge a dex
+ // file to fall-through out the method code. In this case we bail out compilation.
+ // (A switch can fall-through so we don't need to check CanFlowThrough().)
+ return false;
+ } else if (FindBlockStartingAt(dex_pc) == nullptr) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
}
@@ -415,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
dex_pc += instruction.SizeInCodeUnits();
}
}
+ return true;
}
HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index dc6d97eb0c..36503ce43a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject {
// the newly created blocks.
// As a side effect, also compute the number of dex instructions, blocks, and
// branches.
- void ComputeBranchTargets(const uint16_t* start,
+ // Returns true if all the branches fall inside the method code, false otherwise.
+ // (In normal cases this should always return true but someone can artificially
+ // create a code unit in which branches fall-through out of it).
+ bool ComputeBranchTargets(const uint16_t* start,
const uint16_t* end,
size_t* number_of_branches);
void MaybeUpdateCurrentBlock(size_t index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index cfe121e0ec..0e776b31f7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
if (environment->GetInstructionAt(i) != nullptr) {
Primitive::Type type = environment->GetInstructionAt(i)->GetType();
- DCHECK(CheckType(type, locations->GetEnvironmentAt(i)))
- << type << " " << locations->GetEnvironmentAt(i);
+ DCHECK(CheckType(type, environment->GetLocationAt(i)))
+ << type << " " << environment->GetLocationAt(i);
} else {
- DCHECK(locations->GetEnvironmentAt(i).IsInvalid())
- << locations->GetEnvironmentAt(i);
+ DCHECK(environment->GetLocationAt(i).IsInvalid())
+ << environment->GetLocationAt(i);
}
}
return true;
@@ -680,6 +680,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
locations->GetStackMask(),
environment_size,
inlining_depth);
+ if (environment != nullptr) {
+ // TODO: Handle parent environment.
+ DCHECK(environment->GetParent() == nullptr);
+ DCHECK_EQ(environment->GetDexPc(), dex_pc);
+ }
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0; i < environment_size; ++i) {
@@ -689,7 +694,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
continue;
}
- Location location = locations->GetEnvironmentAt(i);
+ Location location = environment->GetLocationAt(i);
switch (location.GetKind()) {
case Location::kConstant: {
DCHECK_EQ(current, location.GetConstant());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e4c37deb8b..f56e446605 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
// If not null, the block to branch to after the suspend check.
@@ -3539,8 +3543,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction)
void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
__ LoadFromOffset(
kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9e02a1d850..b1cb8802b3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -285,6 +285,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
// If not null, the block to branch to after the suspend check.
@@ -975,14 +979,12 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset,
BlockPoolsScope block_pools(GetVIXLAssembler());
__ Ldr(lr, MemOperand(tr, entry_point_offset));
__ Blr(lr);
- if (instruction != nullptr) {
- RecordPcInfo(instruction, dex_pc, slow_path);
- DCHECK(instruction->IsSuspendCheck()
- || instruction->IsBoundsCheck()
- || instruction->IsNullCheck()
- || instruction->IsDivZeroCheck()
- || !IsLeafMethod());
- }
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ DCHECK(instruction->IsSuspendCheck()
+ || instruction->IsBoundsCheck()
+ || instruction->IsNullCheck()
+ || instruction->IsDivZeroCheck()
+ || !IsLeafMethod());
}
void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
@@ -1034,8 +1036,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM64* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
Register temp = temps.AcquireW();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 8aa77969fc..2848a48a64 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
HBasicBlock* const successor_;
@@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -2827,7 +2830,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
Register low = loc.AsRegisterPairLow<Register>();
Register high = loc.AsRegisterPairHigh<Register>();
- if (shift == 32) {
+ if (shift == 1) {
+ // This is just an addition.
+ __ addl(low, low);
+ __ adcl(high, high);
+ } else if (shift == 32) {
// Shift by 32 is easy. High gets low, and low gets 0.
codegen_->EmitParallelMoves(
loc.ToLow(),
@@ -3993,8 +4000,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction)
void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathX86* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
__ fs()->cmpw(Address::Absolute(
Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
if (successor == nullptr) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5ac68668ba..e633970279 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
if (is_div_) {
__ negq(cpu_reg_);
} else {
- __ movq(cpu_reg_, Immediate(0));
+ __ xorl(cpu_reg_, cpu_reg_);
}
}
__ jmp(GetExitLabel());
@@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
HBasicBlock* const successor_;
@@ -671,7 +675,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
DCHECK(constant->IsLongConstant());
value = constant->AsLongConstant()->GetValue();
}
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(source.IsDoubleStackSlot());
@@ -704,9 +708,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
} else if (const_to_move->IsLongConstant()) {
int64_t value = const_to_move->AsLongConstant()->GetValue();
if (location.IsRegister()) {
- __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
+ Load64BitValue(location.AsRegister<CpuRegister>(), value);
} else if (location.IsDoubleStackSlot()) {
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(location.IsConstant());
@@ -771,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -956,7 +959,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
LocationSummary* locations = comp->GetLocations();
CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
// Clear register: setcc only sets the low byte.
- __ xorq(reg, reg);
+ __ xorl(reg, reg);
Location lhs = locations->InAt(0);
Location rhs = locations->InAt(1);
if (rhs.IsRegister()) {
@@ -1419,8 +1422,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
// Set the hidden argument.
- __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
- Immediate(invoke->GetDexMethodIndex()));
+ CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
@@ -1856,7 +1859,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-float(output)
__ cvtsi2ss(temp, output, true);
// if input >= temp goto done
@@ -1869,7 +1872,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -1881,7 +1884,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-double(output)
__ cvtsi2sd(temp, output, true);
// if input >= temp goto done
@@ -1894,7 +1897,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -2483,7 +2486,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
case Primitive::kPrimLong: {
if (instruction->IsRem()) {
- __ xorq(output_register, output_register);
+ __ xorl(output_register, output_register);
} else {
__ movq(output_register, input_register);
if (imm == -1) {
@@ -2527,7 +2530,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
- __ movq(rdx, Immediate(std::abs(imm) - 1));
+ codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
__ addq(rdx, numerator);
__ testq(numerator, numerator);
__ cmov(kGreaterEqual, rdx, numerator);
@@ -2624,7 +2627,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
__ movq(numerator, rax);
// RAX = magic
- __ movq(rax, Immediate(magic));
+ codegen_->Load64BitValue(rax, magic);
// RDX:RAX = magic * numerator
__ imulq(numerator);
@@ -2653,8 +2656,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
if (IsInt<32>(imm)) {
__ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
} else {
- __ movq(numerator, Immediate(imm));
- __ imulq(rdx, numerator);
+ __ imulq(rdx, codegen_->LiteralInt64Address(imm));
}
__ subq(rax, rdx);
@@ -3020,8 +3022,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
-
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3042,7 +3044,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3864,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio
void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathX86_64* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
__ gs()->cmpw(Address::Absolute(
Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
if (successor == nullptr) {
@@ -3938,45 +3952,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
if (destination.IsRegister()) {
- __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
+ codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), Immediate(value));
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
} else if (constant->IsFloatConstant()) {
float fp_value = constant->AsFloatConstant()->GetValue();
int32_t value = bit_cast<int32_t, float>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
// easy FP 0.0.
__ xorps(dest, dest);
} else {
- __ movl(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
+ Immediate imm(value);
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else {
DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
double fp_value = constant->AsDoubleConstant()->GetValue();
int64_t value = bit_cast<int64_t, double>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
__ xorpd(dest, dest);
} else {
- __ movq(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), imm);
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
}
@@ -4435,6 +4446,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+ if (value == 0) {
+ __ xorl(dest, dest);
+ } else if (value > 0 && IsInt<32>(value)) {
+ // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+ __ movl(dest, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ movq(dest, Immediate(value));
+ }
+}
+
void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 13f9c46b5e..480ea6b9c9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -282,6 +282,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Address LiteralInt32Address(int32_t v);
Address LiteralInt64Address(int64_t v);
+ // Load a 64 bit value into a register in the most efficient manner.
+ void Load64BitValue(CpuRegister dest, int64_t value);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 94f56e5d3e..bfed1a89de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph,
static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
ArenaPool pool;
ArenaAllocator arena(&pool);
- HGraph* graph = new (&arena) HGraph(&arena);
+ HGraph* graph = CreateGraph(&arena);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe
static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
ArenaPool pool;
ArenaAllocator arena(&pool);
- HGraph* graph = new (&arena) HGraph(&arena);
+ HGraph* graph = CreateGraph(&arena);
HGraphBuilder builder(graph, Primitive::kPrimLong);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) {
for (size_t i = 0; i < arraysize(lhs); i++) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry_block);
@@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) {
for (size_t i = 0; i < arraysize(lhs); i++) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index b7a92b5ae5..5a1d9b488f 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -28,6 +28,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
void VisitShift(HBinaryOperation* shift);
void VisitAnd(HAnd* instruction) OVERRIDE;
+ void VisitCompare(HCompare* instruction) OVERRIDE;
void VisitMul(HMul* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitRem(HRem* instruction) OVERRIDE;
@@ -108,6 +109,26 @@ void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) {
}
}
+void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) {
+ HConstant* input_cst = instruction->GetConstantRight();
+ if (input_cst != nullptr) {
+ HInstruction* input_value = instruction->GetLeastConstantLeft();
+ if (Primitive::IsFloatingPointType(input_value->GetType()) &&
+ ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) ||
+ (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) {
+ // Replace code looking like
+ // CMP{G,L} dst, src, NaN
+ // with
+ // CONSTANT +1 (gt bias)
+ // or
+ // CONSTANT -1 (lt bias)
+ instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt,
+ (instruction->IsGtBias() ? 1 : -1)));
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+ }
+}
+
void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
Primitive::Type type = instruction->GetType();
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cd427c5ed8..6fbe75e802 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -47,6 +47,12 @@ static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
}
}
+static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
+ for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
+ set->SetBit(it.Current()->GetHeader()->GetBlockId());
+ }
+}
+
void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
if (stats_ != nullptr) {
stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
@@ -58,18 +64,24 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
// Classify blocks as reachable/unreachable.
ArenaAllocator* allocator = graph_->GetArena();
ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+ ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false);
+
MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
- // Remove all dead blocks. Process blocks in post-order, because removal needs
- // the block's chain of dominators.
+ // Remove all dead blocks. Iterate in post order because removal needs the
+ // block's chain of dominators and nested loops need to be updated from the
+ // inside out.
for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (live_blocks.IsBitSet(block->GetBlockId())) {
- // If this block is part of a loop that is being dismantled, we need to
- // update its loop information.
- block->UpdateLoopInformation();
+ int id = block->GetBlockId();
+ if (live_blocks.IsBitSet(id)) {
+ if (affected_loops.IsBitSet(id)) {
+ DCHECK(block->IsLoopHeader());
+ block->GetLoopInformation()->Update();
+ }
} else {
MaybeRecordDeadBlock(block);
+ MarkLoopHeadersContaining(*block, &affected_loops);
block->DisconnectAndDelete();
}
}
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 0bea0fc1c2..59a57c4345 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,13 +31,13 @@ class HDeadCodeElimination : public HOptimization {
public:
HDeadCodeElimination(HGraph* graph,
OptimizingCompilerStats* stats = nullptr,
- const char* name = kDeadCodeEliminationPassName)
+ const char* name = kInitialDeadCodeEliminationPassName)
: HOptimization(graph, true, name, stats) {}
void Run() OVERRIDE;
- static constexpr const char* kDeadCodeEliminationPassName =
- "dead_code_elimination";
+ static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
+ static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 61a7697301..78ae1dd960 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -27,7 +27,7 @@ namespace art {
static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 2bfecc696a..29aa97a83a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -28,7 +28,7 @@
namespace art {
static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) {
TestBlock(graph, 0, false, -1); // entry block
TestBlock(graph, 1, false, -1); // pre header
- const int blocks2[] = {2, 3, 4, 5, 8};
- TestBlock(graph, 2, true, 2, blocks2, 5); // loop header
+ const int blocks2[] = {2, 3, 4, 5};
+ TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header
TestBlock(graph, 3, false, 2); // block in loop
- TestBlock(graph, 4, false, 2); // original back edge
- TestBlock(graph, 5, false, 2); // original back edge
+ TestBlock(graph, 4, false, 2); // back edge
+ TestBlock(graph, 5, false, 2); // back edge
TestBlock(graph, 6, false, -1); // return block
TestBlock(graph, 7, false, -1); // exit block
- TestBlock(graph, 8, false, 2); // synthesized back edge
}
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 8ea8f3cd79..fd28f0b83f 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -288,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
+ HLoopInformation* loop_information = loop_header->GetLoopInformation();
// Ensure the pre-header block is first in the list of
// predecessors of a loop header.
@@ -297,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
id));
}
- // Ensure the loop header has only two predecessors and that only the
- // second one is a back edge.
+ // Ensure the loop header has only one incoming branch and the remaining
+ // predecessors are back edges.
size_t num_preds = loop_header->GetPredecessors().Size();
if (num_preds < 2) {
AddError(StringPrintf(
"Loop header %d has less than two predecessors: %zu.",
id,
num_preds));
- } else if (num_preds > 2) {
- AddError(StringPrintf(
- "Loop header %d has more than two predecessors: %zu.",
- id,
- num_preds));
} else {
- HLoopInformation* loop_information = loop_header->GetLoopInformation();
HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
if (loop_information->IsBackEdge(*first_predecessor)) {
AddError(StringPrintf(
"First predecessor of loop header %d is a back edge.",
id));
}
- HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
- if (!loop_information->IsBackEdge(*second_predecessor)) {
- AddError(StringPrintf(
- "Second predecessor of loop header %d is not a back edge.",
- id));
+ for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
+ HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+ if (!loop_information->IsBackEdge(*predecessor)) {
+ AddError(StringPrintf(
+ "Loop header %d has multiple incoming (non back edge) blocks.",
+ id));
+ }
}
}
- const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+ const ArenaBitVector& loop_blocks = loop_information->GetBlocks();
- // Ensure there is only one back edge per loop.
- size_t num_back_edges =
- loop_header->GetLoopInformation()->GetBackEdges().Size();
+ // Ensure back edges belong to the loop.
+ size_t num_back_edges = loop_information->GetBackEdges().Size();
if (num_back_edges == 0) {
AddError(StringPrintf(
"Loop defined by header %d has no back edge.",
id));
- } else if (num_back_edges > 1) {
- AddError(StringPrintf(
- "Loop defined by header %d has several back edges: %zu.",
- id,
- num_back_edges));
} else {
- DCHECK_EQ(num_back_edges, 1u);
- int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
- if (!loop_blocks.IsBitSet(back_edge_id)) {
- AddError(StringPrintf(
- "Loop defined by header %d has an invalid back edge %d.",
- id,
- back_edge_id));
+ for (size_t i = 0; i < num_back_edges; ++i) {
+ int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId();
+ if (!loop_blocks.IsBitSet(back_edge_id)) {
+ AddError(StringPrintf(
+ "Loop defined by header %d has an invalid back edge %d.",
+ id,
+ back_edge_id));
+ }
}
}
@@ -394,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
// Ensure an instruction having an environment is dominated by the
// instructions contained in the environment.
- HEnvironment* environment = instruction->GetEnvironment();
- if (environment != nullptr) {
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* env_instruction = environment->GetInstructionAt(i);
if (env_instruction != nullptr
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 923468ff16..eca0d9344f 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -30,7 +30,7 @@ namespace art {
* 1: Exit
*/
HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
entry_block->AddInstruction(new (allocator) HGoto());
graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 50398b4790..59d50926ad 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_true = createGotoBlock(graph, &allocator);
@@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_false = createGotoBlock(graph, &allocator);
@@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* block = createGotoBlock(graph, &allocator);
HInstruction* got = block->GetLastInstruction();
ASSERT_TRUE(got->IsControlFlow());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index ca9cbc3d01..f5c630bf97 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -17,6 +17,7 @@
#include "graph_visualizer.h"
#include "code_generator.h"
+#include "dead_code_elimination.h"
#include "licm.h"
#include "nodes.h"
#include "optimization.h"
@@ -211,17 +212,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << "]";
}
if (instruction->HasEnvironment()) {
- HEnvironment* env = instruction->GetEnvironment();
- output_ << " (env: [ ";
- for (size_t i = 0, e = env->Size(); i < e; ++i) {
- HInstruction* insn = env->GetInstructionAt(i);
- if (insn != nullptr) {
- output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
- } else {
- output_ << " _ ";
+ output_ << " (env:";
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
+ output_ << " [ ";
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* insn = environment->GetInstructionAt(i);
+ if (insn != nullptr) {
+ output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
+ } else {
+ output_ << " _ ";
+ }
}
+ output_ << "]";
}
- output_ << "])";
+ output_ << ")";
}
if (IsPass(SsaLivenessAnalysis::kLivenessPassName)
&& is_after_pass_
@@ -248,7 +254,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
}
}
output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
- } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) {
+ } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
+ || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
output_ << " ( loop_header:";
HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
if (info == nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index a81d49aa0c..c3ce7e142a 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ada32db047..afffc7ab4f 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -170,7 +170,11 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
nullptr);
HGraph* callee_graph = new (graph_->GetArena()) HGraph(
- graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId());
+ graph_->GetArena(),
+ caller_dex_file,
+ method_index,
+ graph_->IsDebuggable(),
+ graph_->GetCurrentInstructionId());
OptimizingCompilerStats inline_stats;
HGraphBuilder builder(callee_graph,
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index e79d4f4bdc..46fad17b8f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
- if ((input_cst != nullptr) && input_cst->IsZero()) {
- // Replace code looking like
- // SHL dst, src, 0
- // with
- // src
- instruction->ReplaceWith(input_other);
- instruction->GetBlock()->RemoveInstruction(instruction);
+ if (input_cst != nullptr) {
+ if (input_cst->IsZero()) {
+ // Replace code looking like
+ // SHL dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->IsShl() && input_cst->IsOne()) {
+ // Replace Shl looking like
+ // SHL dst, src, 1
+ // with
+ // ADD dst, src, src
+ HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
+ input_other,
+ input_other);
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+ RecordSimplification();
+ }
}
}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 7f7b450003..dccfe9a0ca 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -850,6 +850,94 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+ ArmAssembler* assembler,
+ CodeGeneratorARM* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+ Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeARM* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ Register char_reg = locations->InAt(1).AsRegister<Register>();
+ __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
+ __ cmp(char_reg, ShifterOperand(tmp_reg));
+ slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), HI);
+ }
+
+ if (start_at_zero) {
+ DCHECK_EQ(tmp_reg, R2);
+ // Start-index = 0.
+ __ LoadImmediate(tmp_reg, 0);
+ }
+
+ __ LoadFromOffset(kLoadWord, LR, TR,
+ QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+ __ blx(LR);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetOut(Location::RegisterLocation(R0));
+
+ // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(Location::RegisterLocation(R0));
+
+ // Need a temp for slow-path codepoint compare.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
@@ -951,8 +1039,6 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding
UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index ca3de99092..2c4fab0465 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -993,6 +993,91 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+ vixl::MacroAssembler* masm,
+ CodeGeneratorARM64* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+ Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeARM64* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ Register char_reg = WRegisterFrom(locations->InAt(1));
+ __ Mov(tmp_reg, 0xFFFF);
+ __ Cmp(char_reg, Operand(tmp_reg));
+ slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ B(hi, slow_path->GetEntryLabel());
+ }
+
+ if (start_at_zero) {
+ // Start-index = 0.
+ __ Mov(tmp_reg, 0);
+ }
+
+ __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+ __ Blr(lr);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+ // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+ locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+ // Need a temp for slow-path codepoint compare.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+}
+
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
@@ -1080,8 +1165,6 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED
}
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 1eef1eff0b..28b7a07cf9 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,6 +16,8 @@
#include "intrinsics_x86.h"
+#include <limits>
+
#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
@@ -124,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
// restored!
class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
public:
- explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
- : invoke_(invoke) {
- // The temporary register has to be EAX for x86 invokes.
- DCHECK_EQ(temp, EAX);
- }
+ explicit IntrinsicSlowPathX86(HInvoke* invoke)
+ : invoke_(invoke) { }
void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
@@ -880,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::SameAsFirstInput());
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
@@ -901,8 +898,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
// TODO: For simplicity, the index parameter is requested in a register, so different from Quick
// we will not optimize the code for constants (which would save a register).
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
- invoke, locations->GetTemp(0).AsRegister<Register>());
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
X86Assembler* assembler = GetAssembler();
@@ -926,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(Location::RegisterLocation(EAX));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
@@ -939,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
Register argument = locations->InAt(1).AsRegister<Register>();
__ testl(argument, argument);
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
- invoke, locations->GetTemp(0).AsRegister<Register>());
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
@@ -948,6 +941,158 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ // The data needs to be in EDI for scasw. So request that the string is there, anyways.
+ locations->SetInAt(0, Location::RegisterLocation(EDI));
+ // If we look for a constant char, we'll still have to copy it into EAX. So just request the
+ // allocator to do that, anyways. We can still do the constant check by checking the parameter
+ // of the instruction explicitly.
+ // Note: This works as we don't clobber EAX anywhere.
+ locations->SetInAt(1, Location::RegisterLocation(EAX));
+ if (!start_at_zero) {
+ locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
+ }
+ // As we clobber EDI during execution anyways, also use it as the output.
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // repne scasw uses ECX as the counter.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ // Need another temporary to be able to compute the result.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+ X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ Register string_obj = locations->InAt(0).AsRegister<Register>();
+ Register search_value = locations->InAt(1).AsRegister<Register>();
+ Register counter = locations->GetTemp(0).AsRegister<Register>();
+ Register string_length = locations->GetTemp(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ // Check our assumptions for registers.
+ DCHECK_EQ(string_obj, EDI);
+ DCHECK_EQ(search_value, EAX);
+ DCHECK_EQ(counter, ECX);
+ DCHECK_EQ(out, EDI);
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeX86* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+ slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ j(kAbove, slow_path->GetEntryLabel());
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+ // Location of reference to data array within the String object.
+ int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count within the String object.
+ int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+ // Load string length, i.e., the count field of the string.
+ __ movl(string_length, Address(string_obj, count_offset));
+
+ // Do a zero-length check.
+ // TODO: Support jecxz.
+ Label not_found_label;
+ __ testl(string_length, string_length);
+ __ j(kEqual, &not_found_label);
+
+ if (start_at_zero) {
+ // Number of chars to scan is the same as the string length.
+ __ movl(counter, string_length);
+
+ // Move to the start of the string.
+ __ addl(string_obj, Immediate(value_offset));
+ } else {
+ Register start_index = locations->InAt(2).AsRegister<Register>();
+
+ // Do a start_index check.
+ __ cmpl(start_index, string_length);
+ __ j(kGreaterEqual, &not_found_label);
+
+ // Ensure we have a start index >= 0;
+ __ xorl(counter, counter);
+ __ cmpl(start_index, Immediate(0));
+ __ cmovl(kGreater, counter, start_index);
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+ // compare.
+ __ negl(counter);
+ __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+ }
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in EDI.
+ // * Counter in ECX.
+ __ repne_scasw();
+
+ // Did we find a match?
+ __ j(kNotEqual, &not_found_label);
+
+ // Yes, we matched. Compute the index of the result.
+ __ subl(string_length, counter);
+ __ leal(out, Address(string_length, -1));
+
+ Label done;
+ __ jmp(&done);
+
+ // Failed to match; return -1.
+ __ Bind(&not_found_label);
+ __ movl(out, Immediate(-1));
+
+ // And join up at the end.
+ __ Bind(&done);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
@@ -958,8 +1103,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke
locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
locations->SetOut(Location::RegisterLocation(EAX));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -968,8 +1111,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
Register byte_array = locations->InAt(0).AsRegister<Register>();
__ testl(byte_array, byte_array);
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
- invoke, locations->GetTemp(0).AsRegister<Register>());
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
@@ -1003,8 +1145,6 @@ void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invok
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(EAX));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
@@ -1013,8 +1153,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke)
Register string_to_copy = locations->InAt(0).AsRegister<Register>();
__ testl(string_to_copy, string_to_copy);
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
- invoke, locations->GetTemp(0).AsRegister<Register>());
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
@@ -1584,8 +1723,6 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 1fc5432a89..0efa714a23 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,6 +16,8 @@
#include "intrinsics_x86_64.h"
+#include <limits>
+
#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
@@ -783,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
__ Bind(&nan);
// output = 0
- __ xorq(out, out);
+ __ xorl(out, out);
__ Bind(&done);
}
@@ -858,6 +860,157 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ // The data needs to be in RDI for scasw. So request that the string is there, anyways.
+ locations->SetInAt(0, Location::RegisterLocation(RDI));
+ // If we look for a constant char, we'll still have to copy it into RAX. So just request the
+ // allocator to do that, anyways. We can still do the constant check by checking the parameter
+ // of the instruction explicitly.
+ // Note: This works as we don't clobber RAX anywhere.
+ locations->SetInAt(1, Location::RegisterLocation(RAX));
+ if (!start_at_zero) {
+ locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
+ }
+ // As we clobber RDI during execution anyways, also use it as the output.
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // repne scasw uses RCX as the counter.
+ locations->AddTemp(Location::RegisterLocation(RCX));
+ // Need another temporary to be able to compute the result.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ // Check our assumptions for registers.
+ DCHECK_EQ(string_obj.AsRegister(), RDI);
+ DCHECK_EQ(search_value.AsRegister(), RAX);
+ DCHECK_EQ(counter.AsRegister(), RCX);
+ DCHECK_EQ(out.AsRegister(), RDI);
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeX86_64* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ j(kAbove, slow_path->GetEntryLabel());
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+ // Location of reference to data array within the String object.
+ int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count within the String object.
+ int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+ // Load string length, i.e., the count field of the string.
+ __ movl(string_length, Address(string_obj, count_offset));
+
+ // Do a length check.
+ // TODO: Support jecxz.
+ Label not_found_label;
+ __ testl(string_length, string_length);
+ __ j(kEqual, &not_found_label);
+
+ if (start_at_zero) {
+ // Number of chars to scan is the same as the string length.
+ __ movl(counter, string_length);
+
+ // Move to the start of the string.
+ __ addq(string_obj, Immediate(value_offset));
+ } else {
+ CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
+
+ // Do a start_index check.
+ __ cmpl(start_index, string_length);
+ __ j(kGreaterEqual, &not_found_label);
+
+ // Ensure we have a start index >= 0;
+ __ xorl(counter, counter);
+ __ cmpl(start_index, Immediate(0));
+ __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx, the work counter: it's gonna be string.length - start_index.
+ __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
+ __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+ }
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in RDI.
+ // * Counter in ECX.
+ __ repne_scasw();
+
+ // Did we find a match?
+ __ j(kNotEqual, &not_found_label);
+
+ // Yes, we matched. Compute the index of the result.
+ __ subl(string_length, counter);
+ __ leal(out, Address(string_length, -1));
+
+ Label done;
+ __ jmp(&done);
+
+ // Failed to match; return -1.
+ __ Bind(&not_found_label);
+ __ movl(out, Immediate(-1));
+
+ // And join up at the end.
+ __ Bind(&done);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
@@ -1434,8 +1587,6 @@ void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
}
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index bf9b8e59c5..2535ea274a 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
}
}
- if (instruction->HasEnvironment()) {
- HEnvironment* environment = instruction->GetEnvironment();
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* input = environment->GetInstructionAt(i);
if (input != nullptr) {
@@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
* If `environment` has a loop header phi, we replace it with its first input.
*/
static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) {
- for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HInstruction* input = environment->GetInstructionAt(i);
- if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
- environment->RemoveAsUserOfInput(i);
- HInstruction* incoming = input->InputAt(0);
- environment->SetRawEnvAt(i, incoming);
- incoming->AddEnvUseAt(environment, i);
+ for (; environment != nullptr; environment = environment->GetParent()) {
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* input = environment->GetInstructionAt(i);
+ if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
+ environment->RemoveAsUserOfInput(i);
+ HInstruction* incoming = input->InputAt(0);
+ environment->SetRawEnvAt(i, incoming);
+ incoming->AddEnvUseAt(environment, i);
+ }
}
}
}
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 7818c606db..4f259b5095 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -39,7 +39,7 @@ namespace art {
static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 52367730ed..7cb00a1923 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -32,7 +32,7 @@
namespace art {
static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 8a96ee9ace..9d7d0b6c67 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector,
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) {
TEST(LivenessTest, Loop6) {
// Bitsets are made of:
- // (constant0, constant4, constant5, phi in block 2, phi in block 8)
+ // (constant0, constant4, constant5, phi in block 2)
const char* expected =
"Block 0\n"
- " live in: (00000)\n"
- " live out: (11100)\n"
- " kill: (11100)\n"
+ " live in: (0000)\n"
+ " live out: (1110)\n"
+ " kill: (1110)\n"
"Block 1\n"
- " live in: (11100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
+ " live in: (1110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
"Block 2\n" // loop header
- " live in: (01100)\n"
- " live out: (01110)\n"
- " kill: (00010)\n"
+ " live in: (0110)\n"
+ " live out: (0111)\n"
+ " kill: (0001)\n"
"Block 3\n"
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
- "Block 4\n" // original back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
- "Block 5\n" // original back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
+ "Block 4\n" // back edge
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
+ "Block 5\n" // back edge
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
"Block 6\n" // return block
- " live in: (00010)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0001)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 7\n" // exit block
- " live in: (00000)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
- "Block 8\n" // synthesized back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00001)\n";
+ " live in: (0000)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index a1ae67009e..42aba04828 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
bool intrinsified)
: inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
- environment_(instruction->GetBlock()->GetGraph()->GetArena(),
- instruction->EnvironmentSize()),
output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
stack_mask_(nullptr),
@@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs_.Put(i, Location());
}
- environment_.SetSize(instruction->EnvironmentSize());
- for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
- environment_.Put(i, Location());
- }
instruction->SetLocations(this);
if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index c3a99150c4..09bbb33042 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
return temps_.Size();
}
- void SetEnvironmentAt(uint32_t at, Location location) {
- environment_.Put(at, location);
- }
-
- Location GetEnvironmentAt(uint32_t at) const {
- return environment_.Get(at);
- }
-
Location Out() const { return output_; }
bool CanCall() const { return call_kind_ != kNoCall; }
@@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
private:
GrowableArray<Location> inputs_;
GrowableArray<Location> temps_;
- GrowableArray<Location> environment_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
Location::OutputOverlap output_overlaps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d3ee770941..41adc7223e 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,7 @@
#include "nodes.h"
#include "ssa_builder.h"
+#include "base/bit_vector-inl.h"
#include "utils/growable_array.h"
#include "scoped_thread_state_change.h"
@@ -37,8 +38,9 @@ static void RemoveAsUser(HInstruction* instruction) {
instruction->RemoveAsUserOfInput(i);
}
- HEnvironment* environment = instruction->GetEnvironment();
- if (environment != nullptr) {
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
if (environment->GetInstructionAt(i) != nullptr) {
environment->RemoveAsUserOfInput(i);
@@ -191,24 +193,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
void HGraph::SimplifyLoop(HBasicBlock* header) {
HLoopInformation* info = header->GetLoopInformation();
- // If there are more than one back edge, make them branch to the same block that
- // will become the only back edge. This simplifies finding natural loops in the
- // graph.
- // Also, if the loop is a do/while (that is the back edge is an if), change the
- // back edge to be a goto. This simplifies code generation of suspend cheks.
- if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
- HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
- AddBlock(new_back_edge);
- new_back_edge->AddInstruction(new (arena_) HGoto());
- for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
- HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
- back_edge->ReplaceSuccessor(header, new_back_edge);
- }
- info->ClearBackEdges();
- info->AddBackEdge(new_back_edge);
- new_back_edge->AddSuccessor(header);
- }
-
// Make sure the loop has only one pre header. This simplifies SSA building by having
// to just look at the pre header to know which locals are initialized at entry of the
// loop.
@@ -218,11 +202,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
AddBlock(pre_header);
pre_header->AddInstruction(new (arena_) HGoto());
- ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
- HBasicBlock* back_edge = info->GetBackEdges().Get(0);
for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
- if (predecessor != back_edge) {
+ if (!info->IsBackEdge(*predecessor)) {
predecessor->ReplaceSuccessor(header, pre_header);
pred--;
}
@@ -230,9 +212,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
pre_header->AddSuccessor(header);
}
- // Make sure the second predecessor of a loop header is the back edge.
- if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
- header->SwapPredecessors();
+ // Make sure the first predecessor of a loop header is the incoming block.
+ if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
+ HBasicBlock* to_swap = header->GetPredecessors().Get(0);
+ for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
+ HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+ if (!info->IsBackEdge(*predecessor)) {
+ header->predecessors_.Put(pred, to_swap);
+ header->predecessors_.Put(0, predecessor);
+ break;
+ }
+ }
}
// Place the suspend check at the beginning of the header, so that live registers
@@ -357,24 +347,59 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
}
bool HLoopInformation::Populate() {
- DCHECK_EQ(GetBackEdges().Size(), 1u);
- HBasicBlock* back_edge = GetBackEdges().Get(0);
- DCHECK(back_edge->GetDominator() != nullptr);
- if (!header_->Dominates(back_edge)) {
- // This loop is not natural. Do not bother going further.
- return false;
- }
+ DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+ for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) {
+ HBasicBlock* back_edge = GetBackEdges().Get(i);
+ DCHECK(back_edge->GetDominator() != nullptr);
+ if (!header_->Dominates(back_edge)) {
+ // This loop is not natural. Do not bother going further.
+ return false;
+ }
- // Populate this loop: starting with the back edge, recursively add predecessors
- // that are not already part of that loop. Set the header as part of the loop
- // to end the recursion.
- // This is a recursive implementation of the algorithm described in
- // "Advanced Compiler Design & Implementation" (Muchnick) p192.
- blocks_.SetBit(header_->GetBlockId());
- PopulateRecursive(back_edge);
+ // Populate this loop: starting with the back edge, recursively add predecessors
+ // that are not already part of that loop. Set the header as part of the loop
+ // to end the recursion.
+ // This is a recursive implementation of the algorithm described in
+ // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+ blocks_.SetBit(header_->GetBlockId());
+ PopulateRecursive(back_edge);
+ }
return true;
}
+void HLoopInformation::Update() {
+ HGraph* graph = header_->GetGraph();
+ for (uint32_t id : blocks_.Indexes()) {
+ HBasicBlock* block = graph->GetBlocks().Get(id);
+ // Reset loop information of non-header blocks inside the loop, except
+ // members of inner nested loops because those should already have been
+ // updated by their own LoopInformation.
+ if (block->GetLoopInformation() == this && block != header_) {
+ block->SetLoopInformation(nullptr);
+ }
+ }
+ blocks_.ClearAllBits();
+
+ if (back_edges_.IsEmpty()) {
+ // The loop has been dismantled, delete its suspend check and remove info
+ // from the header.
+ DCHECK(HasSuspendCheck());
+ header_->RemoveInstruction(suspend_check_);
+ header_->SetLoopInformation(nullptr);
+ header_ = nullptr;
+ suspend_check_ = nullptr;
+ } else {
+ if (kIsDebugBuild) {
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ DCHECK(header_->Dominates(back_edges_.Get(i)));
+ }
+ }
+ // This loop still has reachable back edges. Repopulate the list of blocks.
+ bool populate_successful = Populate();
+ DCHECK(populate_successful);
+ }
+}
+
HBasicBlock* HLoopInformation::GetPreHeader() const {
return header_->GetDominator();
}
@@ -387,6 +412,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const {
return other.blocks_.IsBitSet(header_->GetBlockId());
}
+size_t HLoopInformation::GetLifetimeEnd() const {
+ size_t last_position = 0;
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position);
+ }
+ return last_position;
+}
+
bool HBasicBlock::Dominates(HBasicBlock* other) const {
// Walk up the dominator tree from `other`, to find out if `this`
// is an ancestor.
@@ -503,6 +536,16 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_
}
}
+void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) {
+ for (size_t i = 0; i < locals.Size(); i++) {
+ HInstruction* instruction = locals.Get(i);
+ SetRawEnvAt(i, instruction);
+ if (instruction != nullptr) {
+ instruction->AddEnvUseAt(this, i);
+ }
+ }
+}
+
void HEnvironment::CopyFrom(HEnvironment* env) {
for (size_t i = 0; i < env->Size(); i++) {
HInstruction* instruction = env->GetInstructionAt(i);
@@ -963,8 +1006,9 @@ void HBasicBlock::DisconnectAndDelete() {
HLoopInformation* loop_info = it.Current();
loop_info->Remove(this);
if (loop_info->IsBackEdge(*this)) {
- // This deliberately leaves the loop in an inconsistent state and will
- // fail SSAChecker unless the entire loop is removed during the pass.
+ // If this was the last back edge of the loop, we deliberately leave the
+ // loop in an inconsistent state and will fail SSAChecker unless the
+ // entire loop is removed during the pass.
loop_info->RemoveBackEdge(this);
}
}
@@ -1040,20 +1084,6 @@ void HBasicBlock::DisconnectAndDelete() {
SetGraph(nullptr);
}
-void HBasicBlock::UpdateLoopInformation() {
- // Check if loop information points to a dismantled loop. If so, replace with
- // the loop information of a larger loop which contains this block, or nullptr
- // otherwise. We iterate in case the larger loop has been destroyed too.
- while (IsInLoop() && loop_information_->GetBackEdges().IsEmpty()) {
- if (IsLoopHeader()) {
- HSuspendCheck* suspend_check = loop_information_->GetSuspendCheck();
- DCHECK_EQ(suspend_check->GetBlock(), this);
- RemoveInstruction(suspend_check);
- }
- loop_information_ = loop_information_->GetPreHeader()->GetLoopInformation();
- }
-}
-
void HBasicBlock::MergeWith(HBasicBlock* other) {
DCHECK_EQ(GetGraph(), other->GetGraph());
DCHECK(GetDominatedBlocks().Contains(other));
@@ -1075,8 +1105,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) {
HLoopInformation* loop_info = it.Current();
loop_info->Remove(other);
if (loop_info->IsBackEdge(*other)) {
- loop_info->ClearBackEdges();
- loop_info->AddBackEdge(this);
+ loop_info->ReplaceBackEdge(other, this);
}
}
@@ -1307,11 +1336,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
loop_it.Current()->Add(to);
}
if (info->IsBackEdge(*at)) {
- // Only `at` can become a back edge, as the inlined blocks
- // are predecessors of `at`.
- DCHECK_EQ(1u, info->NumberOfBackEdges());
- info->ClearBackEdges();
- info->AddBackEdge(to);
+ // Only `to` can become a back edge, as the inlined blocks
+ // are predecessors of `to`.
+ info->ReplaceBackEdge(at, to);
}
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 63f3c95c7d..0089f22169 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -48,6 +48,7 @@ class HPhi;
class HSuspendCheck;
class LiveInterval;
class LocationSummary;
+class SlowPathCode;
class SsaBuilder;
static const int kDefaultNumberOfBlocks = 8;
@@ -116,7 +117,11 @@ class HInstructionList {
// Control-flow graph of a method. Contains a list of basic blocks.
class HGraph : public ArenaObject<kArenaAllocMisc> {
public:
- HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0)
+ HGraph(ArenaAllocator* arena,
+ const DexFile& dex_file,
+ uint32_t method_idx,
+ bool debuggable = false,
+ int start_instruction_id = 0)
: arena_(arena),
blocks_(arena, kDefaultNumberOfBlocks),
reverse_post_order_(arena, kDefaultNumberOfBlocks),
@@ -130,6 +135,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
has_bounds_checks_(false),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
+ dex_file_(dex_file),
+ method_idx_(method_idx),
cached_null_constant_(nullptr),
cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
@@ -262,6 +269,14 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
+ const DexFile& GetDexFile() const {
+ return dex_file_;
+ }
+
+ uint32_t GetMethodIdx() const {
+ return method_idx_;
+ }
+
private:
void VisitBlockForDominatorTree(HBasicBlock* block,
HBasicBlock* predecessor,
@@ -338,6 +353,12 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// The current id to assign to a newly added instruction. See HInstruction.id_.
int32_t current_instruction_id_;
+ // The dex file from which the method is from.
+ const DexFile& dex_file_;
+
+ // The method index in the dex file.
+ const uint32_t method_idx_;
+
// Cached constants.
HNullConstant* cached_null_constant_;
ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
@@ -397,19 +418,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> {
return back_edges_;
}
- HBasicBlock* GetSingleBackEdge() const {
- DCHECK_EQ(back_edges_.Size(), 1u);
- return back_edges_.Get(0);
- }
+ // Returns the lifetime position of the back edge that has the
+ // greatest lifetime position.
+ size_t GetLifetimeEnd() const;
- void ClearBackEdges() {
- back_edges_.Reset();
+ void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) {
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ if (back_edges_.Get(i) == existing) {
+ back_edges_.Put(i, new_back_edge);
+ return;
+ }
+ }
+ UNREACHABLE();
}
- // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+ // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
// that is the header dominates the back edge.
bool Populate();
+ // Reanalyzes the loop by removing loop info from its blocks and re-running
+ // Populate(). If there are no back edges left, the loop info is completely
+ // removed as well as its SuspendCheck instruction. It must be run on nested
+ // inner loops first.
+ void Update();
+
// Returns whether this loop information contains `block`.
// Note that this loop information *must* be populated before entering this function.
bool Contains(const HBasicBlock& block) const;
@@ -679,14 +711,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
loop_information_ = info;
}
- // Checks if the loop information points to a valid loop. If the loop has been
- // dismantled (does not have a back edge any more), loop information is
- // removed or replaced with the information of the first valid outer loop.
- void UpdateLoopInformation();
-
bool IsInLoop() const { return loop_information_ != nullptr; }
- // Returns wheter this block dominates the blocked passed as parameter.
+ // Returns whether this block dominates the blocked passed as parameter.
bool Dominates(HBasicBlock* block) const;
size_t GetLifetimeStart() const { return lifetime_start_; }
@@ -928,6 +955,14 @@ class HUseList : public ValueObject {
return first_ != nullptr && first_->next_ == nullptr;
}
+ size_t SizeSlow() const {
+ size_t count = 0;
+ for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+ ++count;
+ }
+ return count;
+ }
+
private:
HUseListNode<T>* first_;
};
@@ -1054,15 +1089,43 @@ class SideEffects : public ValueObject {
// A HEnvironment object contains the values of virtual registers at a given location.
class HEnvironment : public ArenaObject<kArenaAllocMisc> {
public:
- HEnvironment(ArenaAllocator* arena, size_t number_of_vregs)
- : vregs_(arena, number_of_vregs) {
+ HEnvironment(ArenaAllocator* arena,
+ size_t number_of_vregs,
+ const DexFile& dex_file,
+ uint32_t method_idx,
+ uint32_t dex_pc)
+ : vregs_(arena, number_of_vregs),
+ locations_(arena, number_of_vregs),
+ parent_(nullptr),
+ dex_file_(dex_file),
+ method_idx_(method_idx),
+ dex_pc_(dex_pc) {
vregs_.SetSize(number_of_vregs);
for (size_t i = 0; i < number_of_vregs; i++) {
vregs_.Put(i, HUserRecord<HEnvironment*>());
}
+
+ locations_.SetSize(number_of_vregs);
+ for (size_t i = 0; i < number_of_vregs; ++i) {
+ locations_.Put(i, Location());
+ }
}
- void CopyFrom(HEnvironment* env);
+ void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
+ parent_ = new (allocator) HEnvironment(allocator,
+ parent->Size(),
+ parent->GetDexFile(),
+ parent->GetMethodIdx(),
+ parent->GetDexPc());
+ if (parent->GetParent() != nullptr) {
+ parent_->SetAndCopyParentChain(allocator, parent->GetParent());
+ }
+ parent_->CopyFrom(parent);
+ }
+
+ void CopyFrom(const GrowableArray<HInstruction*>& locals);
+ void CopyFrom(HEnvironment* environment);
+
// Copy from `env`. If it's a loop phi for `loop_header`, copy the first
// input to the loop phi instead. This is for inserting instructions that
// require an environment (like HDeoptimization) in the loop pre-header.
@@ -1080,6 +1143,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
size_t Size() const { return vregs_.Size(); }
+ HEnvironment* GetParent() const { return parent_; }
+
+ void SetLocationAt(size_t index, Location location) {
+ locations_.Put(index, location);
+ }
+
+ Location GetLocationAt(size_t index) const {
+ return locations_.Get(index);
+ }
+
+ uint32_t GetDexPc() const {
+ return dex_pc_;
+ }
+
+ uint32_t GetMethodIdx() const {
+ return method_idx_;
+ }
+
+ const DexFile& GetDexFile() const {
+ return dex_file_;
+ }
+
private:
// Record instructions' use entries of this environment for constant-time removal.
// It should only be called by HInstruction when a new environment use is added.
@@ -1090,6 +1175,11 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
}
GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+ GrowableArray<Location> locations_;
+ HEnvironment* parent_;
+ const DexFile& dex_file_;
+ const uint32_t method_idx_;
+ const uint32_t dex_pc_;
friend class HInstruction;
@@ -1221,6 +1311,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
}
virtual bool NeedsEnvironment() const { return false; }
+ virtual uint32_t GetDexPc() const {
+ LOG(FATAL) << "GetDexPc() cannot be called on an instruction that"
+ " does not need an environment";
+ UNREACHABLE();
+ }
virtual bool IsControlFlow() const { return false; }
virtual bool CanThrow() const { return false; }
bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -1298,14 +1393,30 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
// copying, the uses lists are being updated.
void CopyEnvironmentFrom(HEnvironment* environment) {
ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
- environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+ environment_ = new (allocator) HEnvironment(
+ allocator,
+ environment->Size(),
+ environment->GetDexFile(),
+ environment->GetMethodIdx(),
+ environment->GetDexPc());
environment_->CopyFrom(environment);
+ if (environment->GetParent() != nullptr) {
+ environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+ }
}
void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
HBasicBlock* block) {
ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
- environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+ environment_ = new (allocator) HEnvironment(
+ allocator,
+ environment->Size(),
+ environment->GetDexFile(),
+ environment->GetMethodIdx(),
+ environment->GetDexPc());
+ if (environment->GetParent() != nullptr) {
+ environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+ }
environment_->CopyFromWithLoopPhiAdjustment(environment, block);
}
@@ -1682,7 +1793,7 @@ class HDeoptimize : public HTemplateInstruction<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Deoptimize);
@@ -2086,15 +2197,16 @@ class HFloatConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
bool IsMinusOne() const OVERRIDE {
- return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
- bit_cast<uint32_t, float>((-1.0f));
+ return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
}
bool IsZero() const OVERRIDE {
- return AsFloatConstant()->GetValue() == 0.0f;
+ return value_ == 0.0f;
}
bool IsOne() const OVERRIDE {
- return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) ==
- bit_cast<uint32_t, float>(1.0f);
+ return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
+ }
+ bool IsNaN() const {
+ return std::isnan(value_);
}
DECLARE_INSTRUCTION(FloatConstant);
@@ -2124,15 +2236,16 @@ class HDoubleConstant : public HConstant {
size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
bool IsMinusOne() const OVERRIDE {
- return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
- bit_cast<uint64_t, double>((-1.0));
+ return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
}
bool IsZero() const OVERRIDE {
- return AsDoubleConstant()->GetValue() == 0.0;
+ return value_ == 0.0;
}
bool IsOne() const OVERRIDE {
- return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) ==
- bit_cast<uint64_t, double>(1.0);
+ return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
+ }
+ bool IsNaN() const {
+ return std::isnan(value_);
}
DECLARE_INSTRUCTION(DoubleConstant);
@@ -2251,7 +2364,7 @@ class HInvoke : public HInstruction {
Primitive::Type GetType() const OVERRIDE { return return_type_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
@@ -2468,7 +2581,7 @@ class HNewInstance : public HExpression<0> {
type_index_(type_index),
entrypoint_(entrypoint) {}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
// Calls runtime so needs an environment.
@@ -2520,7 +2633,7 @@ class HNewArray : public HExpression<1> {
SetRawInputAt(0, length);
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
// Calls runtime so needs an environment.
@@ -2615,7 +2728,7 @@ class HDiv : public HBinaryOperation {
return (y == -1) ? -x : x / y;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Div);
@@ -2642,7 +2755,7 @@ class HRem : public HBinaryOperation {
return (y == -1) ? 0 : x % y;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Rem);
@@ -2669,7 +2782,7 @@ class HDivZeroCheck : public HExpression<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(DivZeroCheck);
@@ -2864,7 +2977,7 @@ class HTypeConversion : public HExpression<1> {
// Required by the x86 and ARM code generators when producing calls
// to the runtime.
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
@@ -2974,7 +3087,7 @@ class HNullCheck : public HExpression<1> {
bool CanBeNull() const OVERRIDE { return false; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(NullCheck);
@@ -3137,7 +3250,7 @@ class HArraySet : public HTemplateInstruction<3> {
bool NeedsTypeCheck() const { return needs_type_check_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
HInstruction* GetArray() const { return InputAt(0); }
HInstruction* GetIndex() const { return InputAt(1); }
@@ -3207,7 +3320,7 @@ class HBoundsCheck : public HExpression<2> {
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(BoundsCheck);
@@ -3247,19 +3360,25 @@ class HTemporary : public HTemplateInstruction<0> {
class HSuspendCheck : public HTemplateInstruction<0> {
public:
explicit HSuspendCheck(uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
+ : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {}
bool NeedsEnvironment() const OVERRIDE {
return true;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
+ void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
+ SlowPathCode* GetSlowPath() const { return slow_path_; }
DECLARE_INSTRUCTION(SuspendCheck);
private:
const uint32_t dex_pc_;
+ // Only used for code generation, in order to share the same slow path between back edges
+ // of a same loop.
+ SlowPathCode* slow_path_;
+
DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
};
@@ -3286,7 +3405,7 @@ class HLoadClass : public HExpression<0> {
size_t ComputeHashCode() const OVERRIDE { return type_index_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
bool IsReferrersClass() const { return is_referrers_class_; }
@@ -3360,7 +3479,7 @@ class HLoadString : public HExpression<0> {
size_t ComputeHashCode() const OVERRIDE { return string_index_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint32_t GetStringIndex() const { return string_index_; }
// TODO: Can we deopt or debug when we resolve a string?
@@ -3398,7 +3517,7 @@ class HClinitCheck : public HExpression<1> {
return true;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
@@ -3498,7 +3617,7 @@ class HThrow : public HTemplateInstruction<1> {
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Throw);
@@ -3532,7 +3651,7 @@ class HInstanceOf : public HExpression<2> {
return false;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsClassFinal() const { return class_is_final_; }
@@ -3607,7 +3726,7 @@ class HCheckCast : public HTemplateInstruction<2> {
bool MustDoNullCheck() const { return must_do_null_check_; }
void ClearMustDoNullCheck() { must_do_null_check_ = false; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsClassFinal() const { return class_is_final_; }
@@ -3653,7 +3772,7 @@ class HMonitorOperation : public HTemplateInstruction<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsEnter() const { return kind_ == kEnter; }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4e83ce576c..2736453ccc 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -16,6 +16,7 @@
#include "base/arena_allocator.h"
#include "nodes.h"
+#include "optimizing_unit_test.h"
#include "gtest/gtest.h"
@@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) {
first_block->AddSuccessor(exit_block);
exit_block->AddInstruction(new (&allocator) HExit());
- HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
+ HEnvironment* environment = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
null_check->SetRawEnvironment(environment);
environment->SetRawEnvAt(0, parameter);
parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
@@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -96,7 +98,7 @@ TEST(Node, AddInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -112,4 +114,51 @@ TEST(Node, AddInstruction) {
ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
}
+TEST(Node, ParentEnvironment) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
+ entry->AddInstruction(parameter1);
+ entry->AddInstruction(with_environment);
+ entry->AddInstruction(new (&allocator) HExit());
+
+ ASSERT_TRUE(parameter1->HasUses());
+ ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+
+ HEnvironment* environment = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ GrowableArray<HInstruction*> array(&allocator, 1);
+ array.Add(parameter1);
+
+ environment->CopyFrom(array);
+ with_environment->SetRawEnvironment(environment);
+
+ ASSERT_TRUE(parameter1->HasEnvironmentUses());
+ ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+
+ HEnvironment* parent1 = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ parent1->CopyFrom(array);
+
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u);
+
+ HEnvironment* parent2 = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ parent2->CopyFrom(array);
+ parent1->SetAndCopyParentChain(&allocator, parent2);
+
+ // One use for parent2, and one other use for the new parent of parent1.
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u);
+
+ // We have copied the parent chain. So we now have two more uses.
+ environment->SetAndCopyParentChain(&allocator, parent1);
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u);
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index b2c13adf35..7aea249c42 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -21,6 +21,7 @@
#include "cfi_test.h"
#include "gtest/gtest.h"
#include "optimizing/code_generator.h"
+#include "optimizing/optimizing_unit_test.h"
#include "utils/assembler.h"
#include "optimizing/optimizing_cfi_test_expected.inc"
@@ -45,10 +46,10 @@ class OptimizingCFITest : public CFITest {
std::unique_ptr<const InstructionSetFeatures> isa_features;
std::string error;
isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
- HGraph graph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
// Generate simple frame with some spills.
std::unique_ptr<CodeGenerator> code_gen(
- CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+ CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
const int frame_size = 64;
int core_reg = 0;
int fp_reg = 0;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 05451bcaa6..8bb5d8ebae 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -320,8 +320,10 @@ static void RunOptimizations(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer,
StackHandleScopeCollection* handles) {
- HDeadCodeElimination dce1(graph, stats);
- HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
+ HDeadCodeElimination dce1(graph, stats,
+ HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+ HDeadCodeElimination dce2(graph, stats,
+ HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph, stats);
HBooleanSimplifier boolean_simplify(graph);
@@ -512,7 +514,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
ArenaAllocator arena(Runtime::Current()->GetArenaPool());
HGraph* graph = new (&arena) HGraph(
- &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
+ &arena, dex_file, method_idx, compiler_driver->GetCompilerOptions().GetDebuggable());
// For testing purposes, we put a special marker on method names that should be compiled
// with this compiler. This makes sure we're not regressing.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 65c84e6942..b6b1bb1cad 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,25 +29,26 @@ enum MethodCompilationStat {
kCompiledBaseline,
kCompiledOptimized,
kCompiledQuick,
- kInstructionSimplifications,
kInlinedInvoke,
- kNotCompiledUnsupportedIsa,
- kNotCompiledPathological,
+ kInstructionSimplifications,
+ kNotCompiledBranchOutsideMethodCode,
+ kNotCompiledCannotBuildSSA,
+ kNotCompiledCantAccesType,
+ kNotCompiledClassNotVerified,
kNotCompiledHugeMethod,
kNotCompiledLargeMethodNoBranches,
- kNotCompiledCannotBuildSSA,
kNotCompiledNoCodegen,
- kNotCompiledUnresolvedMethod,
- kNotCompiledUnresolvedField,
kNotCompiledNonSequentialRegPair,
+ kNotCompiledPathological,
kNotCompiledSpaceFilter,
- kNotOptimizedTryCatch,
- kNotOptimizedDisabled,
- kNotCompiledCantAccesType,
- kNotOptimizedRegisterAllocator,
kNotCompiledUnhandledInstruction,
+ kNotCompiledUnresolvedField,
+ kNotCompiledUnresolvedMethod,
+ kNotCompiledUnsupportedIsa,
kNotCompiledVerifyAtRuntime,
- kNotCompiledClassNotVerified,
+ kNotOptimizedDisabled,
+ kNotOptimizedRegisterAllocator,
+ kNotOptimizedTryCatch,
kRemovedCheckedCast,
kRemovedDeadInstruction,
kRemovedNullCheck,
@@ -98,23 +99,24 @@ class OptimizingCompilerStats {
case kCompiledQuick : return "kCompiledQuick";
case kInlinedInvoke : return "kInlinedInvoke";
case kInstructionSimplifications: return "kInstructionSimplifications";
- case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
- case kNotCompiledPathological : return "kNotCompiledPathological";
+ case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
+ case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
+ case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+ case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
- case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
- case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
- case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
- case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
- case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
- case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+ case kNotCompiledPathological : return "kNotCompiledPathological";
case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
- case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+ case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
+ case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
+ case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
- case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
+ case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
+ case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
+ case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
case kRemovedCheckedCast: return "kRemovedCheckedCast";
case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
case kRemovedNullCheck: return "kRemovedNullCheck";
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6b236927da..4f8ec65e43 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -72,11 +72,16 @@ void RemoveSuspendChecks(HGraph* graph) {
}
}
+inline HGraph* CreateGraph(ArenaAllocator* allocator) {
+ return new (allocator) HGraph(
+ allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1);
+}
+
// Create a control-flow graph from Dex instructions.
inline HGraph* CreateCFG(ArenaAllocator* allocator,
const uint16_t* data,
Primitive::Type return_type = Primitive::kPrimInt) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph, return_type);
const DexFile::CodeItem* item =
reinterpret_cast<const DexFile::CodeItem*>(data);
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 293fde978e..c56100dfa1 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,7 +30,7 @@ namespace art {
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 2375595978..f53f846326 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1534,9 +1534,10 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
}
while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
- DCHECK(current->CoversSlow(env_use->GetPosition()) || (env_use->GetPosition() == range->GetEnd()));
- LocationSummary* locations = env_use->GetUser()->GetLocations();
- locations->SetEnvironmentAt(env_use->GetInputIndex(), source);
+ DCHECK(current->CoversSlow(env_use->GetPosition())
+ || (env_use->GetPosition() == range->GetEnd()));
+ HEnvironment* environment = env_use->GetUser()->GetEnvironment();
+ environment->SetLocationAt(env_use->GetInputIndex(), source);
env_use = env_use->GetNext();
}
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 8c6d904a4c..b72ffb8bf7 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -38,7 +38,7 @@ namespace art {
static bool Check(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) {
TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) {
}
static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
HPhi** phi,
HInstruction** input1,
HInstruction** input2) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
HInstruction** field,
HInstruction** ret) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
HInstruction** first_sub,
HInstruction** second_sub) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
static HGraph* BuildDiv(ArenaAllocator* allocator,
HInstruction** div) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) {
// Create a synthesized graph to please the register_allocator and
// ssa_liveness_analysis code.
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index b66e655d2b..59a2852735 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() {
}
HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
- return GetLocalsFor(block)->GetInstructionAt(local);
+ return GetLocalsFor(block)->Get(local);
}
void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
HPhi* phi = new (GetGraph()->GetArena()) HPhi(
GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
block->AddPhi(phi);
- current_locals_->SetRawEnvAt(local, phi);
+ current_locals_->Put(local, phi);
}
}
// Save the loop header so that the last phase of the analysis knows which
@@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
block->AddPhi(phi);
value = phi;
}
- current_locals_->SetRawEnvAt(local, value);
+ current_locals_->Put(local, value);
}
}
@@ -520,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
}
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
- HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
+ HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
// If the operation requests a specific type, we make sure its input is of that type.
if (load->GetType() != value->GetType()) {
if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
@@ -534,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
}
void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
- current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+ current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
store->GetBlock()->RemoveInstruction(store);
}
@@ -543,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
return;
}
HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
- GetGraph()->GetArena(), current_locals_->Size());
- environment->CopyFrom(current_locals_);
+ GetGraph()->GetArena(),
+ current_locals_->Size(),
+ GetGraph()->GetDexFile(),
+ GetGraph()->GetMethodIdx(),
+ instruction->GetDexPc());
+ environment->CopyFrom(*current_locals_);
instruction->SetRawEnvironment(environment);
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 265e95b4ac..1c83c4ba48 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor {
void BuildSsa();
- HEnvironment* GetLocalsFor(HBasicBlock* block) {
- HEnvironment* env = locals_for_.Get(block->GetBlockId());
- if (env == nullptr) {
- env = new (GetGraph()->GetArena()) HEnvironment(
+ GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+ GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
+ if (locals == nullptr) {
+ locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
- locals_for_.Put(block->GetBlockId(), env);
+ locals->SetSize(GetGraph()->GetNumberOfVRegs());
+ locals_for_.Put(block->GetBlockId(), locals);
}
- return env;
+ return locals;
}
HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor {
static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
// Locals for the current block being visited.
- HEnvironment* current_locals_;
+ GrowableArray<HInstruction*>* current_locals_;
// Keep track of loop headers found. The last phase of the analysis iterates
// over these blocks to set the inputs of their phis.
GrowableArray<HBasicBlock*> loop_headers_;
// HEnvironment for each block.
- GrowableArray<HEnvironment*> locals_for_;
+ GrowableArray<GrowableArray<HInstruction*>*> locals_for_;
DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
};
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 17841685b1..250eb04a1c 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() {
HBasicBlock* block = it.Current();
size_t number_of_forward_predecessors = block->GetPredecessors().Size();
if (block->IsLoopHeader()) {
- // We rely on having simplified the CFG.
- DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
- number_of_forward_predecessors--;
+ number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
}
forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
}
@@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
// Process the environment first, because we know their uses come after
// or at the same liveness position of inputs.
- if (current->HasEnvironment()) {
+ for (HEnvironment* environment = current->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
// Handle environment uses. See statements (b) and (c) of the
// SsaLivenessAnalysis.
- HEnvironment* environment = current->GetEnvironment();
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* instruction = environment->GetInstructionAt(i);
bool should_be_live = ShouldBeLiveForEnvironment(instruction);
@@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (instruction != nullptr) {
instruction->GetLiveInterval()->AddUse(
- current, i, /* is_environment */ true, should_be_live);
+ current, environment, i, should_be_live);
}
}
}
@@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
// to be materialized.
if (input->HasSsaIndex()) {
live_in->SetBit(input->GetSsaIndex());
- input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
+ input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
}
}
}
@@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (block->IsLoopHeader()) {
- HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+ size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
// For all live_in instructions at the loop header, we need to create a range
// that covers the full loop.
for (uint32_t idx : live_in->Indexes()) {
HInstruction* current = instructions_from_ssa_index_.Get(idx);
- current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
- back_edge->GetLifetimeEnd());
+ current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
}
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 7b98c4eab5..82c5454bb0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -104,13 +104,13 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> {
class UsePosition : public ArenaObject<kArenaAllocMisc> {
public:
UsePosition(HInstruction* user,
+ HEnvironment* environment,
size_t input_index,
- bool is_environment,
size_t position,
UsePosition* next)
: user_(user),
+ environment_(environment),
input_index_(input_index),
- is_environment_(is_environment),
position_(position),
next_(next) {
DCHECK((user == nullptr)
@@ -129,7 +129,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
HInstruction* GetUser() const { return user_; }
- bool GetIsEnvironment() const { return is_environment_; }
+ bool GetIsEnvironment() const { return environment_ != nullptr; }
bool IsSynthesized() const { return user_ == nullptr; }
size_t GetInputIndex() const { return input_index_; }
@@ -144,7 +144,7 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
UsePosition* Dup(ArenaAllocator* allocator) const {
return new (allocator) UsePosition(
- user_, input_index_, is_environment_, position_,
+ user_, environment_, input_index_, position_,
next_ == nullptr ? nullptr : next_->Dup(allocator));
}
@@ -159,8 +159,8 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
private:
HInstruction* const user_;
+ HEnvironment* const environment_;
const size_t input_index_;
- const bool is_environment_;
const size_t position_;
UsePosition* next_;
@@ -237,15 +237,16 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
size_t position = instruction->GetLifetimePosition();
first_use_ = new (allocator_) UsePosition(
- instruction, temp_index, /* is_environment */ false, position, first_use_);
+ instruction, /* environment */ nullptr, temp_index, position, first_use_);
AddRange(position, position + 1);
}
void AddUse(HInstruction* instruction,
+ HEnvironment* environment,
size_t input_index,
- bool is_environment,
bool keep_alive = false) {
// Set the use within the instruction.
+ bool is_environment = (environment != nullptr);
size_t position = instruction->GetLifetimePosition() + 1;
LocationSummary* locations = instruction->GetLocations();
if (!is_environment) {
@@ -279,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
DCHECK(first_use_->GetPosition() + 1 == position);
UsePosition* new_use = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, cursor->GetNext());
+ instruction, environment, input_index, position, cursor->GetNext());
cursor->SetNext(new_use);
if (first_range_->GetEnd() == first_use_->GetPosition()) {
first_range_->end_ = position;
@@ -289,10 +290,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
if (is_environment) {
first_env_use_ = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, first_env_use_);
+ instruction, environment, input_index, position, first_env_use_);
} else {
first_use_ = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, first_use_);
+ instruction, environment, input_index, position, first_use_);
}
if (is_environment && !keep_alive) {
@@ -331,7 +332,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
AddBackEdgeUses(*block);
}
first_use_ = new (allocator_) UsePosition(
- instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
+ instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
}
void AddRange(size_t start, size_t end) {
@@ -973,7 +974,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
break;
}
- size_t back_edge_use_position = current->GetSingleBackEdge()->GetLifetimeEnd();
+ // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+ // all back edges is not necessary: anything used in the loop will have its use at the
+ // last back edge. If we want branches in a loop to have better register allocation than
+ // another branch, then it is the linear order we should change.
+ size_t back_edge_use_position = current->GetLifetimeEnd();
if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
// There was a use already seen in this loop. Therefore the previous call to `AddUse`
// already inserted the backedge use. We can stop going outward.
@@ -985,8 +990,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
|| back_edge_use_position > last_in_new_list->GetPosition());
UsePosition* new_use = new (allocator_) UsePosition(
- nullptr, UsePosition::kNoInput, /* is_environment */ false,
- back_edge_use_position, nullptr);
+ /* user */ nullptr,
+ /* environment */ nullptr,
+ UsePosition::kNoInput,
+ back_edge_use_position,
+ /* next */ nullptr);
if (last_in_new_list != nullptr) {
// Going outward. The latest created use needs to point to the new use.
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 00c241b85a..fb3e7d798c 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) {
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) {
const char* expected =
"BasicBlock 0, succ: 1\n"
" 0: IntConstant 0 [5]\n"
- " 1: IntConstant 4 [14, 8, 8]\n"
- " 2: IntConstant 5 [14]\n"
+ " 1: IntConstant 4 [5, 8, 8]\n"
+ " 2: IntConstant 5 [5]\n"
" 3: Goto\n"
"BasicBlock 1, pred: 0, succ: 2\n"
" 4: Goto\n"
- "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
- " 5: Phi(0, 14) [12, 6, 6]\n"
+ "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
+ " 5: Phi(0, 2, 1) [12, 6, 6]\n"
" 6: Equal(5, 5) [7]\n"
" 7: If(6)\n"
"BasicBlock 3, pred: 2, succ: 5, 4\n"
" 8: Equal(1, 1) [9]\n"
" 9: If(8)\n"
- "BasicBlock 4, pred: 3, succ: 8\n"
+ "BasicBlock 4, pred: 3, succ: 2\n"
" 10: Goto\n"
- "BasicBlock 5, pred: 3, succ: 8\n"
+ "BasicBlock 5, pred: 3, succ: 2\n"
" 11: Goto\n"
"BasicBlock 6, pred: 2, succ: 7\n"
" 12: Return(5)\n"
"BasicBlock 7, pred: 6\n"
- " 13: Exit\n"
- // Synthesized single back edge of loop.
- "BasicBlock 8, pred: 5, 4, succ: 2\n"
- " 14: Phi(1, 2) [5]\n"
- " 15: Goto\n";
+ " 13: Exit\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index a5a0eb2114..5ca66a1de6 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -30,7 +30,7 @@ namespace art {
static void TestCode(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 282ab96ce4..5e9653df33 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -272,6 +272,10 @@ void Mips64Assembler::Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
EmitI(0x25, rs, rt, imm16);
}
+void Mips64Assembler::Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+ EmitI(0x27, rs, rt, imm16);
+}
+
void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
}
@@ -480,6 +484,9 @@ void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuR
case kLoadWord:
Lw(reg, base, offset);
break;
+ case kLoadUnsignedWord:
+ Lwu(reg, base, offset);
+ break;
case kLoadDoubleword:
// TODO: alignment issues ???
Ld(reg, base, offset);
@@ -512,7 +519,6 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register,
CHECK_EQ(0u, size) << dst;
} else if (dst.IsGpuRegister()) {
if (size == 4) {
- CHECK_EQ(4u, size) << dst;
LoadFromOffset(kLoadWord, dst.AsGpuRegister(), src_register, src_offset);
} else if (size == 8) {
CHECK_EQ(8u, size) << dst;
@@ -740,14 +746,13 @@ void Mips64Assembler::LoadFromThread64(ManagedRegister mdest, ThreadOffset<8> sr
void Mips64Assembler::LoadRef(ManagedRegister mdest, FrameOffset src) {
Mips64ManagedRegister dest = mdest.AsMips64();
CHECK(dest.IsGpuRegister());
- LoadFromOffset(kLoadWord, dest.AsGpuRegister(), SP, src.Int32Value());
+ LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), SP, src.Int32Value());
}
-void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base,
- MemberOffset offs) {
+void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs) {
Mips64ManagedRegister dest = mdest.AsMips64();
- CHECK(dest.IsGpuRegister() && dest.IsGpuRegister());
- LoadFromOffset(kLoadWord, dest.AsGpuRegister(),
+ CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
+ LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
base.AsMips64().AsGpuRegister(), offs.Int32Value());
if (kPoisonHeapReferences) {
Subu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
@@ -921,7 +926,7 @@ void Mips64Assembler::CreateHandleScopeEntry(ManagedRegister mout_reg,
// the address in the handle scope holding the reference.
// e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset)
if (in_reg.IsNoRegister()) {
- LoadFromOffset(kLoadWord, out_reg.AsGpuRegister(),
+ LoadFromOffset(kLoadUnsignedWord, out_reg.AsGpuRegister(),
SP, handle_scope_offset.Int32Value());
in_reg = out_reg;
}
@@ -944,7 +949,7 @@ void Mips64Assembler::CreateHandleScopeEntry(FrameOffset out_off,
CHECK(scratch.IsGpuRegister()) << scratch;
if (null_allowed) {
Label null_arg;
- LoadFromOffset(kLoadWord, scratch.AsGpuRegister(), SP,
+ LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(), SP,
handle_scope_offset.Int32Value());
// Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is
// the address in the handle scope holding the reference.
@@ -998,7 +1003,7 @@ void Mips64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscr
Mips64ManagedRegister scratch = mscratch.AsMips64();
CHECK(scratch.IsGpuRegister()) << scratch;
// Call *(*(SP + base) + offset)
- LoadFromOffset(kLoadWord, scratch.AsGpuRegister(),
+ LoadFromOffset(kLoadUnsignedWord, scratch.AsGpuRegister(),
SP, base.Int32Value());
LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
scratch.AsGpuRegister(), offset.Int32Value());
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b7f6a9e83a..2d7c661eac 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -36,6 +36,7 @@ enum LoadOperandType {
kLoadSignedHalfword,
kLoadUnsignedHalfword,
kLoadWord,
+ kLoadUnsignedWord,
kLoadDoubleword
};
@@ -85,6 +86,7 @@ class Mips64Assembler FINAL : public Assembler {
void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);
void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+ void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
void Lui(GpuRegister rt, uint16_t imm16);
void Mfhi(GpuRegister rd);
void Mflo(GpuRegister rd);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index f2541a2113..7e7520066d 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1507,6 +1507,14 @@ void X86Assembler::jmp(Label* label) {
}
+void X86Assembler::repne_scasw() {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0xF2);
+ EmitUint8(0xAF);
+}
+
+
X86Assembler* X86Assembler::lock() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF0);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 946c96de71..136b0cbfdb 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -464,6 +464,8 @@ class X86Assembler FINAL : public Assembler {
void jmp(const Address& address);
void jmp(Label* label);
+ void repne_scasw();
+
X86Assembler* lock();
void cmpxchgl(const Address& address, Register reg);
void cmpxchg8b(const Address& address);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index f326e496d4..aacc57bb0c 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -190,4 +190,10 @@ TEST_F(AssemblerX86Test, FPUIntegerStore) {
DriverStr(expected, "FPUIntegerStore");
}
+TEST_F(AssemblerX86Test, Repnescasw) {
+ GetAssembler()->repne_scasw();
+ const char* expected = "repne scasw\n";
+ DriverStr(expected, "Repnescasw");
+}
+
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index c0ca7ef437..feceecac68 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2065,6 +2065,14 @@ void X86_64Assembler::bswapq(CpuRegister dst) {
}
+void X86_64Assembler::repne_scasw() {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0xF2);
+ EmitUint8(0xAF);
+}
+
+
void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
// TODO: Need to have a code constants table.
int64_t constant = bit_cast<int64_t, double>(value);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index f5327a8d02..162714af68 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -601,6 +601,8 @@ class X86_64Assembler FINAL : public Assembler {
void bswapl(CpuRegister dst);
void bswapq(CpuRegister dst);
+ void repne_scasw();
+
//
// Macros for High-level operations.
//
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9e4144ac26..0be4d632fb 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1215,4 +1215,10 @@ TEST_F(AssemblerX86_64Test, MovsxbRegs) {
DriverStr(Repeatrb(&x86_64::X86_64Assembler::movsxb, "movsbl %{reg2}, %{reg1}"), "movsxb");
}
+TEST_F(AssemblerX86_64Test, Repnescasw) {
+ GetAssembler()->repne_scasw();
+ const char* expected = "repne scasw\n";
+ DriverStr(expected, "Repnescasw");
+}
+
} // namespace art