summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.mk1
-rw-r--r--compiler/dex/compiler_enums.h3
-rw-r--r--compiler/dex/mir_optimization.cc7
-rw-r--r--compiler/dex/quick/arm/arm_lir.h1
-rw-r--r--compiler/dex/quick/arm/assemble_arm.cc4
-rw-r--r--compiler/dex/quick/arm/codegen_arm.h9
-rw-r--r--compiler/dex/quick/arm/int_arm.cc1
-rw-r--r--compiler/dex/quick/arm/utility_arm.cc272
-rw-r--r--compiler/dex/quick/codegen_util.cc9
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc63
-rw-r--r--compiler/dex/quick/x86/call_x86.cc64
-rwxr-xr-xcompiler/dex/quick/x86/int_x86.cc13
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc41
-rw-r--r--compiler/dex/quick/x86/utility_x86.cc71
-rw-r--r--compiler/dex/quick/x86/x86_lir.h3
-rw-r--r--compiler/dex/verification_results.cc9
-rw-r--r--compiler/dex/verification_results.h1
-rw-r--r--compiler/driver/compiler_driver-inl.h6
-rw-r--r--compiler/driver/compiler_driver.cc221
-rw-r--r--compiler/driver/compiler_driver.h7
-rw-r--r--compiler/driver/compiler_options.h17
-rw-r--r--compiler/driver/dex_compilation_unit.h6
-rw-r--r--compiler/elf_writer_test.cc4
-rw-r--r--compiler/image_writer.cc142
-rw-r--r--compiler/image_writer.h16
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc691
-rw-r--r--compiler/optimizing/bounds_check_elimination.h36
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc1045
-rw-r--r--compiler/optimizing/builder.cc388
-rw-r--r--compiler/optimizing/builder.h23
-rw-r--r--compiler/optimizing/code_generator.cc23
-rw-r--r--compiler/optimizing/code_generator.h5
-rw-r--r--compiler/optimizing/code_generator_arm.cc153
-rw-r--r--compiler/optimizing/code_generator_x86.cc172
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc189
-rw-r--r--compiler/optimizing/find_loops_test.cc2
-rw-r--r--compiler/optimizing/gvn_test.cc4
-rw-r--r--compiler/optimizing/linearize_test.cc2
-rw-r--r--compiler/optimizing/live_ranges_test.cc2
-rw-r--r--compiler/optimizing/liveness_test.cc2
-rw-r--r--compiler/optimizing/nodes.cc59
-rw-r--r--compiler/optimizing/nodes.h26
-rw-r--r--compiler/optimizing/optimizing_compiler.cc46
-rw-r--r--compiler/optimizing/optimizing_unit_test.h5
-rw-r--r--compiler/optimizing/register_allocator_test.cc6
-rw-r--r--compiler/optimizing/ssa_builder.h14
-rw-r--r--compiler/utils/arena_allocator.cc9
-rw-r--r--compiler/utils/arena_allocator.h11
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc28
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
50 files changed, 3397 insertions, 538 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk
index 70c7e521a6..a75417bcbc 100644
--- a/compiler/Android.mk
+++ b/compiler/Android.mk
@@ -86,6 +86,7 @@ LIBART_COMPILER_SRC_FILES := \
jni/quick/jni_compiler.cc \
llvm/llvm_compiler.cc \
optimizing/builder.cc \
+ optimizing/bounds_check_elimination.cc \
optimizing/code_generator.cc \
optimizing/code_generator_arm.cc \
optimizing/code_generator_arm64.cc \
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 3b3170e511..a3fe8ad038 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -606,7 +606,7 @@ enum SelectInstructionKind {
};
std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
-// LIR fixup kinds for Arm
+// LIR fixup kinds for Arm and X86.
enum FixupKind {
kFixupNone,
kFixupLabel, // For labels we just adjust the offset.
@@ -624,6 +624,7 @@ enum FixupKind {
kFixupMovImmHST, // kThumb2MovImm16HST.
kFixupAlign4, // Align to 4-byte boundary.
kFixupA53Erratum835769, // Cortex A53 Erratum 835769.
+ kFixupSwitchTable, // X86_64 packed switch table.
};
std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 55f2abcd3f..fd4c3d7d77 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -526,6 +526,9 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) {
default: LOG(ERROR) << "Unexpected opcode: " << opcode;
}
mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+ // Clear use count of temp VR.
+ use_counts_[mir->ssa_rep->defs[0]] = 0;
+ raw_use_counts_[mir->ssa_rep->defs[0]] = 0;
// Copy the SSA information that is relevant.
mir_next->ssa_rep->num_uses = mir->ssa_rep->num_uses;
mir_next->ssa_rep->uses = mir->ssa_rep->uses;
@@ -795,10 +798,6 @@ bool MIRGraph::LayoutBlocks(BasicBlock* bb) {
break;
}
walker = prev;
-
- if (walker->visited) {
- break;
- }
}
return false;
}
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 4c7f87433d..b9d9a1111d 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -488,6 +488,7 @@ enum ArmOpcode {
kThumb2BicRRI8M, // bic rd, rn, #<const> [11110] i [000010] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2AndRRI8M, // and rd, rn, #<const> [11110] i [000000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2OrrRRI8M, // orr rd, rn, #<const> [11110] i [000100] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
+ kThumb2OrnRRI8M, // orn rd, rn, #<const> [11110] i [000110] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2EorRRI8M, // eor rd, rn, #<const> [11110] i [001000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2AddRRI8M, // add rd, rn, #<const> [11110] i [010001] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
kThumb2AdcRRI8M, // adc rd, rn, #<const> [11110] i [010101] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 76ec9dfbc5..de93e2602b 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -788,6 +788,10 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = {
kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
"orr", "!0C, !1C, #!2m", 4, kFixupNone),
+ ENCODING_MAP(kThumb2OrnRRI8M, 0xf0600000,
+ kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+ "orn", "!0C, !1C, #!2m", 4, kFixupNone),
ENCODING_MAP(kThumb2EorRRI8M, 0xf0800000,
kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index c3b19a36f0..0ae7ee3560 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -219,10 +219,11 @@ class ArmMir2Lir FINAL : public Mir2Lir {
int EncodeShift(int code, int amount);
int ModifiedImmediate(uint32_t value);
ArmConditionCode ArmConditionEncoding(ConditionCode code);
- bool InexpensiveConstantInt(int32_t value);
- bool InexpensiveConstantFloat(int32_t value);
- bool InexpensiveConstantLong(int64_t value);
- bool InexpensiveConstantDouble(int64_t value);
+ bool InexpensiveConstantInt(int32_t value) OVERRIDE;
+ bool InexpensiveConstantInt(int32_t value, Instruction::Code opcode) OVERRIDE;
+ bool InexpensiveConstantFloat(int32_t value) OVERRIDE;
+ bool InexpensiveConstantLong(int64_t value) OVERRIDE;
+ bool InexpensiveConstantDouble(int64_t value) OVERRIDE;
RegStorage AllocPreservedDouble(int s_reg);
RegStorage AllocPreservedSingle(int s_reg);
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 4aedbafcfb..1a7b4395d8 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -592,7 +592,6 @@ bool ArmMir2Lir::GetEasyMultiplyOp(int lit, ArmMir2Lir::EasyMultiplyOp* op) {
// Try to convert *lit to 1~2 RegRegRegShift/RegRegShift forms.
bool ArmMir2Lir::GetEasyMultiplyTwoOps(int lit, EasyMultiplyOp* ops) {
- GetEasyMultiplyOp(lit, &ops[0]);
if (GetEasyMultiplyOp(lit, &ops[0])) {
ops[1].op = kOpInvalid;
ops[1].shift = 0;
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 7168b9f30b..36d065f0db 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -97,31 +97,11 @@ LIR* ArmMir2Lir::LoadFPConstantValue(int r_dest, int value) {
return load_pc_rel;
}
-static int LeadingZeros(uint32_t val) {
- uint32_t alt;
- int32_t n;
- int32_t count;
-
- count = 16;
- n = 32;
- do {
- alt = val >> count;
- if (alt != 0) {
- n = n - count;
- val = alt;
- }
- count >>= 1;
- } while (count);
- return n - val;
-}
-
/*
* Determine whether value can be encoded as a Thumb2 modified
* immediate. If not, return -1. If so, return i:imm3:a:bcdefgh form.
*/
int ArmMir2Lir::ModifiedImmediate(uint32_t value) {
- int32_t z_leading;
- int32_t z_trailing;
uint32_t b0 = value & 0xff;
/* Note: case of value==0 must use 0:000:0:0000000 encoding */
@@ -135,8 +115,8 @@ int ArmMir2Lir::ModifiedImmediate(uint32_t value) {
if (value == ((b0 << 24) | (b0 << 8)))
return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
/* Can we do it with rotation? */
- z_leading = LeadingZeros(value);
- z_trailing = 32 - LeadingZeros(~value & (value - 1));
+ int z_leading = CLZ(value);
+ int z_trailing = CTZ(value);
/* A run of eight or fewer active bits? */
if ((z_leading + z_trailing) < 24)
return -1; /* No - bail */
@@ -152,6 +132,71 @@ bool ArmMir2Lir::InexpensiveConstantInt(int32_t value) {
return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
}
+bool ArmMir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
+ switch (opcode) {
+ case Instruction::ADD_INT:
+ case Instruction::ADD_INT_2ADDR:
+ case Instruction::SUB_INT:
+ case Instruction::SUB_INT_2ADDR:
+ if ((value >> 12) == (value >> 31)) { // Signed 12-bit, RRI12 versions of ADD/SUB.
+ return true;
+ }
+ FALLTHROUGH_INTENDED;
+ case Instruction::IF_EQ:
+ case Instruction::IF_NE:
+ case Instruction::IF_LT:
+ case Instruction::IF_GE:
+ case Instruction::IF_GT:
+ case Instruction::IF_LE:
+ return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(-value) >= 0);
+ case Instruction::SHL_INT:
+ case Instruction::SHL_INT_2ADDR:
+ case Instruction::SHR_INT:
+ case Instruction::SHR_INT_2ADDR:
+ case Instruction::USHR_INT:
+ case Instruction::USHR_INT_2ADDR:
+ return true;
+ case Instruction::CONST:
+ case Instruction::CONST_4:
+ case Instruction::CONST_16:
+ if ((value >> 16) == 0) {
+ return true; // movw, 16-bit unsigned.
+ }
+ FALLTHROUGH_INTENDED;
+ case Instruction::AND_INT:
+ case Instruction::AND_INT_2ADDR:
+ case Instruction::AND_INT_LIT16:
+ case Instruction::AND_INT_LIT8:
+ case Instruction::OR_INT:
+ case Instruction::OR_INT_2ADDR:
+ case Instruction::OR_INT_LIT16:
+ case Instruction::OR_INT_LIT8:
+ return (ModifiedImmediate(value) >= 0) || (ModifiedImmediate(~value) >= 0);
+ case Instruction::XOR_INT:
+ case Instruction::XOR_INT_2ADDR:
+ case Instruction::XOR_INT_LIT16:
+ case Instruction::XOR_INT_LIT8:
+ return (ModifiedImmediate(value) >= 0);
+ case Instruction::MUL_INT:
+ case Instruction::MUL_INT_2ADDR:
+ case Instruction::MUL_INT_LIT8:
+ case Instruction::MUL_INT_LIT16:
+ case Instruction::DIV_INT:
+ case Instruction::DIV_INT_2ADDR:
+ case Instruction::DIV_INT_LIT8:
+ case Instruction::DIV_INT_LIT16:
+ case Instruction::REM_INT:
+ case Instruction::REM_INT_2ADDR:
+ case Instruction::REM_INT_LIT8:
+ case Instruction::REM_INT_LIT16: {
+ EasyMultiplyOp ops[2];
+ return GetEasyMultiplyTwoOps(value, ops);
+ }
+ default:
+ return false;
+ }
+}
+
bool ArmMir2Lir::InexpensiveConstantFloat(int32_t value) {
return EncodeImmSingle(value) >= 0;
}
@@ -510,7 +555,7 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, in
op = (op == kOpAdd) ? kOpSub : kOpAdd;
}
}
- if (mod_imm < 0 && (abs_value & 0x3ff) == abs_value) {
+ if (mod_imm < 0 && (abs_value >> 12) == 0) {
// This is deliberately used only if modified immediate encoding is inadequate since
// we sometimes actually use the flags for small values but not necessarily low regs.
if (op == kOpAdd)
@@ -542,6 +587,12 @@ LIR* ArmMir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, in
case kOpOr:
opcode = kThumb2OrrRRI8M;
alt_opcode = kThumb2OrrRRR;
+ if (mod_imm < 0) {
+ mod_imm = ModifiedImmediate(~value);
+ if (mod_imm >= 0) {
+ opcode = kThumb2OrnRRI8M;
+ }
+ }
break;
case kOpAnd:
if (mod_imm < 0) {
@@ -855,12 +906,12 @@ LIR* ArmMir2Lir::LoadStoreUsingInsnWithOffsetImm8Shl2(ArmOpcode opcode, RegStora
*/
LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
OpSize size) {
- LIR* load = NULL;
- ArmOpcode opcode = kThumbBkpt;
+ LIR* load = nullptr;
+ ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode.
+ ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode.
bool short_form = false;
- bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low = r_dest.Is32Bit() && r_base.Low8() && r_dest.Low8();
- int encoded_disp = displacement;
+ int scale = 0; // Used for opcode16 and some indexed loads.
bool already_generated = false;
switch (size) {
case kDouble:
@@ -888,57 +939,45 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
already_generated = true;
break;
}
+ DCHECK_EQ((displacement & 0x3), 0);
+ scale = 2;
if (r_dest.Low8() && (r_base == rs_rARM_PC) && (displacement <= 1020) &&
(displacement >= 0)) {
short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbLdrPcRel;
+ opcode16 = kThumbLdrPcRel;
} else if (r_dest.Low8() && (r_base == rs_rARM_SP) && (displacement <= 1020) &&
(displacement >= 0)) {
short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbLdrSpRel;
- } else if (all_low && displacement < 128 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x3), 0);
- short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbLdrRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrRRI12;
+ opcode16 = kThumbLdrSpRel;
+ } else {
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbLdrRRI5;
+ opcode32 = kThumb2LdrRRI12;
}
break;
case kUnsignedHalf:
- if (all_low && displacement < 64 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x1), 0);
- short_form = true;
- encoded_disp >>= 1;
- opcode = kThumbLdrhRRI5;
- } else if (displacement < 4092 && displacement >= 0) {
- short_form = true;
- opcode = kThumb2LdrhRRI12;
- }
+ DCHECK_EQ((displacement & 0x1), 0);
+ scale = 1;
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbLdrhRRI5;
+ opcode32 = kThumb2LdrhRRI12;
break;
case kSignedHalf:
- if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrshRRI12;
- }
+ DCHECK_EQ((displacement & 0x1), 0);
+ scale = 1;
+ DCHECK_EQ(opcode16, kThumbBkpt); // Not available.
+ opcode32 = kThumb2LdrshRRI12;
break;
case kUnsignedByte:
- if (all_low && displacement < 32 && displacement >= 0) {
- short_form = true;
- opcode = kThumbLdrbRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrbRRI12;
- }
+ DCHECK_EQ(scale, 0); // Keep scale = 0.
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbLdrbRRI5;
+ opcode32 = kThumb2LdrbRRI12;
break;
case kSignedByte:
- if (thumb2Form) {
- short_form = true;
- opcode = kThumb2LdrsbRRI12;
- }
+ DCHECK_EQ(scale, 0); // Keep scale = 0.
+ DCHECK_EQ(opcode16, kThumbBkpt); // Not available.
+ opcode32 = kThumb2LdrsbRRI12;
break;
default:
LOG(FATAL) << "Bad size: " << size;
@@ -946,12 +985,33 @@ LIR* ArmMir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorag
if (!already_generated) {
if (short_form) {
- load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), encoded_disp);
+ load = NewLIR3(opcode16, r_dest.GetReg(), r_base.GetReg(), displacement >> scale);
+ } else if ((displacement >> 12) == 0) { // Thumb2 form.
+ load = NewLIR3(opcode32, r_dest.GetReg(), r_base.GetReg(), displacement);
+ } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+ InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+ // In this case, using LoadIndexed would emit 3 insns (movw+movt+ldr) but we can
+ // actually do it in two because we know that the kOpAdd is a single insn. On the
+ // other hand, we introduce an extra dependency, so this is not necessarily faster.
+ if (opcode16 != kThumbBkpt && r_dest.Low8() &&
+ InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+ // We can use the 16-bit Thumb opcode for the load.
+ OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~(0x1f << scale));
+ load = NewLIR3(opcode16, r_dest.GetReg(), r_dest.GetReg(), (displacement >> scale) & 0x1f);
+ } else {
+ DCHECK_NE(opcode32, kThumbBkpt);
+ OpRegRegImm(kOpAdd, r_dest, r_base, displacement & ~0x00000fff);
+ load = NewLIR3(opcode32, r_dest.GetReg(), r_dest.GetReg(), displacement & 0x00000fff);
+ }
} else {
+ if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+ (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+ scale = 0; // Prefer unscaled indexing if the same number of insns.
+ }
RegStorage reg_offset = AllocTemp();
- LoadConstant(reg_offset, encoded_disp);
+ LoadConstant(reg_offset, displacement >> scale);
DCHECK(!r_dest.IsFloat());
- load = LoadBaseIndexed(r_base, reg_offset, r_dest, 0, size);
+ load = LoadBaseIndexed(r_base, reg_offset, r_dest, scale, size);
FreeTemp(reg_offset);
}
}
@@ -997,12 +1057,12 @@ LIR* ArmMir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_
LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size) {
- LIR* store = NULL;
- ArmOpcode opcode = kThumbBkpt;
+ LIR* store = nullptr;
+ ArmOpcode opcode16 = kThumbBkpt; // 16-bit Thumb opcode.
+ ArmOpcode opcode32 = kThumbBkpt; // 32-bit Thumb2 opcode.
bool short_form = false;
- bool thumb2Form = (displacement < 4092 && displacement >= 0);
bool all_low = r_src.Is32Bit() && r_base.Low8() && r_src.Low8();
- int encoded_disp = displacement;
+ int scale = 0; // Used for opcode16 and some indexed loads.
bool already_generated = false;
switch (size) {
case kDouble:
@@ -1034,53 +1094,67 @@ LIR* ArmMir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStora
already_generated = true;
break;
}
+ DCHECK_EQ((displacement & 0x3), 0);
+ scale = 2;
if (r_src.Low8() && (r_base == rs_r13sp) && (displacement <= 1020) && (displacement >= 0)) {
short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbStrSpRel;
- } else if (all_low && displacement < 128 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x3), 0);
- short_form = true;
- encoded_disp >>= 2;
- opcode = kThumbStrRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrRRI12;
+ opcode16 = kThumbStrSpRel;
+ } else {
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbStrRRI5;
+ opcode32 = kThumb2StrRRI12;
}
break;
case kUnsignedHalf:
case kSignedHalf:
- if (all_low && displacement < 64 && displacement >= 0) {
- DCHECK_EQ((displacement & 0x1), 0);
- short_form = true;
- encoded_disp >>= 1;
- opcode = kThumbStrhRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrhRRI12;
- }
+ DCHECK_EQ((displacement & 0x1), 0);
+ scale = 1;
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbStrhRRI5;
+ opcode32 = kThumb2StrhRRI12;
break;
case kUnsignedByte:
case kSignedByte:
- if (all_low && displacement < 32 && displacement >= 0) {
- short_form = true;
- opcode = kThumbStrbRRI5;
- } else if (thumb2Form) {
- short_form = true;
- opcode = kThumb2StrbRRI12;
- }
+ DCHECK_EQ(scale, 0); // Keep scale = 0.
+ short_form = all_low && (displacement >> (5 + scale)) == 0;
+ opcode16 = kThumbStrbRRI5;
+ opcode32 = kThumb2StrbRRI12;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
if (!already_generated) {
if (short_form) {
- store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), encoded_disp);
+ store = NewLIR3(opcode16, r_src.GetReg(), r_base.GetReg(), displacement >> scale);
+ } else if ((displacement >> 12) == 0) {
+ store = NewLIR3(opcode32, r_src.GetReg(), r_base.GetReg(), displacement);
+ } else if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) &&
+ InexpensiveConstantInt(displacement & ~0x00000fff, Instruction::ADD_INT)) {
+ // In this case, using StoreIndexed would emit 3 insns (movw+movt+str) but we can
+ // actually do it in two because we know that the kOpAdd is a single insn. On the
+ // other hand, we introduce an extra dependency, so this is not necessarily faster.
+ RegStorage r_scratch = AllocTemp();
+ if (opcode16 != kThumbBkpt && r_src.Low8() && r_scratch.Low8() &&
+ InexpensiveConstantInt(displacement & ~(0x1f << scale), Instruction::ADD_INT)) {
+ // We can use the 16-bit Thumb opcode for the load.
+ OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~(0x1f << scale));
+ store = NewLIR3(opcode16, r_src.GetReg(), r_scratch.GetReg(),
+ (displacement >> scale) & 0x1f);
+ } else {
+ DCHECK_NE(opcode32, kThumbBkpt);
+ OpRegRegImm(kOpAdd, r_scratch, r_base, displacement & ~0x00000fff);
+ store = NewLIR3(opcode32, r_src.GetReg(), r_scratch.GetReg(), displacement & 0x00000fff);
+ }
+ FreeTemp(r_scratch);
} else {
+ if (!InexpensiveConstantInt(displacement >> scale, Instruction::CONST) ||
+ (scale != 0 && InexpensiveConstantInt(displacement, Instruction::CONST))) {
+ scale = 0; // Prefer unscaled indexing if the same number of insns.
+ }
RegStorage r_scratch = AllocTemp();
- LoadConstant(r_scratch, encoded_disp);
+ LoadConstant(r_scratch, displacement >> scale);
DCHECK(!r_src.IsFloat());
- store = StoreBaseIndexed(r_base, r_scratch, r_src, 0, size);
+ store = StoreBaseIndexed(r_base, r_scratch, r_src, scale, size);
FreeTemp(r_scratch);
}
}
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index e1b4546e6c..cc61e93d82 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -538,9 +538,12 @@ void Mir2Lir::InstallSwitchTables() {
bx_offset = tab_rec->anchor->offset + 4;
break;
case kX86:
- case kX86_64:
bx_offset = 0;
break;
+ case kX86_64:
+ // RIP relative to switch table.
+ bx_offset = tab_rec->offset;
+ break;
case kArm64:
case kMips:
bx_offset = tab_rec->anchor->offset;
@@ -775,6 +778,10 @@ void Mir2Lir::CreateNativeGcMap() {
": " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
native_gc_map_builder.AddEntry(native_offset, references);
}
+
+ // Maybe not necessary, but this could help prevent errors where we access the verified method
+ // after it has been deleted.
+ mir_graph_->GetCurrentDexCompilationUnit()->ClearVerifiedMethod();
}
/* Determine the offset of each literal field */
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 84d68d2b7b..ad2ed01ad5 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -553,7 +553,7 @@ std::ostream& operator<<(std::ostream& os, const X86OpCode& rhs) {
}
static bool NeedsRex(int32_t raw_reg) {
- return RegStorage::RegNum(raw_reg) > 7;
+ return raw_reg != kRIPReg && RegStorage::RegNum(raw_reg) > 7;
}
static uint8_t LowRegisterBits(int32_t raw_reg) {
@@ -689,7 +689,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int
entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) {
DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
}
- size += IS_SIMM8(displacement) ? 1 : 4;
+ if (raw_base == kRIPReg) {
+ DCHECK(cu_->target64) <<
+ "Attempt to use a 64-bit RIP adressing with instruction " << entry->name;
+ size += 4;
+ } else {
+ size += IS_SIMM8(displacement) ? 1 : 4;
+ }
}
}
size += entry->skeleton.immediate_bytes;
@@ -1022,14 +1028,24 @@ void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) {
DCHECK_LT(reg_or_opcode, 8);
- DCHECK_LT(base, 8);
- uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
- code_buffer_.push_back(modrm);
- if (base == rs_rX86_SP_32.GetRegNum()) {
- // Special SIB for SP base
- code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum());
+ if (base == kRIPReg) {
+ // x86_64 RIP handling: always 32 bit displacement.
+ uint8_t modrm = (0x0 << 6) | (reg_or_opcode << 3) | 0x5;
+ code_buffer_.push_back(modrm);
+ code_buffer_.push_back(disp & 0xFF);
+ code_buffer_.push_back((disp >> 8) & 0xFF);
+ code_buffer_.push_back((disp >> 16) & 0xFF);
+ code_buffer_.push_back((disp >> 24) & 0xFF);
+ } else {
+ DCHECK_LT(base, 8);
+ uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
+ code_buffer_.push_back(modrm);
+ if (base == rs_rX86_SP_32.GetRegNum()) {
+ // Special SIB for SP base
+ code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum());
+ }
+ EmitDisp(base, disp);
}
- EmitDisp(base, disp);
}
void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
@@ -1141,7 +1157,7 @@ void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32
CheckValidByteRegister(entry, raw_reg);
EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
uint8_t low_reg = LowRegisterBits(raw_reg);
- uint8_t low_base = LowRegisterBits(raw_base);
+ uint8_t low_base = (raw_base == kRIPReg) ? raw_base : LowRegisterBits(raw_base);
EmitModrmDisp(low_reg, low_base, disp);
DCHECK_EQ(0, entry->skeleton.modrm_opcode);
DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1758,12 +1774,29 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) {
LIR *target_lir = lir->target;
DCHECK(target_lir != NULL);
CodeOffset target = target_lir->offset;
- lir->operands[2] = target;
- int newSize = GetInsnSize(lir);
- if (newSize != lir->flags.size) {
- lir->flags.size = newSize;
- res = kRetryAll;
+ // Handle 64 bit RIP addressing.
+ if (lir->operands[1] == kRIPReg) {
+ // Offset is relative to next instruction.
+ lir->operands[2] = target - (lir->offset + lir->flags.size);
+ } else {
+ lir->operands[2] = target;
+ int newSize = GetInsnSize(lir);
+ if (newSize != lir->flags.size) {
+ lir->flags.size = newSize;
+ res = kRetryAll;
+ }
}
+ } else if (lir->flags.fixup == kFixupSwitchTable) {
+ DCHECK(cu_->target64);
+ DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
+ DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
+ // Grab the target offset from the saved data.
+ Mir2Lir::EmbeddedData* tab_rec =
+ reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4]));
+ CodeOffset target = tab_rec->offset;
+ // Handle 64 bit RIP addressing.
+ // Offset is relative to next instruction.
+ lir->operands[2] = target - (lir->offset + lir->flags.size);
}
break;
}
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index be10d93a97..544ac3b815 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -142,25 +142,7 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat
// Get the switch value
rl_src = LoadValue(rl_src, kCoreReg);
- // NewLIR0(kX86Bkpt);
- // Materialize a pointer to the switch table
- RegStorage start_of_method_reg;
- if (base_of_code_ != nullptr) {
- // We can use the saved value.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = LoadValueWide(rl_method, kCoreReg);
- } else {
- rl_method = LoadValue(rl_method, kCoreReg);
- }
- start_of_method_reg = rl_method.reg;
- store_method_addr_used_ = true;
- } else {
- start_of_method_reg = AllocTempRef();
- NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
- }
- DCHECK_EQ(start_of_method_reg.Is64Bit(), cu_->target64);
int low_key = s4FromSwitchData(&table[2]);
RegStorage keyReg;
// Remove the bias, if necessary
@@ -170,19 +152,49 @@ void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocat
keyReg = AllocTemp();
OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
}
+
// Bounds check - if < 0 or >= size continue following switch
OpRegImm(kOpCmp, keyReg, size - 1);
LIR* branch_over = OpCondBranch(kCondHi, NULL);
- // Load the displacement from the switch table
- RegStorage disp_reg = AllocTemp();
- NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
- 2, WrapPointer(tab_rec));
- // Add displacement to start of method
- OpRegReg(kOpAdd, start_of_method_reg, cu_->target64 ? As64BitReg(disp_reg) : disp_reg);
+ RegStorage addr_for_jump;
+ if (cu_->target64) {
+ RegStorage table_base = AllocTempWide();
+ // Load the address of the table into table_base.
+ LIR* lea = RawLIR(current_dalvik_offset_, kX86Lea64RM, table_base.GetReg(), kRIPReg,
+ 256, 0, WrapPointer(tab_rec));
+ lea->flags.fixup = kFixupSwitchTable;
+ AppendLIR(lea);
+
+ // Load the offset from the table out of the table.
+ addr_for_jump = AllocTempWide();
+ NewLIR5(kX86MovsxdRA, addr_for_jump.GetReg(), table_base.GetReg(), keyReg.GetReg(), 2, 0);
+
+ // Add the offset from the table to the table base.
+ OpRegReg(kOpAdd, addr_for_jump, table_base);
+ } else {
+ // Materialize a pointer to the switch table.
+ RegStorage start_of_method_reg;
+ if (base_of_code_ != nullptr) {
+ // We can use the saved value.
+ RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+ rl_method = LoadValue(rl_method, kCoreReg);
+ start_of_method_reg = rl_method.reg;
+ store_method_addr_used_ = true;
+ } else {
+ start_of_method_reg = AllocTempRef();
+ NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
+ }
+ // Load the displacement from the switch table.
+ addr_for_jump = AllocTemp();
+ NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+ 2, WrapPointer(tab_rec));
+ // Add displacement to start of method.
+ OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg);
+ }
+
// ..and go!
- LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
- tab_rec->anchor = switch_branch;
+ tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg());
/* branch_over target here */
LIR* target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 80cdc83497..85ab92bc08 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1289,6 +1289,18 @@ bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
}
LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+ if (cu_->target64) {
+ // We can do this directly using RIP addressing.
+ // We don't know the proper offset for the value, so pick one that will force
+ // 4 byte offset. We will fix this up in the assembler later to have the right
+ // value.
+ ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
+ LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
+ res->target = target;
+ res->flags.fixup = kFixupLoad;
+ return res;
+ }
+
CHECK(base_of_code_ != nullptr);
// Address the start of the method
@@ -1309,7 +1321,6 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
0, 0, target);
res->target = target;
res->flags.fixup = kFixupLoad;
- store_method_addr_used_ = true;
return res;
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index e16a70b555..5f6cdda0d3 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -206,7 +206,7 @@ static const RegStorage RegStorage32FromSpecialTargetRegister_Target64[] {
RegStorage::InvalidReg(), // kSelf - Thread pointer.
RegStorage::InvalidReg(), // kSuspend - Used to reduce suspend checks for some targets.
RegStorage::InvalidReg(), // kLr - no register as the return address is pushed on entry.
- RegStorage::InvalidReg(), // kPc - TODO: RIP based addressing.
+ RegStorage(kRIPReg), // kPc
rs_rX86_SP_32, // kSp
rs_rDI, // kArg0
rs_rSI, // kArg1
@@ -662,6 +662,12 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
xp_reg_info->SetIsTemp(true);
}
+ // Special Handling for x86_64 RIP addressing.
+ if (cu_->target64) {
+ RegisterInfo* info = new (arena_) RegisterInfo(RegStorage(kRIPReg), kEncodeNone);
+ reginfo_map_[kRIPReg] = info;
+ }
+
// Alias single precision xmm to double xmms.
// TODO: as needed, add larger vector sizes - alias all to the largest.
for (RegisterInfo* info : reg_pool_->sp_regs_) {
@@ -1609,9 +1615,6 @@ void X86Mir2Lir::GenConst128(MIR* mir) {
}
void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
- // The literal pool needs position independent logic.
- store_method_addr_used_ = true;
-
// To deal with correct memory ordering, reverse order of constants.
int32_t constants[4];
constants[3] = mir->dalvikInsn.arg[0];
@@ -1625,20 +1628,28 @@ void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
data_target = AddVectorLiteral(constants);
}
- // Address the start of the method.
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = LoadValueWide(rl_method, kCoreReg);
- } else {
- rl_method = LoadValue(rl_method, kCoreReg);
- }
-
// Load the proper value from the literal area.
// We don't know the proper offset for the value, so pick one that will force
- // 4 byte offset. We will fix this up in the assembler later to have the right
- // value.
+ // 4 byte offset. We will fix this up in the assembler later to have the
+ // right value.
+ LIR* load;
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
- LIR *load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
+ if (cu_->target64) {
+ load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */);
+ } else {
+ // Address the start of the method.
+ RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+ if (rl_method.wide) {
+ rl_method = LoadValueWide(rl_method, kCoreReg);
+ } else {
+ rl_method = LoadValue(rl_method, kCoreReg);
+ }
+
+ load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
+
+ // The literal pool needs position independent logic.
+ store_method_addr_used_ = true;
+ }
load->flags.fixup = kFixupLoad;
load->target = data_target;
}
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index ad3222cd84..3b58698619 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -570,32 +570,36 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
if (is_fp) {
DCHECK(r_dest.IsDouble());
if (value == 0) {
- return NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
- } else if (base_of_code_ != nullptr) {
+ return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
+ } else if (base_of_code_ != nullptr || cu_->target64) {
// We will load the value from the literal area.
LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
if (data_target == NULL) {
data_target = AddWideData(&literal_list_, val_lo, val_hi);
}
- // Address the start of the method
- RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
- if (rl_method.wide) {
- rl_method = LoadValueWide(rl_method, kCoreReg);
- } else {
- rl_method = LoadValue(rl_method, kCoreReg);
- }
-
// Load the proper value from the literal area.
- // We don't know the proper offset for the value, so pick one that will force
- // 4 byte offset. We will fix this up in the assembler later to have the right
- // value.
+ // We don't know the proper offset for the value, so pick one that
+ // will force 4 byte offset. We will fix this up in the assembler
+ // later to have the right value.
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
- res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
- kDouble, kNotVolatile);
+ if (cu_->target64) {
+ res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
+ } else {
+ // Address the start of the method.
+ RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+ if (rl_method.wide) {
+ rl_method = LoadValueWide(rl_method, kCoreReg);
+ } else {
+ rl_method = LoadValue(rl_method, kCoreReg);
+ }
+
+ res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
+ kDouble, kNotVolatile);
+ store_method_addr_used_ = true;
+ }
res->target = data_target;
res->flags.fixup = kFixupLoad;
- store_method_addr_used_ = true;
} else {
if (r_dest.IsPair()) {
if (val_lo == 0) {
@@ -960,12 +964,14 @@ void X86Mir2Lir::AnalyzeMIR() {
curr_bb = iter.Next();
}
- // Did we need a pointer to the method code?
+ // Did we need a pointer to the method code? Not in 64 bit mode.
+ base_of_code_ = nullptr;
+
+ // store_method_addr_ must be false for x86_64, since RIP addressing is used.
+ CHECK(!(cu_->target64 && store_method_addr_));
if (store_method_addr_) {
- base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, cu_->target64 == true);
+ base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
DCHECK(base_of_code_ != nullptr);
- } else {
- base_of_code_ = nullptr;
}
}
@@ -994,19 +1000,22 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) {
AnalyzeFPInstruction(opcode, bb, mir);
break;
case kMirOpConstVector:
- store_method_addr_ = true;
+ if (!cu_->target64) {
+ store_method_addr_ = true;
+ }
break;
case kMirOpPackedMultiply:
case kMirOpPackedShiftLeft:
case kMirOpPackedSignedShiftRight:
- case kMirOpPackedUnsignedShiftRight: {
- // Byte emulation requires constants from the literal pool.
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
- if (opsize == kSignedByte || opsize == kUnsignedByte) {
- store_method_addr_ = true;
+ case kMirOpPackedUnsignedShiftRight:
+ if (!cu_->target64) {
+ // Byte emulation requires constants from the literal pool.
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ store_method_addr_ = true;
+ }
}
break;
- }
default:
// Ignore the rest.
break;
@@ -1016,6 +1025,7 @@ void X86Mir2Lir::AnalyzeExtendedMIR(int opcode, BasicBlock* bb, MIR* mir) {
void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
// Looking for
// - Do we need a pointer to the code (used for packed switches and double lits)?
+ // 64 bit uses RIP addressing instead.
switch (opcode) {
// Instructions referencing doubles.
@@ -1038,7 +1048,9 @@ void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
// Packed switches and array fills need a pointer to the base of the method.
case Instruction::FILL_ARRAY_DATA:
case Instruction::PACKED_SWITCH:
- store_method_addr_ = true;
+ if (!cu_->target64) {
+ store_method_addr_ = true;
+ }
break;
case Instruction::INVOKE_STATIC:
case Instruction::INVOKE_STATIC_RANGE:
@@ -1115,7 +1127,8 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc) {
void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) {
UNUSED(opcode, bb);
- // For now this is only actual for x86-32.
+
+ // 64 bit RIP addressing doesn't need store_method_addr_ set.
if (cu_->target64) {
return;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 76a67c4d6c..3e0a8527a3 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -217,6 +217,9 @@ enum X86NativeRegisterPool {
xr14 = RegStorage::k128BitSolo | 14,
xr15 = RegStorage::k128BitSolo | 15,
+ // Special value for RIP 64 bit addressing.
+ kRIPReg = 255,
+
// TODO: as needed, add 256, 512 and 1024-bit xmm views.
};
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 4929b5b2b0..932a532e56 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -84,6 +84,15 @@ const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref
return (it != verified_methods_.end()) ? it->second : nullptr;
}
+void VerificationResults::RemoveVerifiedMethod(MethodReference ref) {
+ WriterMutexLock mu(Thread::Current(), verified_methods_lock_);
+ auto it = verified_methods_.find(ref);
+ if (it != verified_methods_.end()) {
+ delete it->second;
+ verified_methods_.erase(it);
+ }
+}
+
void VerificationResults::AddRejectedClass(ClassReference ref) {
{
WriterMutexLock mu(Thread::Current(), rejected_classes_lock_);
diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h
index 0e7923fbc3..7fc2a2363d 100644
--- a/compiler/dex/verification_results.h
+++ b/compiler/dex/verification_results.h
@@ -48,6 +48,7 @@ class VerificationResults {
const VerifiedMethod* GetVerifiedMethod(MethodReference ref)
LOCKS_EXCLUDED(verified_methods_lock_);
+ void RemoveVerifiedMethod(MethodReference ref) LOCKS_EXCLUDED(verified_methods_lock_);
void AddRejectedClass(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
bool IsClassRejected(ClassReference ref) LOCKS_EXCLUDED(rejected_classes_lock_);
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index ebf7874dcf..3a91b08d5e 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -21,7 +21,6 @@
#include "dex/compiler_ir.h"
#include "dex_compilation_unit.h"
-#include "field_helper.h"
#include "mirror/art_field-inl.h"
#include "mirror/art_method-inl.h"
#include "mirror/class_loader.h"
@@ -134,10 +133,9 @@ inline std::pair<bool, bool> CompilerDriver::IsFastStaticField(
} else {
// Search dex file for localized ssb index, may fail if field's class is a parent
// of the class mentioned in the dex file and there is no dex cache entry.
- StackHandleScope<1> hs(Thread::Current());
+ std::string temp;
const DexFile::StringId* string_id =
- dex_file->FindStringId(
- FieldHelper(hs.NewHandle(resolved_field)).GetDeclaringClassDescriptor());
+ dex_file->FindStringId(resolved_field->GetDeclaringClass()->GetDescriptor(&temp));
if (string_id != nullptr) {
const DexFile::TypeId* type_id =
dex_file->FindTypeId(dex_file->GetIndexForStringId(*string_id));
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 2e9f8355f2..e4274712d4 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -19,9 +19,14 @@
#define ATRACE_TAG ATRACE_TAG_DALVIK
#include <utils/Trace.h>
+#include <unordered_set>
#include <vector>
#include <unistd.h>
+#ifndef __APPLE__
+#include <malloc.h> // For mallinfo
+#endif
+
#include "base/stl_util.h"
#include "base/timing_logger.h"
#include "class_linker.h"
@@ -53,6 +58,7 @@
#include "ScopedLocalRef.h"
#include "handle_scope-inl.h"
#include "thread.h"
+#include "thread_list.h"
#include "thread_pool.h"
#include "trampolines/trampoline_compiler.h"
#include "transaction.h"
@@ -495,6 +501,7 @@ void CompilerDriver::CompileAll(jobject class_loader,
TimingLogger* timings) {
DCHECK(!Runtime::Current()->IsStarted());
std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", thread_count_ - 1));
+ VLOG(compiler) << "Before precompile " << GetMemoryUsageString();
PreCompile(class_loader, dex_files, thread_pool.get(), timings);
Compile(class_loader, dex_files, thread_pool.get(), timings);
if (dump_stats_) {
@@ -591,20 +598,25 @@ void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFi
void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
ThreadPool* thread_pool, TimingLogger* timings) {
LoadImageClasses(timings);
+ VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString();
Resolve(class_loader, dex_files, thread_pool, timings);
+ VLOG(compiler) << "Resolve: " << GetMemoryUsageString();
if (!compiler_options_->IsVerificationEnabled()) {
- LOG(INFO) << "Verify none mode specified, skipping verification.";
+ VLOG(compiler) << "Verify none mode specified, skipping verification.";
SetVerified(class_loader, dex_files, thread_pool, timings);
return;
}
Verify(class_loader, dex_files, thread_pool, timings);
+ VLOG(compiler) << "Verify: " << GetMemoryUsageString();
InitializeClasses(class_loader, dex_files, thread_pool, timings);
+ VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString();
UpdateImageClasses(timings);
+ VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString();
}
bool CompilerDriver::IsImageClass(const char* descriptor) const {
@@ -626,10 +638,10 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const {
}
}
-static void ResolveExceptionsForMethod(MutableMethodHelper* mh,
+static void ResolveExceptionsForMethod(MutableHandle<mirror::ArtMethod> method_handle,
std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- const DexFile::CodeItem* code_item = mh->GetMethod()->GetCodeItem();
+ const DexFile::CodeItem* code_item = method_handle->GetCodeItem();
if (code_item == nullptr) {
return; // native or abstract method
}
@@ -649,10 +661,10 @@ static void ResolveExceptionsForMethod(MutableMethodHelper* mh,
uint16_t encoded_catch_handler_handlers_type_idx =
DecodeUnsignedLeb128(&encoded_catch_handler_list);
// Add to set of types to resolve if not already in the dex cache resolved types
- if (!mh->GetMethod()->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
+ if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) {
exceptions_to_resolve.insert(
std::pair<uint16_t, const DexFile*>(encoded_catch_handler_handlers_type_idx,
- mh->GetMethod()->GetDexFile()));
+ method_handle->GetDexFile()));
}
// ignore address associated with catch handler
DecodeUnsignedLeb128(&encoded_catch_handler_list);
@@ -669,14 +681,14 @@ static bool ResolveCatchBlockExceptionsClassVisitor(mirror::Class* c, void* arg)
std::set<std::pair<uint16_t, const DexFile*>>* exceptions_to_resolve =
reinterpret_cast<std::set<std::pair<uint16_t, const DexFile*>>*>(arg);
StackHandleScope<1> hs(Thread::Current());
- MutableMethodHelper mh(hs.NewHandle<mirror::ArtMethod>(nullptr));
+ MutableHandle<mirror::ArtMethod> method_handle(hs.NewHandle<mirror::ArtMethod>(nullptr));
for (size_t i = 0; i < c->NumVirtualMethods(); ++i) {
- mh.ChangeMethod(c->GetVirtualMethod(i));
- ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
+ method_handle.Assign(c->GetVirtualMethod(i));
+ ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve);
}
for (size_t i = 0; i < c->NumDirectMethods(); ++i) {
- mh.ChangeMethod(c->GetDirectMethod(i));
- ResolveExceptionsForMethod(&mh, *exceptions_to_resolve);
+ method_handle.Assign(c->GetDirectMethod(i));
+ ResolveExceptionsForMethod(method_handle, *exceptions_to_resolve);
}
return true;
}
@@ -782,23 +794,143 @@ static void MaybeAddToImageClasses(Handle<mirror::Class> c, std::set<std::string
}
}
-void CompilerDriver::FindClinitImageClassesCallback(mirror::Object* object, void* arg) {
- DCHECK(object != nullptr);
- DCHECK(arg != nullptr);
- CompilerDriver* compiler_driver = reinterpret_cast<CompilerDriver*>(arg);
- StackHandleScope<1> hs(Thread::Current());
- MaybeAddToImageClasses(hs.NewHandle(object->GetClass()), compiler_driver->image_classes_.get());
-}
+// Keeps all the data for the update together. Also doubles as the reference visitor.
+// Note: we can use object pointers because we suspend all threads.
+class ClinitImageUpdate {
+ public:
+ static ClinitImageUpdate* Create(std::set<std::string>* image_class_descriptors, Thread* self,
+ ClassLinker* linker, std::string* error_msg) {
+ std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(image_class_descriptors, self,
+ linker));
+ if (res->art_method_class_ == nullptr) {
+ *error_msg = "Could not find ArtMethod class.";
+ return nullptr;
+ } else if (res->dex_cache_class_ == nullptr) {
+ *error_msg = "Could not find DexCache class.";
+ return nullptr;
+ }
+
+ return res.release();
+ }
+
+ ~ClinitImageUpdate() {
+ // Allow others to suspend again.
+ self_->EndAssertNoThreadSuspension(old_cause_);
+ }
+
+ // Visitor for VisitReferences.
+ void operator()(mirror::Object* object, MemberOffset field_offset, bool /* is_static */) const
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset);
+ if (ref != nullptr) {
+ VisitClinitClassesObject(ref);
+ }
+ }
+
+ // java.lang.Reference visitor for VisitReferences.
+ void operator()(mirror::Class* /* klass */, mirror::Reference* /* ref */) const {
+ }
+
+ void Walk() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ // Use the initial classes as roots for a search.
+ for (mirror::Class* klass_root : image_classes_) {
+ VisitClinitClassesObject(klass_root);
+ }
+ }
+
+ private:
+ ClinitImageUpdate(std::set<std::string>* image_class_descriptors, Thread* self,
+ ClassLinker* linker)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) :
+ image_class_descriptors_(image_class_descriptors), self_(self) {
+ CHECK(linker != nullptr);
+ CHECK(image_class_descriptors != nullptr);
+
+ // Make sure nobody interferes with us.
+ old_cause_ = self->StartAssertNoThreadSuspension("Boot image closure");
+
+ // Find the interesting classes.
+ art_method_class_ = linker->LookupClass(self, "Ljava/lang/reflect/ArtMethod;",
+ ComputeModifiedUtf8Hash("Ljava/lang/reflect/ArtMethod;"), nullptr);
+ dex_cache_class_ = linker->LookupClass(self, "Ljava/lang/DexCache;",
+ ComputeModifiedUtf8Hash("Ljava/lang/DexCache;"), nullptr);
+
+ // Find all the already-marked classes.
+ WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
+ linker->VisitClasses(FindImageClasses, this);
+ }
+
+ static bool FindImageClasses(mirror::Class* klass, void* arg)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ ClinitImageUpdate* data = reinterpret_cast<ClinitImageUpdate*>(arg);
+ std::string temp;
+ const char* name = klass->GetDescriptor(&temp);
+ if (data->image_class_descriptors_->find(name) != data->image_class_descriptors_->end()) {
+ data->image_classes_.push_back(klass);
+ }
+
+ return true;
+ }
+
+ void VisitClinitClassesObject(mirror::Object* object) const
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ DCHECK(object != nullptr);
+ if (marked_objects_.find(object) != marked_objects_.end()) {
+ // Already processed.
+ return;
+ }
+
+ // Mark it.
+ marked_objects_.insert(object);
+
+ if (object->IsClass()) {
+ // If it is a class, add it.
+ StackHandleScope<1> hs(self_);
+ MaybeAddToImageClasses(hs.NewHandle(object->AsClass()), image_class_descriptors_);
+ } else {
+ // Else visit the object's class.
+ VisitClinitClassesObject(object->GetClass());
+ }
+
+ // If it is not a dex cache or an ArtMethod, visit all references.
+ mirror::Class* klass = object->GetClass();
+ if (klass != art_method_class_ && klass != dex_cache_class_) {
+ object->VisitReferences<false /* visit class */>(*this, *this);
+ }
+ }
+
+ mutable std::unordered_set<mirror::Object*> marked_objects_;
+ std::set<std::string>* const image_class_descriptors_;
+ std::vector<mirror::Class*> image_classes_;
+ const mirror::Class* art_method_class_;
+ const mirror::Class* dex_cache_class_;
+ Thread* const self_;
+ const char* old_cause_;
+
+ DISALLOW_COPY_AND_ASSIGN(ClinitImageUpdate);
+};
void CompilerDriver::UpdateImageClasses(TimingLogger* timings) {
if (IsImage()) {
TimingLogger::ScopedTiming t("UpdateImageClasses", timings);
- // Update image_classes_ with classes for objects created by <clinit> methods.
- gc::Heap* heap = Runtime::Current()->GetHeap();
- // TODO: Image spaces only?
- ScopedObjectAccess soa(Thread::Current());
- WriterMutexLock mu(soa.Self(), *Locks::heap_bitmap_lock_);
- heap->VisitObjects(FindClinitImageClassesCallback, this);
+
+ Runtime* current = Runtime::Current();
+
+ // Suspend all threads.
+ current->GetThreadList()->SuspendAll();
+
+ std::string error_msg;
+ std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(),
+ Thread::Current(),
+ current->GetClassLinker(),
+ &error_msg));
+ CHECK(update.get() != nullptr) << error_msg; // TODO: Soft failure?
+
+ // Do the marking.
+ update->Walk();
+
+ // Resume threads.
+ current->GetThreadList()->ResumeAll();
}
}
@@ -1194,11 +1326,10 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType
target_method->dex_method_index = method->GetDexMethodIndex();
} else {
if (no_guarantee_of_dex_cache_entry) {
- StackHandleScope<1> hs(Thread::Current());
- MethodHelper mh(hs.NewHandle(method));
// See if the method is also declared in this dex cache.
- uint32_t dex_method_idx = mh.FindDexMethodIndexInOtherDexFile(
- *target_method->dex_file, target_method->dex_method_index);
+ uint32_t dex_method_idx =
+ method->FindDexMethodIndexInOtherDexFile(*target_method->dex_file,
+ target_method->dex_method_index);
if (dex_method_idx != DexFile::kDexNoIndex) {
target_method->dex_method_index = dex_method_idx;
} else {
@@ -1820,6 +1951,12 @@ static void InitializeClass(const ParallelCompilationManager* manager, size_t cl
mirror::Throwable* exception = soa.Self()->GetException(&throw_location);
VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
<< exception->Dump();
+ std::ostream* file_log = manager->GetCompiler()->
+ GetCompilerOptions().GetInitFailureOutput();
+ if (file_log != nullptr) {
+ *file_log << descriptor << "\n";
+ *file_log << exception->Dump() << "\n";
+ }
soa.Self()->ClearException();
transaction.Abort();
CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
@@ -1875,6 +2012,7 @@ void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFi
CHECK(dex_file != nullptr);
CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings);
}
+ VLOG(compiler) << "Compile: " << GetMemoryUsageString();
}
void CompilerDriver::CompileClass(const ParallelCompilationManager* manager, size_t class_def_index) {
@@ -2001,6 +2139,7 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t
bool compilation_enabled) {
CompiledMethod* compiled_method = nullptr;
uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0;
+ MethodReference method_ref(&dex_file, method_idx);
if ((access_flags & kAccNative) != 0) {
// Are we interpreting only and have support for generic JNI down calls?
@@ -2014,7 +2153,6 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t
} else if ((access_flags & kAccAbstract) != 0) {
// Abstract methods don't have code.
} else {
- MethodReference method_ref(&dex_file, method_idx);
bool compile = compilation_enabled &&
verification_results_->IsCandidateForCompilation(method_ref, access_flags);
if (compile) {
@@ -2051,16 +2189,18 @@ void CompilerDriver::CompileMethod(const DexFile::CodeItem* code_item, uint32_t
// When compiling with PIC, there should be zero non-relative linker patches
CHECK(!compile_pic || non_relative_linker_patch_count == 0u);
- MethodReference ref(&dex_file, method_idx);
- DCHECK(GetCompiledMethod(ref) == nullptr) << PrettyMethod(method_idx, dex_file);
+ DCHECK(GetCompiledMethod(method_ref) == nullptr) << PrettyMethod(method_idx, dex_file);
{
MutexLock mu(self, compiled_methods_lock_);
- compiled_methods_.Put(ref, compiled_method);
+ compiled_methods_.Put(method_ref, compiled_method);
non_relative_linker_patch_count_ += non_relative_linker_patch_count;
}
- DCHECK(GetCompiledMethod(ref) != nullptr) << PrettyMethod(method_idx, dex_file);
+ DCHECK(GetCompiledMethod(method_ref) != nullptr) << PrettyMethod(method_idx, dex_file);
}
+ // Done compiling, delete the verified method to reduce native memory usage.
+ verification_results_->RemoveVerifiedMethod(method_ref);
+
if (self->IsExceptionPending()) {
ScopedObjectAccess soa(self);
LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
@@ -2210,4 +2350,21 @@ bool CompilerDriver::SkipCompilation(const std::string& method_name) {
}
return !compile;
}
+
+std::string CompilerDriver::GetMemoryUsageString() const {
+ std::ostringstream oss;
+ const ArenaPool* arena_pool = GetArenaPool();
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ oss << "arena alloc=" << PrettySize(arena_pool->GetBytesAllocated());
+ oss << " java alloc=" << PrettySize(heap->GetBytesAllocated());
+#ifdef HAVE_MALLOC_H
+ struct mallinfo info = mallinfo();
+ const size_t allocated_space = static_cast<size_t>(info.uordblks);
+ const size_t free_space = static_cast<size_t>(info.fordblks);
+ oss << " native alloc=" << PrettySize(allocated_space) << " free="
+ << PrettySize(free_space);
+#endif
+ return oss.str();
+}
+
} // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 437a1a9c0e..615e0d0db4 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -39,6 +39,7 @@
#include "thread_pool.h"
#include "utils/arena_allocator.h"
#include "utils/dedupe_set.h"
+#include "dex/verified_method.h"
namespace art {
@@ -340,6 +341,9 @@ class CompilerDriver {
ArenaPool* GetArenaPool() {
return &arena_pool_;
}
+ const ArenaPool* GetArenaPool() const {
+ return &arena_pool_;
+ }
bool WriteElf(const std::string& android_root,
bool is_host,
@@ -395,6 +399,9 @@ class CompilerDriver {
// Should the compiler run on this method given profile information?
bool SkipCompilation(const std::string& method_name);
+ // Get memory usage during compilation.
+ std::string GetMemoryUsageString() const;
+
private:
// These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
// The only external contract is that unresolved method has flags 0 and resolved non-0.
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 0592f0cf1e..aec7d241f5 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
#define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_
+#include <ostream>
#include <string>
#include <vector>
@@ -70,7 +71,8 @@ class CompilerOptions FINAL {
#ifdef ART_SEA_IR_MODE
sea_ir_mode_(false),
#endif
- verbose_methods_(nullptr) {
+ verbose_methods_(nullptr),
+ init_failure_output_(nullptr) {
}
CompilerOptions(CompilerFilter compiler_filter,
@@ -90,7 +92,8 @@ class CompilerOptions FINAL {
#ifdef ART_SEA_IR_MODE
bool sea_ir_mode,
#endif
- const std::vector<std::string>* verbose_methods
+ const std::vector<std::string>* verbose_methods,
+ std::ostream* init_failure_output
) : // NOLINT(whitespace/parens)
compiler_filter_(compiler_filter),
huge_method_threshold_(huge_method_threshold),
@@ -109,7 +112,8 @@ class CompilerOptions FINAL {
#ifdef ART_SEA_IR_MODE
sea_ir_mode_(sea_ir_mode),
#endif
- verbose_methods_(verbose_methods) {
+ verbose_methods_(verbose_methods),
+ init_failure_output_(init_failure_output) {
}
CompilerFilter GetCompilerFilter() const {
@@ -217,6 +221,10 @@ class CompilerOptions FINAL {
return false;
}
+ std::ostream* GetInitFailureOutput() const {
+ return init_failure_output_;
+ }
+
private:
CompilerFilter compiler_filter_;
const size_t huge_method_threshold_;
@@ -241,6 +249,9 @@ class CompilerOptions FINAL {
// Vector of methods to have verbose output enabled for.
const std::vector<std::string>* const verbose_methods_;
+ // Log initialization of initialization failures to this stream if not null.
+ std::ostream* const init_failure_output_;
+
DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
};
std::ostream& operator<<(std::ostream& os, const CompilerOptions::CompilerFilter& rhs);
diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h
index 84f57991c3..03ae489da1 100644
--- a/compiler/driver/dex_compilation_unit.h
+++ b/compiler/driver/dex_compilation_unit.h
@@ -102,6 +102,10 @@ class DexCompilationUnit {
return verified_method_;
}
+ void ClearVerifiedMethod() {
+ verified_method_ = nullptr;
+ }
+
const std::string& GetSymbol();
private:
@@ -117,7 +121,7 @@ class DexCompilationUnit {
const uint16_t class_def_idx_;
const uint32_t dex_method_idx_;
const uint32_t access_flags_;
- const VerifiedMethod* const verified_method_;
+ const VerifiedMethod* verified_method_;
std::string symbol_;
};
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index 2ffbd10eab..5488e2f6d0 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -46,7 +46,11 @@ class ElfWriterTest : public CommonCompilerTest {
EXPECT_EQ(expected_value, ef->FindDynamicSymbolAddress(symbol_name)); \
} while (false)
+#if defined(ART_USE_OPTIMIZING_COMPILER)
+TEST_F(ElfWriterTest, DISABLED_dlsym) {
+#else
TEST_F(ElfWriterTest, dlsym) {
+#endif
std::string elf_location;
if (IsHost()) {
const char* host_dir = getenv("ANDROID_HOST_OUT");
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 3b1d914f6e..eb1b5db958 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -77,6 +77,7 @@ bool ImageWriter::PrepareImageAddressSpace() {
Thread::Current()->TransitionFromSuspendedToRunnable();
PruneNonImageClasses(); // Remove junk
ComputeLazyFieldsForImageClasses(); // Add useful information
+ ProcessStrings();
Thread::Current()->TransitionFromRunnableToSuspended(kNative);
}
gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -479,34 +480,29 @@ class StringCollector {
};
// Compare strings based on length, used for sorting strings by length / reverse length.
-class StringLengthComparator {
+class LexicographicalStringComparator {
public:
- explicit StringLengthComparator(Handle<mirror::ObjectArray<mirror::String>> strings)
- : strings_(strings) {
+ bool operator()(const mirror::HeapReference<mirror::String>& lhs,
+ const mirror::HeapReference<mirror::String>& rhs) const
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ mirror::String* lhs_s = lhs.AsMirrorPtr();
+ mirror::String* rhs_s = rhs.AsMirrorPtr();
+ uint16_t* lhs_begin = lhs_s->GetCharArray()->GetData() + lhs_s->GetOffset();
+ uint16_t* rhs_begin = rhs_s->GetCharArray()->GetData() + rhs_s->GetOffset();
+ return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(),
+ rhs_begin, rhs_begin + rhs_s->GetLength());
}
- bool operator()(size_t a, size_t b) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- return strings_->GetWithoutChecks(a)->GetLength() < strings_->GetWithoutChecks(b)->GetLength();
- }
-
- private:
- Handle<mirror::ObjectArray<mirror::String>> strings_;
};
-// Normal string < comparison through the chars_ array.
-class SubstringComparator {
- public:
- explicit SubstringComparator(const std::vector<uint16_t>* const chars) : chars_(chars) {
- }
- bool operator()(const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b) {
- return std::lexicographical_compare(chars_->begin() + a.first,
- chars_->begin() + a.first + a.second,
- chars_->begin() + b.first,
- chars_->begin() + b.first + b.second);
+static bool IsPrefix(mirror::String* pref, mirror::String* full)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (pref->GetLength() > full->GetLength()) {
+ return false;
}
-
- private:
- const std::vector<uint16_t>* const chars_;
-};
+ uint16_t* pref_begin = pref->GetCharArray()->GetData() + pref->GetOffset();
+ uint16_t* full_begin = full->GetCharArray()->GetData() + full->GetOffset();
+ return std::equal(pref_begin, pref_begin + pref->GetLength(), full_begin);
+}
void ImageWriter::ProcessStrings() {
size_t total_strings = 0;
@@ -528,67 +524,52 @@ void ImageWriter::ProcessStrings() {
// Some strings could have gotten freed if AllocStringArray caused a GC.
CHECK_LE(string_collector.GetIndex(), total_strings);
total_strings = string_collector.GetIndex();
- size_t total_length = 0;
- std::vector<size_t> reverse_sorted_strings;
- for (size_t i = 0; i < total_strings; ++i) {
- mirror::String* s = strings->GetWithoutChecks(i);
- // Look up the string in the array.
- total_length += s->GetLength();
- reverse_sorted_strings.push_back(i);
- }
- // Sort by reverse length.
- StringLengthComparator comparator(strings);
- std::sort(reverse_sorted_strings.rbegin(), reverse_sorted_strings.rend(), comparator);
- // Deduplicate prefixes and add strings to the char array.
- std::vector<uint16_t> combined_chars(total_length, 0U);
- size_t num_chars = 0;
+ auto* strings_begin = reinterpret_cast<mirror::HeapReference<mirror::String>*>(
+ strings->GetRawData(sizeof(mirror::HeapReference<mirror::String>), 0));
+ std::sort(strings_begin, strings_begin + total_strings, LexicographicalStringComparator());
// Characters of strings which are non equal prefix of another string (not the same string).
// We don't count the savings from equal strings since these would get interned later anyways.
size_t prefix_saved_chars = 0;
- std::set<std::pair<size_t, size_t>, SubstringComparator> existing_strings((
- SubstringComparator(&combined_chars)));
- for (size_t i = 0; i < total_strings; ++i) {
- mirror::String* s = strings->GetWithoutChecks(reverse_sorted_strings[i]);
- // Add the string to the end of the char array.
+ // Count characters needed for the strings.
+ size_t num_chars = 0u;
+ mirror::String* prev_s = nullptr;
+ for (size_t idx = 0; idx != total_strings; ++idx) {
+ mirror::String* s = strings->GetWithoutChecks(idx);
size_t length = s->GetLength();
- for (size_t j = 0; j < length; ++j) {
- combined_chars[num_chars++] = s->CharAt(j);
- }
- // Try to see if the string exists as a prefix of an existing string.
- size_t new_offset = 0;
- std::pair<size_t, size_t> new_string(num_chars - length, length);
- auto it = existing_strings.lower_bound(new_string);
- bool is_prefix = false;
- if (it != existing_strings.end()) {
- CHECK_LE(length, it->second);
- is_prefix = std::equal(combined_chars.begin() + it->first,
- combined_chars.begin() + it->first + it->second,
- combined_chars.begin() + new_string.first);
- }
- if (is_prefix) {
- // Shares a prefix, set the offset to where the new offset will be.
- new_offset = it->first;
- // Remove the added chars.
- num_chars -= length;
- if (it->second != length) {
- prefix_saved_chars += length;
+ num_chars += length;
+ if (prev_s != nullptr && IsPrefix(prev_s, s)) {
+ size_t prev_length = prev_s->GetLength();
+ num_chars -= prev_length;
+ if (prev_length != length) {
+ prefix_saved_chars += prev_length;
}
- } else {
- new_offset = new_string.first;
- existing_strings.insert(new_string);
}
- s->SetOffset(new_offset);
- }
- // Allocate and update the char arrays.
- auto* array = mirror::CharArray::Alloc(self, num_chars);
- for (size_t i = 0; i < num_chars; ++i) {
- array->SetWithoutChecks<false>(i, combined_chars[i]);
- }
- for (size_t i = 0; i < total_strings; ++i) {
- strings->GetWithoutChecks(i)->SetArray(array);
+ prev_s = s;
+ }
+ // Create character array, copy characters and point the strings there.
+ mirror::CharArray* array = mirror::CharArray::Alloc(self, num_chars);
+ string_data_array_ = array;
+ uint16_t* array_data = array->GetData();
+ size_t pos = 0u;
+ prev_s = nullptr;
+ for (size_t idx = 0; idx != total_strings; ++idx) {
+ mirror::String* s = strings->GetWithoutChecks(idx);
+ uint16_t* s_data = s->GetCharArray()->GetData() + s->GetOffset();
+ int32_t s_length = s->GetLength();
+ int32_t prefix_length = 0u;
+ if (idx != 0u && IsPrefix(prev_s, s)) {
+ prefix_length = prev_s->GetLength();
+ }
+ memcpy(array_data + pos, s_data + prefix_length, (s_length - prefix_length) * sizeof(*s_data));
+ s->SetOffset(pos - prefix_length);
+ s->SetArray(array);
+ pos += s_length - prefix_length;
+ prev_s = s;
}
+ CHECK_EQ(pos, num_chars);
+
LOG(INFO) << "Total # image strings=" << total_strings << " combined length="
- << total_length << " prefix saved chars=" << prefix_saved_chars;
+ << num_chars << " prefix saved chars=" << prefix_saved_chars;
ComputeEagerResolvedStrings();
}
@@ -1254,4 +1235,13 @@ uint32_t ImageWriter::BinSlot::GetIndex() const {
return lockword_ & ~kBinMask;
}
+void ImageWriter::FreeStringDataArray() {
+ if (string_data_array_ != nullptr) {
+ gc::space::LargeObjectSpace* los = Runtime::Current()->GetHeap()->GetLargeObjectsSpace();
+ if (los != nullptr) {
+ los->Free(Thread::Current(), reinterpret_cast<mirror::Object*>(string_data_array_));
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 8c84b686fa..4418879e73 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_IMAGE_WRITER_H_
#include <stdint.h>
+#include <valgrind.h>
#include <cstddef>
#include <memory>
@@ -56,7 +57,15 @@ class ImageWriter FINAL {
CHECK_NE(image_begin, 0U);
}
- ~ImageWriter() {}
+ ~ImageWriter() {
+ // For interned strings a large array is allocated to hold all the character data and avoid
+ // overhead. However, no GC is run anymore at this point. As the array is likely large, it
+ // will be allocated in the large object space, where valgrind can track every single
+ // allocation. Not explicitly freeing that array will be recognized as a leak.
+ if (RUNNING_ON_VALGRIND != 0) {
+ FreeStringDataArray();
+ }
+ }
bool PrepareImageAddressSpace();
@@ -254,6 +263,9 @@ class ImageWriter FINAL {
// Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins.
size_t GetBinSizeSum(Bin up_to = kBinSize) const;
+ // Release the string_data_array_.
+ void FreeStringDataArray();
+
const CompilerDriver& compiler_driver_;
// Beginning target image address for the output image.
@@ -306,6 +318,8 @@ class ImageWriter FINAL {
size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin
size_t bin_slot_count_[kBinSize]; // Number of objects in a bin
+ void* string_data_array_; // The backing for the interned strings.
+
friend class FixupVisitor;
friend class FixupClassVisitor;
DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
new file mode 100644
index 0000000000..c353d66f1e
--- /dev/null
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -0,0 +1,691 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bounds_check_elimination.h"
+#include "nodes.h"
+#include "utils/arena_containers.h"
+
+namespace art {
+
+class MonotonicValueRange;
+
+/**
+ * A value bound is represented as a pair of value and constant,
+ * e.g. array.length - 1.
+ */
+class ValueBound : public ValueObject {
+ public:
+ static ValueBound Create(HInstruction* instruction, int constant) {
+ if (instruction == nullptr) {
+ return ValueBound(nullptr, constant);
+ }
+ if (instruction->IsIntConstant()) {
+ return ValueBound(nullptr, instruction->AsIntConstant()->GetValue() + constant);
+ }
+ return ValueBound(instruction, constant);
+ }
+
+ HInstruction* GetInstruction() const { return instruction_; }
+ int GetConstant() const { return constant_; }
+
+ bool IsRelativeToArrayLength() const {
+ return instruction_ != nullptr && instruction_->IsArrayLength();
+ }
+
+ bool IsConstant() const {
+ return instruction_ == nullptr;
+ }
+
+ static ValueBound Min() { return ValueBound(nullptr, INT_MIN); }
+ static ValueBound Max() { return ValueBound(nullptr, INT_MAX); }
+
+ bool Equals(ValueBound bound) const {
+ return instruction_ == bound.instruction_ && constant_ == bound.constant_;
+ }
+
+ // Returns if it's certain bound1 >= bound2.
+ bool GreaterThanOrEqual(ValueBound bound) const {
+ if (instruction_ == bound.instruction_) {
+ if (instruction_ == nullptr) {
+ // Pure constant.
+ return constant_ >= bound.constant_;
+ }
+ // There might be overflow/underflow. Be conservative for now.
+ return false;
+ }
+ // Not comparable. Just return false.
+ return false;
+ }
+
+ // Returns if it's certain bound1 <= bound2.
+ bool LessThanOrEqual(ValueBound bound) const {
+ if (instruction_ == bound.instruction_) {
+ if (instruction_ == nullptr) {
+ // Pure constant.
+ return constant_ <= bound.constant_;
+ }
+ if (IsRelativeToArrayLength()) {
+ // Array length is guaranteed to be no less than 0.
+ // No overflow/underflow can happen if both constants are negative.
+ if (constant_ <= 0 && bound.constant_ <= 0) {
+ return constant_ <= bound.constant_;
+ }
+ // There might be overflow/underflow. Be conservative for now.
+ return false;
+ }
+ }
+
+ // In case the array length is some constant, we can
+ // still compare.
+ if (IsConstant() && bound.IsRelativeToArrayLength()) {
+ HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0);
+ if (array->IsNullCheck()) {
+ array = array->AsNullCheck()->InputAt(0);
+ }
+ if (array->IsNewArray()) {
+ HInstruction* len = array->InputAt(0);
+ if (len->IsIntConstant()) {
+ int len_const = len->AsIntConstant()->GetValue();
+ return constant_ <= len_const + bound.GetConstant();
+ }
+ }
+ }
+
+ // Not comparable. Just return false.
+ return false;
+ }
+
+ // Try to narrow lower bound. Returns the greatest of the two if possible.
+ // Pick one if they are not comparable.
+ static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
+ if (bound1.instruction_ == bound2.instruction_) {
+ // Same instruction, compare the constant part.
+ return ValueBound(bound1.instruction_,
+ std::max(bound1.constant_, bound2.constant_));
+ }
+
+ // Not comparable. Just pick one. We may lose some info, but that's ok.
+ // Favor constant as lower bound.
+ return bound1.IsConstant() ? bound1 : bound2;
+ }
+
+ // Try to narrow upper bound. Returns the lowest of the two if possible.
+ // Pick one if they are not comparable.
+ static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) {
+ if (bound1.instruction_ == bound2.instruction_) {
+ // Same instruction, compare the constant part.
+ return ValueBound(bound1.instruction_,
+ std::min(bound1.constant_, bound2.constant_));
+ }
+
+ // Not comparable. Just pick one. We may lose some info, but that's ok.
+ // Favor array length as upper bound.
+ return bound1.IsRelativeToArrayLength() ? bound1 : bound2;
+ }
+
+ // Add a constant to a ValueBound. If the constant part of the ValueBound
+ // overflows/underflows, then we can't accurately represent it. For correctness,
+ // just return Max/Min() depending on whether the returned ValueBound is used for
+ // lower/upper bound.
+ ValueBound Add(int c, bool for_lower_bound, bool* overflow_or_underflow) const {
+ *overflow_or_underflow = false;
+ if (c == 0) {
+ return *this;
+ }
+
+ int new_constant;
+ if (c > 0) {
+ if (constant_ > INT_MAX - c) {
+ // Constant part overflows.
+ *overflow_or_underflow = true;
+ return for_lower_bound ? Min() : Max();
+ } else {
+ new_constant = constant_ + c;
+ }
+ } else {
+ if (constant_ < INT_MIN - c) {
+ // Constant part underflows.
+ *overflow_or_underflow = true;
+ return for_lower_bound ? Min() : Max();
+ } else {
+ new_constant = constant_ + c;
+ }
+ }
+ return ValueBound(instruction_, new_constant);
+ }
+
+ private:
+ ValueBound(HInstruction* instruction, int constant)
+ : instruction_(instruction), constant_(constant) {}
+
+ HInstruction* instruction_;
+ int constant_;
+};
+
+/**
+ * Represent a range of lower bound and upper bound, both being inclusive.
+ * Currently a ValueRange may be generated as a result of the following:
+ * comparisons related to array bounds, array bounds check, add/sub on top
+ * of an existing value range, or a loop phi corresponding to an
+ * incrementing/decrementing array index (MonotonicValueRange).
+ */
+class ValueRange : public ArenaObject<kArenaAllocMisc> {
+ public:
+ ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper)
+ : allocator_(allocator), lower_(lower), upper_(upper) {}
+
+ virtual ~ValueRange() {}
+
+ virtual const MonotonicValueRange* AsMonotonicValueRange() const { return nullptr; }
+ bool IsMonotonicValueRange() const {
+ return AsMonotonicValueRange() != nullptr;
+ }
+
+ ArenaAllocator* GetAllocator() const { return allocator_; }
+ ValueBound GetLower() const { return lower_; }
+ ValueBound GetUpper() const { return upper_; }
+
+ // If it's certain that this value range fits in other_range.
+ virtual bool FitsIn(ValueRange* other_range) const {
+ if (other_range == nullptr) {
+ return true;
+ }
+ DCHECK(!other_range->IsMonotonicValueRange());
+ return lower_.GreaterThanOrEqual(other_range->lower_) &&
+ upper_.LessThanOrEqual(other_range->upper_);
+ }
+
+ // Returns the intersection of this and range.
+ // If it's not possible to do intersection because some
+ // bounds are not comparable, it's ok to pick either bound.
+ virtual ValueRange* Narrow(ValueRange* range) {
+ if (range == nullptr) {
+ return this;
+ }
+
+ if (range->IsMonotonicValueRange()) {
+ return this;
+ }
+
+ return new (allocator_) ValueRange(
+ allocator_,
+ ValueBound::NarrowLowerBound(lower_, range->lower_),
+ ValueBound::NarrowUpperBound(upper_, range->upper_));
+ }
+
+ // Shift a range by a constant. If either bound can't be represented
+ // as (instruction+c) format due to possible overflow/underflow,
+ // return the full integer range.
+ ValueRange* Add(int constant) const {
+ bool overflow_or_underflow;
+ ValueBound lower = lower_.Add(constant, true, &overflow_or_underflow);
+ if (overflow_or_underflow) {
+ // We can't accurately represent the bounds anymore.
+ return FullIntRange();
+ }
+ ValueBound upper = upper_.Add(constant, false, &overflow_or_underflow);
+ if (overflow_or_underflow) {
+ // We can't accurately represent the bounds anymore.
+ return FullIntRange();
+ }
+ return new (allocator_) ValueRange(allocator_, lower, upper);
+ }
+
+ // Return [INT_MIN, INT_MAX].
+ ValueRange* FullIntRange() const {
+ return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max());
+ }
+
+ private:
+ ArenaAllocator* const allocator_;
+ const ValueBound lower_; // inclusive
+ const ValueBound upper_; // inclusive
+
+ DISALLOW_COPY_AND_ASSIGN(ValueRange);
+};
+
+/**
+ * A monotonically incrementing/decrementing value range, e.g.
+ * the variable i in "for (int i=0; i<array.length; i++)".
+ * Special care needs to be taken to account for overflow/underflow
+ * of such value ranges.
+ */
+class MonotonicValueRange : public ValueRange {
+ public:
+ static MonotonicValueRange* Create(ArenaAllocator* allocator,
+ HInstruction* initial, int increment) {
+ DCHECK_NE(increment, 0);
+ // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
+ // used as a regular value range, due to possible overflow/underflow.
+ return new (allocator) MonotonicValueRange(
+ allocator, ValueBound::Min(), ValueBound::Max(), initial, increment);
+ }
+
+ virtual ~MonotonicValueRange() {}
+
+ const MonotonicValueRange* AsMonotonicValueRange() const OVERRIDE { return this; }
+
+ // If it's certain that this value range fits in other_range.
+ bool FitsIn(ValueRange* other_range) const OVERRIDE {
+ if (other_range == nullptr) {
+ return true;
+ }
+ DCHECK(!other_range->IsMonotonicValueRange());
+ return false;
+ }
+
+ // Try to narrow this MonotonicValueRange given another range.
+ // Ideally it will return a normal ValueRange. But due to
+ // possible overflow/underflow, that may not be possible.
+ ValueRange* Narrow(ValueRange* range) OVERRIDE {
+ if (range == nullptr) {
+ return this;
+ }
+ DCHECK(!range->IsMonotonicValueRange());
+
+ if (increment_ > 0) {
+ // Monotonically increasing.
+ ValueBound lower = ValueBound::NarrowLowerBound(
+ ValueBound::Create(initial_, 0), range->GetLower());
+
+ // We currently conservatively assume max array length is INT_MAX. If we can
+ // make assumptions about the max array length, e.g. due to the max heap size,
+ // divided by the element size (such as 4 bytes for each integer array), we can
+ // lower this number and rule out some possible overflows.
+ int max_array_len = INT_MAX;
+
+ int upper = INT_MAX;
+ if (range->GetUpper().IsConstant()) {
+ upper = range->GetUpper().GetConstant();
+ } else if (range->GetUpper().IsRelativeToArrayLength()) {
+ int constant = range->GetUpper().GetConstant();
+ if (constant <= 0) {
+ // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc.
+ upper = max_array_len + constant;
+ } else {
+ // There might be overflow. Give up narrowing.
+ return this;
+ }
+ } else {
+ // There might be overflow. Give up narrowing.
+ return this;
+ }
+
+ // If we can prove for the last number in sequence of initial_,
+ // initial_ + increment_, initial_ + 2 x increment_, ...
+ // that's <= upper, (last_num_in_sequence + increment_) doesn't trigger overflow,
+ // then this MonoticValueRange is narrowed to a normal value range.
+
+ // Be conservative first, assume last number in the sequence hits upper.
+ int last_num_in_sequence = upper;
+ if (initial_->IsIntConstant()) {
+ int initial_constant = initial_->AsIntConstant()->GetValue();
+ if (upper <= initial_constant) {
+ last_num_in_sequence = upper;
+ } else {
+ // Cast to int64_t for the substraction part to avoid int overflow.
+ last_num_in_sequence = initial_constant +
+ ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_;
+ }
+ }
+ if (last_num_in_sequence <= INT_MAX - increment_) {
+ // No overflow. The sequence will be stopped by the upper bound test as expected.
+ return new (GetAllocator()) ValueRange(GetAllocator(), lower, range->GetUpper());
+ }
+
+ // There might be overflow. Give up narrowing.
+ return this;
+ } else {
+ DCHECK_NE(increment_, 0);
+ // Monotonically decreasing.
+ ValueBound upper = ValueBound::NarrowUpperBound(
+ ValueBound::Create(initial_, 0), range->GetUpper());
+
+ // Need to take care of underflow. Try to prove underflow won't happen
+ // for common cases. Basically need to be able to prove for any value
+ // that's >= range->GetLower(), it won't be positive with value+increment.
+ if (range->GetLower().IsConstant()) {
+ int constant = range->GetLower().GetConstant();
+ if (constant >= INT_MIN - increment_) {
+ return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper);
+ }
+ }
+
+ // There might be underflow. Give up narrowing.
+ return this;
+ }
+ }
+
+ private:
+ MonotonicValueRange(ArenaAllocator* allocator, ValueBound lower,
+ ValueBound upper, HInstruction* initial, int increment)
+ : ValueRange(allocator, lower, upper),
+ initial_(initial),
+ increment_(increment) {}
+
+ HInstruction* const initial_;
+ const int increment_;
+
+ DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
+};
+
+class BCEVisitor : public HGraphVisitor {
+ public:
+ explicit BCEVisitor(HGraph* graph)
+ : HGraphVisitor(graph),
+ maps_(graph->GetBlocks().Size()) {}
+
+ private:
+ // Return the map of proven value ranges at the beginning of a basic block.
+ ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
+ int block_id = basic_block->GetBlockId();
+ if (maps_.at(block_id) == nullptr) {
+ std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map(
+ new ArenaSafeMap<int, ValueRange*>(
+ std::less<int>(), GetGraph()->GetArena()->Adapter()));
+ maps_.at(block_id) = std::move(map);
+ }
+ return maps_.at(block_id).get();
+ }
+
+ // Traverse up the dominator tree to look for value range info.
+ ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) {
+ while (basic_block != nullptr) {
+ ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block);
+ if (map->find(instruction->GetId()) != map->end()) {
+ return map->Get(instruction->GetId());
+ }
+ basic_block = basic_block->GetDominator();
+ }
+ // Didn't find any.
+ return nullptr;
+ }
+
+ // Try to detect useful value bound format from an instruction, e.g.
+ // a constant or array length related value.
+ ValueBound DetectValueBoundFromValue(HInstruction* instruction) {
+ if (instruction->IsIntConstant()) {
+ return ValueBound::Create(nullptr, instruction->AsIntConstant()->GetValue());
+ }
+
+ if (instruction->IsArrayLength()) {
+ return ValueBound::Create(instruction, 0);
+ }
+ // Try to detect (array.length + c) format.
+ if (instruction->IsAdd()) {
+ HAdd* add = instruction->AsAdd();
+ HInstruction* left = add->GetLeft();
+ HInstruction* right = add->GetRight();
+ if (left->IsArrayLength() && right->IsIntConstant()) {
+ return ValueBound::Create(left, right->AsIntConstant()->GetValue());
+ }
+ }
+
+ // No useful bound detected.
+ return ValueBound::Max();
+ }
+
+ // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range',
+ // and push the narrowed value range to 'successor'.
+ void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
+ HBasicBlock* successor, ValueRange* range) {
+ ValueRange* existing_range = LookupValueRange(instruction, basic_block);
+ ValueRange* narrowed_range = (existing_range == nullptr) ?
+ range : existing_range->Narrow(range);
+ if (narrowed_range != nullptr) {
+ GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
+ }
+ }
+
+ // Handle "if (left cmp_cond right)".
+ void HandleIf(HIf* instruction, HInstruction* left, HInstruction* right, IfCondition cond) {
+ HBasicBlock* block = instruction->GetBlock();
+
+ HBasicBlock* true_successor = instruction->IfTrueSuccessor();
+ // There should be no critical edge at this point.
+ DCHECK_EQ(true_successor->GetPredecessors().Size(), 1u);
+
+ HBasicBlock* false_successor = instruction->IfFalseSuccessor();
+ // There should be no critical edge at this point.
+ DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
+
+ ValueBound bound = DetectValueBoundFromValue(right);
+ bool found = !bound.Equals(ValueBound::Max());
+
+ ValueBound lower = bound;
+ ValueBound upper = bound;
+ if (!found) {
+ // No constant or array.length+c bound found.
+ // For i<j, we can still use j's upper bound as i's upper bound. Same for lower.
+ ValueRange* range = LookupValueRange(right, block);
+ if (range != nullptr) {
+ lower = range->GetLower();
+ upper = range->GetUpper();
+ } else {
+ lower = ValueBound::Min();
+ upper = ValueBound::Max();
+ }
+ }
+
+ bool overflow_or_underflow;
+ if (cond == kCondLT || cond == kCondLE) {
+ if (!upper.Equals(ValueBound::Max())) {
+ int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, false, &overflow_or_underflow);
+ // overflow_or_underflow is ignored here since we already use ValueBound::Min()
+ // for lower bound.
+ ValueRange* new_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
+ ApplyRangeFromComparison(left, block, true_successor, new_range);
+ }
+
+ // array.length as a lower bound isn't considered useful.
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
+ int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, true, &overflow_or_underflow);
+ // overflow_or_underflow is ignored here since we already use ValueBound::Max()
+ // for upper bound.
+ ValueRange* new_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
+ ApplyRangeFromComparison(left, block, false_successor, new_range);
+ }
+ } else if (cond == kCondGT || cond == kCondGE) {
+ // array.length as a lower bound isn't considered useful.
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
+ int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, true, &overflow_or_underflow);
+ // overflow_or_underflow is ignored here since we already use ValueBound::Max()
+ // for upper bound.
+ ValueRange* new_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
+ ApplyRangeFromComparison(left, block, true_successor, new_range);
+ }
+
+ if (!upper.Equals(ValueBound::Max())) {
+ int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, false, &overflow_or_underflow);
+ // overflow_or_underflow is ignored here since we already use ValueBound::Min()
+ // for lower bound.
+ ValueRange* new_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
+ ApplyRangeFromComparison(left, block, false_successor, new_range);
+ }
+ }
+ }
+
+ void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+ HBasicBlock* block = bounds_check->GetBlock();
+ HInstruction* index = bounds_check->InputAt(0);
+ HInstruction* array_length = bounds_check->InputAt(1);
+ ValueRange* index_range = LookupValueRange(index, block);
+
+ if (index_range != nullptr) {
+ ValueBound lower = ValueBound::Create(nullptr, 0); // constant 0
+ ValueBound upper = ValueBound::Create(array_length, -1); // array_length - 1
+ ValueRange* array_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ if (index_range->FitsIn(array_range)) {
+ ReplaceBoundsCheck(bounds_check, index);
+ return;
+ }
+ }
+
+ if (index->IsIntConstant()) {
+ ValueRange* array_length_range = LookupValueRange(array_length, block);
+ int constant = index->AsIntConstant()->GetValue();
+ if (array_length_range != nullptr &&
+ array_length_range->GetLower().IsConstant()) {
+ if (constant < array_length_range->GetLower().GetConstant()) {
+ ReplaceBoundsCheck(bounds_check, index);
+ return;
+ }
+ }
+
+ // Once we have an array access like 'array[5] = 1', we record array.length >= 6.
+ ValueBound lower = ValueBound::Create(nullptr, constant + 1);
+ ValueBound upper = ValueBound::Max();
+ ValueRange* range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ ValueRange* existing_range = LookupValueRange(array_length, block);
+ ValueRange* new_range = range;
+ if (existing_range != nullptr) {
+ new_range = range->Narrow(existing_range);
+ }
+ GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range);
+ }
+ }
+
+ void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
+ bounds_check->ReplaceWith(index);
+ bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+ }
+
+ void VisitPhi(HPhi* phi) {
+ if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
+ DCHECK_EQ(phi->InputCount(), 2U);
+ HInstruction* instruction = phi->InputAt(1);
+ if (instruction->IsAdd()) {
+ HAdd* add = instruction->AsAdd();
+ HInstruction* left = add->GetLeft();
+ HInstruction* right = add->GetRight();
+ if (left == phi && right->IsIntConstant()) {
+ HInstruction* initial_value = phi->InputAt(0);
+ ValueRange* range = nullptr;
+ if (right->AsIntConstant()->GetValue() == 0) {
+ // Add constant 0. It's really a fixed value.
+ range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound::Create(initial_value, 0),
+ ValueBound::Create(initial_value, 0));
+ } else {
+ // Monotonically increasing/decreasing.
+ range = MonotonicValueRange::Create(
+ GetGraph()->GetArena(),
+ initial_value,
+ right->AsIntConstant()->GetValue());
+ }
+ GetValueRangeMap(phi->GetBlock())->Overwrite(phi->GetId(), range);
+ }
+ }
+ }
+ }
+
+ void VisitIf(HIf* instruction) {
+ if (instruction->InputAt(0)->IsCondition()) {
+ HCondition* cond = instruction->InputAt(0)->AsCondition();
+ IfCondition cmp = cond->GetCondition();
+ if (cmp == kCondGT || cmp == kCondGE ||
+ cmp == kCondLT || cmp == kCondLE) {
+ HInstruction* left = cond->GetLeft();
+ HInstruction* right = cond->GetRight();
+ HandleIf(instruction, left, right, cmp);
+ }
+ }
+ }
+
+ void VisitAdd(HAdd* add) {
+ HInstruction* right = add->GetRight();
+ if (right->IsIntConstant()) {
+ ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
+ if (left_range == nullptr) {
+ return;
+ }
+ ValueRange* range = left_range->Add(right->AsIntConstant()->GetValue());
+ if (range != nullptr) {
+ GetValueRangeMap(add->GetBlock())->Overwrite(add->GetId(), range);
+ }
+ }
+ }
+
+ void VisitSub(HSub* sub) {
+ HInstruction* left = sub->GetLeft();
+ HInstruction* right = sub->GetRight();
+ if (right->IsIntConstant()) {
+ ValueRange* left_range = LookupValueRange(left, sub->GetBlock());
+ if (left_range == nullptr) {
+ return;
+ }
+ ValueRange* range = left_range->Add(-right->AsIntConstant()->GetValue());
+ if (range != nullptr) {
+ GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ return;
+ }
+ }
+
+ // Here we are interested in the typical triangular case of nested loops,
+ // such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i
+ // is the index for outer loop. In this case, we know j is bounded by array.length-1.
+ if (left->IsArrayLength()) {
+ HInstruction* array_length = left->AsArrayLength();
+ ValueRange* right_range = LookupValueRange(right, sub->GetBlock());
+ if (right_range != nullptr) {
+ ValueBound lower = right_range->GetLower();
+ ValueBound upper = right_range->GetUpper();
+ if (lower.IsConstant() && upper.IsRelativeToArrayLength()) {
+ HInstruction* upper_inst = upper.GetInstruction();
+ if (upper_inst->IsArrayLength() &&
+ upper_inst->AsArrayLength() == array_length) {
+ // (array.length - v) where v is in [c1, array.length + c2]
+ // gets [-c2, array.length - c1] as its value range.
+ ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound::Create(nullptr, - upper.GetConstant()),
+ ValueBound::Create(array_length, - lower.GetConstant()));
+ GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ }
+ }
+ }
+ }
+ }
+
+ std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
+
+ DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
+};
+
+void BoundsCheckElimination::Run() {
+ BCEVisitor visitor(graph_);
+ // Reverse post order guarantees a node's dominators are visited first.
+ // We want to visit in the dominator-based order since if a value is known to
+ // be bounded by a range at one instruction, it must be true that all uses of
+ // that value dominated by that instruction fits in that range. Range of that
+ // value can be narrowed further down in the dominator tree.
+ //
+ // TODO: only visit blocks that dominate some array accesses.
+ visitor.VisitReversePostOrder();
+}
+
+} // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
new file mode 100644
index 0000000000..05cb185ed7
--- /dev/null
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+class BoundsCheckElimination : public HOptimization {
+ public:
+ explicit BoundsCheckElimination(HGraph* graph) : HOptimization(graph, true, "BCE") {}
+
+ void Run() OVERRIDE;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_BOUNDS_CHECK_ELIMINATION_H_
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
new file mode 100644
index 0000000000..f650ff21df
--- /dev/null
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -0,0 +1,1045 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "bounds_check_elimination.h"
+#include "builder.h"
+#include "gvn.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "utils/arena_allocator.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+// if (i < 0) { array[i] = 1; // Can't eliminate. }
+// else if (i >= array.length) { array[i] = 1; // Can't eliminate. }
+// else { array[i] = 1; // Can eliminate. }
+TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter1 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimNot); // array
+ HInstruction* parameter2 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimInt); // i
+ HInstruction* constant_1 = new (&allocator) HIntConstant(1);
+ HInstruction* constant_0 = new (&allocator) HIntConstant(0);
+ entry->AddInstruction(parameter1);
+ entry->AddInstruction(parameter2);
+ entry->AddInstruction(constant_1);
+ entry->AddInstruction(constant_0);
+
+ HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block1);
+ HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, constant_0);
+ HIf* if_inst = new (&allocator) HIf(cmp);
+ block1->AddInstruction(cmp);
+ block1->AddInstruction(if_inst);
+ entry->AddSuccessor(block1);
+
+ HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block2);
+ HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
+ HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check2 = new (&allocator)
+ HBoundsCheck(parameter2, array_length, 0);
+ HArraySet* array_set = new (&allocator) HArraySet(
+ null_check, bounds_check2, constant_1, Primitive::kPrimInt, 0);
+ block2->AddInstruction(null_check);
+ block2->AddInstruction(array_length);
+ block2->AddInstruction(bounds_check2);
+ block2->AddInstruction(array_set);
+
+ HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block3);
+ null_check = new (&allocator) HNullCheck(parameter1, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ cmp = new (&allocator) HLessThan(parameter2, array_length);
+ if_inst = new (&allocator) HIf(cmp);
+ block3->AddInstruction(null_check);
+ block3->AddInstruction(array_length);
+ block3->AddInstruction(cmp);
+ block3->AddInstruction(if_inst);
+
+ HBasicBlock* block4 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block4);
+ null_check = new (&allocator) HNullCheck(parameter1, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check4 = new (&allocator)
+ HBoundsCheck(parameter2, array_length, 0);
+ array_set = new (&allocator) HArraySet(
+ null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
+ block4->AddInstruction(null_check);
+ block4->AddInstruction(array_length);
+ block4->AddInstruction(bounds_check4);
+ block4->AddInstruction(array_set);
+
+ HBasicBlock* block5 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block5);
+ null_check = new (&allocator) HNullCheck(parameter1, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check5 = new (&allocator)
+ HBoundsCheck(parameter2, array_length, 0);
+ array_set = new (&allocator) HArraySet(
+ null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
+ block5->AddInstruction(null_check);
+ block5->AddInstruction(array_length);
+ block5->AddInstruction(bounds_check5);
+ block5->AddInstruction(array_set);
+
+ HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit);
+ block2->AddSuccessor(exit);
+ block4->AddSuccessor(exit);
+ block5->AddSuccessor(exit);
+ exit->AddInstruction(new (&allocator) HExit());
+
+ block1->AddSuccessor(block3); // True successor
+ block1->AddSuccessor(block2); // False successor
+
+ block3->AddSuccessor(block5); // True successor
+ block3->AddSuccessor(block4); // False successor
+
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check2));
+ ASSERT_FALSE(IsRemoved(bounds_check4));
+ ASSERT_TRUE(IsRemoved(bounds_check5));
+}
+
+// if (i > 0) {
+// // Positive number plus MAX_INT will overflow and be negative.
+// int j = i + Integer.MAX_VALUE;
+// if (j < array.length) array[j] = 1; // Can't eliminate.
+// }
+TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter1 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimNot); // array
+ HInstruction* parameter2 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimInt); // i
+ HInstruction* constant_1 = new (&allocator) HIntConstant(1);
+ HInstruction* constant_0 = new (&allocator) HIntConstant(0);
+ HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX);
+ entry->AddInstruction(parameter1);
+ entry->AddInstruction(parameter2);
+ entry->AddInstruction(constant_1);
+ entry->AddInstruction(constant_0);
+ entry->AddInstruction(constant_max_int);
+
+ HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block1);
+ HInstruction* cmp = new (&allocator) HLessThanOrEqual(parameter2, constant_0);
+ HIf* if_inst = new (&allocator) HIf(cmp);
+ block1->AddInstruction(cmp);
+ block1->AddInstruction(if_inst);
+ entry->AddSuccessor(block1);
+
+ HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block2);
+ HInstruction* add = new (&allocator) HAdd(Primitive::kPrimInt, parameter2, constant_max_int);
+ HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
+ HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* cmp2 = new (&allocator) HGreaterThanOrEqual(add, array_length);
+ if_inst = new (&allocator) HIf(cmp2);
+ block2->AddInstruction(add);
+ block2->AddInstruction(null_check);
+ block2->AddInstruction(array_length);
+ block2->AddInstruction(cmp2);
+ block2->AddInstruction(if_inst);
+
+ HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block3);
+ HBoundsCheck* bounds_check = new (&allocator)
+ HBoundsCheck(add, array_length, 0);
+ HArraySet* array_set = new (&allocator) HArraySet(
+ null_check, bounds_check, constant_1, Primitive::kPrimInt, 0);
+ block3->AddInstruction(bounds_check);
+ block3->AddInstruction(array_set);
+
+ HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit);
+ exit->AddInstruction(new (&allocator) HExit());
+ block1->AddSuccessor(exit); // true successor
+ block1->AddSuccessor(block2); // false successor
+ block2->AddSuccessor(exit); // true successor
+ block2->AddSuccessor(block3); // false successor
+ block3->AddSuccessor(exit);
+
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+}
+
+// if (i < array.length) {
+// int j = i - Integer.MAX_VALUE;
+// j = j - Integer.MAX_VALUE; // j is (i+2) after substracting MAX_INT twice
+// if (j > 0) array[j] = 1; // Can't eliminate.
+// }
+TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter1 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimNot); // array
+ HInstruction* parameter2 = new (&allocator)
+ HParameterValue(0, Primitive::kPrimInt); // i
+ HInstruction* constant_1 = new (&allocator) HIntConstant(1);
+ HInstruction* constant_0 = new (&allocator) HIntConstant(0);
+ HInstruction* constant_max_int = new (&allocator) HIntConstant(INT_MAX);
+ entry->AddInstruction(parameter1);
+ entry->AddInstruction(parameter2);
+ entry->AddInstruction(constant_1);
+ entry->AddInstruction(constant_0);
+ entry->AddInstruction(constant_max_int);
+
+ HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block1);
+ HNullCheck* null_check = new (&allocator) HNullCheck(parameter1, 0);
+ HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(parameter2, array_length);
+ HIf* if_inst = new (&allocator) HIf(cmp);
+ block1->AddInstruction(null_check);
+ block1->AddInstruction(array_length);
+ block1->AddInstruction(cmp);
+ block1->AddInstruction(if_inst);
+ entry->AddSuccessor(block1);
+
+ HBasicBlock* block2 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block2);
+ HInstruction* sub1 = new (&allocator) HSub(Primitive::kPrimInt, parameter2, constant_max_int);
+ HInstruction* sub2 = new (&allocator) HSub(Primitive::kPrimInt, sub1, constant_max_int);
+ HInstruction* cmp2 = new (&allocator) HLessThanOrEqual(sub2, constant_0);
+ if_inst = new (&allocator) HIf(cmp2);
+ block2->AddInstruction(sub1);
+ block2->AddInstruction(sub2);
+ block2->AddInstruction(cmp2);
+ block2->AddInstruction(if_inst);
+
+ HBasicBlock* block3 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block3);
+ HBoundsCheck* bounds_check = new (&allocator)
+ HBoundsCheck(sub2, array_length, 0);
+ HArraySet* array_set = new (&allocator) HArraySet(
+ null_check, bounds_check, constant_1, Primitive::kPrimInt, 0);
+ block3->AddInstruction(bounds_check);
+ block3->AddInstruction(array_set);
+
+ HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit);
+ exit->AddInstruction(new (&allocator) HExit());
+ block1->AddSuccessor(exit); // true successor
+ block1->AddSuccessor(block2); // false successor
+ block2->AddSuccessor(exit); // true successor
+ block2->AddSuccessor(block3); // false successor
+ block3->AddSuccessor(exit);
+
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+}
+
+// array[5] = 1; // Can't eliminate.
+// array[4] = 1; // Can eliminate.
+// array[6] = 1; // Can't eliminate.
+TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* constant_5 = new (&allocator) HIntConstant(5);
+ HInstruction* constant_4 = new (&allocator) HIntConstant(4);
+ HInstruction* constant_6 = new (&allocator) HIntConstant(6);
+ HInstruction* constant_1 = new (&allocator) HIntConstant(1);
+ entry->AddInstruction(parameter);
+ entry->AddInstruction(constant_5);
+ entry->AddInstruction(constant_4);
+ entry->AddInstruction(constant_6);
+ entry->AddInstruction(constant_1);
+
+ HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+
+ HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
+ HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check5 = new (&allocator)
+ HBoundsCheck(constant_5, array_length, 0);
+ HInstruction* array_set = new (&allocator) HArraySet(
+ null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0);
+ block->AddInstruction(null_check);
+ block->AddInstruction(array_length);
+ block->AddInstruction(bounds_check5);
+ block->AddInstruction(array_set);
+
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check4 = new (&allocator)
+ HBoundsCheck(constant_4, array_length, 0);
+ array_set = new (&allocator) HArraySet(
+ null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0);
+ block->AddInstruction(null_check);
+ block->AddInstruction(array_length);
+ block->AddInstruction(bounds_check4);
+ block->AddInstruction(array_set);
+
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check6 = new (&allocator)
+ HBoundsCheck(constant_6, array_length, 0);
+ array_set = new (&allocator) HArraySet(
+ null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0);
+ block->AddInstruction(null_check);
+ block->AddInstruction(array_length);
+ block->AddInstruction(bounds_check6);
+ block->AddInstruction(array_set);
+
+ block->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit);
+ block->AddSuccessor(exit);
+ exit->AddInstruction(new (&allocator) HExit());
+
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check5));
+ ASSERT_TRUE(IsRemoved(bounds_check4));
+ ASSERT_FALSE(IsRemoved(bounds_check6));
+}
+
+// for (int i=initial; i<array.length; i+=increment) { array[i] = 10; }
+static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
+ HInstruction** bounds_check,
+ int initial,
+ int increment,
+ IfCondition cond = kCondGE) {
+ HGraph* graph = new (allocator) HGraph(allocator);
+
+ HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* constant_initial = new (allocator) HIntConstant(initial);
+ HInstruction* constant_increment = new (allocator) HIntConstant(increment);
+ HInstruction* constant_10 = new (allocator) HIntConstant(10);
+ entry->AddInstruction(parameter);
+ entry->AddInstruction(constant_initial);
+ entry->AddInstruction(constant_increment);
+ entry->AddInstruction(constant_10);
+
+ HBasicBlock* block = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ block->AddInstruction(new (allocator) HGoto());
+
+ HBasicBlock* loop_header = new (allocator) HBasicBlock(graph);
+ HBasicBlock* loop_body = new (allocator) HBasicBlock(graph);
+ HBasicBlock* exit = new (allocator) HBasicBlock(graph);
+
+ graph->AddBlock(loop_header);
+ graph->AddBlock(loop_body);
+ graph->AddBlock(exit);
+ block->AddSuccessor(loop_header);
+ loop_header->AddSuccessor(exit); // true successor
+ loop_header->AddSuccessor(loop_body); // false successor
+ loop_body->AddSuccessor(loop_header);
+
+ HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
+ phi->AddInput(constant_initial);
+ HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
+ HInstruction* array_length = new (allocator) HArrayLength(null_check);
+ HInstruction* cmp = nullptr;
+ if (cond == kCondGE) {
+ cmp = new (allocator) HGreaterThanOrEqual(phi, array_length);
+ } else {
+ DCHECK(cond == kCondGT);
+ cmp = new (allocator) HGreaterThan(phi, array_length);
+ }
+ HInstruction* if_inst = new (allocator) HIf(cmp);
+ loop_header->AddPhi(phi);
+ loop_header->AddInstruction(null_check);
+ loop_header->AddInstruction(array_length);
+ loop_header->AddInstruction(cmp);
+ loop_header->AddInstruction(if_inst);
+
+ null_check = new (allocator) HNullCheck(parameter, 0);
+ array_length = new (allocator) HArrayLength(null_check);
+ *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
+ HInstruction* array_set = new (allocator) HArraySet(
+ null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+
+ HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(*bounds_check);
+ loop_body->AddInstruction(array_set);
+ loop_body->AddInstruction(add);
+ loop_body->AddInstruction(new (allocator) HGoto());
+ phi->AddInput(add);
+
+ exit->AddInstruction(new (allocator) HExit());
+
+ return graph;
+}
+
+TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ // for (int i=0; i<array.length; i++) { array[i] = 10; // Can eliminate with gvn. }
+ HInstruction* bounds_check = nullptr;
+ HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
+ graph->BuildDominatorTree();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // This time add gvn. Need gvn to eliminate the second
+ // HArrayLength which uses the null check as its input.
+ graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
+ bounds_check_elimination_after_gvn.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
+ graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
+ bounds_check_elimination_with_initial_1.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
+ bounds_check_elimination_with_initial_minus_1.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
+ bounds_check_elimination_with_greater_than.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // for (int i=0; i<array.length; i += 2) {
+ // array[i] = 10; // Can't eliminate due to overflow concern. }
+ graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
+ bounds_check_elimination_with_increment_2.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
+ graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
+ bounds_check_elimination_with_increment_2_from_1.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+}
+
+// for (int i=array.length; i>0; i+=increment) { array[i-1] = 10; }
+static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
+ HInstruction** bounds_check,
+ int initial,
+ int increment = -1,
+ IfCondition cond = kCondLE) {
+ HGraph* graph = new (allocator) HGraph(allocator);
+
+ HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* constant_initial = new (allocator) HIntConstant(initial);
+ HInstruction* constant_increment = new (allocator) HIntConstant(increment);
+ HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1);
+ HInstruction* constant_10 = new (allocator) HIntConstant(10);
+ entry->AddInstruction(parameter);
+ entry->AddInstruction(constant_initial);
+ entry->AddInstruction(constant_increment);
+ entry->AddInstruction(constant_minus_1);
+ entry->AddInstruction(constant_10);
+
+ HBasicBlock* block = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
+ HInstruction* array_length = new (allocator) HArrayLength(null_check);
+ block->AddInstruction(null_check);
+ block->AddInstruction(array_length);
+ block->AddInstruction(new (allocator) HGoto());
+
+ HBasicBlock* loop_header = new (allocator) HBasicBlock(graph);
+ HBasicBlock* loop_body = new (allocator) HBasicBlock(graph);
+ HBasicBlock* exit = new (allocator) HBasicBlock(graph);
+
+ graph->AddBlock(loop_header);
+ graph->AddBlock(loop_body);
+ graph->AddBlock(exit);
+ block->AddSuccessor(loop_header);
+ loop_header->AddSuccessor(exit); // true successor
+ loop_header->AddSuccessor(loop_body); // false successor
+ loop_body->AddSuccessor(loop_header);
+
+ HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
+ phi->AddInput(array_length);
+ HInstruction* cmp = nullptr;
+ if (cond == kCondLE) {
+ cmp = new (allocator) HLessThanOrEqual(phi, constant_initial);
+ } else {
+ DCHECK(cond == kCondLT);
+ cmp = new (allocator) HLessThan(phi, constant_initial);
+ }
+ HInstruction* if_inst = new (allocator) HIf(cmp);
+ loop_header->AddPhi(phi);
+ loop_header->AddInstruction(cmp);
+ loop_header->AddInstruction(if_inst);
+
+ HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
+ null_check = new (allocator) HNullCheck(parameter, 0);
+ array_length = new (allocator) HArrayLength(null_check);
+ *bounds_check = new (allocator) HBoundsCheck(add, array_length, 0);
+ HInstruction* array_set = new (allocator) HArraySet(
+ null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+ HInstruction* add_phi = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
+ loop_body->AddInstruction(add);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(*bounds_check);
+ loop_body->AddInstruction(array_set);
+ loop_body->AddInstruction(add_phi);
+ loop_body->AddInstruction(new (allocator) HGoto());
+ phi->AddInput(add);
+
+ exit->AddInstruction(new (allocator) HExit());
+
+ return graph;
+}
+
+TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ // for (int i=array.length; i>0; i--) { array[i-1] = 10; // Can eliminate with gvn. }
+ HInstruction* bounds_check = nullptr;
+ HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
+ graph->BuildDominatorTree();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // This time add gvn. Need gvn to eliminate the second
+ // HArrayLength which uses the null check as its input.
+ graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
+ bounds_check_elimination_after_gvn.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
+ graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
+ bounds_check_elimination_with_initial_1.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
+ bounds_check_elimination_with_initial_minus_1.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
+ bounds_check_elimination_with_less_than.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
+ graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
+ bounds_check_elimination_increment_minus_2.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+}
+
+// int[] array = new array[10];
+// for (int i=0; i<10; i+=increment) { array[i] = 10; }
+static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
+ HInstruction** bounds_check,
+ int initial,
+ int increment,
+ IfCondition cond) {
+ HGraph* graph = new (allocator) HGraph(allocator);
+
+ HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* constant_10 = new (allocator) HIntConstant(10);
+ HInstruction* constant_initial = new (allocator) HIntConstant(initial);
+ HInstruction* constant_increment = new (allocator) HIntConstant(increment);
+ entry->AddInstruction(constant_10);
+ entry->AddInstruction(constant_initial);
+ entry->AddInstruction(constant_increment);
+
+ HBasicBlock* block = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ HInstruction* new_array = new (allocator)
+ HNewArray(constant_10, 0, Primitive::kPrimInt);
+ block->AddInstruction(new_array);
+ block->AddInstruction(new (allocator) HGoto());
+
+ HBasicBlock* loop_header = new (allocator) HBasicBlock(graph);
+ HBasicBlock* loop_body = new (allocator) HBasicBlock(graph);
+ HBasicBlock* exit = new (allocator) HBasicBlock(graph);
+
+ graph->AddBlock(loop_header);
+ graph->AddBlock(loop_body);
+ graph->AddBlock(exit);
+ block->AddSuccessor(loop_header);
+ loop_header->AddSuccessor(exit); // true successor
+ loop_header->AddSuccessor(loop_body); // false successor
+ loop_body->AddSuccessor(loop_header);
+
+ HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
+ phi->AddInput(constant_initial);
+ HInstruction* cmp = nullptr;
+ if (cond == kCondGE) {
+ cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10);
+ } else {
+ DCHECK(cond == kCondGT);
+ cmp = new (allocator) HGreaterThan(phi, constant_10);
+ }
+ HInstruction* if_inst = new (allocator) HIf(cmp);
+ loop_header->AddPhi(phi);
+ loop_header->AddInstruction(cmp);
+ loop_header->AddInstruction(if_inst);
+
+ HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
+ HArrayLength* array_length = new (allocator) HArrayLength(null_check);
+ *bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0);
+ HInstruction* array_set = new (allocator) HArraySet(
+ null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+ HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(*bounds_check);
+ loop_body->AddInstruction(array_set);
+ loop_body->AddInstruction(add);
+ loop_body->AddInstruction(new (allocator) HGoto());
+ phi->AddInput(add);
+
+ exit->AddInstruction(new (allocator) HExit());
+
+ return graph;
+}
+
+TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ // int[] array = new array[10];
+ // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. }
+ HInstruction* bounds_check = nullptr;
+ HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
+ bounds_check_elimination_after_gvn.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // int[] array = new array[10];
+ // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
+ graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
+ bounds_check_elimination_with_initial_1.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // int[] array = new array[10];
+ // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
+ bounds_check_elimination_with_greater_than.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // int[] array = new array[10];
+ // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
+ graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_increment_8(graph);
+ bounds_check_elimination_increment_8.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+}
+
+// for (int i=initial; i<array.length; i++) { array[array.length-i-1] = 10; }
+static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
+ HInstruction** bounds_check,
+ int initial,
+ IfCondition cond = kCondGE) {
+ HGraph* graph = new (allocator) HGraph(allocator);
+
+ HBasicBlock* entry = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter = new (allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* constant_initial = new (allocator) HIntConstant(initial);
+ HInstruction* constant_1 = new (allocator) HIntConstant(1);
+ HInstruction* constant_10 = new (allocator) HIntConstant(10);
+ HInstruction* constant_minus_1 = new (allocator) HIntConstant(-1);
+ entry->AddInstruction(parameter);
+ entry->AddInstruction(constant_initial);
+ entry->AddInstruction(constant_1);
+ entry->AddInstruction(constant_10);
+ entry->AddInstruction(constant_minus_1);
+
+ HBasicBlock* block = new (allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ block->AddInstruction(new (allocator) HGoto());
+
+ HBasicBlock* loop_header = new (allocator) HBasicBlock(graph);
+ HBasicBlock* loop_body = new (allocator) HBasicBlock(graph);
+ HBasicBlock* exit = new (allocator) HBasicBlock(graph);
+
+ graph->AddBlock(loop_header);
+ graph->AddBlock(loop_body);
+ graph->AddBlock(exit);
+ block->AddSuccessor(loop_header);
+ loop_header->AddSuccessor(exit); // true successor
+ loop_header->AddSuccessor(loop_body); // false successor
+ loop_body->AddSuccessor(loop_header);
+
+ HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
+ phi->AddInput(constant_initial);
+ HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
+ HInstruction* array_length = new (allocator) HArrayLength(null_check);
+ HInstruction* cmp = nullptr;
+ if (cond == kCondGE) {
+ cmp = new (allocator) HGreaterThanOrEqual(phi, array_length);
+ } else if (cond == kCondGT) {
+ cmp = new (allocator) HGreaterThan(phi, array_length);
+ }
+ HInstruction* if_inst = new (allocator) HIf(cmp);
+ loop_header->AddPhi(phi);
+ loop_header->AddInstruction(null_check);
+ loop_header->AddInstruction(array_length);
+ loop_header->AddInstruction(cmp);
+ loop_header->AddInstruction(if_inst);
+
+ null_check = new (allocator) HNullCheck(parameter, 0);
+ array_length = new (allocator) HArrayLength(null_check);
+ HInstruction* sub = new (allocator) HSub(Primitive::kPrimInt, array_length, phi);
+ HInstruction* add_minus_1 = new (allocator)
+ HAdd(Primitive::kPrimInt, sub, constant_minus_1);
+ *bounds_check = new (allocator) HBoundsCheck(add_minus_1, array_length, 0);
+ HInstruction* array_set = new (allocator) HArraySet(
+ null_check, *bounds_check, constant_10, Primitive::kPrimInt, 0);
+ HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_1);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(sub);
+ loop_body->AddInstruction(add_minus_1);
+ loop_body->AddInstruction(*bounds_check);
+ loop_body->AddInstruction(array_set);
+ loop_body->AddInstruction(add);
+ loop_body->AddInstruction(new (allocator) HGoto());
+ phi->AddInput(add);
+
+ exit->AddInstruction(new (allocator) HExit());
+
+ return graph;
+}
+
+TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ // for (int i=0; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate with gvn. }
+ HInstruction* bounds_check = nullptr;
+ HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
+ graph->BuildDominatorTree();
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+
+ // This time add gvn. Need gvn to eliminate the second
+ // HArrayLength which uses the null check as its input.
+ graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
+ bounds_check_elimination_after_gvn.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
+ graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
+ bounds_check_elimination_with_initial_1.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check));
+
+ // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
+ graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
+ bounds_check_elimination_with_greater_than.Run();
+ ASSERT_FALSE(IsRemoved(bounds_check));
+}
+
+// Bubble sort:
+// (Every array access bounds-check can be eliminated.)
+// for (int i=0; i<array.length-1; i++) {
+// for (int j=0; j<array.length-i-1; j++) {
+// if (array[j] > array[j+1]) {
+// int temp = array[j+1];
+// array[j+1] = array[j];
+// array[j] = temp;
+// }
+// }
+// }
+TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = new (&allocator) HGraph(&allocator);
+
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* constant_0 = new (&allocator) HIntConstant(0);
+ HInstruction* constant_minus_1 = new (&allocator) HIntConstant(-1);
+ HInstruction* constant_1 = new (&allocator) HIntConstant(1);
+ entry->AddInstruction(parameter);
+ entry->AddInstruction(constant_0);
+ entry->AddInstruction(constant_minus_1);
+ entry->AddInstruction(constant_1);
+
+ HBasicBlock* block = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(block);
+ entry->AddSuccessor(block);
+ block->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* exit = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(exit);
+ exit->AddInstruction(new (&allocator) HExit());
+
+ HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(outer_header);
+ HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
+ phi_i->AddInput(constant_0);
+ HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
+ HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
+ HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
+ HInstruction* cmp = new (&allocator) HGreaterThanOrEqual(phi_i, add);
+ HIf* if_inst = new (&allocator) HIf(cmp);
+ outer_header->AddPhi(phi_i);
+ outer_header->AddInstruction(null_check);
+ outer_header->AddInstruction(array_length);
+ outer_header->AddInstruction(add);
+ outer_header->AddInstruction(cmp);
+ outer_header->AddInstruction(if_inst);
+
+ HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(inner_header);
+ HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
+ phi_j->AddInput(constant_0);
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i);
+ add = new (&allocator) HAdd(Primitive::kPrimInt, sub, constant_minus_1);
+ cmp = new (&allocator) HGreaterThanOrEqual(phi_j, add);
+ if_inst = new (&allocator) HIf(cmp);
+ inner_header->AddPhi(phi_j);
+ inner_header->AddInstruction(null_check);
+ inner_header->AddInstruction(array_length);
+ inner_header->AddInstruction(sub);
+ inner_header->AddInstruction(add);
+ inner_header->AddInstruction(cmp);
+ inner_header->AddInstruction(if_inst);
+
+ HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(inner_body_compare);
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check1 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
+ HArrayGet* array_get_j = new (&allocator)
+ HArrayGet(null_check, bounds_check1, Primitive::kPrimInt);
+ inner_body_compare->AddInstruction(null_check);
+ inner_body_compare->AddInstruction(array_length);
+ inner_body_compare->AddInstruction(bounds_check1);
+ inner_body_compare->AddInstruction(array_get_j);
+ HInstruction* j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HBoundsCheck* bounds_check2 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
+ HArrayGet* array_get_j_plus_1 = new (&allocator)
+ HArrayGet(null_check, bounds_check2, Primitive::kPrimInt);
+ cmp = new (&allocator) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1);
+ if_inst = new (&allocator) HIf(cmp);
+ inner_body_compare->AddInstruction(j_plus_1);
+ inner_body_compare->AddInstruction(null_check);
+ inner_body_compare->AddInstruction(array_length);
+ inner_body_compare->AddInstruction(bounds_check2);
+ inner_body_compare->AddInstruction(array_get_j_plus_1);
+ inner_body_compare->AddInstruction(cmp);
+ inner_body_compare->AddInstruction(if_inst);
+
+ HBasicBlock* inner_body_swap = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(inner_body_swap);
+ j_plus_1 = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+ // temp = array[j+1]
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* bounds_check3 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
+ array_get_j_plus_1 = new (&allocator)
+ HArrayGet(null_check, bounds_check3, Primitive::kPrimInt);
+ inner_body_swap->AddInstruction(j_plus_1);
+ inner_body_swap->AddInstruction(null_check);
+ inner_body_swap->AddInstruction(array_length);
+ inner_body_swap->AddInstruction(bounds_check3);
+ inner_body_swap->AddInstruction(array_get_j_plus_1);
+ // array[j+1] = array[j]
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* bounds_check4 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
+ array_get_j = new (&allocator)
+ HArrayGet(null_check, bounds_check4, Primitive::kPrimInt);
+ inner_body_swap->AddInstruction(null_check);
+ inner_body_swap->AddInstruction(array_length);
+ inner_body_swap->AddInstruction(bounds_check4);
+ inner_body_swap->AddInstruction(array_get_j);
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* bounds_check5 = new (&allocator) HBoundsCheck(j_plus_1, array_length, 0);
+ HArraySet* array_set_j_plus_1 = new (&allocator)
+ HArraySet(null_check, bounds_check5, array_get_j, Primitive::kPrimInt, 0);
+ inner_body_swap->AddInstruction(null_check);
+ inner_body_swap->AddInstruction(array_length);
+ inner_body_swap->AddInstruction(bounds_check5);
+ inner_body_swap->AddInstruction(array_set_j_plus_1);
+ // array[j] = temp
+ null_check = new (&allocator) HNullCheck(parameter, 0);
+ array_length = new (&allocator) HArrayLength(null_check);
+ HInstruction* bounds_check6 = new (&allocator) HBoundsCheck(phi_j, array_length, 0);
+ HArraySet* array_set_j = new (&allocator)
+ HArraySet(null_check, bounds_check6, array_get_j_plus_1, Primitive::kPrimInt, 0);
+ inner_body_swap->AddInstruction(null_check);
+ inner_body_swap->AddInstruction(array_length);
+ inner_body_swap->AddInstruction(bounds_check6);
+ inner_body_swap->AddInstruction(array_set_j);
+ inner_body_swap->AddInstruction(new (&allocator) HGoto());
+
+ HBasicBlock* inner_body_add = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(inner_body_add);
+ add = new (&allocator) HAdd(Primitive::kPrimInt, phi_j, constant_1);
+ inner_body_add->AddInstruction(add);
+ inner_body_add->AddInstruction(new (&allocator) HGoto());
+ phi_j->AddInput(add);
+
+ HBasicBlock* outer_body_add = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(outer_body_add);
+ add = new (&allocator) HAdd(Primitive::kPrimInt, phi_i, constant_1);
+ outer_body_add->AddInstruction(add);
+ outer_body_add->AddInstruction(new (&allocator) HGoto());
+ phi_i->AddInput(add);
+
+ block->AddSuccessor(outer_header);
+ outer_header->AddSuccessor(exit);
+ outer_header->AddSuccessor(inner_header);
+ inner_header->AddSuccessor(outer_body_add);
+ inner_header->AddSuccessor(inner_body_compare);
+ inner_body_compare->AddSuccessor(inner_body_add);
+ inner_body_compare->AddSuccessor(inner_body_swap);
+ inner_body_swap->AddSuccessor(inner_body_add);
+ inner_body_add->AddSuccessor(inner_header);
+ outer_body_add->AddSuccessor(outer_header);
+
+ graph->BuildDominatorTree();
+ GlobalValueNumberer(&allocator, graph).Run();
+ // gvn should remove the same bounds check.
+ ASSERT_FALSE(IsRemoved(bounds_check1));
+ ASSERT_FALSE(IsRemoved(bounds_check2));
+ ASSERT_TRUE(IsRemoved(bounds_check3));
+ ASSERT_TRUE(IsRemoved(bounds_check4));
+ ASSERT_TRUE(IsRemoved(bounds_check5));
+ ASSERT_TRUE(IsRemoved(bounds_check6));
+
+ BoundsCheckElimination bounds_check_elimination(graph);
+ bounds_check_elimination.Run();
+ ASSERT_TRUE(IsRemoved(bounds_check1));
+ ASSERT_TRUE(IsRemoved(bounds_check2));
+ ASSERT_TRUE(IsRemoved(bounds_check3));
+ ASSERT_TRUE(IsRemoved(bounds_check4));
+ ASSERT_TRUE(IsRemoved(bounds_check5));
+ ASSERT_TRUE(IsRemoved(bounds_check6));
+}
+
+} // namespace art
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 777a117a3b..0a3f830247 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -16,6 +16,7 @@
#include "builder.h"
+#include "base/logging.h"
#include "class_linker.h"
#include "dex_file.h"
#include "dex_file-inl.h"
@@ -68,6 +69,74 @@ class Temporaries : public ValueObject {
size_t index_;
};
+class SwitchTable : public ValueObject {
+ public:
+ SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+ : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+ int32_t table_offset = instruction.VRegB_31t();
+ const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+ if (sparse) {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+ } else {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+ }
+ num_entries_ = table[1];
+ values_ = reinterpret_cast<const int32_t*>(&table[2]);
+ }
+
+ uint16_t GetNumEntries() const {
+ return num_entries_;
+ }
+
+ void CheckIndex(size_t index) const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+ }
+ }
+
+ int32_t GetEntryAt(size_t index) const {
+ CheckIndex(index);
+ return values_[index];
+ }
+
+ uint32_t GetDexPcForIndex(size_t index) const {
+ CheckIndex(index);
+ return dex_pc_ +
+ (reinterpret_cast<const int16_t*>(values_ + index) -
+ reinterpret_cast<const int16_t*>(&instruction_));
+ }
+
+ // Index of the first value in the table.
+ size_t GetFirstValueIndex() const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ return num_entries_;
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ return 1;
+ }
+ }
+
+ private:
+ const Instruction& instruction_;
+ const uint32_t dex_pc_;
+
+ // Whether this is a sparse-switch table (or a packed-switch one).
+ const bool sparse_;
+
+ // This can't be const as it needs to be computed off of the given instruction, and complicated
+ // expressions in the initializer list seemed very ugly.
+ uint16_t num_entries_;
+
+ const int32_t* values_;
+
+ DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
void HGraphBuilder::InitializeLocals(uint16_t count) {
graph_->SetNumberOfVRegs(count);
locals_.SetSize(count);
@@ -155,6 +224,37 @@ void HGraphBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) {
current_block_ = nullptr;
}
+static bool ShouldSkipCompilation(const CompilerDriver& compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit,
+ size_t number_of_dex_instructions,
+ size_t number_of_blocks ATTRIBUTE_UNUSED,
+ size_t number_of_branches) {
+ const CompilerOptions& compiler_options = compiler_driver.GetCompilerOptions();
+ CompilerOptions::CompilerFilter compiler_filter = compiler_options.GetCompilerFilter();
+ if (compiler_filter == CompilerOptions::kEverything) {
+ return false;
+ }
+
+ if (compiler_options.IsHugeMethod(number_of_dex_instructions)) {
+ LOG(INFO) << "Skip compilation of huge method "
+ << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+ *dex_compilation_unit.GetDexFile())
+ << ": " << number_of_dex_instructions << " dex instructions";
+ return true;
+ }
+
+ // If it's large and contains no branches, it's likely to be machine generated initialization.
+ if (compiler_options.IsLargeMethod(number_of_dex_instructions) && (number_of_branches == 0)) {
+ LOG(INFO) << "Skip compilation of large method with no branch "
+ << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+ *dex_compilation_unit.GetDexFile())
+ << ": " << number_of_dex_instructions << " dex instructions";
+ return true;
+ }
+
+ return false;
+}
+
HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
const uint16_t* code_ptr = code_item.insns_;
const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
@@ -171,9 +271,27 @@ HGraph* HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
InitializeLocals(code_item.registers_size_);
graph_->UpdateMaximumNumberOfOutVRegs(code_item.outs_size_);
+ // Compute the number of dex instructions, blocks, and branches. We will
+ // check these values against limits given to the compiler.
+ size_t number_of_dex_instructions = 0;
+ size_t number_of_blocks = 0;
+ size_t number_of_branches = 0;
+
// To avoid splitting blocks, we compute ahead of time the instructions that
// start a new block, and create these blocks.
- ComputeBranchTargets(code_ptr, code_end);
+ ComputeBranchTargets(
+ code_ptr, code_end, &number_of_dex_instructions, &number_of_blocks, &number_of_branches);
+
+ // Note that the compiler driver is null when unit testing.
+ if (compiler_driver_ != nullptr) {
+ if (ShouldSkipCompilation(*compiler_driver_,
+ *dex_compilation_unit_,
+ number_of_dex_instructions,
+ number_of_blocks,
+ number_of_branches)) {
+ return nullptr;
+ }
+ }
// Also create blocks for catch handlers.
if (code_item.tries_size_ != 0) {
@@ -232,8 +350,11 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
current_block_ = block;
}
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end) {
- // TODO: Support switch instructions.
+void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+ const uint16_t* code_end,
+ size_t* number_of_dex_instructions,
+ size_t* number_of_blocks,
+ size_t* number_of_branches) {
branch_targets_.SetSize(code_end - code_ptr);
// Create the first block for the dex instructions, single successor of the entry block.
@@ -243,21 +364,60 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_
// Iterate over all instructions and find branching instructions. Create blocks for
// the locations these instructions branch to.
- size_t dex_pc = 0;
+ uint32_t dex_pc = 0;
while (code_ptr < code_end) {
+ (*number_of_dex_instructions)++;
const Instruction& instruction = *Instruction::At(code_ptr);
if (instruction.IsBranch()) {
+ (*number_of_branches)++;
int32_t target = instruction.GetTargetOffset() + dex_pc;
// Create a block for the target instruction.
if (FindBlockStartingAt(target) == nullptr) {
block = new (arena_) HBasicBlock(graph_, target);
branch_targets_.Put(target, block);
+ (*number_of_blocks)++;
}
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
+ (*number_of_blocks)++;
+ }
+ } else if (instruction.IsSwitch()) {
+ SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+
+ uint16_t num_entries = table.GetNumEntries();
+
+ // In a packed-switch, the entry at index 0 is the starting key. In a sparse-switch, the
+ // entry at index 0 is the first key, and values are after *all* keys.
+ size_t offset = table.GetFirstValueIndex();
+
+ // Use a larger loop counter type to avoid overflow issues.
+ for (size_t i = 0; i < num_entries; ++i) {
+ // The target of the case.
+ uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+ if (FindBlockStartingAt(target) == nullptr) {
+ block = new (arena_) HBasicBlock(graph_, target);
+ branch_targets_.Put(target, block);
+ (*number_of_blocks)++;
+ }
+
+ // The next case gets its own block.
+ if (i < num_entries) {
+ block = new (arena_) HBasicBlock(graph_, target);
+ branch_targets_.Put(table.GetDexPcForIndex(i), block);
+ (*number_of_blocks)++;
+ }
+ }
+
+ // Fall-through. Add a block if there is more code afterwards.
+ dex_pc += instruction.SizeInCodeUnits();
+ code_ptr += instruction.SizeInCodeUnits();
+ if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+ block = new (arena_) HBasicBlock(graph_, dex_pc);
+ branch_targets_.Put(dex_pc, block);
+ (*number_of_blocks)++;
}
} else {
code_ptr += instruction.SizeInCodeUnits();
@@ -280,9 +440,10 @@ void HGraphBuilder::Unop_12x(const Instruction& instruction, Primitive::Type typ
void HGraphBuilder::Conversion_12x(const Instruction& instruction,
Primitive::Type input_type,
- Primitive::Type result_type) {
+ Primitive::Type result_type,
+ uint32_t dex_pc) {
HInstruction* first = LoadLocal(instruction.VRegB(), input_type);
- current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first));
+ current_block_->AddInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc));
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
}
@@ -423,38 +584,36 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
bool is_instance_call = invoke_type != kStatic;
const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1);
- HInvoke* invoke = nullptr;
- if (invoke_type == kVirtual || invoke_type == kInterface || invoke_type == kSuper) {
- MethodReference target_method(dex_file_, method_idx);
- uintptr_t direct_code;
- uintptr_t direct_method;
- int table_index;
- InvokeType optimized_invoke_type = invoke_type;
- compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
- &optimized_invoke_type, &target_method, &table_index,
- &direct_code, &direct_method);
- if (table_index == -1) {
- return false;
- }
+ MethodReference target_method(dex_file_, method_idx);
+ uintptr_t direct_code;
+ uintptr_t direct_method;
+ int table_index;
+ InvokeType optimized_invoke_type = invoke_type;
+
+ if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true,
+ &optimized_invoke_type, &target_method, &table_index,
+ &direct_code, &direct_method)) {
+ LOG(INFO) << "Did not compile " << PrettyMethod(method_idx, *dex_file_)
+ << " because a method call could not be resolved";
+ return false;
+ }
+ DCHECK(optimized_invoke_type != kSuper);
- if (optimized_invoke_type == kVirtual) {
- invoke = new (arena_) HInvokeVirtual(
- arena_, number_of_arguments, return_type, dex_pc, table_index);
- } else if (optimized_invoke_type == kInterface) {
- invoke = new (arena_) HInvokeInterface(
- arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
- } else if (optimized_invoke_type == kDirect) {
- // For this compiler, sharpening only works if we compile PIC.
- DCHECK(compiler_driver_->GetCompilerOptions().GetCompilePic());
- // Treat invoke-direct like static calls for now.
- invoke = new (arena_) HInvokeStatic(
- arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
- }
+ HInvoke* invoke = nullptr;
+ if (optimized_invoke_type == kVirtual) {
+ invoke = new (arena_) HInvokeVirtual(
+ arena_, number_of_arguments, return_type, dex_pc, table_index);
+ } else if (optimized_invoke_type == kInterface) {
+ invoke = new (arena_) HInvokeInterface(
+ arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index);
} else {
- DCHECK(invoke_type == kDirect || invoke_type == kStatic);
+ DCHECK(optimized_invoke_type == kDirect || optimized_invoke_type == kStatic);
+ // Sharpening to kDirect only works if we compile PIC.
+ DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect)
+ || compiler_driver_->GetCompilerOptions().GetCompilePic());
// Treat invoke-direct like static calls for now.
invoke = new (arena_) HInvokeStatic(
- arena_, number_of_arguments, return_type, dex_pc, method_idx);
+ arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index);
}
size_t start_index = 0;
@@ -808,6 +967,85 @@ bool HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
return true;
}
+bool HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) {
+ SwitchTable table(instruction, dex_pc, false);
+
+ // Value to test against.
+ HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+
+ uint16_t num_entries = table.GetNumEntries();
+ // There should be at least one entry here.
+ DCHECK_GT(num_entries, 0U);
+
+ // Chained cmp-and-branch, starting from starting_key.
+ int32_t starting_key = table.GetEntryAt(0);
+
+ for (size_t i = 1; i <= num_entries; i++) {
+ BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, starting_key + i - 1,
+ table.GetEntryAt(i), dex_pc);
+ }
+ return true;
+}
+
+bool HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) {
+ SwitchTable table(instruction, dex_pc, true);
+
+ // Value to test against.
+ HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt);
+
+ uint16_t num_entries = table.GetNumEntries();
+ // There should be at least one entry here.
+ DCHECK_GT(num_entries, 0U);
+
+ for (size_t i = 0; i < num_entries; i++) {
+ BuildSwitchCaseHelper(instruction, i, i == static_cast<size_t>(num_entries) - 1, table, value,
+ table.GetEntryAt(i), table.GetEntryAt(i + num_entries), dex_pc);
+ }
+ return true;
+}
+
+void HGraphBuilder::BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
+ bool is_last_case, const SwitchTable& table,
+ HInstruction* value, int32_t case_value_int,
+ int32_t target_offset, uint32_t dex_pc) {
+ PotentiallyAddSuspendCheck(target_offset, dex_pc);
+
+ // The current case's value.
+ HInstruction* this_case_value = GetIntConstant(case_value_int);
+
+ // Compare value and this_case_value.
+ HEqual* comparison = new (arena_) HEqual(value, this_case_value);
+ current_block_->AddInstruction(comparison);
+ HInstruction* ifinst = new (arena_) HIf(comparison);
+ current_block_->AddInstruction(ifinst);
+
+ // Case hit: use the target offset to determine where to go.
+ HBasicBlock* case_target = FindBlockStartingAt(dex_pc + target_offset);
+ DCHECK(case_target != nullptr);
+ current_block_->AddSuccessor(case_target);
+
+ // Case miss: go to the next case (or default fall-through).
+ // When there is a next case, we use the block stored with the table offset representing this
+ // case (that is where we registered them in ComputeBranchTargets).
+ // When there is no next case, we use the following instruction.
+ // TODO: Find a good way to peel the last iteration to avoid conditional, but still have re-use.
+ if (!is_last_case) {
+ HBasicBlock* next_case_target = FindBlockStartingAt(table.GetDexPcForIndex(index));
+ DCHECK(next_case_target != nullptr);
+ current_block_->AddSuccessor(next_case_target);
+
+ // Need to manually add the block, as there is no dex-pc transition for the cases.
+ graph_->AddBlock(next_case_target);
+
+ current_block_ = next_case_target;
+ } else {
+ HBasicBlock* default_target = FindBlockStartingAt(dex_pc + instruction.SizeInCodeUnits());
+ DCHECK(default_target != nullptr);
+ current_block_->AddSuccessor(default_target);
+ current_block_ = nullptr;
+ }
+}
+
void HGraphBuilder::PotentiallyAddSuspendCheck(int32_t target_offset, uint32_t dex_pc) {
if (target_offset <= 0) {
// Unconditionnally add a suspend check to backward branches. We can remove
@@ -1024,47 +1262,77 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
}
case Instruction::INT_TO_LONG: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc);
break;
}
case Instruction::INT_TO_FLOAT: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc);
break;
}
case Instruction::INT_TO_DOUBLE: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc);
break;
}
case Instruction::LONG_TO_INT: {
- Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt);
+ Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc);
break;
}
case Instruction::LONG_TO_FLOAT: {
- Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat);
+ Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc);
break;
}
case Instruction::LONG_TO_DOUBLE: {
- Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble);
+ Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc);
+ break;
+ }
+
+ case Instruction::FLOAT_TO_INT: {
+ Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc);
+ break;
+ }
+
+ case Instruction::FLOAT_TO_LONG: {
+ Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc);
+ break;
+ }
+
+ case Instruction::FLOAT_TO_DOUBLE: {
+ Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc);
+ break;
+ }
+
+ case Instruction::DOUBLE_TO_INT: {
+ Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc);
+ break;
+ }
+
+ case Instruction::DOUBLE_TO_LONG: {
+ Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc);
+ break;
+ }
+
+ case Instruction::DOUBLE_TO_FLOAT: {
+ Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc);
break;
}
case Instruction::INT_TO_BYTE: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc);
break;
}
case Instruction::INT_TO_SHORT: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc);
break;
}
case Instruction::INT_TO_CHAR: {
- Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar);
+ Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc);
break;
}
@@ -1167,6 +1435,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
break;
}
+ case Instruction::REM_FLOAT: {
+ Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+ break;
+ }
+
+ case Instruction::REM_DOUBLE: {
+ Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+ break;
+ }
+
case Instruction::AND_INT: {
Binop_23x<HAnd>(instruction, Primitive::kPrimInt);
break;
@@ -1306,6 +1584,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
break;
}
+ case Instruction::REM_FLOAT_2ADDR: {
+ Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc);
+ break;
+ }
+
+ case Instruction::REM_DOUBLE_2ADDR: {
+ Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc);
+ break;
+ }
+
case Instruction::SHL_INT_2ADDR: {
Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt);
break;
@@ -1700,6 +1988,20 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
break;
}
+ case Instruction::PACKED_SWITCH: {
+ if (!BuildPackedSwitch(instruction, dex_pc)) {
+ return false;
+ }
+ break;
+ }
+
+ case Instruction::SPARSE_SWITCH: {
+ if (!BuildSparseSwitch(instruction, dex_pc)) {
+ return false;
+ }
+ break;
+ }
+
default:
return false;
}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 25781b08f4..73c2f50958 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -29,6 +29,7 @@
namespace art {
class Instruction;
+class SwitchTable;
class HGraphBuilder : public ValueObject {
public:
@@ -80,7 +81,13 @@ class HGraphBuilder : public ValueObject {
// Finds all instructions that start a new block, and populates branch_targets_ with
// the newly created blocks.
- void ComputeBranchTargets(const uint16_t* start, const uint16_t* end);
+ // As a side effect, also compute the number of dex instructions, blocks, and
+ // branches.
+ void ComputeBranchTargets(const uint16_t* start,
+ const uint16_t* end,
+ size_t* number_of_dex_instructions,
+ size_t* number_of_block,
+ size_t* number_of_branches);
void MaybeUpdateCurrentBlock(size_t index);
HBasicBlock* FindBlockStartingAt(int32_t index) const;
@@ -129,7 +136,8 @@ class HGraphBuilder : public ValueObject {
void Conversion_12x(const Instruction& instruction,
Primitive::Type input_type,
- Primitive::Type result_type);
+ Primitive::Type result_type,
+ uint32_t dex_pc);
void BuildCheckedDivRem(uint16_t out_reg,
uint16_t first_reg,
@@ -196,6 +204,17 @@ class HGraphBuilder : public ValueObject {
uint16_t type_index,
uint32_t dex_pc);
+ // Builds an instruction sequence for a packed switch statement.
+ bool BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+ // Builds an instruction sequence for a sparse switch statement.
+ bool BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc);
+
+ void BuildSwitchCaseHelper(const Instruction& instruction, size_t index,
+ bool is_last_case, const SwitchTable& table,
+ HInstruction* value, int32_t case_value_int,
+ int32_t target_offset, uint32_t dex_pc);
+
ArenaAllocator* const arena_;
// A list of the size of the dex code holding block information for
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index e581af22aa..461409ddca 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -499,6 +499,29 @@ void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) {
}
void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
+ if (instruction != nullptr) {
+ // The code generated for some type conversions may call the
+ // runtime, thus normally requiring a subsequent call to this
+ // method. However, the method verifier does not produce PC
+ // information for certain instructions, which are considered "atomic"
+ // (they cannot join a GC).
+ // Therefore we do not currently record PC information for such
+ // instructions. As this may change later, we added this special
+ // case so that code generators may nevertheless call
+ // CodeGenerator::RecordPcInfo without triggering an error in
+ // CodeGenerator::BuildNativeGCMap ("Missing ref for dex pc 0x")
+ // thereafter.
+ if (instruction->IsTypeConversion()) {
+ return;
+ }
+ if (instruction->IsRem()) {
+ Primitive::Type type = instruction->AsRem()->GetResultType();
+ if ((type == Primitive::kPrimFloat) || (type == Primitive::kPrimDouble)) {
+ return;
+ }
+ }
+ }
+
// Collect PC infos for the mapping table.
struct PcInfo pc_info;
pc_info.dex_pc = dex_pc;
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4c0d3ea960..1d42c47d56 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -35,6 +35,11 @@ static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
// Binary encoding of 2^31 for type double.
static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000);
+// Maximum value for a primitive integer.
+static int32_t constexpr kPrimIntMax = 0x7fffffff;
+// Maximum value for a primitive long.
+static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff;
+
class Assembler;
class CodeGenerator;
class DexCompilationUnit;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5b2be2e9a1..cbe5f0cc6e 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -44,8 +44,9 @@ static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static constexpr SRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr SRegister kRuntimeParameterFpuRegisters[] = { S0, S1, S2, S3 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterFpuRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
public:
@@ -874,6 +875,7 @@ void CodeGeneratorARM::InvokeRuntime(int32_t entry_point_offset,
|| instruction->IsBoundsCheck()
|| instruction->IsNullCheck()
|| instruction->IsDivZeroCheck()
+ || instruction->GetLocations()->CanCall()
|| !IsLeafMethod());
}
@@ -1359,11 +1361,20 @@ void InstructionCodeGeneratorARM::VisitNeg(HNeg* neg) {
}
void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
Primitive::Type result_type = conversion->GetResultType();
Primitive::Type input_type = conversion->GetInputType();
DCHECK_NE(result_type, input_type);
+
+ // The float-to-long and double-to-long type conversions rely on a
+ // call to the runtime.
+ LocationSummary::CallKind call_kind =
+ ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
+ && result_type == Primitive::kPrimLong)
+ ? LocationSummary::kCall
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
switch (result_type) {
case Primitive::kPrimByte:
switch (input_type) {
@@ -1406,9 +1417,17 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -1428,11 +1447,24 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-long' instruction.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(
+ calling_convention.GetFpuRegisterAt(0)));
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-long' instruction.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterPairLocation(
+ calling_convention.GetFpuRegisterAt(0),
+ calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(Location::RegisterPairLocation(R0, R1));
+ break;
+ }
default:
LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -1478,8 +1510,9 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1509,8 +1542,9 @@ void LocationsBuilderARM::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1580,11 +1614,24 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
}
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-int' instruction.
+ SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+ __ vmovs(temp, in.AsFpuRegister<SRegister>());
+ __ vcvtis(temp, temp);
+ __ vmovrs(out.AsRegister<Register>(), temp);
break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-int' instruction.
+ SRegister temp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>();
+ DRegister temp_d = FromLowSToD(temp_s);
+ __ vmovd(temp_d, FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
+ __ vcvtid(temp_s, temp_d);
+ __ vmovrs(out.AsRegister<Register>(), temp_s);
+ break;
+ }
default:
LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -1609,9 +1656,17 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-long' instruction.
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pF2l),
+ conversion,
+ conversion->GetDexPc());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ // Processing a Dex `double-to-long' instruction.
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pD2l),
+ conversion,
+ conversion->GetDexPc());
break;
default:
@@ -1690,8 +1745,9 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
}
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ __ vcvtsd(out.AsFpuRegister<SRegister>(),
+ FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
break;
default:
@@ -1746,8 +1802,9 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
}
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ __ vcvtds(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+ in.AsFpuRegister<SRegister>());
break;
default:
@@ -2075,12 +2132,13 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
}
void LocationsBuilderARM::VisitRem(HRem* rem) {
- LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
- ? LocationSummary::kCall
- : LocationSummary::kNoCall;
+ Primitive::Type type = rem->GetResultType();
+ LocationSummary::CallKind call_kind = type == Primitive::kPrimInt
+ ? LocationSummary::kNoCall
+ : LocationSummary::kCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
- switch (rem->GetResultType()) {
+ switch (type) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2098,14 +2156,26 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
locations->SetOut(Location::RegisterPairLocation(R2, R3));
break;
}
- case Primitive::kPrimFloat:
+ case Primitive::kPrimFloat: {
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ locations->SetOut(Location::FpuRegisterLocation(S0));
+ break;
+ }
+
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterPairLocation(
+ calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
+ locations->SetInAt(1, Location::FpuRegisterPairLocation(
+ calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
+ locations->SetOut(Location::Location::FpuRegisterPairLocation(S0, S1));
break;
}
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
@@ -2115,7 +2185,8 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
Location first = locations->InAt(0);
Location second = locations->InAt(1);
- switch (rem->GetResultType()) {
+ Primitive::Type type = rem->GetResultType();
+ switch (type) {
case Primitive::kPrimInt: {
Register reg1 = first.AsRegister<Register>();
Register reg2 = second.AsRegister<Register>();
@@ -2131,26 +2202,22 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
}
case Primitive::kPrimLong: {
- InvokeRuntimeCallingConvention calling_convention;
- DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>());
- DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>());
- DCHECK_EQ(calling_convention.GetRegisterAt(2), second.AsRegisterPairLow<Register>());
- DCHECK_EQ(calling_convention.GetRegisterAt(3), second.AsRegisterPairHigh<Register>());
- DCHECK_EQ(R2, out.AsRegisterPairLow<Register>());
- DCHECK_EQ(R3, out.AsRegisterPairHigh<Register>());
-
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc());
break;
}
- case Primitive::kPrimFloat:
+ case Primitive::kPrimFloat: {
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc());
+ break;
+ }
+
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+ codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc());
break;
}
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index fd794f95d1..8a0c2deab9 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1326,11 +1326,20 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) {
}
void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
Primitive::Type result_type = conversion->GetResultType();
Primitive::Type input_type = conversion->GetInputType();
DCHECK_NE(result_type, input_type);
+
+ // The float-to-long and double-to-long type conversions rely on a
+ // call to the runtime.
+ LocationSummary::CallKind call_kind =
+ ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble)
+ && result_type == Primitive::kPrimLong)
+ ? LocationSummary::kCall
+ : LocationSummary::kNoCall;
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind);
+
switch (result_type) {
case Primitive::kPrimByte:
switch (input_type) {
@@ -1373,9 +1382,17 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -1395,10 +1412,30 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-long' instruction.
+ InvokeRuntimeCallingConvention calling_convention;
+ // Note that on x86 floating-point parameters are passed
+ // through core registers (here, EAX).
+ locations->SetInAt(0, Location::RegisterLocation(
+ calling_convention.GetRegisterAt(0)));
+ // The runtime helper puts the result in EAX, EDX.
+ locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-long' instruction.
+ InvokeRuntimeCallingConvention calling_convention;
+ // Note that on x86 floating-point parameters are passed
+ // through core registers (here, EAX and ECX).
+ locations->SetInAt(0, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(0),
+ calling_convention.GetRegisterAt(1)));
+ // The runtime helper puts the result in EAX, EDX.
+ locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
+ break;
+ }
break;
default:
@@ -1443,8 +1480,9 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1473,8 +1511,9 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1559,11 +1598,55 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
}
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-int' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ Register output = out.AsRegister<Register>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movl(output, Immediate(kPrimIntMax));
+ // temp = int-to-float(output)
+ __ cvtsi2ss(temp, output);
+ // if input >= temp goto done
+ __ comiss(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(output, input);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorl(output, output);
+ __ Bind(&done);
break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-int' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ Register output = out.AsRegister<Register>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movl(output, Immediate(kPrimIntMax));
+ // temp = int-to-double(output)
+ __ cvtsi2sd(temp, output);
+ // if input >= temp goto done
+ __ comisd(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = double-to-int-truncate(input)
+ __ cvttsd2si(output, input);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorl(output, output);
+ __ Bind(&done);
+ break;
+ }
default:
LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -1585,9 +1668,15 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-long' instruction.
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pF2l)));
+ codegen_->RecordPcInfo(conversion, conversion->GetDexPc());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ // Processing a Dex `double-to-long' instruction.
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pD2l)));
+ codegen_->RecordPcInfo(conversion, conversion->GetDexPc());
break;
default:
@@ -1664,8 +1753,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
}
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
break;
default:
@@ -1711,8 +1800,8 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
}
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
break;
default:
@@ -2117,12 +2206,13 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
}
void LocationsBuilderX86::VisitRem(HRem* rem) {
- LocationSummary::CallKind call_kind = rem->GetResultType() == Primitive::kPrimLong
- ? LocationSummary::kCall
- : LocationSummary::kNoCall;
+ Primitive::Type type = rem->GetResultType();
+ LocationSummary::CallKind call_kind = type == Primitive::kPrimInt
+ ? LocationSummary::kNoCall
+ : LocationSummary::kCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
- switch (rem->GetResultType()) {
+ switch (type) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RegisterLocation(EAX));
locations->SetInAt(1, Location::RequiresRegister());
@@ -2139,14 +2229,29 @@ void LocationsBuilderX86::VisitRem(HRem* rem) {
locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
break;
}
- case Primitive::kPrimFloat:
+ case Primitive::kPrimFloat: {
+ InvokeRuntimeCallingConvention calling_convention;
+ // x86 floating-point parameters are passed through core registers (EAX, ECX).
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ // The runtime helper puts the result in XMM0.
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
+ break;
+ }
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+ InvokeRuntimeCallingConvention calling_convention;
+ // x86 floating-point parameters are passed through core registers (EAX_ECX, EDX_EBX).
+ locations->SetInAt(0, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(1, Location::RegisterPairLocation(
+ calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+ // The runtime helper puts the result in XMM0.
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
break;
}
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
@@ -2158,9 +2263,14 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
GenerateDivRemIntegral(rem);
break;
}
- case Primitive::kPrimFloat:
+ case Primitive::kPrimFloat: {
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmodf)));
+ codegen_->RecordPcInfo(rem, rem->GetDexPc());
+ break;
+ }
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << type;
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pFmod)));
+ codegen_->RecordPcInfo(rem, rem->GetDexPc());
break;
}
default:
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4d70efcf38..233f4a4e4b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -43,8 +43,9 @@ static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
-static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { };
-static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
+static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+ arraysize(kRuntimeParameterFpuRegisters);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatRegister> {
public:
@@ -1363,9 +1364,17 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-int' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -1388,9 +1397,17 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
+ // Processing a Dex `float-to-long' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ // Processing a Dex `double-to-long' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
break;
default:
@@ -1433,8 +1450,9 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1461,8 +1479,9 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
break;
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
@@ -1550,11 +1569,55 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
}
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-int' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ CpuRegister output = out.AsRegister<CpuRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movl(output, Immediate(kPrimIntMax));
+ // temp = int-to-float(output)
+ __ cvtsi2ss(temp, output, false);
+ // if input >= temp goto done
+ __ comiss(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = float-to-int-truncate(input)
+ __ cvttss2si(output, input, false);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorl(output, output);
+ __ Bind(&done);
break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-int' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ CpuRegister output = out.AsRegister<CpuRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movl(output, Immediate(kPrimIntMax));
+ // temp = int-to-double(output)
+ __ cvtsi2sd(temp, output);
+ // if input >= temp goto done
+ __ comisd(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = double-to-int-truncate(input)
+ __ cvttsd2si(output, input);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorl(output, output);
+ __ Bind(&done);
+ break;
+ }
default:
LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -1574,11 +1637,55 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
__ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
break;
- case Primitive::kPrimFloat:
- case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type << " to "
- << result_type << " not yet implemented";
+ case Primitive::kPrimFloat: {
+ // Processing a Dex `float-to-long' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ CpuRegister output = out.AsRegister<CpuRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movq(output, Immediate(kPrimLongMax));
+ // temp = long-to-float(output)
+ __ cvtsi2ss(temp, output, true);
+ // if input >= temp goto done
+ __ comiss(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = float-to-long-truncate(input)
+ __ cvttss2si(output, input, true);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorq(output, output);
+ __ Bind(&done);
+ break;
+ }
+
+ case Primitive::kPrimDouble: {
+ // Processing a Dex `double-to-long' instruction.
+ XmmRegister input = in.AsFpuRegister<XmmRegister>();
+ CpuRegister output = out.AsRegister<CpuRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Label done, nan;
+
+ __ movq(output, Immediate(kPrimLongMax));
+ // temp = long-to-double(output)
+ __ cvtsi2sd(temp, output, true);
+ // if input >= temp goto done
+ __ comisd(input, temp);
+ __ j(kAboveEqual, &done);
+ // if input == NaN goto nan
+ __ j(kUnordered, &nan);
+ // output = double-to-long-truncate(input)
+ __ cvttsd2si(output, input, true);
+ __ jmp(&done);
+ __ Bind(&nan);
+ // output = 0
+ __ xorq(output, output);
+ __ Bind(&done);
break;
+ }
default:
LOG(FATAL) << "Unexpected type conversion from " << input_type
@@ -1626,8 +1733,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
break;
case Primitive::kPrimDouble:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `double-to-float' instruction.
+ __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
break;
default:
@@ -1652,8 +1759,8 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
break;
case Primitive::kPrimFloat:
- LOG(FATAL) << "Type conversion from " << input_type
- << " to " << result_type << " not yet implemented";
+ // Processing a Dex `float-to-double' instruction.
+ __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
break;
default:
@@ -1893,16 +2000,16 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
// 0x80000000(00000000)/-1 triggers an arithmetic exception!
// Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
// so it's safe to just use negl instead of more complex comparisons.
-
- __ cmpl(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
-
if (type == Primitive::kPrimInt) {
+ __ cmpl(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
// edx:eax <- sign-extended of eax
__ cdq();
// eax = quotient, edx = remainder
__ idivl(second_reg);
} else {
+ __ cmpq(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
// rdx:rax <- sign-extended of rax
__ cqo();
// rax = quotient, rdx = remainder
@@ -1969,9 +2076,14 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
}
void LocationsBuilderX86_64::VisitRem(HRem* rem) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
- switch (rem->GetResultType()) {
+ Primitive::Type type = rem->GetResultType();
+ LocationSummary::CallKind call_kind =
+ (type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)
+ ? LocationSummary::kNoCall
+ : LocationSummary::kCall;
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+ switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RegisterLocation(RAX));
@@ -1983,12 +2095,16 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+ locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+ // The runtime helper puts the result in XMM0.
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
break;
}
default:
- LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
+ LOG(FATAL) << "Unexpected rem type " << type;
}
}
@@ -2000,13 +2116,16 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
GenerateDivRemIntegral(rem);
break;
}
-
- case Primitive::kPrimFloat:
+ case Primitive::kPrimFloat: {
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmodf), true));
+ codegen_->RecordPcInfo(rem, rem->GetDexPc());
+ break;
+ }
case Primitive::kPrimDouble: {
- LOG(FATAL) << "Unimplemented rem type " << rem->GetResultType();
+ __ gs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pFmod), true));
+ codegen_->RecordPcInfo(rem, rem->GetDexPc());
break;
}
-
default:
LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
}
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index c36b1436d3..82fe03caa2 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -32,7 +32,7 @@ static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
HGraph* graph = builder.BuildGraph(*item);
graph->BuildDominatorTree();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
return graph;
}
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index ad6e3382bc..a6a68ca59d 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -175,7 +175,7 @@ TEST(GVNTest, LoopFieldElimination) {
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
GlobalValueNumberer(&allocator, graph).Run();
// Check that all field get instructions are still there.
@@ -239,7 +239,7 @@ TEST(GVNTest, LoopSideEffects) {
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn(
*outer_loop_header->GetLoopInformation()));
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index c49cf7e03f..28ca5e81e6 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -44,7 +44,7 @@ static void TestCode(const uint16_t* data, const int* expected_order, size_t num
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
x86::CodeGeneratorX86 codegen(graph);
SsaLivenessAnalysis liveness(*graph, &codegen);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index e3c6fec23b..5c7e6f0325 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -38,7 +38,7 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
RemoveSuspendChecks(graph);
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
return graph;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 246e7ef309..4b69e57960 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -50,7 +50,7 @@ static void TestCode(const uint16_t* data, const char* expected) {
ASSERT_NE(graph, nullptr);
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
// `Inline` conditions into ifs.
PrepareForRegisterAllocation(graph).Run();
x86::CodeGeneratorX86 codegen(graph);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 8cb2ef6de8..ba4dccf598 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -30,6 +30,36 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) {
VisitBlockForBackEdges(entry_block_, visited, &visiting);
}
+static void RemoveAsUser(HInstruction* instruction) {
+ for (size_t i = 0; i < instruction->InputCount(); i++) {
+ instruction->InputAt(i)->RemoveUser(instruction, i);
+ }
+
+ HEnvironment* environment = instruction->GetEnvironment();
+ if (environment != nullptr) {
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* vreg = environment->GetInstructionAt(i);
+ if (vreg != nullptr) {
+ vreg->RemoveEnvironmentUser(environment, i);
+ }
+ }
+ }
+}
+
+void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const {
+ for (size_t i = 0; i < blocks_.Size(); ++i) {
+ if (!visited.IsBitSet(i)) {
+ HBasicBlock* block = blocks_.Get(i);
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ RemoveAsUser(it.Current());
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ RemoveAsUser(it.Current());
+ }
+ }
+ }
+}
+
void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
for (size_t i = 0; i < blocks_.Size(); ++i) {
if (!visited.IsBitSet(i)) {
@@ -72,16 +102,21 @@ void HGraph::BuildDominatorTree() {
// (1) Find the back edges in the graph doing a DFS traversal.
FindBackEdges(&visited);
- // (2) Remove blocks not visited during the initial DFS.
- // Step (3) requires dead blocks to be removed from the
+ // (2) Remove instructions and phis from blocks not visited during
+ // the initial DFS as users from other instructions, so that
+ // users can be safely removed before uses later.
+ RemoveInstructionsAsUsersFromDeadBlocks(visited);
+
+ // (3) Remove blocks not visited during the initial DFS.
+ // Step (4) requires dead blocks to be removed from the
// predecessors list of live blocks.
RemoveDeadBlocks(visited);
- // (3) Simplify the CFG now, so that we don't need to recompute
+ // (4) Simplify the CFG now, so that we don't need to recompute
// dominators and the reverse post order.
SimplifyCFG();
- // (4) Compute the immediate dominator of each block. We visit
+ // (5) Compute the immediate dominator of each block. We visit
// the successors of a block only when all its forward branches
// have been processed.
GrowableArray<size_t> visits(arena_, blocks_.Size());
@@ -237,7 +272,7 @@ void HGraph::SimplifyCFG() {
}
}
-bool HGraph::FindNaturalLoops() const {
+bool HGraph::AnalyzeNaturalLoops() const {
for (size_t i = 0; i < blocks_.Size(); ++i) {
HBasicBlock* block = blocks_.Get(i);
if (block->IsLoopHeader()) {
@@ -391,19 +426,7 @@ static void Remove(HInstructionList* instruction_list,
instruction->SetBlock(nullptr);
instruction_list->RemoveInstruction(instruction);
- for (size_t i = 0; i < instruction->InputCount(); i++) {
- instruction->InputAt(i)->RemoveUser(instruction, i);
- }
-
- HEnvironment* environment = instruction->GetEnvironment();
- if (environment != nullptr) {
- for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HInstruction* vreg = environment->GetInstructionAt(i);
- if (vreg != nullptr) {
- vreg->RemoveEnvironmentUser(environment, i);
- }
- }
- }
+ RemoveAsUser(instruction);
}
void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 28496e4ad2..8a25de19d9 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -112,10 +112,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
void TransformToSSA();
void SimplifyCFG();
- // Find all natural loops in this graph. Aborts computation and returns false
- // if one loop is not natural, that is the header does not dominate the back
- // edge.
- bool FindNaturalLoops() const;
+ // Analyze all natural loops in this graph. Returns false if one
+ // loop is not natural, that is the header does not dominate the
+ // back edge.
+ bool AnalyzeNaturalLoops() const;
void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
void SimplifyLoop(HBasicBlock* header);
@@ -173,6 +173,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
void VisitBlockForBackEdges(HBasicBlock* block,
ArenaBitVector* visited,
ArenaBitVector* visiting);
+ void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited) const;
ArenaAllocator* const arena_;
@@ -1669,7 +1670,12 @@ class HNewInstance : public HExpression<0> {
uint16_t GetTypeIndex() const { return type_index_; }
// Calls runtime so needs an environment.
- virtual bool NeedsEnvironment() const { return true; }
+ bool NeedsEnvironment() const OVERRIDE { return true; }
+ // It may throw when called on:
+ // - interfaces
+ // - abstract/innaccessible/unknown classes
+ // TODO: optimize when possible.
+ bool CanThrow() const OVERRIDE { return true; }
DECLARE_INSTRUCTION(NewInstance);
@@ -1995,8 +2001,8 @@ class HNot : public HUnaryOperation {
class HTypeConversion : public HExpression<1> {
public:
// Instantiate a type conversion of `input` to `result_type`.
- HTypeConversion(Primitive::Type result_type, HInstruction* input)
- : HExpression(result_type, SideEffects::None()) {
+ HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc)
+ : HExpression(result_type, SideEffects::None()), dex_pc_(dex_pc) {
SetRawInputAt(0, input);
DCHECK_NE(input->GetType(), result_type);
}
@@ -2005,12 +2011,18 @@ class HTypeConversion : public HExpression<1> {
Primitive::Type GetInputType() const { return GetInput()->GetType(); }
Primitive::Type GetResultType() const { return GetType(); }
+ // Required by the x86 and ARM code generators when producing calls
+ // to the runtime.
+ uint32_t GetDexPc() const { return dex_pc_; }
+
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
DECLARE_INSTRUCTION(TypeConversion);
private:
+ const uint32_t dex_pc_;
+
DISALLOW_COPY_AND_ASSIGN(HTypeConversion);
};
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d8533eb8bf..11fc9bf9b9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -19,6 +19,7 @@
#include <fstream>
#include <stdint.h>
+#include "bounds_check_elimination.h"
#include "builder.h"
#include "code_generator.h"
#include "compiler.h"
@@ -192,24 +193,24 @@ static bool CanOptimize(const DexFile::CodeItem& code_item) {
}
static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer) {
- TransformToSsa ssa(graph);
HDeadCodeElimination opt1(graph);
HConstantFolding opt2(graph);
SsaRedundantPhiElimination opt3(graph);
SsaDeadPhiElimination opt4(graph);
InstructionSimplifier opt5(graph);
GVNOptimization opt6(graph);
- InstructionSimplifier opt7(graph);
+ BoundsCheckElimination bce(graph);
+ InstructionSimplifier opt8(graph);
HOptimization* optimizations[] = {
- &ssa,
&opt1,
&opt2,
&opt3,
&opt4,
&opt5,
&opt6,
- &opt7
+ &bce,
+ &opt8
};
for (size_t i = 0; i < arraysize(optimizations); ++i) {
@@ -220,6 +221,23 @@ static void RunOptimizations(HGraph* graph, const HGraphVisualizer& visualizer)
}
}
+static bool TryBuildingSsa(HGraph* graph,
+ const DexCompilationUnit& dex_compilation_unit,
+ const HGraphVisualizer& visualizer) {
+ graph->BuildDominatorTree();
+ graph->TransformToSSA();
+
+ if (!graph->AnalyzeNaturalLoops()) {
+ LOG(INFO) << "Skipping compilation of "
+ << PrettyMethod(dex_compilation_unit.GetDexMethodIndex(),
+ *dex_compilation_unit.GetDexFile())
+ << ": it contains a non natural loop";
+ return false;
+ }
+ visualizer.DumpGraph("ssa transform");
+ return true;
+}
+
CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
@@ -281,7 +299,12 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
if (run_optimizations_
&& CanOptimize(*code_item)
&& RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set)) {
+ VLOG(compiler) << "Optimizing " << PrettyMethod(method_idx, dex_file);
optimized_compiled_methods_++;
+ if (!TryBuildingSsa(graph, dex_compilation_unit, visualizer)) {
+ // We could not transform the graph to SSA, bailout.
+ return nullptr;
+ }
RunOptimizations(graph, visualizer);
PrepareForRegisterAllocation(graph).Run();
@@ -316,23 +339,10 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
LOG(FATAL) << "Could not allocate registers in optimizing compiler";
UNREACHABLE();
} else {
+ VLOG(compiler) << "Compile baseline " << PrettyMethod(method_idx, dex_file);
unoptimized_compiled_methods_++;
codegen->CompileBaseline(&allocator);
- if (CanOptimize(*code_item)) {
- // Run these phases to get some test coverage.
- graph->BuildDominatorTree();
- graph->TransformToSSA();
- visualizer.DumpGraph("ssa");
- graph->FindNaturalLoops();
- SsaRedundantPhiElimination(graph).Run();
- SsaDeadPhiElimination(graph).Run();
- GVNOptimization(graph).Run();
- SsaLivenessAnalysis liveness(*graph, codegen);
- liveness.Analyze();
- visualizer.DumpGraph(kLivenessPassName);
- }
-
std::vector<uint8_t> mapping_table;
SrcMap src_mapping_table;
codegen->BuildMappingTable(&mapping_table,
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index c4106b72b5..04b56345c4 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -96,6 +96,11 @@ inline std::string Patch(const std::string& original, const diff_t& diff) {
return result;
}
+// Returns if the instruction is removed from the graph.
+inline bool IsRemoved(HInstruction* instruction) {
+ return instruction->GetBlock() == nullptr;
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index ba4be34ca3..8d75db91d2 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -41,7 +41,7 @@ static bool Check(const uint16_t* data) {
HGraph* graph = builder.BuildGraph(*item);
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
x86::CodeGeneratorX86 codegen(graph);
SsaLivenessAnalysis liveness(*graph, &codegen);
liveness.Analyze();
@@ -255,7 +255,7 @@ static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
HGraph* graph = builder.BuildGraph(*item);
graph->BuildDominatorTree();
graph->TransformToSSA();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
return graph;
}
@@ -494,7 +494,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
(*phi)->AddInput(*input2);
graph->BuildDominatorTree();
- graph->FindNaturalLoops();
+ graph->AnalyzeNaturalLoops();
return graph;
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 5ab328fe23..2cbd51aa10 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -22,20 +22,6 @@
namespace art {
-class TransformToSsa : public HOptimization {
- public:
- explicit TransformToSsa(HGraph* graph) : HOptimization(graph, true, "ssa transform") {}
-
- void Run() OVERRIDE {
- graph_->BuildDominatorTree();
- graph_->TransformToSSA();
- graph_->FindNaturalLoops();
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(TransformToSsa);
-};
-
static constexpr int kDefaultNumberOfLoops = 2;
class SsaBuilder : public HGraphVisitor {
diff --git a/compiler/utils/arena_allocator.cc b/compiler/utils/arena_allocator.cc
index 004af98852..a80ad938a6 100644
--- a/compiler/utils/arena_allocator.cc
+++ b/compiler/utils/arena_allocator.cc
@@ -189,6 +189,15 @@ Arena* ArenaPool::AllocArena(size_t size) {
return ret;
}
+size_t ArenaPool::GetBytesAllocated() const {
+ size_t total = 0;
+ MutexLock lock(Thread::Current(), lock_);
+ for (Arena* arena = free_arenas_; arena != nullptr; arena = arena->next_) {
+ total += arena->GetBytesAllocated();
+ }
+ return total;
+}
+
void ArenaPool::FreeArenaChain(Arena* first) {
if (UNLIKELY(RUNNING_ON_VALGRIND > 0)) {
for (Arena* arena = first; arena != nullptr; arena = arena->next_) {
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 6d213991d3..7f5bc9ac4c 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -135,6 +135,10 @@ class Arena {
return Size() - bytes_allocated_;
}
+ size_t GetBytesAllocated() const {
+ return bytes_allocated_;
+ }
+
private:
size_t bytes_allocated_;
uint8_t* memory_;
@@ -153,11 +157,12 @@ class ArenaPool {
public:
ArenaPool();
~ArenaPool();
- Arena* AllocArena(size_t size);
- void FreeArenaChain(Arena* first);
+ Arena* AllocArena(size_t size) LOCKS_EXCLUDED(lock_);
+ void FreeArenaChain(Arena* first) LOCKS_EXCLUDED(lock_);
+ size_t GetBytesAllocated() const LOCKS_EXCLUDED(lock_);
private:
- Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
Arena* free_arenas_ GUARDED_BY(lock_);
DISALLOW_COPY_AND_ASSIGN(ArenaPool);
};
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 474d8a909e..2a6c58e128 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -663,9 +663,19 @@ void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
+ cvttss2si(dst, src, false);
+}
+
+
+void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
- EmitOptionalRex32(dst, src);
+ if (is64bit) {
+ // Emit a REX.W prefix if the operand size is 64 bits.
+ EmitRex64(dst, src);
+ } else {
+ EmitOptionalRex32(dst, src);
+ }
EmitUint8(0x0F);
EmitUint8(0x2C);
EmitXmmRegisterOperand(dst.LowBits(), src);
@@ -673,9 +683,19 @@ void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
+ cvttsd2si(dst, src, false);
+}
+
+
+void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
- EmitOptionalRex32(dst, src);
+ if (is64bit) {
+ // Emit a REX.W prefix if the operand size is 64 bits.
+ EmitRex64(dst, src);
+ } else {
+ EmitOptionalRex32(dst, src);
+ }
EmitUint8(0x0F);
EmitUint8(0x2C);
EmitXmmRegisterOperand(dst.LowBits(), src);
@@ -1997,6 +2017,10 @@ void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
}
+void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
+ EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
+}
+
void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
uint8_t rex = 0x48 | operand.rex(); // REX.W000
if (dst.NeedsRex()) {
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6e71e4a5bb..51d1de2c0f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -340,7 +340,9 @@ class X86_64Assembler FINAL : public Assembler {
void cvtsd2ss(XmmRegister dst, XmmRegister src);
void cvttss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
+ void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
+ void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
void cvtdq2pd(XmmRegister dst, XmmRegister src);
@@ -688,6 +690,7 @@ class X86_64Assembler FINAL : public Assembler {
void EmitRex64(CpuRegister dst, CpuRegister src);
void EmitRex64(CpuRegister dst, const Operand& operand);
void EmitRex64(XmmRegister dst, CpuRegister src);
+ void EmitRex64(CpuRegister dst, XmmRegister src);
// Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);