summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/backend.h19
-rw-r--r--compiler/dex/compiler_enums.h92
-rw-r--r--compiler/dex/mir_dataflow.cc48
-rw-r--r--compiler/dex/mir_graph.cc5
-rwxr-xr-xcompiler/dex/quick/gen_invoke.cc6
-rw-r--r--compiler/dex/quick/ralloc_util.cc2
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc36
-rw-r--r--compiler/dex/quick/x86/call_x86.cc32
-rw-r--r--compiler/dex/quick/x86/codegen_x86.h37
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc470
-rw-r--r--compiler/dex/quick/x86/x86_lir.h5
-rw-r--r--compiler/driver/compiler_driver.cc8
-rw-r--r--compiler/elf_writer_quick.cc2
-rw-r--r--compiler/image_writer.cc13
14 files changed, 599 insertions, 176 deletions
diff --git a/compiler/dex/backend.h b/compiler/dex/backend.h
index 596b3c9802..1f24849257 100644
--- a/compiler/dex/backend.h
+++ b/compiler/dex/backend.h
@@ -28,6 +28,25 @@ class Backend {
virtual void Materialize() = 0;
virtual CompiledMethod* GetCompiledMethod() = 0;
+ // Queries for backend support for vectors
+ /*
+ * Return the number of bits in a vector register.
+ * @return 0 if vector registers are not supported, or the
+ * number of bits in the vector register if supported.
+ */
+ virtual int VectorRegisterSize() { return 0; }
+
+ /*
+ * Return the number of reservable vector registers supported
+ * @param fp_used ‘true’ if floating point computations will be
+ * executed while vector registers are reserved.
+ * @return the number of vector registers that are available
+ * @note The backend should ensure that sufficient vector registers
+ * are held back to generate scalar code without exhausting vector
+ * registers, if scalar code also uses the vector registers.
+ */
+ virtual int NumReservableVectorRegisters(bool fp_used) { return 0; }
+
protected:
explicit Backend(ArenaAllocator* arena) : arena_(arena) {}
ArenaAllocator* const arena_;
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index caecb7a48e..799a742032 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -133,91 +133,101 @@ enum ExtendedMIROpcode {
// could be supported by using a bit in TypeSize and arg[0] where needed.
// @brief MIR to move constant data to a vector register
- // vA: number of bits in register
- // vB: destination
+ // vA: destination
+ // vB: number of bits in register
// args[0]~args[3]: up to 128 bits of data for initialization
kMirOpConstVector,
// @brief MIR to move a vectorized register to another
- // vA: TypeSize
- // vB: destination
- // vC: source
+ // vA: destination
+ // vB: source
+ // vC: TypeSize
kMirOpMoveVector,
// @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedMultiply,
// @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedAddition,
// @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedSubtract,
// @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: immediate
+ // vA: destination and source
+ // vB: amount to shift
+ // vC: TypeSize
kMirOpPackedShiftLeft,
// @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: immediate
+ // vA: destination and source
+ // vB: amount to shift
+ // vC: TypeSize
kMirOpPackedSignedShiftRight,
// @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: immediate
+ // vA: destination and source
+ // vB: amount to shift
+ // vC: TypeSize
kMirOpPackedUnsignedShiftRight,
// @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedAnd,
// @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedOr,
// @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector.
- // vA: TypeSize
- // vB: destination and source
- // vC: source
+ // vA: destination and source
+ // vB: source
+ // vC: TypeSize
kMirOpPackedXor,
// @brief Reduce a 128-bit packed element into a single VR by taking lower bits
// @details Instruction does a horizontal addition of the packed elements and then adds it to VR
- // vA: TypeSize
- // vB: destination and source VR (not vector register)
- // vC: source (vector register)
+ // vA: destination and source VR (not vector register)
+ // vB: source (vector register)
+ // vC: TypeSize
kMirOpPackedAddReduce,
// @brief Extract a packed element into a single VR.
- // vA: TypeSize
- // vB: destination VR (not vector register)
- // vC: source (vector register)
+ // vA: destination VR (not vector register)
+ // vB: source (vector register)
+ // vC: TypeSize
// arg[0]: The index to use for extraction from vector register (which packed element)
kMirOpPackedReduce,
// @brief Create a vector value, with all TypeSize values equal to vC
- // vA: TypeSize
- // vB: destination vector register
- // vC: source VR (not vector register)
+ // vA: destination vector register
+ // vB: source VR (not vector register)
+ // vC: TypeSize
kMirOpPackedSet,
+ // @brief Reserve N vector registers (named 0..N-1)
+ // vA: Number of registers
+ // @note: The backend may choose to map vector numbers used in vector opcodes.
+ // Reserved registers are removed from the list of backend temporary pool.
+ kMirOpReserveVectorRegisters,
+
+ // @brief Free Reserved vector registers
+ // @note: All currently reserved vector registers are returned to the temporary pool.
+ kMirOpReturnVectorRegisters,
+
kMirOpLast,
};
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 9fea709568..bc99a272a6 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -840,6 +840,54 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = {
// 113 MIR_SELECT
DF_DA | DF_UB,
+
+ // 114 MirOpConstVector
+ DF_DA,
+
+ // 115 MirOpMoveVector
+ 0,
+
+ // 116 MirOpPackedMultiply
+ 0,
+
+ // 117 MirOpPackedAddition
+ 0,
+
+ // 118 MirOpPackedSubtract
+ 0,
+
+ // 119 MirOpPackedShiftLeft
+ 0,
+
+ // 120 MirOpPackedSignedShiftRight
+ 0,
+
+ // 121 MirOpPackedUnsignedShiftRight
+ 0,
+
+ // 122 MirOpPackedAnd
+ 0,
+
+ // 123 MirOpPackedOr
+ 0,
+
+ // 124 MirOpPackedXor
+ 0,
+
+ // 125 MirOpPackedAddReduce
+ DF_DA | DF_UA,
+
+ // 126 MirOpPackedReduce
+ DF_DA,
+
+ // 127 MirOpPackedSet
+ DF_UB,
+
+ // 128 MirOpReserveVectorRegisters
+ 0,
+
+ // 129 MirOpReturnVectorRegisters
+ 0,
};
/* Return the base virtual register for a SSA name */
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 71bb2c6166..5741b0b8ff 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -62,6 +62,8 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = {
"PackedAddReduce",
"PackedReduce",
"PackedSet",
+ "ReserveVectorRegisters",
+ "ReturnVectorRegisters",
};
MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena)
@@ -910,12 +912,13 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff
mir->next ? " | " : " ");
}
} else {
- fprintf(file, " {%04x %s %s %s\\l}%s\\\n", mir->offset,
+ fprintf(file, " {%04x %s %s %s %s\\l}%s\\\n", mir->offset,
mir->ssa_rep ? GetDalvikDisassembly(mir) :
!IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) :
extended_mir_op_names_[opcode - kMirOpFirst],
(mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ",
(mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ",
+ (mir->optimization_flags & MIR_IGNORE_SUSPEND_CHECK) != 0 ? " no_suspendcheck" : " ",
mir->next ? " | " : " ");
}
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 02f39ac180..6c0dfe80a6 100755
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -1638,6 +1638,12 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) {
bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) {
RegLocation rl_dest = InlineTarget(info);
+
+ // Early exit if the result is unused.
+ if (rl_dest.orig_sreg < 0) {
+ return true;
+ }
+
RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
switch (cu_->instruction_set) {
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index 13bd4432d7..e8fc919d5f 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -1276,7 +1276,7 @@ void Mir2Lir::DoPromotion() {
if (cu_->instruction_set == kThumb2) {
bool wide = fp_regs[i].s_reg & STARTING_WIDE_SREG;
if (wide) {
- if (promotion_map_[p_map_idx + 1].fp_location == kLocPhysReg) {
+ if (promotion_map_[p_map_idx + 1].fp_location != kLocPhysReg) {
// Ignore result - if can't alloc double may still be able to alloc singles.
AllocPreservedDouble(low_sreg);
}
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 7baf2d9663..4e973d8b48 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -327,21 +327,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-#define EXT_0F_REX_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { REX, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-#define EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(opname, opcode, reg_def) \
-{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
-{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
-{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { REX_W, 0x00, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
-
#define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -405,9 +395,12 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
EXT_0F_ENCODING_MAP(Haddpd, 0x66, 0x7C, REG_DEF0_USE0),
EXT_0F_ENCODING_MAP(Haddps, 0xF2, 0x7C, REG_DEF0_USE0),
- { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
- { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
- { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
+ { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" },
+ { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" },
+ { kX86PextrdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "PextdRRI", "!0r,!1r,!2d" },
+ { kX86PextrbMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrbMRI", "[!0r+!1d],!2r,!3d" },
+ { kX86PextrwMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrwMRI", "[!0r+!1d],!2r,!3d" },
+ { kX86PextrdMRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_STORE, { 0x66, 0, 0x0F, 0x3A, 0x16, 0, 0, 1, false }, "kX86PextrdMRI", "[!0r+!1d],!2r,!3d" },
{ kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" },
{ kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" },
@@ -499,10 +492,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
EXT_0F_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0),
EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
- EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movzx8q, 0xB6, REG_DEF0),
- EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movzx16q, 0xB7, REG_DEF0),
- EXT_0F_REX_NO_PREFIX_ENCODING_MAP(Movsx8q, 0xBE, REG_DEF0),
- EXT_0F_REX_W_NO_PREFIX_ENCODING_MAP(Movsx16q, 0xBF, REG_DEF0),
+ EXT_0F_ENCODING_MAP(Movzx8q, REX_W, 0xB6, REG_DEF0),
+ EXT_0F_ENCODING_MAP(Movzx16q, REX_W, 0xB7, REG_DEF0),
+ EXT_0F_ENCODING_MAP(Movsx8q, REX, 0xBE, REG_DEF0),
+ EXT_0F_ENCODING_MAP(Movsx16q, REX_W, 0xBF, REG_DEF0),
#undef EXT_0F_ENCODING_MAP
{ kX86Jcc8, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP | USES_CCODES, { 0, 0, 0x70, 0, 0, 0, 0, 0, false }, "Jcc8", "!1c !0t" },
@@ -627,7 +620,8 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int
if (registers_need_rex_prefix) {
DCHECK(cu_->target64) << "Attempt to use a 64-bit only addressable register "
<< RegStorage::RegNum(raw_reg) << " with instruction " << entry->name;
- if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) {
+ if (entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W
+ && entry->skeleton.prefix1 != REX && entry->skeleton.prefix2 != REX) {
++size; // rex
}
}
@@ -906,7 +900,8 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
// 64 bit addresses by GS, not FS.
code_buffer_.push_back(THREAD_PREFIX_GS);
} else {
- if (entry->skeleton.prefix1 == REX_W) {
+ if (entry->skeleton.prefix1 == REX_W || entry->skeleton.prefix1 == REX) {
+ DCHECK(cu_->target64);
rex |= entry->skeleton.prefix1;
code_buffer_.push_back(rex);
rex = 0;
@@ -915,7 +910,8 @@ void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
}
}
if (entry->skeleton.prefix2 != 0) {
- if (entry->skeleton.prefix2 == REX_W) {
+ if (entry->skeleton.prefix2 == REX_W || entry->skeleton.prefix1 == REX) {
+ DCHECK(cu_->target64);
rex |= entry->skeleton.prefix2;
code_buffer_.push_back(rex);
rex = 0;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index 6ca220cb2e..9000514856 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -94,13 +94,10 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
start_of_method_reg = rl_method.reg;
store_method_addr_used_ = true;
} else {
- if (cu_->target64) {
- start_of_method_reg = AllocTempWide();
- } else {
- start_of_method_reg = AllocTemp();
- }
+ start_of_method_reg = AllocTempRef();
NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
}
+ DCHECK_EQ(start_of_method_reg.Is64Bit(), cu_->target64);
int low_key = s4FromSwitchData(&table[2]);
RegStorage keyReg;
// Remove the bias, if necessary
@@ -111,7 +108,7 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
}
// Bounds check - if < 0 or >= size continue following switch
- OpRegImm(kOpCmp, keyReg, size-1);
+ OpRegImm(kOpCmp, keyReg, size - 1);
LIR* branch_over = OpCondBranch(kCondHi, NULL);
// Load the displacement from the switch table
@@ -119,11 +116,7 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
2, WrapPointer(tab_rec));
// Add displacement to start of method
- if (cu_->target64) {
- NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg());
- } else {
- OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
- }
+ OpRegReg(kOpAdd, start_of_method_reg, cu_->target64 ? As64BitReg(disp_reg) : disp_reg);
// ..and go!
LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
tab_rec->anchor = switch_branch;
@@ -174,7 +167,6 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) {
}
store_method_addr_used_ = true;
} else {
- // TODO(64) force to be 64-bit
NewLIR1(kX86StartOfMethod, method_start.GetReg());
}
NewLIR2(kX86PcRelAdr, payload.GetReg(), WrapPointer(tab_rec));
@@ -193,8 +185,8 @@ void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
Thread::ExceptionOffset<8>().Int32Value() :
Thread::ExceptionOffset<4>().Int32Value();
RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true);
- NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
- NewLIR2(kX86Mov32TI, ex_offset, 0);
+ NewLIR2(cu_->target64 ? kX86Mov64RT : kX86Mov32RT, rl_result.reg.GetReg(), ex_offset);
+ NewLIR2(cu_->target64 ? kX86Mov64TI : kX86Mov32TI, ex_offset, 0);
StoreValue(rl_dest, rl_result);
}
@@ -202,17 +194,15 @@ void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
* Mark garbage collection card. Skip if the value we're storing is null.
*/
void X86Mir2Lir::MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) {
- RegStorage reg_card_base = AllocTemp();
- RegStorage reg_card_no = AllocTemp();
+ DCHECK_EQ(tgt_addr_reg.Is64Bit(), cu_->target64);
+ DCHECK_EQ(val_reg.Is64Bit(), cu_->target64);
+ RegStorage reg_card_base = AllocTempRef();
+ RegStorage reg_card_no = AllocTempRef();
LIR* branch_over = OpCmpImmBranch(kCondEq, val_reg, 0, NULL);
int ct_offset = cu_->target64 ?
Thread::CardTableOffset<8>().Int32Value() :
Thread::CardTableOffset<4>().Int32Value();
- if (cu_->target64) {
- NewLIR2(kX86Mov64RT, reg_card_base.GetReg(), ct_offset);
- } else {
- NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
- }
+ NewLIR2(cu_->target64 ? kX86Mov64RT : kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
LIR* target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 55e5993dce..ff7b30eeec 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -28,7 +28,7 @@ class X86Mir2Lir : public Mir2Lir {
protected:
class InToRegStorageMapper {
public:
- virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0;
virtual ~InToRegStorageMapper() {}
};
@@ -36,7 +36,7 @@ class X86Mir2Lir : public Mir2Lir {
public:
explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {}
virtual ~InToRegStorageX86_64Mapper() {}
- virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref);
protected:
Mir2Lir* ml_;
private:
@@ -118,6 +118,8 @@ class X86Mir2Lir : public Mir2Lir {
void FreeCallTemps();
void LockCallTemps();
void CompilerInitializeRegAlloc();
+ int VectorRegisterSize();
+ int NumReservableVectorRegisters(bool fp_used);
// Required for target - miscellaneous.
void AssembleLIR();
@@ -503,6 +505,11 @@ class X86Mir2Lir : public Mir2Lir {
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
int64_t val, ConditionCode ccode);
void GenConstWide(RegLocation rl_dest, int64_t value);
+ void GenMultiplyVectorSignedByte(BasicBlock *bb, MIR *mir);
+ void GenShiftByteVector(BasicBlock *bb, MIR *mir);
+ void AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4);
+ void MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4);
+ void AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir);
static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
@@ -513,6 +520,12 @@ class X86Mir2Lir : public Mir2Lir {
virtual RegStorage AllocateByteRegister();
/*
+ * @brief Use a wide temporary as a 128-bit register
+ * @returns a 128-bit temporary register.
+ */
+ virtual RegStorage Get128BitRegister(RegStorage reg);
+
+ /*
* @brief Check if a register is byte addressable.
* @returns true if a register is byte addressable.
*/
@@ -528,6 +541,22 @@ class X86Mir2Lir : public Mir2Lir {
*/
bool GenInlinedIndexOf(CallInfo* info, bool zero_based);
+ /**
+ * @brief Reserve a fixed number of vector registers from the register pool
+ * @details The mir->dalvikInsn.vA specifies an N such that vector registers
+ * [0..N-1] are removed from the temporary pool. The caller must call
+ * ReturnVectorRegisters before calling ReserveVectorRegisters again.
+ * Also sets the num_reserved_vector_regs_ to the specified value
+ * @param mir whose vA specifies the number of registers to reserve
+ */
+ void ReserveVectorRegisters(MIR* mir);
+
+ /**
+ * @brief Return all the reserved vector registers to the temp pool
+ * @details Returns [0..num_reserved_vector_regs_]
+ */
+ void ReturnVectorRegisters();
+
/*
* @brief Load 128 bit constant into vector register.
* @param bb The basic block in which the MIR is from.
@@ -901,6 +930,10 @@ class X86Mir2Lir : public Mir2Lir {
LIR *AddVectorLiteral(MIR *mir);
InToRegStorageMapping in_to_reg_storage_mapping_;
+
+ private:
+ // The number of vector registers [0..N] reserved by a call to ReserveVectorRegisters
+ int num_reserved_vector_regs_;
};
} // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 43882c2e02..e81f505f2f 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -427,6 +427,10 @@ RegStorage X86Mir2Lir::AllocateByteRegister() {
return reg;
}
+RegStorage X86Mir2Lir::Get128BitRegister(RegStorage reg) {
+ return GetRegInfo(reg)->FindMatchingView(RegisterInfo::k128SoloStorageMask)->GetReg();
+}
+
bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
return cu_->target64 || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
}
@@ -646,6 +650,14 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() {
reg_pool_->next_dp_reg_ = 1;
}
+int X86Mir2Lir::VectorRegisterSize() {
+ return 128;
+}
+
+int X86Mir2Lir::NumReservableVectorRegisters(bool fp_used) {
+ return fp_used ? 5 : 7;
+}
+
void X86Mir2Lir::SpillCoreRegs() {
if (num_core_spills_ == 0) {
return;
@@ -790,6 +802,9 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator*
rX86_RET1 = rDX;
rX86_INVOKE_TGT = rAX;
rX86_COUNT = rCX;
+
+ // Initialize the number of reserved vector registers
+ num_reserved_vector_regs_ = -1;
}
Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
@@ -1475,6 +1490,12 @@ std::vector<uint8_t>* X86Mir2Lir::ReturnCallFrameInformation() {
void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+ case kMirOpReserveVectorRegisters:
+ ReserveVectorRegisters(mir);
+ break;
+ case kMirOpReturnVectorRegisters:
+ ReturnVectorRegisters();
+ break;
case kMirOpConstVector:
GenConst128(bb, mir);
break;
@@ -1522,11 +1543,57 @@ void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) {
}
}
+void X86Mir2Lir::ReserveVectorRegisters(MIR* mir) {
+ // We should not try to reserve twice without returning the registers
+ DCHECK_NE(num_reserved_vector_regs_, -1);
+
+ int num_vector_reg = mir->dalvikInsn.vA;
+ for (int i = 0; i < num_vector_reg; i++) {
+ RegStorage xp_reg = RegStorage::Solo128(i);
+ RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
+ Clobber(xp_reg);
+
+ for (RegisterInfo *info = xp_reg_info->GetAliasChain();
+ info != nullptr;
+ info = info->GetAliasChain()) {
+ if (info->GetReg().IsSingle()) {
+ reg_pool_->sp_regs_.Delete(info);
+ } else {
+ reg_pool_->dp_regs_.Delete(info);
+ }
+ }
+ }
+
+ num_reserved_vector_regs_ = num_vector_reg;
+}
+
+void X86Mir2Lir::ReturnVectorRegisters() {
+ // Return all the reserved registers
+ for (int i = 0; i < num_reserved_vector_regs_; i++) {
+ RegStorage xp_reg = RegStorage::Solo128(i);
+ RegisterInfo *xp_reg_info = GetRegInfo(xp_reg);
+
+ for (RegisterInfo *info = xp_reg_info->GetAliasChain();
+ info != nullptr;
+ info = info->GetAliasChain()) {
+ if (info->GetReg().IsSingle()) {
+ reg_pool_->sp_regs_.Insert(info);
+ } else {
+ reg_pool_->dp_regs_.Insert(info);
+ }
+ }
+ }
+
+ // We don't have anymore reserved vector registers
+ num_reserved_vector_regs_ = -1;
+}
+
void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
- int type_size = mir->dalvikInsn.vA;
+ store_method_addr_used_ = true;
+ int type_size = mir->dalvikInsn.vB;
// We support 128 bit vectors.
DCHECK_EQ(type_size & 0xFFFF, 128);
- RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
uint32_t *args = mir->dalvikInsn.arg;
int reg = rs_dest.GetReg();
// Check for all 0 case.
@@ -1534,6 +1601,12 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
NewLIR2(kX86XorpsRR, reg, reg);
return;
}
+
+ // Append the mov const vector to reg opcode.
+ AppendOpcodeWithConst(kX86MovupsRM, reg, mir);
+}
+
+void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
// Okay, load it from the constant vector area.
LIR *data_target = ScanVectorLiteral(mir);
if (data_target == nullptr) {
@@ -1553,24 +1626,66 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
// 4 byte offset. We will fix this up in the assembler later to have the right
// value.
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
- LIR *load = NewLIR3(kX86Mova128RM, reg, rl_method.reg.GetReg(), 256 /* bogus */);
+ LIR *load = NewLIR2(opcode, reg, rl_method.reg.GetReg());
load->flags.fixup = kFixupLoad;
load->target = data_target;
}
void X86Mir2Lir::GenMoveVector(BasicBlock *bb, MIR *mir) {
// We only support 128 bit registers.
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
NewLIR2(kX86Mova128RR, rs_dest.GetReg(), rs_src.GetReg());
}
+void X86Mir2Lir::GenMultiplyVectorSignedByte(BasicBlock *bb, MIR *mir) {
+ const int BYTE_SIZE = 8;
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegStorage rs_src1_high_tmp = Get128BitRegister(AllocTempWide());
+
+ /*
+ * Emulate the behavior of a kSignedByte by separating out the 16 values in the two XMM
+ * and multiplying 8 at a time before recombining back into one XMM register.
+ *
+ * let xmm1, xmm2 be real srcs (keep low bits of 16bit lanes)
+ * xmm3 is tmp (operate on high bits of 16bit lanes)
+ *
+ * xmm3 = xmm1
+ * xmm1 = xmm1 .* xmm2
+ * xmm1 = xmm1 & 0x00ff00ff00ff00ff00ff00ff00ff00ff // xmm1 now has low bits
+ * xmm3 = xmm3 .>> 8
+ * xmm2 = xmm2 & 0xff00ff00ff00ff00ff00ff00ff00ff00
+ * xmm2 = xmm2 .* xmm3 // xmm2 now has high bits
+ * xmm1 = xmm1 | xmm2 // combine results
+ */
+
+ // Copy xmm1.
+ NewLIR2(kX86Mova128RR, rs_src1_high_tmp.GetReg(), rs_dest_src1.GetReg());
+
+ // Multiply low bits.
+ NewLIR2(kX86PmullwRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+
+ // xmm1 now has low bits.
+ AndMaskVectorRegister(rs_dest_src1, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF);
+
+ // Prepare high bits for multiplication.
+ NewLIR2(kX86PsrlwRI, rs_src1_high_tmp.GetReg(), BYTE_SIZE);
+ AndMaskVectorRegister(rs_src2, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00);
+
+ // Multiply high bits and xmm2 now has high bits.
+ NewLIR2(kX86PmullwRR, rs_src2.GetReg(), rs_src1_high_tmp.GetReg());
+
+ // Combine back into dest XMM register.
+ NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
+}
+
void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
int opcode = 0;
switch (opsize) {
case k32:
@@ -1585,6 +1700,10 @@ void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
case kDouble:
opcode = kX86MulpdRR;
break;
+ case kSignedByte:
+ // HW doesn't support 16x16 byte multiplication so emulate it.
+ GenMultiplyVectorSignedByte(bb, mir);
+ return;
default:
LOG(FATAL) << "Unsupported vector multiply " << opsize;
break;
@@ -1593,10 +1712,10 @@ void X86Mir2Lir::GenMultiplyVector(BasicBlock *bb, MIR *mir) {
}
void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
int opcode = 0;
switch (opsize) {
case k32:
@@ -1624,10 +1743,10 @@ void X86Mir2Lir::GenAddVector(BasicBlock *bb, MIR *mir) {
}
void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
int opcode = 0;
switch (opsize) {
case k32:
@@ -1654,11 +1773,60 @@ void X86Mir2Lir::GenSubtractVector(BasicBlock *bb, MIR *mir) {
NewLIR2(opcode, rs_dest_src1.GetReg(), rs_src2.GetReg());
}
+void X86Mir2Lir::GenShiftByteVector(BasicBlock *bb, MIR *mir) {
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_tmp = Get128BitRegister(AllocTempWide());
+
+ int opcode = 0;
+ int imm = mir->dalvikInsn.vB;
+
+ switch (static_cast<ExtendedMIROpcode>(mir->dalvikInsn.opcode)) {
+ case kMirOpPackedShiftLeft:
+ opcode = kX86PsllwRI;
+ break;
+ case kMirOpPackedSignedShiftRight:
+ opcode = kX86PsrawRI;
+ break;
+ case kMirOpPackedUnsignedShiftRight:
+ opcode = kX86PsrlwRI;
+ break;
+ default:
+ LOG(FATAL) << "Unsupported shift operation on byte vector " << opcode;
+ break;
+ }
+
+ /*
+ * xmm1 will have low bits
+ * xmm2 will have high bits
+ *
+ * xmm2 = xmm1
+ * xmm1 = xmm1 .<< N
+ * xmm2 = xmm2 && 0xFF00FF00FF00FF00FF00FF00FF00FF00
+ * xmm2 = xmm2 .<< N
+ * xmm1 = xmm1 | xmm2
+ */
+
+ // Copy xmm1.
+ NewLIR2(kX86Mova128RR, rs_tmp.GetReg(), rs_dest_src1.GetReg());
+
+ // Shift lower values.
+ NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+
+ // Mask bottom bits.
+ AndMaskVectorRegister(rs_tmp, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00);
+
+ // Shift higher values.
+ NewLIR2(opcode, rs_tmp.GetReg(), imm);
+
+ // Combine back into dest XMM register.
+ NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_tmp.GetReg());
+}
+
void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- int imm = mir->dalvikInsn.vC;
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ int imm = mir->dalvikInsn.vB;
int opcode = 0;
switch (opsize) {
case k32:
@@ -1671,6 +1839,10 @@ void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
case kUnsignedHalf:
opcode = kX86PsllwRI;
break;
+ case kSignedByte:
+ case kUnsignedByte:
+ GenShiftByteVector(bb, mir);
+ return;
default:
LOG(FATAL) << "Unsupported vector shift left " << opsize;
break;
@@ -1679,10 +1851,10 @@ void X86Mir2Lir::GenShiftLeftVector(BasicBlock *bb, MIR *mir) {
}
void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- int imm = mir->dalvikInsn.vC;
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ int imm = mir->dalvikInsn.vB;
int opcode = 0;
switch (opsize) {
case k32:
@@ -1692,6 +1864,10 @@ void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
case kUnsignedHalf:
opcode = kX86PsrawRI;
break;
+ case kSignedByte:
+ case kUnsignedByte:
+ GenShiftByteVector(bb, mir);
+ return;
default:
LOG(FATAL) << "Unsupported vector signed shift right " << opsize;
break;
@@ -1700,10 +1876,10 @@ void X86Mir2Lir::GenSignedShiftRightVector(BasicBlock *bb, MIR *mir) {
}
void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- int imm = mir->dalvikInsn.vC;
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ int imm = mir->dalvikInsn.vB;
int opcode = 0;
switch (opsize) {
case k32:
@@ -1716,6 +1892,10 @@ void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
case kUnsignedHalf:
opcode = kX86PsrlwRI;
break;
+ case kSignedByte:
+ case kUnsignedByte:
+ GenShiftByteVector(bb, mir);
+ return;
default:
LOG(FATAL) << "Unsupported vector unsigned shift right " << opsize;
break;
@@ -1725,91 +1905,209 @@ void X86Mir2Lir::GenUnsignedShiftRightVector(BasicBlock *bb, MIR *mir) {
void X86Mir2Lir::GenAndVector(BasicBlock *bb, MIR *mir) {
// We only support 128 bit registers.
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
NewLIR2(kX86PandRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
}
void X86Mir2Lir::GenOrVector(BasicBlock *bb, MIR *mir) {
// We only support 128 bit registers.
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
NewLIR2(kX86PorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
}
void X86Mir2Lir::GenXorVector(BasicBlock *bb, MIR *mir) {
// We only support 128 bit registers.
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vC);
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vA);
+ RegStorage rs_src2 = RegStorage::Solo128(mir->dalvikInsn.vB);
NewLIR2(kX86PxorRR, rs_dest_src1.GetReg(), rs_src2.GetReg());
}
+void X86Mir2Lir::AndMaskVectorRegister(RegStorage rs_src1, uint32_t m1, uint32_t m2, uint32_t m3, uint32_t m4) {
+ MaskVectorRegister(kX86PandRM, rs_src1, m1, m2, m3, m4);
+}
+
+void X86Mir2Lir::MaskVectorRegister(X86OpCode opcode, RegStorage rs_src1, uint32_t m0, uint32_t m1, uint32_t m2, uint32_t m3) {
+ // Create temporary MIR as container for 128-bit binary mask.
+ MIR const_mir;
+ MIR* const_mirp = &const_mir;
+ const_mirp->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpConstVector);
+ const_mirp->dalvikInsn.arg[0] = m0;
+ const_mirp->dalvikInsn.arg[1] = m1;
+ const_mirp->dalvikInsn.arg[2] = m2;
+ const_mirp->dalvikInsn.arg[3] = m3;
+
+ // Mask vector with const from literal pool.
+ AppendOpcodeWithConst(opcode, rs_src1.GetReg(), const_mirp);
+}
+
void X86Mir2Lir::GenAddReduceVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
- int imm = mir->dalvikInsn.vC;
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ RegLocation rl_dest = mir_graph_->GetDest(mir);
+ RegStorage rs_tmp;
+
+ int vec_bytes = (mir->dalvikInsn.vC & 0xFFFF) / 8;
+ int vec_unit_size = 0;
int opcode = 0;
+ int extr_opcode = 0;
+ RegLocation rl_result;
+
switch (opsize) {
case k32:
+ extr_opcode = kX86PextrdRRI;
opcode = kX86PhadddRR;
+ vec_unit_size = 4;
+ break;
+ case kSignedByte:
+ case kUnsignedByte:
+ extr_opcode = kX86PextrbRRI;
+ opcode = kX86PhaddwRR;
+ vec_unit_size = 2;
break;
case kSignedHalf:
case kUnsignedHalf:
+ extr_opcode = kX86PextrwRRI;
opcode = kX86PhaddwRR;
+ vec_unit_size = 2;
break;
+ case kSingle:
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ vec_unit_size = 4;
+ for (int i = 0; i < 3; i++) {
+ NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), rs_src1.GetReg());
+ NewLIR3(kX86ShufpsRRI, rs_src1.GetReg(), rs_src1.GetReg(), 0x39);
+ }
+ NewLIR2(kX86AddssRR, rl_result.reg.GetReg(), rs_src1.GetReg());
+ StoreValue(rl_dest, rl_result);
+
+ // For single-precision floats, we are done here
+ return;
default:
LOG(FATAL) << "Unsupported vector add reduce " << opsize;
break;
}
- NewLIR2(opcode, rs_dest_src1.GetReg(), imm);
+
+ int elems = vec_bytes / vec_unit_size;
+
+ // Emulate horizontal add instruction by reducing 2 vectors with 8 values before adding them again
+ // TODO is overflow handled correctly?
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ rs_tmp = Get128BitRegister(AllocTempWide());
+
+ // tmp = xmm1 .>> 8.
+ NewLIR2(kX86Mova128RR, rs_tmp.GetReg(), rs_src1.GetReg());
+ NewLIR2(kX86PsrlwRI, rs_tmp.GetReg(), 8);
+
+ // Zero extend low bits in xmm1.
+ AndMaskVectorRegister(rs_src1, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF);
+ }
+
+ while (elems > 1) {
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ NewLIR2(opcode, rs_tmp.GetReg(), rs_tmp.GetReg());
+ }
+ NewLIR2(opcode, rs_src1.GetReg(), rs_src1.GetReg());
+ elems >>= 1;
+ }
+
+ // Combine the results if we separated them.
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ NewLIR2(kX86PaddbRR, rs_src1.GetReg(), rs_tmp.GetReg());
+ }
+
+ // We need to extract to a GPR.
+ RegStorage temp = AllocTemp();
+ NewLIR3(extr_opcode, temp.GetReg(), rs_src1.GetReg(), 0);
+
+ // Can we do this directly into memory?
+ rl_result = UpdateLocTyped(rl_dest, kCoreReg);
+ if (rl_result.location == kLocPhysReg) {
+ // Ensure res is in a core reg
+ rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ OpRegReg(kOpAdd, rl_result.reg, temp);
+ StoreFinalValue(rl_dest, rl_result);
+ } else {
+ OpMemReg(kOpAdd, rl_result, temp.GetReg());
+ }
+
+ FreeTemp(temp);
}
void X86Mir2Lir::GenReduceVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_src = RegStorage::Solo128(mir->dalvikInsn.vB);
- int index = mir->dalvikInsn.arg[0];
- int opcode = 0;
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegLocation rl_dest = mir_graph_->GetDest(mir);
+ RegStorage rs_src1 = RegStorage::Solo128(mir->dalvikInsn.vB);
+ int extract_index = mir->dalvikInsn.arg[0];
+ int extr_opcode = 0;
+ RegLocation rl_result;
+ bool is_wide = false;
+
switch (opsize) {
case k32:
- opcode = kX86PextrdRRI;
+ rl_result = UpdateLocTyped(rl_dest, kCoreReg);
+ extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrdMRI : kX86PextrdRRI;
break;
case kSignedHalf:
case kUnsignedHalf:
- opcode = kX86PextrwRRI;
- break;
- case kUnsignedByte:
- case kSignedByte:
- opcode = kX86PextrbRRI;
+ rl_result= UpdateLocTyped(rl_dest, kCoreReg);
+ extr_opcode = (rl_result.location == kLocPhysReg) ? kX86PextrwMRI : kX86PextrwRRI;
break;
default:
- LOG(FATAL) << "Unsupported vector reduce " << opsize;
+ LOG(FATAL) << "Unsupported vector add reduce " << opsize;
+ return;
break;
}
- // We need to extract to a GPR.
- RegStorage temp = AllocTemp();
- NewLIR3(opcode, temp.GetReg(), rs_src.GetReg(), index);
- // Assume that the destination VR is in the def for the mir.
- RegLocation rl_dest = mir_graph_->GetDest(mir);
- RegLocation rl_temp =
- {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, temp, INVALID_SREG, INVALID_SREG};
- StoreValue(rl_dest, rl_temp);
+ if (rl_result.location == kLocPhysReg) {
+ NewLIR3(extr_opcode, rl_result.reg.GetReg(), rs_src1.GetReg(), extract_index);
+ if (is_wide == true) {
+ StoreFinalValue(rl_dest, rl_result);
+ } else {
+ StoreFinalValueWide(rl_dest, rl_result);
+ }
+ } else {
+ int displacement = SRegOffset(rl_result.s_reg_low);
+ LIR *l = NewLIR3(extr_opcode, rs_rX86_SP.GetReg(), displacement, rs_src1.GetReg());
+ AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is_wide /* is_64bit */);
+ AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is_wide /* is_64bit */);
+ }
}
void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
- DCHECK_EQ(mir->dalvikInsn.vA & 0xFFFF, 128U);
- OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vA >> 16);
- RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vB);
- int op_low = 0, op_high = 0;
+ DCHECK_EQ(mir->dalvikInsn.vC & 0xFFFF, 128U);
+ OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+ RegStorage rs_dest = RegStorage::Solo128(mir->dalvikInsn.vA);
+ int op_low = 0, op_high = 0, imm = 0, op_mov = kX86MovdxrRR;
+ RegisterClass reg_type = kCoreReg;
+
switch (opsize) {
case k32:
op_low = kX86PshufdRRI;
break;
+ case kSingle:
+ op_low = kX86PshufdRRI;
+ op_mov = kX86Mova128RR;
+ reg_type = kFPReg;
+ break;
+ case k64:
+ op_low = kX86PshufdRRI;
+ imm = 0x44;
+ break;
+ case kDouble:
+ op_low = kX86PshufdRRI;
+ op_mov = kX86Mova128RR;
+ reg_type = kFPReg;
+ imm = 0x44;
+ break;
+ case kSignedByte:
+ case kUnsignedByte:
+ // Shuffle 8 bit value into 16 bit word.
+ // We set val = val + (val << 8) below and use 16 bit shuffle.
case kSignedHalf:
case kUnsignedHalf:
// Handles low quadword.
@@ -1822,23 +2120,37 @@ void X86Mir2Lir::GenSetVector(BasicBlock *bb, MIR *mir) {
break;
}
- // Load the value from the VR into a GPR.
RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
- rl_src = LoadValue(rl_src, kCoreReg);
+
+ // Load the value from the VR into the reg.
+ if (rl_src.wide == 0) {
+ rl_src = LoadValue(rl_src, reg_type);
+ } else {
+ rl_src = LoadValueWide(rl_src, reg_type);
+ }
+
+ // If opsize is 8 bits wide then double value and use 16 bit shuffle instead.
+ if (opsize == kSignedByte || opsize == kUnsignedByte) {
+ RegStorage temp = AllocTemp();
+ // val = val + (val << 8).
+ NewLIR2(kX86Mov32RR, temp.GetReg(), rl_src.reg.GetReg());
+ NewLIR2(kX86Sal32RI, temp.GetReg(), 8);
+ NewLIR2(kX86Or32RR, rl_src.reg.GetReg(), temp.GetReg());
+ FreeTemp(temp);
+ }
// Load the value into the XMM register.
- NewLIR2(kX86MovdxrRR, rs_dest.GetReg(), rl_src.reg.GetReg());
+ NewLIR2(op_mov, rs_dest.GetReg(), rl_src.reg.GetReg());
// Now shuffle the value across the destination.
- NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+ NewLIR3(op_low, rs_dest.GetReg(), rs_dest.GetReg(), imm);
// And then repeat as needed.
if (op_high != 0) {
- NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), 0);
+ NewLIR3(op_high, rs_dest.GetReg(), rs_dest.GetReg(), imm);
}
}
-
LIR *X86Mir2Lir::ScanVectorLiteral(MIR *mir) {
int *args = reinterpret_cast<int*>(mir->dalvikInsn.arg);
for (LIR *p = const_vectors_; p != nullptr; p = p->next) {
@@ -1867,7 +2179,7 @@ LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
}
// ------------ ABI support: mapping of args to physical registers -------------
-RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) {
const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5};
const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(SpecialTargetRegister);
const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3,
@@ -1880,7 +2192,8 @@ RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_
}
} else {
if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
- return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], is_wide);
+ return is_ref ? ml_->TargetRefReg(coreArgMappingToPhysicalReg[cur_core_reg_++]) :
+ ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], is_wide);
}
}
return RegStorage::InvalidReg();
@@ -1897,11 +2210,12 @@ void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int co
max_mapped_in_ = -1;
is_there_stack_mapped_ = false;
for (int in_position = 0; in_position < count; in_position++) {
- RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+ RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp,
+ arg_locs[in_position].wide, arg_locs[in_position].ref);
if (reg.Valid()) {
mapping_[in_position] = reg;
max_mapped_in_ = std::max(max_mapped_in_, in_position);
- if (reg.Is64BitSolo()) {
+ if (arg_locs[in_position].wide) {
// We covered 2 args, so skip the next one
in_position++;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index e271e9d100..2789923bb9 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -569,6 +569,9 @@ enum X86OpCode {
kX86PextrbRRI, // Extract 8 bits from XMM into GPR
kX86PextrwRRI, // Extract 16 bits from XMM into GPR
kX86PextrdRRI, // Extract 32 bits from XMM into GPR
+ kX86PextrbMRI, // Extract 8 bits from XMM into memory
+ kX86PextrwMRI, // Extract 16 bits from XMM into memory
+ kX86PextrdMRI, // Extract 32 bits from XMM into memory
kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM.
kX86PshufdRRI, // Shuffle 32 bits in XMM.
kX86ShufpsRRI, // FP Shuffle 32 bits in XMM.
@@ -723,7 +726,7 @@ struct X86EncodingMap {
#define REX_X 0x42
// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field
#define REX_B 0x41
-// Extended register set
+// An empty REX prefix used to normalize the byte operations so that they apply to R4 through R15
#define REX 0x40
// Mask extracting the least 3 bits of r0..r15
#define kRegNumMask32 0x07
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 770ae89ca2..9bf51359cf 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1466,8 +1466,12 @@ static void CheckAndClearResolveException(Thread* self)
CHECK(self->IsExceptionPending());
mirror::Throwable* exception = self->GetException(nullptr);
std::string descriptor = exception->GetClass()->GetDescriptor();
- if (descriptor != "Ljava/lang/IncompatibleClassChangeError;" &&
- descriptor != "Ljava/lang/NoClassDefFoundError;") {
+ if (descriptor != "Ljava/lang/IllegalAccessError;" &&
+ descriptor != "Ljava/lang/IncompatibleClassChangeError;" &&
+ descriptor != "Ljava/lang/InstantiationError;" &&
+ descriptor != "Ljava/lang/NoClassDefFoundError;" &&
+ descriptor != "Ljava/lang/NoSuchFieldError;" &&
+ descriptor != "Ljava/lang/NoSuchMethodError;") {
LOG(FATAL) << "Unexpected exeption " << exception->Dump();
}
self->ClearException();
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 06f6e89c7b..42743862fe 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -14,8 +14,6 @@
* limitations under the License.
*/
-#include <unordered_set>
-
#include "elf_writer_quick.h"
#include "base/logging.h"
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 2d25b7a2ed..acfa607f39 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -796,9 +796,9 @@ void ImageWriter::PatchOatCodeAndMethods(File* elf_file) {
};
const bool add_patches = compiler_driver_.GetCompilerOptions().GetIncludePatchInformation();
if (add_patches) {
- // TODO if we are adding patches the resulting ELF file might have a
- // potentially rather large amount of free space where patches might have been
- // placed. We should adjust the ELF file to get rid of this excess space.
+ // TODO if we are adding patches the resulting ELF file might have a potentially rather large
+ // amount of free space where patches might have been placed. We should adjust the ELF file to
+ // get rid of this excess space.
patches.reserve(compiler_driver_.GetCodeToPatch().size() +
compiler_driver_.GetMethodsToPatch().size() +
compiler_driver_.GetClassesToPatch().size());
@@ -892,7 +892,7 @@ void ImageWriter::PatchOatCodeAndMethods(File* elf_file) {
}
Elf32_Shdr* shdr = file->FindSectionByName(".oat_patches");
if (shdr != nullptr) {
- DCHECK_EQ(shdr, file->FindSectionByType(SHT_OAT_PATCH))
+ CHECK_EQ(shdr, file->FindSectionByType(SHT_OAT_PATCH))
<< "Incorrect type for .oat_patches section";
CHECK_LE(patches.size() * sizeof(uintptr_t), shdr->sh_size)
<< "We got more patches than anticipated";
@@ -903,9 +903,8 @@ void ImageWriter::PatchOatCodeAndMethods(File* elf_file) {
<< "Section overlaps onto next section";
// It's mmap'd so we can just memcpy.
memcpy(file->Begin() + shdr->sh_offset, patches.data(), patches.size()*sizeof(uintptr_t));
- // TODO We should fill in the newly empty space between the last patch and
- // the start of the next section by moving the following sections down if
- // possible.
+ // TODO We should fill in the newly empty space between the last patch and the start of the
+ // next section by moving the following sections down if possible.
shdr->sh_size = patches.size() * sizeof(uintptr_t);
} else {
LOG(ERROR) << "Unable to find section header for SHT_OAT_PATCH";