Revert "Revert "Better support for x86 XMM registers""
This reverts commit 8ff67e3338952c70ccf3b609559bf8cc0f379cfd.
Fix applied to loc.fp usage.
Change-Id: I1eb3005392544fcf30c595923ed25bcee2dc4859
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 4650f25..18122b3 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -56,6 +56,17 @@
kLocInvalid
};
+/**
+ * Support for vector registers. Initially used for x86 floats. This will be used
+ * to replace the assumption that a double takes up 2 single FP registers
+ */
+enum VectorLengthType {
+ kVectorNotUsed = 0, // This value is NOT in a vector register.
+ kVectorLength4, // The value occupies 4 bytes in a vector register.
+ kVectorLength8, // The value occupies 8 bytes in a vector register.
+ kVectorLength16 // The value occupies 16 bytes in a vector register (unused now).
+};
+
enum BBType {
kNullBlock,
kEntryBlock,
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index 010de20..d080e39 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -325,11 +325,14 @@
unsigned ref:1; // Something GC cares about.
unsigned high_word:1; // High word of pair?
unsigned home:1; // Does this represent the home location?
+ VectorLengthType vec_len:3; // Is this value in a vector register, and how big is it?
uint8_t low_reg; // First physical register.
uint8_t high_reg; // 2nd physical register (if wide).
int16_t s_reg_low; // SSA name for low Dalvik word.
int16_t orig_sreg; // TODO: remove after Bitcode gen complete
// and consolidate usage w/ s_reg_low.
+
+ bool IsVectorScalar() const { return vec_len == kVectorLength4 || vec_len == kVectorLength8;}
};
/*
@@ -354,7 +357,7 @@
};
-const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
+const RegLocation bad_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0, kVectorNotUsed,
INVALID_REG, INVALID_REG, INVALID_SREG, INVALID_SREG};
class MIRGraph {
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index b06ebcf..37b4ec6 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -118,9 +118,9 @@
#define ARM_FP_REG_MASK (ARM_FP_REG_OFFSET-1)
// RegisterLocation templates return values (r0, or r0/r1).
-#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \
+#define ARM_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, INVALID_REG, \
INVALID_SREG, INVALID_SREG}
-#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1, \
+#define ARM_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1, \
INVALID_SREG, INVALID_SREG}
#define ARM_LOC_C_RETURN_FLOAT ARM_LOC_C_RETURN
#define ARM_LOC_C_RETURN_DOUBLE ARM_LOC_C_RETURN_WIDE
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 6b4cbd4..3bd0298 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1840,4 +1840,11 @@
CallRuntimeHelperRegLocation(QUICK_ENTRYPOINT_OFFSET(pUnlockObject), rl_src, true);
}
+/* Generic code for generating a wide constant into a VR. */
+void Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
+ RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
+ LoadConstantWide(rl_result.low_reg, rl_result.high_reg, value);
+ StoreValueWide(rl_dest, rl_result);
+}
+
} // namespace art
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 3539106..8f2f6ad 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -265,9 +265,17 @@
// Dest is now live and dirty (until/if we flush it to home location)
MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
- MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
- MarkDirty(rl_dest);
- MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+
+ // Does this wide value live in two registers (or one vector one)?
+ if (rl_dest.low_reg != rl_dest.high_reg) {
+ MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+ MarkDirty(rl_dest);
+ MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+ } else {
+ // This must be an x86 vector register value,
+ DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+ MarkDirty(rl_dest);
+ }
ResetDefLocWide(rl_dest);
diff --git a/compiler/dex/quick/mips/mips_lir.h b/compiler/dex/quick/mips/mips_lir.h
index 278fcef..00eef96 100644
--- a/compiler/dex/quick/mips/mips_lir.h
+++ b/compiler/dex/quick/mips/mips_lir.h
@@ -142,13 +142,13 @@
#define rMIPS_PC INVALID_REG
// RegisterLocation templates return values (r_V0, or r_V0/r_V1).
-#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_V0, INVALID_REG, \
+#define MIPS_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_V0, INVALID_REG, \
INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \
+#define MIPS_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \
INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_RESULT0, \
+#define MIPS_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_RESULT0, \
r_RESULT1, INVALID_SREG, INVALID_SREG}
-#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r_FRESULT0, \
+#define MIPS_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r_FRESULT0, \
r_FRESULT1, INVALID_SREG, INVALID_SREG}
enum MipsResourceEncodingPos {
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index c5bbae1..6281eff 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -157,16 +157,11 @@
case Instruction::CONST_WIDE_16:
case Instruction::CONST_WIDE_32:
- rl_result = EvalLoc(rl_dest, kAnyReg, true);
- LoadConstantWide(rl_result.low_reg, rl_result.high_reg,
- static_cast<int64_t>(static_cast<int32_t>(vB)));
- StoreValueWide(rl_dest, rl_result);
+ GenConstWide(rl_dest, static_cast<int64_t>(static_cast<int32_t>(vB)));
break;
case Instruction::CONST_WIDE:
- rl_result = EvalLoc(rl_dest, kAnyReg, true);
- LoadConstantWide(rl_result.low_reg, rl_result.high_reg, mir->dalvikInsn.vB_wide);
- StoreValueWide(rl_dest, rl_result);
+ GenConstWide(rl_dest, mir->dalvikInsn.vB_wide);
break;
case Instruction::CONST_WIDE_HIGH16:
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 3f7ec1e..c157327 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -385,7 +385,7 @@
int AllocPreservedSingle(int s_reg);
int AllocPreservedDouble(int s_reg);
int AllocTempBody(RegisterInfo* p, int num_regs, int* next_temp, bool required);
- int AllocTempDouble();
+ virtual int AllocTempDouble();
int AllocFreeTemp();
int AllocTemp();
int AllocTempFloat();
@@ -403,7 +403,7 @@
void MarkDefWide(RegLocation rl, LIR *start, LIR *finish);
RegLocation WideToNarrow(RegLocation rl);
void ResetDefLoc(RegLocation rl);
- void ResetDefLocWide(RegLocation rl);
+ virtual void ResetDefLocWide(RegLocation rl);
void ResetDefTracking();
void ClobberAllRegs();
void FlushAllRegsBody(RegisterInfo* info, int num_regs);
@@ -419,7 +419,7 @@
void CopyRegInfo(int new_reg, int old_reg);
bool CheckCorePoolSanity();
RegLocation UpdateLoc(RegLocation loc);
- RegLocation UpdateLocWide(RegLocation loc);
+ virtual RegLocation UpdateLocWide(RegLocation loc);
RegLocation UpdateRawLoc(RegLocation loc);
/**
@@ -430,7 +430,7 @@
* @param update Whether the liveness information should be updated.
* @return Returns the properly typed temporary in physical register pairs.
*/
- RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+ virtual RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
/**
* @brief Used to load register location into a typed temporary.
@@ -439,7 +439,7 @@
* @param update Whether the liveness information should be updated.
* @return Returns the properly typed temporary in physical register.
*/
- RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+ virtual RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
void CountRefs(RefCounts* core_counts, RefCounts* fp_counts, size_t num_regs);
void DumpCounts(const RefCounts* arr, int size, const char* msg);
@@ -507,6 +507,8 @@
RegLocation rl_src);
void GenSuspendTest(int opt_flags);
void GenSuspendTestAndBranch(int opt_flags, LIR* target);
+ // This will be overridden by x86 implementation.
+ virtual void GenConstWide(RegLocation rl_dest, int64_t value);
// Shared by all targets - implemented in gen_invoke.cc.
int CallHelperSetup(ThreadOffset helper_offset);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index cef013e..32c22f2 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -602,6 +602,7 @@
}
void Mir2Lir::MarkPair(int low_reg, int high_reg) {
+ DCHECK_NE(low_reg, high_reg);
RegisterInfo* info_lo = GetRegInfo(low_reg);
RegisterInfo* info_hi = GetRegInfo(high_reg);
info_lo->pair = info_hi->pair = true;
@@ -807,7 +808,10 @@
if (update) {
loc.location = kLocPhysReg;
MarkLive(loc.low_reg, loc.s_reg_low);
- MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+ // Does this wide value live in two registers or one vector register?
+ if (loc.low_reg != loc.high_reg) {
+ MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+ }
}
DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
return loc;
@@ -1059,7 +1063,10 @@
Clobber(res.high_reg);
LockTemp(res.low_reg);
LockTemp(res.high_reg);
- MarkPair(res.low_reg, res.high_reg);
+ // Does this wide value live in two registers or one vector register?
+ if (res.low_reg != res.high_reg) {
+ MarkPair(res.low_reg, res.high_reg);
+ }
return res;
}
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index e6621f3..816f2d0 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -173,6 +173,12 @@
bool InexpensiveConstantLong(int64_t value);
bool InexpensiveConstantDouble(int64_t value);
+ RegLocation UpdateLocWide(RegLocation loc);
+ RegLocation EvalLocWide(RegLocation loc, int reg_class, bool update);
+ RegLocation EvalLoc(RegLocation loc, int reg_class, bool update);
+ int AllocTempDouble();
+ void ResetDefLocWide(RegLocation rl);
+
private:
void EmitPrefix(const X86EncodingMap* entry);
void EmitOpcode(const X86EncodingMap* entry);
@@ -222,6 +228,8 @@
void EmitUnimplemented(const X86EncodingMap* entry, LIR* lir);
void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1,
int64_t val, ConditionCode ccode);
+ void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
+ void GenConstWide(RegLocation rl_dest, int64_t value);
};
} // namespace art
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 11ccd4b..01479a9 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -155,9 +155,11 @@
// TODO: Prevent this from happening in the code. The result is often
// unused or could have been loaded more easily from memory.
NewLIR2(kX86MovdxrRR, dest_lo, src_lo);
+ dest_hi = AllocTempDouble();
NewLIR2(kX86MovdxrRR, dest_hi, src_hi);
NewLIR2(kX86PsllqRI, dest_hi, 32);
NewLIR2(kX86OrpsRR, dest_lo, dest_hi);
+ FreeTemp(dest_hi);
}
} else {
if (src_fp) {
@@ -525,7 +527,7 @@
// Compute (r1:r0) = (r1:r0) + (r2:r3)
OpRegReg(kOpAdd, r0, r2); // r0 = r0 + r2
OpRegReg(kOpAdc, r1, r3); // r1 = r1 + r3 + CF
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -541,7 +543,7 @@
// Compute (r1:r0) = (r1:r0) + (r2:r3)
OpRegReg(kOpSub, r0, r2); // r0 = r0 - r2
OpRegReg(kOpSbc, r1, r3); // r1 = r1 - r3 - CF
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -557,7 +559,7 @@
// Compute (r1:r0) = (r1:r0) & (r2:r3)
OpRegReg(kOpAnd, r0, r2); // r0 = r0 & r2
OpRegReg(kOpAnd, r1, r3); // r1 = r1 & r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -573,7 +575,7 @@
// Compute (r1:r0) = (r1:r0) | (r2:r3)
OpRegReg(kOpOr, r0, r2); // r0 = r0 | r2
OpRegReg(kOpOr, r1, r3); // r1 = r1 | r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -589,7 +591,7 @@
// Compute (r1:r0) = (r1:r0) ^ (r2:r3)
OpRegReg(kOpXor, r0, r2); // r0 = r0 ^ r2
OpRegReg(kOpXor, r1, r3); // r1 = r1 ^ r3
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
@@ -602,7 +604,7 @@
OpRegReg(kOpNeg, r0, r0); // r0 = -r0
OpRegImm(kOpAdc, r1, 0); // r1 = r1 + CF
OpRegReg(kOpNeg, r1, r1); // r1 = -r1
- RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, r0, r1,
+ RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
INVALID_SREG, INVALID_SREG};
StoreValueWide(rl_dest, rl_result);
}
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index b281063..5c993c5 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -416,7 +416,7 @@
if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) {
low_reg = AllocTempDouble();
- high_reg = low_reg + 1;
+ high_reg = low_reg; // only one allocated!
res = (low_reg & 0xff) | ((high_reg & 0xff) << 8);
return res;
}
@@ -546,4 +546,254 @@
return X86Mir2Lir::EncodingMap[opcode].fmt;
}
+/*
+ * Return an updated location record with current in-register status.
+ * If the value lives in live temps, reflect that fact. No code
+ * is generated. If the live value is part of an older pair,
+ * clobber both low and high.
+ */
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::UpdateLocWide(RegLocation loc) {
+ DCHECK(loc.wide);
+ DCHECK(CheckCorePoolSanity());
+ if (loc.location != kLocPhysReg) {
+ DCHECK((loc.location == kLocDalvikFrame) ||
+ (loc.location == kLocCompilerTemp));
+ // Are the dalvik regs already live in physical registers?
+ RegisterInfo* info_lo = AllocLive(loc.s_reg_low, kAnyReg);
+
+ // Handle FP registers specially on x86.
+ if (info_lo && IsFpReg(info_lo->reg)) {
+ bool match = true;
+
+ // We can't match a FP register with a pair of Core registers.
+ match = match && (info_lo->pair == 0);
+
+ if (match) {
+ // We can reuse;update the register usage info.
+ loc.low_reg = info_lo->reg;
+ loc.high_reg = info_lo->reg; // Play nice with existing code.
+ loc.location = kLocPhysReg;
+ loc.vec_len = kVectorLength8;
+ DCHECK(IsFpReg(loc.low_reg));
+ return loc;
+ }
+ // We can't easily reuse; clobber and free any overlaps.
+ if (info_lo) {
+ Clobber(info_lo->reg);
+ FreeTemp(info_lo->reg);
+ if (info_lo->pair)
+ Clobber(info_lo->partner);
+ }
+ } else {
+ RegisterInfo* info_hi = AllocLive(GetSRegHi(loc.s_reg_low), kAnyReg);
+ bool match = true;
+ match = match && (info_lo != NULL);
+ match = match && (info_hi != NULL);
+ // Are they both core or both FP?
+ match = match && (IsFpReg(info_lo->reg) == IsFpReg(info_hi->reg));
+ // If a pair of floating point singles, are they properly aligned?
+ if (match && IsFpReg(info_lo->reg)) {
+ match &= ((info_lo->reg & 0x1) == 0);
+ match &= ((info_hi->reg - info_lo->reg) == 1);
+ }
+ // If previously used as a pair, it is the same pair?
+ if (match && (info_lo->pair || info_hi->pair)) {
+ match = (info_lo->pair == info_hi->pair);
+ match &= ((info_lo->reg == info_hi->partner) &&
+ (info_hi->reg == info_lo->partner));
+ }
+ if (match) {
+ // Can reuse - update the register usage info
+ loc.low_reg = info_lo->reg;
+ loc.high_reg = info_hi->reg;
+ loc.location = kLocPhysReg;
+ MarkPair(loc.low_reg, loc.high_reg);
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ return loc;
+ }
+ // Can't easily reuse - clobber and free any overlaps
+ if (info_lo) {
+ Clobber(info_lo->reg);
+ FreeTemp(info_lo->reg);
+ if (info_lo->pair)
+ Clobber(info_lo->partner);
+ }
+ if (info_hi) {
+ Clobber(info_hi->reg);
+ FreeTemp(info_hi->reg);
+ if (info_hi->pair)
+ Clobber(info_hi->partner);
+ }
+ }
+ }
+ return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLocWide(RegLocation loc, int reg_class, bool update) {
+ DCHECK(loc.wide);
+ int32_t new_regs;
+ int32_t low_reg;
+ int32_t high_reg;
+
+ loc = UpdateLocWide(loc);
+
+ /* If it is already in a register, we can assume proper form. Is it the right reg class? */
+ if (loc.location == kLocPhysReg) {
+ DCHECK_EQ(IsFpReg(loc.low_reg), loc.IsVectorScalar());
+ if (!RegClassMatches(reg_class, loc.low_reg)) {
+ /* It is the wrong register class. Reallocate and copy. */
+ if (!IsFpReg(loc.low_reg)) {
+ // We want this in a FP reg, and it is in core registers.
+ DCHECK(reg_class != kCoreReg);
+ // Allocate this into any FP reg, and mark it with the right size.
+ low_reg = AllocTypedTemp(true, reg_class);
+ OpVectorRegCopyWide(low_reg, loc.low_reg, loc.high_reg);
+ CopyRegInfo(low_reg, loc.low_reg);
+ Clobber(loc.low_reg);
+ Clobber(loc.high_reg);
+ loc.low_reg = low_reg;
+ loc.high_reg = low_reg; // Play nice with existing code.
+ loc.vec_len = kVectorLength8;
+ } else {
+ // The value is in a FP register, and we want it in a pair of core registers.
+ DCHECK_EQ(reg_class, kCoreReg);
+ DCHECK_EQ(loc.low_reg, loc.high_reg);
+ new_regs = AllocTypedTempPair(false, kCoreReg); // Force to core registers.
+ low_reg = new_regs & 0xff;
+ high_reg = (new_regs >> 8) & 0xff;
+ DCHECK_NE(low_reg, high_reg);
+ OpRegCopyWide(low_reg, high_reg, loc.low_reg, loc.high_reg);
+ CopyRegInfo(low_reg, loc.low_reg);
+ CopyRegInfo(high_reg, loc.high_reg);
+ Clobber(loc.low_reg);
+ Clobber(loc.high_reg);
+ loc.low_reg = low_reg;
+ loc.high_reg = high_reg;
+ MarkPair(loc.low_reg, loc.high_reg);
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ }
+ }
+ return loc;
+ }
+
+ DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
+ // Need a wide vector register.
+ low_reg = AllocTypedTemp(true, reg_class);
+ loc.low_reg = low_reg;
+ loc.high_reg = low_reg; // Play nice with existing code.
+ loc.vec_len = kVectorLength8;
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ }
+ DCHECK(IsFpReg(loc.low_reg));
+ } else {
+ DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+ new_regs = AllocTypedTempPair(loc.fp, reg_class);
+ loc.low_reg = new_regs & 0xff;
+ loc.high_reg = (new_regs >> 8) & 0xff;
+
+ MarkPair(loc.low_reg, loc.high_reg);
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
+ }
+ DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
+ }
+ return loc;
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+RegLocation X86Mir2Lir::EvalLoc(RegLocation loc, int reg_class, bool update) {
+ int new_reg;
+
+ if (loc.wide)
+ return EvalLocWide(loc, reg_class, update);
+
+ loc = UpdateLoc(loc);
+
+ if (loc.location == kLocPhysReg) {
+ if (!RegClassMatches(reg_class, loc.low_reg)) {
+ /* Wrong register class. Realloc, copy and transfer ownership. */
+ new_reg = AllocTypedTemp(loc.fp, reg_class);
+ OpRegCopy(new_reg, loc.low_reg);
+ CopyRegInfo(new_reg, loc.low_reg);
+ Clobber(loc.low_reg);
+ loc.low_reg = new_reg;
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+ loc.vec_len = kVectorLength4;
+ }
+ return loc;
+ }
+
+ DCHECK_NE(loc.s_reg_low, INVALID_SREG);
+
+ new_reg = AllocTypedTemp(loc.fp, reg_class);
+ loc.low_reg = new_reg;
+ if (IsFpReg(loc.low_reg) && reg_class != kCoreReg)
+ loc.vec_len = kVectorLength4;
+
+ if (update) {
+ loc.location = kLocPhysReg;
+ MarkLive(loc.low_reg, loc.s_reg_low);
+ }
+ return loc;
+}
+
+int X86Mir2Lir::AllocTempDouble() {
+ // We really don't need a pair of registers.
+ return AllocTempFloat();
+}
+
+// TODO: Reunify with common code after 'pair mess' has been fixed
+void X86Mir2Lir::ResetDefLocWide(RegLocation rl) {
+ DCHECK(rl.wide);
+ RegisterInfo* p_low = IsTemp(rl.low_reg);
+ if (IsFpReg(rl.low_reg)) {
+ // We are using only the low register.
+ if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+ }
+ ResetDef(rl.low_reg);
+ } else {
+ RegisterInfo* p_high = IsTemp(rl.high_reg);
+ if (p_low && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ DCHECK(p_low->pair);
+ NullifyRange(p_low->def_start, p_low->def_end, p_low->s_reg, rl.s_reg_low);
+ }
+ if (p_high && !(cu_->disable_opt & (1 << kSuppressLoads))) {
+ DCHECK(p_high->pair);
+ }
+ ResetDef(rl.low_reg);
+ ResetDef(rl.high_reg);
+ }
+}
+
+void X86Mir2Lir::GenConstWide(RegLocation rl_dest, int64_t value) {
+ // Can we do this directly to memory?
+ rl_dest = UpdateLocWide(rl_dest);
+ if ((rl_dest.location == kLocDalvikFrame) ||
+ (rl_dest.location == kLocCompilerTemp)) {
+ int32_t val_lo = Low32Bits(value);
+ int32_t val_hi = High32Bits(value);
+ int rBase = TargetReg(kSp);
+ int displacement = SRegOffset(rl_dest.s_reg_low);
+
+ LIR * store = NewLIR3(kX86Mov32MI, rBase, displacement + LOWORD_OFFSET, val_lo);
+ AnnotateDalvikRegAccess(store, (displacement + LOWORD_OFFSET) >> 2,
+ false /* is_load */, true /* is64bit */);
+ store = NewLIR3(kX86Mov32MI, rBase, displacement + HIWORD_OFFSET, val_hi);
+ AnnotateDalvikRegAccess(store, (displacement + HIWORD_OFFSET) >> 2,
+ false /* is_load */, true /* is64bit */);
+ return;
+ }
+
+ // Just use the standard code to do the generation.
+ Mir2Lir::GenConstWide(rl_dest, value);
+}
} // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index f683aff..91c39fa 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -334,6 +334,7 @@
LIR *res;
if (X86_FPREG(r_dest_lo)) {
DCHECK(X86_FPREG(r_dest_hi)); // ignore r_dest_hi
+ DCHECK_EQ(r_dest_lo, r_dest_hi);
if (value == 0) {
return NewLIR2(kX86XorpsRR, r_dest_lo, r_dest_lo);
} else {
@@ -343,9 +344,11 @@
res = LoadConstantNoClobber(r_dest_lo, val_lo);
}
if (val_hi != 0) {
+ r_dest_hi = AllocTempDouble();
LoadConstantNoClobber(r_dest_hi, val_hi);
NewLIR2(kX86PsllqRI, r_dest_hi, 32);
NewLIR2(kX86OrpsRR, r_dest_lo, r_dest_hi);
+ FreeTemp(r_dest_hi);
}
}
} else {
@@ -370,12 +373,6 @@
is64bit = true;
if (X86_FPREG(r_dest)) {
opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
- if (X86_SINGLEREG(r_dest)) {
- DCHECK(X86_FPREG(r_dest_hi));
- DCHECK_EQ(r_dest, (r_dest_hi - 1));
- r_dest = S2d(r_dest, r_dest_hi);
- }
- r_dest_hi = r_dest + 1;
} else {
pair = true;
opcode = is_array ? kX86Mov32RA : kX86Mov32RM;
@@ -488,12 +485,6 @@
is64bit = true;
if (X86_FPREG(r_src)) {
opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
- if (X86_SINGLEREG(r_src)) {
- DCHECK(X86_FPREG(r_src_hi));
- DCHECK_EQ(r_src, (r_src_hi - 1));
- r_src = S2d(r_src, r_src_hi);
- }
- r_src_hi = r_src + 1;
} else {
pair = true;
opcode = is_array ? kX86Mov32AR : kX86Mov32MR;
@@ -573,4 +564,17 @@
r_src_lo, r_src_hi, kLong, INVALID_SREG);
}
+/*
+ * Copy a long value in Core registers to an XMM register
+ *
+ */
+void X86Mir2Lir::OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg) {
+ NewLIR2(kX86MovdxrRR, fp_reg, low_reg);
+ int tmp_reg = AllocTempDouble();
+ NewLIR2(kX86MovdxrRR, tmp_reg, high_reg);
+ NewLIR2(kX86PsllqRI, tmp_reg, 32);
+ NewLIR2(kX86OrpsRR, fp_reg, tmp_reg);
+ FreeTemp(tmp_reg);
+}
+
} // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f38a16d..1488f5d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -128,11 +128,11 @@
#define X86_FP_REG_MASK 0xF
// RegisterLocation templates return values (rAX, rAX/rDX or XMM0).
-// location, wide, defined, const, fp, core, ref, high_word, home, low_reg, high_reg, s_reg_low
-#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rAX, rDX, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG}
-#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, fr0, fr1, INVALID_SREG, INVALID_SREG}
+// location, wide, defined, const, fp, core, ref, high_word, home, vec_len, low_reg, high_reg, s_reg_low
+#define X86_LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, rAX, rDX, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_FLOAT {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1, kVectorLength4, fr0, INVALID_REG, INVALID_SREG, INVALID_SREG}
+#define X86_LOC_C_RETURN_DOUBLE {kLocPhysReg, 1, 0, 0, 1, 0, 0, 0, 1, kVectorLength8, fr0, fr0, INVALID_SREG, INVALID_SREG}
enum X86ResourceEncodingPos {
kX86GPReg0 = 0,
diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc
index bef966c..f211e3f 100644
--- a/compiler/dex/vreg_analysis.cc
+++ b/compiler/dex/vreg_analysis.cc
@@ -405,7 +405,7 @@
}
static const RegLocation fresh_loc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, 0, 0,
- INVALID_REG, INVALID_REG, INVALID_SREG,
+ kVectorNotUsed, INVALID_REG, INVALID_REG, INVALID_SREG,
INVALID_SREG};
void MIRGraph::InitRegLocations() {