Rework type & size inference, literal usage
Fixes a bug in the old type inference mechanism (wasn't properly
propogating type info across Phi & move nodes). Combined type and
size inferences passes.
Fixed long-standing bug in the code to load a special double-precision
immediate (would have been extremely difficult to hit this in the field).
Improved loading floating point immediates.
Change-Id: I1ec72edc3b25525f14d965089f8952d4f0294942
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index 6f435e7..1d6bb41 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -371,7 +371,7 @@
buildInsnString(EncodingMap[lir->opcode].fmt, lir, opOperands,
baseAddr, 256);
char tBuf[256];
- snprintf(tBuf, 256, "%p (%04x): %-8s%s%s", baseAddr + offset, offset,
+ snprintf(tBuf, 256, "%p (%04x): %-9s%s%s", baseAddr + offset, offset,
opName, opOperands, lir->flags.isNop ? "(nop)" : "");
LOG(INFO) << tBuf;
}
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 53e8dc8..07e2e97 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -682,7 +682,7 @@
kThumb2BUncond, /* b <label> */
kThumb2MovImm16H, /* similar to kThumb2MovImm16, but target high hw */
kThumb2AddPCR, /* Thumb2 2-operand add with hard-coded PC target */
- kThumb2AdrST, /* Special purpose encoding of ADR for switch tables */
+ kThumb2Adr, /* Special purpose encoding of ADR for switch tables */
kThumb2MovImm16LST, /* Special purpose version for switch table use */
kThumb2MovImm16HST, /* Special purpose version for switch table use */
kThumb2LdmiaWB, /* ldmia [111010011001[ rn[19..16] mask[15..0] */
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 62706a7..d22c267 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -932,7 +932,7 @@
kFmtUnused, -1, -1,
IS_UNARY_OP | REG_USE0 | IS_BRANCH,
"add", "rPC, !0C", 1),
- ENCODING_MAP(kThumb2AdrST, 0xf20f0000,
+ ENCODING_MAP(kThumb2Adr, 0xf20f0000,
kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
kFmtUnused, -1, -1,
IS_TERTIARY_OP | REG_DEF0,/* Note: doesn't affect flags */
@@ -1085,6 +1085,7 @@
if (lir->opcode == kThumbLdrPcRel ||
lir->opcode == kThumb2LdrPcRel12 ||
lir->opcode == kThumbAddPcRel ||
+ ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
/*
* PC-relative loads are mostly used to load immediates
@@ -1106,57 +1107,34 @@
LOG(FATAL) << "Unexpected pc-rel offset " << delta;
}
// Now, check for the two difficult cases
- if (1 || ((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
- ((lir->opcode == kThumb2Vldrs) && (delta > 1020))) {
- /*
- * OK - the load doesn't work. We'll just materialize
- * the immediate directly using mov16l and mov16h.
- * It's a little ugly for float immediates as we don't have
- * float ops like the core mov imm16H/L. In this case
- * we'll materialize in a core register (rLR) and then copy.
- * NOTE/WARNING: This is a *very* fragile workaround that will
- * be addressed in a later release when we have a late spill
- * capability. We can get away with it for now because rLR
- * is currently only used during call setups, and our convention
- * requires all arguments to be passed in core register & the
- * frame (and thus, we won't see any vlrds in the sequence).
- * The normal resource mask mechanism will prevent any damaging
- * code motion.
- */
- int tgtReg = (lir->opcode == kThumb2Vldrs) ? rLR :
- lir->operands[0];
- int immVal = lirTarget->operands[0];
- // The standard utilities won't work here - build manually
- ArmLIR *newMov16L =
+ if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+ ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+ ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+ int baseReg = (lir->opcode == kThumb2LdrPcRel12) ?
+ lir->operands[0] : rLR;
+
+ // Add new Adr to generate the address
+ ArmLIR *newAdr =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
- newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
- newMov16L->opcode = kThumb2MovImm16;
- newMov16L->operands[0] = tgtReg;
- newMov16L->operands[1] = immVal & 0xffff;
- oatSetupResourceMasks(newMov16L);
- oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
- ArmLIR *newMov16H =
- (ArmLIR *)oatNew(sizeof(ArmLIR), true);
- newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
- newMov16H->opcode = kThumb2MovImm16H;
- newMov16H->operands[0] = tgtReg;
- newMov16H->operands[1] = (immVal >> 16) & 0xffff;
- oatSetupResourceMasks(newMov16H);
- oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
- if (lir->opcode == kThumb2Vldrs) {
- // Convert the vldrs to a kThumb2Fmsr
- lir->opcode = kThumb2Fmsr;
- lir->operands[1] = rLR;
- lir->generic.target = NULL;
- lir->operands[2] = 0;
- oatSetupResourceMasks(lir);
- } else {
- // Nullify the original load
- lir->flags.isNop = true;
+ newAdr->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newAdr->generic.target = lir->generic.target;
+ newAdr->opcode = kThumb2Adr;
+ newAdr->operands[0] = baseReg;
+ oatSetupResourceMasks(newAdr);
+ oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr);
+
+ // Convert to normal load
+ if (lir->opcode == kThumb2LdrPcRel12) {
+ lir->opcode = kThumb2LdrRRI12;
}
+ // Change the load to be relative to the new Adr base
+ lir->operands[1] = baseReg;
+ lir->operands[2] = 0;
+ oatSetupResourceMasks(lir);
res = kRetryAll;
} else {
- if (lir->opcode == kThumb2Vldrs) {
+ if ((lir->opcode == kThumb2Vldrs) ||
+ (lir->opcode == kThumb2Vldrd)) {
lir->operands[2] = delta >> 2;
} else {
lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
@@ -1259,16 +1237,19 @@
lir->operands[0] = (delta >> 12) & 0x7ff;
NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
- } else if (lir->opcode == kThumb2AdrST) {
+ } else if (lir->opcode == kThumb2Adr) {
SwitchTable *tabRec = (SwitchTable*)lir->operands[2];
- int disp = tabRec->offset - ((lir->generic.offset + 4) & ~3);
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ int disp = targetDisp - ((lir->generic.offset + 4) & ~3);
if (disp < 4096) {
lir->operands[1] = disp;
} else {
- // convert to ldimm16l, ldimm16h, add tgt, pc, r12
+ // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
ArmLIR *newMov16L =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newMov16L->generic.target = lir->generic.target;
newMov16L->opcode = kThumb2MovImm16LST;
newMov16L->operands[0] = lir->operands[0];
newMov16L->operands[2] = (intptr_t)lir;
@@ -1278,6 +1259,7 @@
ArmLIR *newMov16H =
(ArmLIR *)oatNew(sizeof(ArmLIR), true);
newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
+ newMov16H->generic.target = lir->generic.target;
newMov16H->opcode = kThumb2MovImm16HST;
newMov16H->operands[0] = lir->operands[0];
newMov16H->operands[2] = (intptr_t)lir;
@@ -1294,13 +1276,19 @@
// operands[1] should hold disp, [2] has add, [3] has tabRec
ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
- lir->operands[1] = (tabRec->offset -
+ // If tabRec is null, this is a literal load - use generic.target
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ lir->operands[1] = (targetDisp -
(addPCInst->generic.offset + 4)) & 0xffff;
} else if (lir->opcode == kThumb2MovImm16HST) {
// operands[1] should hold disp, [2] has add, [3] has tabRec
ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
- lir->operands[1] = ((tabRec->offset -
+ // If tabRec is null, this is a literal load - use generic.target
+ ArmLIR* target = (ArmLIR*)lir->generic.target;
+ int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
+ lir->operands[1] = ((targetDisp -
(addPCInst->generic.offset + 4)) >> 16) & 0xffff;
}
ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
diff --git a/src/compiler/codegen/arm/CodegenCommon.cc b/src/compiler/codegen/arm/CodegenCommon.cc
index 4b9b592..40494f3 100644
--- a/src/compiler/codegen/arm/CodegenCommon.cc
+++ b/src/compiler/codegen/arm/CodegenCommon.cc
@@ -326,6 +326,25 @@
return NULL;
}
+/* Search the existing constants in the literal pool for an exact wide match */
+static ArmLIR* scanLiteralPoolWide(LIR* dataTarget, int valLo, int valHi)
+{
+ bool loMatch = false;
+ LIR* loTarget = NULL;
+ while (dataTarget) {
+ if (loMatch && (((ArmLIR*)dataTarget)->operands[0] == valHi)) {
+ return (ArmLIR*)loTarget;
+ }
+ loMatch = false;
+ if (((ArmLIR*)dataTarget)->operands[0] == valLo) {
+ loMatch = true;
+ loTarget = dataTarget;
+ }
+ dataTarget = dataTarget->next;
+ }
+ return NULL;
+}
+
/*
* The following are building blocks to insert constants into the pool or
* instruction streams.
@@ -350,6 +369,23 @@
return NULL;
}
+/* Add a 64-bit constant to the constant pool or mixed with code */
+static ArmLIR* addWideData(CompilationUnit* cUnit, LIR* *constantListP,
+ int valLo, int valHi)
+{
+ ArmLIR* res;
+ //NOTE: hard-coded little endian
+ if (constantListP == NULL) {
+ res = addWordData(cUnit, NULL, valLo);
+ addWordData(cUnit, NULL, valHi);
+ } else {
+ // Insert high word into list first
+ addWordData(cUnit, constantListP, valHi);
+ res = addWordData(cUnit, constantListP, valLo);
+ }
+ return res;
+}
+
/*
* Generate an kArmPseudoBarrier marker to indicate the boundary of special
* blocks.
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 0af213f..8dc388c 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1182,10 +1182,10 @@
case OP_CONST_WIDE_16:
case OP_CONST_WIDE_32:
- rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
- //TUNING: do high separately to avoid load dependency
- opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
+ rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+ loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+ mir->dalvikInsn.vB,
+ (mir->dalvikInsn.vB & 0x80000000) ? -1 : 0);
storeValueWide(cUnit, rlDest, rlResult);
break;
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index 78b9e61..254802d 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -619,7 +619,7 @@
if (zeroes != 0)
return -1;
if (bitB) {
- if ((notBitB != 0) || (bSmear != 0x1f))
+ if ((notBitB != 0) || (bSmear != 0xff))
return -1;
} else {
if ((notBitB != 1) || (bSmear != 0x0))
@@ -642,9 +642,29 @@
{
int encodedImm = encodeImmDouble(valLo, valHi);
ArmLIR* res;
- if (FPREG(rDestLo) && (encodedImm >= 0)) {
- res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
- encodedImm);
+ if (FPREG(rDestLo)) {
+ if (encodedImm >= 0) {
+ res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
+ encodedImm);
+ } else {
+ ArmLIR* dataTarget = scanLiteralPoolWide(cUnit->literalList, valLo,
+ valHi);
+ if (dataTarget == NULL) {
+ dataTarget = addWideData(cUnit, &cUnit->literalList, valLo,
+ valHi);
+ }
+ ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+ loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
+ loadPcRel->opcode = kThumb2Vldrd;
+ loadPcRel->generic.target = (LIR* ) dataTarget;
+ loadPcRel->operands[0] = S2D(rDestLo, rDestHi);
+ loadPcRel->operands[1] = r15pc;
+ setupResourceMasks(loadPcRel);
+ setMemRefType(loadPcRel, true, kLiteral);
+ loadPcRel->aliasInfo = dataTarget->operands[0];
+ oatAppendLIR(cUnit, (LIR* ) loadPcRel);
+ res = loadPcRel;
+ }
} else {
res = loadConstantNoClobber(cUnit, rDestLo, valLo);
loadConstantNoClobber(cUnit, rDestHi, valHi);
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 52d67de..2404ca7 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -283,7 +283,7 @@
rKey = tmp;
}
// Materialize a pointer to the switch table
- newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
+ newLIR3(cUnit, kThumb2Adr, rBase, 0, (intptr_t)tabRec);
// Set up rIdx
int rIdx = oatAllocTemp(cUnit);
loadConstant(cUnit, rIdx, size);
@@ -324,7 +324,7 @@
rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
int tableBase = oatAllocTemp(cUnit);
// Materialize a pointer to the switch table
- newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
+ newLIR3(cUnit, kThumb2Adr, tableBase, 0, (intptr_t)tabRec);
int lowKey = s4FromSwitchData(&table[2]);
int keyReg;
// Remove the bias, if necessary
@@ -383,7 +383,7 @@
loadWordDisp(cUnit, rSELF,
OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR);
// Materialize a pointer to the fill data image
- newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
+ newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec);
callUnwindableHelper(cUnit, rLR);
oatClobberCallRegs(cUnit);
}