diff options
Diffstat (limited to 'src/compiler/codegen')
| -rw-r--r-- | src/compiler/codegen/arm/ArchUtility.cc | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmLIR.h | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Assemble.cc | 94 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/CodegenCommon.cc | 36 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/MethodCodegenDriver.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Factory.cc | 28 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Gen.cc | 6 |
7 files changed, 110 insertions, 66 deletions
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc index 6f435e77fd..1d6bb41b8a 100644 --- a/src/compiler/codegen/arm/ArchUtility.cc +++ b/src/compiler/codegen/arm/ArchUtility.cc @@ -371,7 +371,7 @@ void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr) buildInsnString(EncodingMap[lir->opcode].fmt, lir, opOperands, baseAddr, 256); char tBuf[256]; - snprintf(tBuf, 256, "%p (%04x): %-8s%s%s", baseAddr + offset, offset, + snprintf(tBuf, 256, "%p (%04x): %-9s%s%s", baseAddr + offset, offset, opName, opOperands, lir->flags.isNop ? "(nop)" : ""); LOG(INFO) << tBuf; } diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h index 53e8dc84d4..07e2e97661 100644 --- a/src/compiler/codegen/arm/ArmLIR.h +++ b/src/compiler/codegen/arm/ArmLIR.h @@ -682,7 +682,7 @@ typedef enum ArmOpcode { kThumb2BUncond, /* b <label> */ kThumb2MovImm16H, /* similar to kThumb2MovImm16, but target high hw */ kThumb2AddPCR, /* Thumb2 2-operand add with hard-coded PC target */ - kThumb2AdrST, /* Special purpose encoding of ADR for switch tables */ + kThumb2Adr, /* Special purpose encoding of ADR for switch tables */ kThumb2MovImm16LST, /* Special purpose version for switch table use */ kThumb2MovImm16HST, /* Special purpose version for switch table use */ kThumb2LdmiaWB, /* ldmia [111010011001[ rn[19..16] mask[15..0] */ diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc index 62706a7030..d22c26760d 100644 --- a/src/compiler/codegen/arm/Assemble.cc +++ b/src/compiler/codegen/arm/Assemble.cc @@ -932,7 +932,7 @@ ArmEncodingMap EncodingMap[kArmLast] = { kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE0 | IS_BRANCH, "add", "rPC, !0C", 1), - ENCODING_MAP(kThumb2AdrST, 0xf20f0000, + ENCODING_MAP(kThumb2Adr, 0xf20f0000, kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,/* Note: doesn't affect flags */ @@ -1085,6 +1085,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit* cUnit, if (lir->opcode == kThumbLdrPcRel || lir->opcode == kThumb2LdrPcRel12 || lir->opcode == kThumbAddPcRel || + ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) || ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) { /* * PC-relative loads are mostly used to load immediates @@ -1106,57 +1107,34 @@ static AssemblerStatus assembleInstructions(CompilationUnit* cUnit, LOG(FATAL) << "Unexpected pc-rel offset " << delta; } // Now, check for the two difficult cases - if (1 || ((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || - ((lir->opcode == kThumb2Vldrs) && (delta > 1020))) { - /* - * OK - the load doesn't work. We'll just materialize - * the immediate directly using mov16l and mov16h. - * It's a little ugly for float immediates as we don't have - * float ops like the core mov imm16H/L. In this case - * we'll materialize in a core register (rLR) and then copy. - * NOTE/WARNING: This is a *very* fragile workaround that will - * be addressed in a later release when we have a late spill - * capability. We can get away with it for now because rLR - * is currently only used during call setups, and our convention - * requires all arguments to be passed in core register & the - * frame (and thus, we won't see any vlrds in the sequence). - * The normal resource mask mechanism will prevent any damaging - * code motion. - */ - int tgtReg = (lir->opcode == kThumb2Vldrs) ? rLR : - lir->operands[0]; - int immVal = lirTarget->operands[0]; - // The standard utilities won't work here - build manually - ArmLIR *newMov16L = - (ArmLIR *)oatNew(sizeof(ArmLIR), true); - newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset; - newMov16L->opcode = kThumb2MovImm16; - newMov16L->operands[0] = tgtReg; - newMov16L->operands[1] = immVal & 0xffff; - oatSetupResourceMasks(newMov16L); - oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L); - ArmLIR *newMov16H = + if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) || + ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) || + ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) { + int baseReg = (lir->opcode == kThumb2LdrPcRel12) ? + lir->operands[0] : rLR; + + // Add new Adr to generate the address + ArmLIR *newAdr = (ArmLIR *)oatNew(sizeof(ArmLIR), true); - newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset; - newMov16H->opcode = kThumb2MovImm16H; - newMov16H->operands[0] = tgtReg; - newMov16H->operands[1] = (immVal >> 16) & 0xffff; - oatSetupResourceMasks(newMov16H); - oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H); - if (lir->opcode == kThumb2Vldrs) { - // Convert the vldrs to a kThumb2Fmsr - lir->opcode = kThumb2Fmsr; - lir->operands[1] = rLR; - lir->generic.target = NULL; - lir->operands[2] = 0; - oatSetupResourceMasks(lir); - } else { - // Nullify the original load - lir->flags.isNop = true; + newAdr->generic.dalvikOffset = lir->generic.dalvikOffset; + newAdr->generic.target = lir->generic.target; + newAdr->opcode = kThumb2Adr; + newAdr->operands[0] = baseReg; + oatSetupResourceMasks(newAdr); + oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr); + + // Convert to normal load + if (lir->opcode == kThumb2LdrPcRel12) { + lir->opcode = kThumb2LdrRRI12; } + // Change the load to be relative to the new Adr base + lir->operands[1] = baseReg; + lir->operands[2] = 0; + oatSetupResourceMasks(lir); res = kRetryAll; } else { - if (lir->opcode == kThumb2Vldrs) { + if ((lir->opcode == kThumb2Vldrs) || + (lir->opcode == kThumb2Vldrd)) { lir->operands[2] = delta >> 2; } else { lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ? @@ -1259,16 +1237,19 @@ static AssemblerStatus assembleInstructions(CompilationUnit* cUnit, lir->operands[0] = (delta >> 12) & 0x7ff; NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff; - } else if (lir->opcode == kThumb2AdrST) { + } else if (lir->opcode == kThumb2Adr) { SwitchTable *tabRec = (SwitchTable*)lir->operands[2]; - int disp = tabRec->offset - ((lir->generic.offset + 4) & ~3); + ArmLIR* target = (ArmLIR*)lir->generic.target; + int targetDisp = tabRec ? tabRec->offset : target->generic.offset; + int disp = targetDisp - ((lir->generic.offset + 4) & ~3); if (disp < 4096) { lir->operands[1] = disp; } else { - // convert to ldimm16l, ldimm16h, add tgt, pc, r12 + // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0] ArmLIR *newMov16L = (ArmLIR *)oatNew(sizeof(ArmLIR), true); newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset; + newMov16L->generic.target = lir->generic.target; newMov16L->opcode = kThumb2MovImm16LST; newMov16L->operands[0] = lir->operands[0]; newMov16L->operands[2] = (intptr_t)lir; @@ -1278,6 +1259,7 @@ static AssemblerStatus assembleInstructions(CompilationUnit* cUnit, ArmLIR *newMov16H = (ArmLIR *)oatNew(sizeof(ArmLIR), true); newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset; + newMov16H->generic.target = lir->generic.target; newMov16H->opcode = kThumb2MovImm16HST; newMov16H->operands[0] = lir->operands[0]; newMov16H->operands[2] = (intptr_t)lir; @@ -1294,13 +1276,19 @@ static AssemblerStatus assembleInstructions(CompilationUnit* cUnit, // operands[1] should hold disp, [2] has add, [3] has tabRec ArmLIR *addPCInst = (ArmLIR*)lir->operands[2]; SwitchTable *tabRec = (SwitchTable*)lir->operands[3]; - lir->operands[1] = (tabRec->offset - + // If tabRec is null, this is a literal load - use generic.target + ArmLIR* target = (ArmLIR*)lir->generic.target; + int targetDisp = tabRec ? tabRec->offset : target->generic.offset; + lir->operands[1] = (targetDisp - (addPCInst->generic.offset + 4)) & 0xffff; } else if (lir->opcode == kThumb2MovImm16HST) { // operands[1] should hold disp, [2] has add, [3] has tabRec ArmLIR *addPCInst = (ArmLIR*)lir->operands[2]; SwitchTable *tabRec = (SwitchTable*)lir->operands[3]; - lir->operands[1] = ((tabRec->offset - + // If tabRec is null, this is a literal load - use generic.target + ArmLIR* target = (ArmLIR*)lir->generic.target; + int targetDisp = tabRec ? tabRec->offset : target->generic.offset; + lir->operands[1] = ((targetDisp - (addPCInst->generic.offset + 4)) >> 16) & 0xffff; } ArmEncodingMap *encoder = &EncodingMap[lir->opcode]; diff --git a/src/compiler/codegen/arm/CodegenCommon.cc b/src/compiler/codegen/arm/CodegenCommon.cc index 4b9b5920cf..40494f38b9 100644 --- a/src/compiler/codegen/arm/CodegenCommon.cc +++ b/src/compiler/codegen/arm/CodegenCommon.cc @@ -326,6 +326,25 @@ static ArmLIR* scanLiteralPool(LIR* dataTarget, int value, unsigned int delta) return NULL; } +/* Search the existing constants in the literal pool for an exact wide match */ +static ArmLIR* scanLiteralPoolWide(LIR* dataTarget, int valLo, int valHi) +{ + bool loMatch = false; + LIR* loTarget = NULL; + while (dataTarget) { + if (loMatch && (((ArmLIR*)dataTarget)->operands[0] == valHi)) { + return (ArmLIR*)loTarget; + } + loMatch = false; + if (((ArmLIR*)dataTarget)->operands[0] == valLo) { + loMatch = true; + loTarget = dataTarget; + } + dataTarget = dataTarget->next; + } + return NULL; +} + /* * The following are building blocks to insert constants into the pool or * instruction streams. @@ -350,6 +369,23 @@ static ArmLIR* addWordData(CompilationUnit* cUnit, LIR* *constantListP, return NULL; } +/* Add a 64-bit constant to the constant pool or mixed with code */ +static ArmLIR* addWideData(CompilationUnit* cUnit, LIR* *constantListP, + int valLo, int valHi) +{ + ArmLIR* res; + //NOTE: hard-coded little endian + if (constantListP == NULL) { + res = addWordData(cUnit, NULL, valLo); + addWordData(cUnit, NULL, valHi); + } else { + // Insert high word into list first + addWordData(cUnit, constantListP, valHi); + res = addWordData(cUnit, constantListP, valLo); + } + return res; +} + /* * Generate an kArmPseudoBarrier marker to indicate the boundary of special * blocks. diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc index 0af213f6fc..8dc388c7e3 100644 --- a/src/compiler/codegen/arm/MethodCodegenDriver.cc +++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc @@ -1182,10 +1182,10 @@ static bool compileDalvikInstruction(CompilationUnit* cUnit, MIR* mir, case OP_CONST_WIDE_16: case OP_CONST_WIDE_32: - rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB); - //TUNING: do high separately to avoid load dependency - opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31); + rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true); + loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg, + mir->dalvikInsn.vB, + (mir->dalvikInsn.vB & 0x80000000) ? -1 : 0); storeValueWide(cUnit, rlDest, rlResult); break; diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc index 78b9e61b2b..254802d032 100644 --- a/src/compiler/codegen/arm/Thumb2/Factory.cc +++ b/src/compiler/codegen/arm/Thumb2/Factory.cc @@ -619,7 +619,7 @@ static int encodeImmDoubleHigh(int value) if (zeroes != 0) return -1; if (bitB) { - if ((notBitB != 0) || (bSmear != 0x1f)) + if ((notBitB != 0) || (bSmear != 0xff)) return -1; } else { if ((notBitB != 1) || (bSmear != 0x0)) @@ -642,9 +642,29 @@ static ArmLIR* loadConstantValueWide(CompilationUnit* cUnit, int rDestLo, { int encodedImm = encodeImmDouble(valLo, valHi); ArmLIR* res; - if (FPREG(rDestLo) && (encodedImm >= 0)) { - res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi), - encodedImm); + if (FPREG(rDestLo)) { + if (encodedImm >= 0) { + res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi), + encodedImm); + } else { + ArmLIR* dataTarget = scanLiteralPoolWide(cUnit->literalList, valLo, + valHi); + if (dataTarget == NULL) { + dataTarget = addWideData(cUnit, &cUnit->literalList, valLo, + valHi); + } + ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true); + loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset; + loadPcRel->opcode = kThumb2Vldrd; + loadPcRel->generic.target = (LIR* ) dataTarget; + loadPcRel->operands[0] = S2D(rDestLo, rDestHi); + loadPcRel->operands[1] = r15pc; + setupResourceMasks(loadPcRel); + setMemRefType(loadPcRel, true, kLiteral); + loadPcRel->aliasInfo = dataTarget->operands[0]; + oatAppendLIR(cUnit, (LIR* ) loadPcRel); + res = loadPcRel; + } } else { res = loadConstantNoClobber(cUnit, rDestLo, valLo); loadConstantNoClobber(cUnit, rDestHi, valHi); diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc index 52d67deece..2404ca7524 100644 --- a/src/compiler/codegen/arm/Thumb2/Gen.cc +++ b/src/compiler/codegen/arm/Thumb2/Gen.cc @@ -283,7 +283,7 @@ static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, rKey = tmp; } // Materialize a pointer to the switch table - newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec); + newLIR3(cUnit, kThumb2Adr, rBase, 0, (intptr_t)tabRec); // Set up rIdx int rIdx = oatAllocTemp(cUnit); loadConstant(cUnit, rIdx, size); @@ -324,7 +324,7 @@ static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, rlSrc = loadValue(cUnit, rlSrc, kCoreReg); int tableBase = oatAllocTemp(cUnit); // Materialize a pointer to the switch table - newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec); + newLIR3(cUnit, kThumb2Adr, tableBase, 0, (intptr_t)tabRec); int lowKey = s4FromSwitchData(&table[2]); int keyReg; // Remove the bias, if necessary @@ -383,7 +383,7 @@ static void genFillArrayData(CompilationUnit* cUnit, MIR* mir, loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR); // Materialize a pointer to the fill data image - newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec); + newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec); callUnwindableHelper(cUnit, rLR); oatClobberCallRegs(cUnit); } |