Custom codegen for small frameless methods.
Added a general mechanism that will allow pattern matching of
small methods and (generally) frameless code generation. Prevously,
all frames were at least 16 bytes, not you can have zero-length
frames (and thus some old asserts had to go).
Change-Id: Ic786940a602e25b48cbc317ac601ac84cc307762
diff --git a/src/compiled_method.cc b/src/compiled_method.cc
index 3ac32d4..3328ab0 100644
--- a/src/compiled_method.cc
+++ b/src/compiled_method.cc
@@ -35,7 +35,6 @@
: instruction_set_(instruction_set), frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask), fp_spill_mask_(fp_spill_mask) {
CHECK_NE(code.size(), 0U);
- CHECK_GE(vmap_table.size(), 1U); // should always contain an entry for LR
DCHECK_EQ(vmap_table.size(),
static_cast<uint32_t>(__builtin_popcount(core_spill_mask)
+ __builtin_popcount(fp_spill_mask)));
@@ -63,7 +62,6 @@
code_ = byte_code;
mapping_table_ = length_prefixed_mapping_table;
vmap_table_ = length_prefixed_vmap_table;
-
DCHECK_EQ(vmap_table_[0], static_cast<uint32_t>(__builtin_popcount(core_spill_mask) + __builtin_popcount(fp_spill_mask)));
}
@@ -73,7 +71,6 @@
#if !defined(ART_USE_LLVM_COMPILER)
// Should only be used with CompiledMethods created with the non-LLVM compilers.
CHECK_NE(mapping_table_.size(), 0U);
- CHECK_NE(vmap_table_.size(), 0U);
#endif
gc_map_ = gc_map;
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 238e3ff..4b4e83d 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -51,6 +51,7 @@
kSkipLargeMethodOptimization,
kSafeOptimizations,
kBBOpt,
+ kMatch,
kPromoteCompilerTemps,
};
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 184d4db..acc7c94 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -533,6 +533,53 @@
int vaddr; // Dalvik offset of FILL_ARRAY_DATA opcode
};
+#define MAX_PATTERN_LEN 5
+
+enum SpecialCaseHandler {
+ kNoHandler,
+ kNullMethod,
+ kConstFunction,
+ kIGet,
+ kIGetBoolean,
+ kIGetObject,
+ kIGetByte,
+ kIGetChar,
+ kIGetShort,
+ kIGetWide,
+ kIPut,
+ kIPutBoolean,
+ kIPutObject,
+ kIPutByte,
+ kIPutChar,
+ kIPutShort,
+ kIPutWide,
+};
+
+struct CodePattern {
+ const Instruction::Code opcodes[MAX_PATTERN_LEN];
+ const SpecialCaseHandler handlerCode;
+};
+
+static const CodePattern specialPatterns[] = {
+ {{Instruction::RETURN_VOID}, kNullMethod},
+ {{Instruction::CONST, Instruction::RETURN}, kConstFunction},
+ {{Instruction::CONST_4, Instruction::RETURN}, kConstFunction},
+ {{Instruction::CONST_16, Instruction::RETURN}, kConstFunction},
+ {{Instruction::IGET, Instruction:: RETURN}, kIGet},
+ {{Instruction::IGET_BOOLEAN, Instruction:: RETURN}, kIGetBoolean},
+ {{Instruction::IGET_OBJECT, Instruction:: RETURN}, kIGetObject},
+ {{Instruction::IGET_BYTE, Instruction:: RETURN}, kIGetByte},
+ {{Instruction::IGET_CHAR, Instruction:: RETURN}, kIGetChar},
+ {{Instruction::IGET_SHORT, Instruction:: RETURN}, kIGetShort},
+ {{Instruction::IGET_WIDE, Instruction:: RETURN_WIDE}, kIGetWide},
+ {{Instruction::IPUT, Instruction:: RETURN}, kIPut},
+ {{Instruction::IPUT_BOOLEAN, Instruction:: RETURN}, kIPutBoolean},
+ {{Instruction::IPUT_OBJECT, Instruction:: RETURN}, kIPutObject},
+ {{Instruction::IPUT_BYTE, Instruction:: RETURN}, kIPutByte},
+ {{Instruction::IPUT_CHAR, Instruction:: RETURN}, kIPutChar},
+ {{Instruction::IPUT_SHORT, Instruction:: RETURN}, kIPutShort},
+ {{Instruction::IPUT_WIDE, Instruction:: RETURN_WIDE}, kIPutWide},
+};
BasicBlock* oatNewBB(CompilationUnit* cUnit, BBType blockType, int blockId);
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 3e0fd3a..b2676a6 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -35,6 +35,7 @@
//(1 << kSkipLargeMethodOptimization) |
//(1 << kSafeOptimizations) |
//(1 << kBBOpt) |
+ //(1 << kMatch) |
//(1 << kPromoteCompilerTemps) |
0;
@@ -801,6 +802,7 @@
(1 << kSuppressLoads) |
(1 << kPromoteRegs) |
(1 << kBBOpt) |
+ (1 << kMatch) |
(1 << kTrackLiveTemps));
}
@@ -868,20 +870,46 @@
/* Identify code range in try blocks and set up the empty catch blocks */
processTryCatchBlocks(cUnit.get());
+ /* Set up for simple method detection */
+ int numPatterns = sizeof(specialPatterns)/sizeof(specialPatterns[0]);
+ bool livePattern = (numPatterns > 0) && !(cUnit->disableOpt & (1 << kMatch));
+ bool* deadPattern = (bool*)oatNew(cUnit.get(), sizeof(bool) * numPatterns,
+ kAllocMisc);
+ SpecialCaseHandler specialCase = kNoHandler;
+ int patternPos = 0;
+
/* Parse all instructions and put them into containing basic blocks */
while (codePtr < codeEnd) {
MIR *insn = (MIR *) oatNew(cUnit.get(), sizeof(MIR), true, kAllocMIR);
insn->offset = curOffset;
int width = parseInsn(cUnit.get(), codePtr, &insn->dalvikInsn, false);
insn->width = width;
+ Instruction::Code opcode = insn->dalvikInsn.opcode;
if (cUnit->opcodeCount != NULL) {
- cUnit->opcodeCount[static_cast<int>(insn->dalvikInsn.opcode)]++;
+ cUnit->opcodeCount[static_cast<int>(opcode)]++;
}
/* Terminate when the data section is seen */
if (width == 0)
break;
+ /* Possible simple method? */
+ if (livePattern) {
+ livePattern = false;
+ specialCase = kNoHandler;
+ for (int i = 0; i < numPatterns; i++) {
+ if (!deadPattern[i]) {
+ if (specialPatterns[i].opcodes[patternPos] == opcode) {
+ livePattern = true;
+ specialCase = specialPatterns[i].handlerCode;
+ } else {
+ deadPattern[i] = true;
+ }
+ }
+ }
+ patternPos++;
+ }
+
oatAppendMIR(curBlock, insn);
codePtr += width;
@@ -1006,8 +1034,19 @@
/* Allocate Registers using simple local allocation scheme */
oatSimpleRegAlloc(cUnit.get());
+ if (specialCase != kNoHandler) {
+ /*
+ * Custom codegen for special cases. If for any reason the
+ * special codegen doesn't success, cUnit->firstLIRInsn will
+ * set to NULL;
+ */
+ oatSpecialMIR2LIR(cUnit.get(), specialCase);
+ }
+
/* Convert MIR to LIR, etc. */
- oatMethodMIR2LIR(cUnit.get());
+ if (cUnit->firstLIRInsn == NULL) {
+ oatMethodMIR2LIR(cUnit.get());
+ }
// Debugging only
if (cUnit->enableDebug & (1 << kDebugDumpCFG)) {
@@ -1044,8 +1083,13 @@
for (size_t i = 0 ; i < cUnit->coreVmapTable.size(); i++) {
vmapTable.push_back(cUnit->coreVmapTable[i]);
}
- // Add a marker to take place of lr
- vmapTable.push_back(INVALID_VREG);
+ // If we have a frame, push a marker to take place of lr
+ if (cUnit->frameSize > 0) {
+ vmapTable.push_back(INVALID_VREG);
+ } else {
+ DCHECK_EQ(__builtin_popcount(cUnit->coreSpillMask), 0);
+ DCHECK_EQ(__builtin_popcount(cUnit->fpSpillMask), 0);
+ }
// Combine vmap tables - core regs, then fp regs
for (uint32_t i = 0; i < cUnit->fpVmapTable.size(); i++) {
vmapTable.push_back(cUnit->fpVmapTable[i]);
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 1137db3..8f854da 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -29,6 +29,9 @@
/* Lower middle-level IR to low-level IR for the whole method */
void oatMethodMIR2LIR(CompilationUnit* cUnit);
+/* Lower middle-level IR to low-level IR for the simple methods */
+void oatSpecialMIR2LIR(CompilationUnit* cUnit, SpecialCaseHandler specialCase );
+
/* Assemble LIR into machine code */
void oatAssembleLIR(CompilationUnit* cUnit);
AssemblerStatus oatAssembleInstructions(CompilationUnit* cUnit,
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index c5b28b3..f63ad4c 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -931,6 +931,17 @@
setupResourceMasks(lir);
}
+bool fastInstance(CompilationUnit* cUnit, uint32_t fieldIdx,
+ int& fieldOffset, bool& isVolatile, bool isPut)
+{
+ OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
+ *cUnit->dex_file, *cUnit->dex_cache,
+ cUnit->code_item, cUnit->method_idx,
+ cUnit->access_flags);
+ return cUnit->compiler->ComputeInstanceFieldInfo(fieldIdx, &mUnit,
+ fieldOffset, isVolatile, isPut);
+}
+
void genIGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
RegLocation rlDest, RegLocation rlObj,
bool isLongOrDouble, bool isObject)
@@ -939,13 +950,8 @@
bool isVolatile;
uint32_t fieldIdx = mir->dalvikInsn.vC;
- OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
- *cUnit->dex_file, *cUnit->dex_cache,
- cUnit->code_item, cUnit->method_idx,
- cUnit->access_flags);
-
- bool fastPath = cUnit->compiler->ComputeInstanceFieldInfo(fieldIdx, &mUnit,
- fieldOffset, isVolatile, false);
+ bool fastPath = fastInstance(cUnit, fieldIdx, fieldOffset, isVolatile,
+ false);
if (fastPath && !SLOW_FIELD_PATH) {
RegLocation rlResult;
@@ -1006,13 +1012,8 @@
bool isVolatile;
uint32_t fieldIdx = mir->dalvikInsn.vC;
- OatCompilationUnit mUnit(cUnit->class_loader, cUnit->class_linker,
- *cUnit->dex_file, *cUnit->dex_cache,
- cUnit->code_item, cUnit->method_idx,
- cUnit->access_flags);
-
- bool fastPath = cUnit->compiler->ComputeInstanceFieldInfo(fieldIdx, &mUnit,
- fieldOffset, isVolatile, true);
+ bool fastPath = fastInstance(cUnit, fieldIdx, fieldOffset, isVolatile,
+ true);
if (fastPath && !SLOW_FIELD_PATH) {
RegisterClass regClass = oatRegClassBySize(size);
DCHECK_GE(fieldOffset, 0);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 8f4df47..671eb73 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -887,6 +887,50 @@
return false;
}
+/* Set basic block labels */
+bool labelBlocks(CompilationUnit* cUnit, BasicBlock* bb)
+{
+ LIR* labelList = (LIR*) cUnit->blockLabelList;
+ int blockId = bb->id;
+
+ cUnit->curBlock = bb;
+ labelList[blockId].operands[0] = bb->startOffset;
+
+ /* Insert the block label */
+ labelList[blockId].opcode = kPseudoNormalBlockLabel;
+ return false;
+}
+
+void oatSpecialMIR2LIR(CompilationUnit* cUnit, SpecialCaseHandler specialCase)
+{
+ /* Find the first DalvikByteCode block */
+ int numReachableBlocks = cUnit->numReachableBlocks;
+ const GrowableList *blockList = &cUnit->blockList;
+ BasicBlock*bb = NULL;
+ for (int idx = 0; idx < numReachableBlocks; idx++) {
+ int dfsIndex = cUnit->dfsOrder.elemList[idx];
+ bb = (BasicBlock*)oatGrowableListGetElement(blockList, dfsIndex);
+ if (bb->blockType == kDalvikByteCode) {
+ break;
+ }
+ }
+ if (bb == NULL) {
+ return;
+ }
+ DCHECK_EQ(bb->startOffset, 0);
+ DCHECK(bb->firstMIRInsn != 0);
+
+ /* Get the first instruction */
+ MIR* mir = bb->firstMIRInsn;
+
+ /* Free temp registers and reset redundant store tracking */
+ oatResetRegPool(cUnit);
+ oatResetDefTracking(cUnit);
+ oatClobberAllRegs(cUnit);
+
+ genSpecialCase(cUnit, bb, mir, specialCase);
+}
+
void oatMethodMIR2LIR(CompilationUnit* cUnit)
{
/* Used to hold the labels of each block */
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 74412ab..8e72227 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -26,6 +26,196 @@
namespace art {
+/* Return a RegLocation that describes an in-register argument */
+RegLocation argLoc(CompilationUnit* cUnit, RegLocation loc, int sReg)
+{
+ loc.location = kLocPhysReg;
+ int base = SRegToVReg(cUnit, sReg) - cUnit->numRegs;
+ loc.sRegLow = sReg;
+ loc.lowReg = rARG1 + base;
+ loc.home = true;
+ if (loc.wide) {
+ loc.highReg = loc.lowReg + 1;
+ oatLockTemp(cUnit, loc.lowReg);
+ oatLockTemp(cUnit, loc.highReg);
+ } else {
+ oatLockTemp(cUnit, loc.lowReg);
+ }
+ return loc;
+}
+
+/* Find the next MIR, which may be in a following basic block */
+MIR* getNextMir(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir)
+{
+ BasicBlock* bb = *pBb;
+ MIR* origMir = mir;
+ while (bb != NULL) {
+ if (mir != NULL) {
+ mir = mir->next;
+ }
+ if (mir != NULL) {
+ return mir;
+ } else {
+ bb = bb->fallThrough;
+ *pBb = bb;
+ if (bb) {
+ mir = bb->firstMIRInsn;
+ if (mir != NULL) {
+ return mir;
+ }
+ }
+ }
+ }
+ return origMir;
+}
+
+/* Used for the "printMe" listing */
+void genPrintLabel(CompilationUnit *cUnit, MIR* mir)
+{
+ LIR* boundaryLIR;
+ /* Mark the beginning of a Dalvik instruction for line tracking */
+ char* instStr = cUnit->printMe ?
+ oatGetDalvikDisassembly(cUnit, mir->dalvikInsn, "") : NULL;
+ boundaryLIR = newLIR1(cUnit, kPseudoDalvikByteCodeBoundary,
+ (intptr_t) instStr);
+ cUnit->boundaryMap.insert(std::make_pair(mir->offset,
+ (LIR*)boundaryLIR));
+ /* Don't generate the SSA annotation unless verbose mode is on */
+ if (cUnit->printMe && mir->ssaRep) {
+ char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
+ newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
+ }
+}
+
+MIR* specialIGet(CompilationUnit* cUnit, BasicBlock** bb, MIR* mir,
+ OpSize size, bool longOrDouble, bool isObject)
+{
+ int fieldOffset;
+ bool isVolatile;
+ uint32_t fieldIdx = mir->dalvikInsn.vC;
+ bool fastPath = fastInstance(cUnit, fieldIdx, fieldOffset, isVolatile,
+ false);
+ if (!fastPath) {
+ return NULL;
+ }
+ mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
+ genPrintLabel(cUnit, mir);
+ RegLocation rlObj = oatGetSrc(cUnit, mir, 0);
+ rlObj = argLoc(cUnit, rlObj, mir->ssaRep->uses[0]);
+ RegLocation rlDest;
+ if (longOrDouble) {
+ rlDest = oatGetReturnWide(cUnit, false);
+ } else {
+ rlDest = oatGetReturn(cUnit, false);
+ }
+ genIGet(cUnit, mir, size, rlDest, rlObj, longOrDouble, isObject);
+ return getNextMir(cUnit, bb, mir);
+}
+
+MIR* specialIPut(CompilationUnit* cUnit, BasicBlock** bb, MIR* mir,
+ OpSize size, bool longOrDouble, bool isObject)
+{
+ int fieldOffset;
+ bool isVolatile;
+ uint32_t fieldIdx = mir->dalvikInsn.vC;
+ bool fastPath = fastInstance(cUnit, fieldIdx, fieldOffset, isVolatile,
+ false);
+ if (!fastPath) {
+ return NULL;
+ }
+ mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
+ genPrintLabel(cUnit, mir);
+ RegLocation rlSrc;
+ RegLocation rlObj;
+ int sSreg = mir->ssaRep->uses[0];
+ int oSreg;
+ if (longOrDouble) {
+ rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+ rlObj = oatGetSrc(cUnit, mir, 2);
+ oSreg = mir->ssaRep->uses[2];
+ } else {
+ rlSrc = oatGetSrc(cUnit, mir, 0);
+ rlObj = oatGetSrc(cUnit, mir, 1);
+ oSreg = mir->ssaRep->uses[1];
+ }
+ rlSrc = argLoc(cUnit, rlSrc, sSreg);
+ rlObj = argLoc(cUnit, rlObj, oSreg);
+ genIPut(cUnit, mir, size, rlSrc, rlObj, longOrDouble, isObject);
+ return getNextMir(cUnit, bb, mir);
+}
+
+/*
+ * Special-case code genration for simple non-throwing leaf methods.
+ */
+void genSpecialCase(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ SpecialCaseHandler specialCase)
+{
+ cUnit->currentDalvikOffset = mir->offset;
+ MIR* nextMir = NULL;
+ switch(specialCase) {
+ case kNullMethod:
+ DCHECK(mir->dalvikInsn.opcode == Instruction::RETURN_VOID);
+ nextMir = mir;
+ break;
+ case kConstFunction:
+ genPrintLabel(cUnit, mir);
+ loadConstant(cUnit, rRET0, mir->dalvikInsn.vB);
+ nextMir = getNextMir(cUnit, &bb, mir);
+ break;
+ case kIGet:
+ nextMir = specialIGet(cUnit, &bb, mir, kWord, false, false);
+ break;;
+ case kIGetBoolean:
+ case kIGetByte:
+ nextMir = specialIGet(cUnit, &bb, mir, kUnsignedByte, false, false);
+ break;;
+ case kIGetObject:
+ nextMir = specialIGet(cUnit, &bb, mir, kWord, false, true);
+ break;;
+ case kIGetChar:
+ nextMir = specialIGet(cUnit, &bb, mir, kUnsignedHalf, false, false);
+ break;;
+ case kIGetShort:
+ nextMir = specialIGet(cUnit, &bb, mir, kSignedHalf, false, false);
+ break;;
+ case kIGetWide:
+ nextMir = specialIGet(cUnit, &bb, mir, kLong, true, false);
+ break;;
+ case kIPut:
+ nextMir = specialIPut(cUnit, &bb, mir, kWord, false, false);
+ break;;
+ case kIPutBoolean:
+ case kIPutByte:
+ nextMir = specialIPut(cUnit, &bb, mir, kUnsignedByte, false, false);
+ break;;
+ case kIPutObject:
+ nextMir = specialIPut(cUnit, &bb, mir, kWord, false, true);
+ break;;
+ case kIPutChar:
+ nextMir = specialIPut(cUnit, &bb, mir, kUnsignedHalf, false, false);
+ break;;
+ case kIPutShort:
+ nextMir = specialIPut(cUnit, &bb, mir, kSignedHalf, false, false);
+ break;;
+ case kIPutWide:
+ nextMir = specialIPut(cUnit, &bb, mir, kLong, true, false);
+ break;;
+ default:
+ return;
+ }
+ if (nextMir != NULL) {
+ cUnit->currentDalvikOffset = nextMir->offset;
+ genPrintLabel(cUnit, nextMir);
+ newLIR1(cUnit, kThumbBx, rLR);
+ cUnit->coreSpillMask = 0;
+ cUnit->numCoreSpills = 0;
+ cUnit->fpSpillMask = 0;
+ cUnit->numFPSpills = 0;
+ cUnit->frameSize = 0;
+ cUnit->coreVmapTable.clear();
+ cUnit->fpVmapTable.clear();
+ }
+}
/*
* Generate a Thumb2 IT instruction, which can nullify up to
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index 5b89e6a..4530517 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -24,6 +24,12 @@
namespace art {
+void genSpecialCase(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ SpecialCaseHandler specialCase)
+{
+ // TODO
+}
+
/*
* The lack of pc-relative loads on Mips presents somewhat of a challenge
* for our PIC switch table strategy. To materialize the current location
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 6f33b56..c6cd55c 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -24,6 +24,12 @@
namespace art {
+void genSpecialCase(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ SpecialCaseHandler specialCase)
+{
+ // TODO
+}
+
/*
* Perform register memory operation.
*/
diff --git a/src/object.h b/src/object.h
index 62d5fed..93493fd 100644
--- a/src/object.h
+++ b/src/object.h
@@ -780,9 +780,6 @@
void SetFrameSizeInBytes(size_t new_frame_size_in_bytes) {
DCHECK_EQ(sizeof(size_t), sizeof(uint32_t));
-#if !defined(ART_USE_LLVM_COMPILER) // LLVM uses shadow stack instead.
- DCHECK_LE(static_cast<size_t>(kStackAlignment), new_frame_size_in_bytes);
-#endif
SetField32(OFFSET_OF_OBJECT_MEMBER(Method, frame_size_in_bytes_),
new_frame_size_in_bytes, false);
}