diff options
| author | 2012-03-30 01:07:54 -0700 | |
|---|---|---|
| committer | 2012-04-04 09:56:48 -0700 | |
| commit | 7caad77632ae121c9f64c488e3f8f710e2c4813d (patch) | |
| tree | 6b12ff6e0c27529f5434c5655b3306a1f79bd379 /src/compiler/codegen | |
| parent | 4855cd516d97c9728fa58312acdf6c4b8b81397a (diff) | |
Implement various missing parts of the X86 compiler
Change-Id: I76f08580600befe268328f8cf7102c6146460c5e
Diffstat (limited to 'src/compiler/codegen')
| -rw-r--r-- | src/compiler/codegen/CodegenUtil.cc | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/GenCommon.cc | 71 | ||||
| -rw-r--r-- | src/compiler/codegen/GenInvoke.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodCodegenDriver.cc | 26 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/ArchFactory.cc | 106 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/ArchUtility.cc | 7 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/Assemble.cc | 222 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/Codegen.h | 6 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86/Factory.cc | 11 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86/Gen.cc | 236 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86LIR.h | 12 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86RallocUtil.cc | 6 |
12 files changed, 477 insertions, 236 deletions
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc index 20eb47fc45..00e78ecf4b 100644 --- a/src/compiler/codegen/CodegenUtil.cc +++ b/src/compiler/codegen/CodegenUtil.cc @@ -657,6 +657,8 @@ void installSwitchTables(CompilationUnit* cUnit) */ #if defined(TARGET_ARM) int bxOffset = tabRec->anchor->offset + 4; +#elif defined(TARGET_X86) + int bxOffset = 0; #else int bxOffset = tabRec->anchor->offset; #endif diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc index 3cc594cc25..444f5f2dc7 100644 --- a/src/compiler/codegen/GenCommon.cc +++ b/src/compiler/codegen/GenCommon.cc @@ -43,6 +43,20 @@ void callRuntimeHelperImm(CompilationUnit* cUnit, int helperOffset, int arg0) { #endif } +void callRuntimeHelperReg(CompilationUnit* cUnit, int helperOffset, int arg0) { +#if !defined(TARGET_X86) + int rTgt = loadHelper(cUnit, helperOffset); +#endif + opRegCopy(cUnit, rARG0, arg0); + oatClobberCalleeSave(cUnit); +#if !defined(TARGET_X86) + opReg(cUnit, kOpBlx, rTgt); + oatFreeTemp(cUnit, rTgt); +#else + opThreadMem(cUnit, kOpBlx, helperOffset); +#endif +} + void callRuntimeHelperRegLocation(CompilationUnit* cUnit, int helperOffset, RegLocation arg0) { #if !defined(TARGET_X86) @@ -431,7 +445,7 @@ void genCompareZeroAndBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir, cond = (ConditionCode)0; LOG(FATAL) << "Unexpected opcode " << (int)opcode; } -#if defined(TARGET_MIPS) +#if defined(TARGET_MIPS) || defined(TARGET_X86) opCmpImmBranch(cUnit, cond, rlSrc.lowReg, 0, &labelList[bb->taken->id]); #else opRegImm(cUnit, kOpCmp, rlSrc.lowReg, 0); @@ -1811,31 +1825,34 @@ bool genArithOpInt(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, (int)mir->dalvikInsn.opcode; } if (!callOut) { - rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); if (unary) { + rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); opRegReg(cUnit, op, rlResult.lowReg, rlSrc1.lowReg); } else { - rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); -#if defined(TARGET_X86) - rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - opRegRegReg(cUnit, op, rlResult.lowReg, - rlSrc1.lowReg, rlSrc2.lowReg); -#else if (shiftOp) { +#if !defined(TARGET_X86) + rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); int tReg = oatAllocTemp(cUnit); opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31); +#else + // X86 doesn't require masking and must use ECX + loadValueDirectFixed(cUnit, rlSrc2, rCX); + int tReg = rCX; +#endif + rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); opRegRegReg(cUnit, op, rlResult.lowReg, rlSrc1.lowReg, tReg); oatFreeTemp(cUnit, tReg); } else { + rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg); + rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); opRegRegReg(cUnit, op, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); } -#endif } storeValue(cUnit, rlDest, rlResult); } else { @@ -2151,12 +2168,8 @@ bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, break; case Instruction::ADD_LONG: case Instruction::ADD_LONG_2ADDR: -#if defined(TARGET_MIPS) +#if defined(TARGET_MIPS) || defined(TARGET_X86) return genAddLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); -#elif defined(TARGET_X86) - callOut = true; - retReg = rRET0; - funcOffset = ENTRYPOINT_OFFSET(pLadd); #else firstOp = kOpAdd; secondOp = kOpAdc; @@ -2164,16 +2177,13 @@ bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, #endif case Instruction::SUB_LONG: case Instruction::SUB_LONG_2ADDR: -#if defined(TARGET_MIPS) +#if defined(TARGET_MIPS) || defined(TARGET_X86) return genSubLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); -#elif defined(TARGET_X86) - callOut = true; - retReg = rRET0; - funcOffset = ENTRYPOINT_OFFSET(pLsub); -#endif +#else firstOp = kOpSub; secondOp = kOpSbc; break; +#endif case Instruction::MUL_LONG: case Instruction::MUL_LONG_2ADDR: callOut = true; @@ -2199,33 +2209,30 @@ bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, case Instruction::AND_LONG_2ADDR: case Instruction::AND_LONG: #if defined(TARGET_X86) - callOut = true; - retReg = rRET0; - funcOffset = ENTRYPOINT_OFFSET(pLand); -#endif + return genAndLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); +#else firstOp = kOpAnd; secondOp = kOpAnd; break; +#endif case Instruction::OR_LONG: case Instruction::OR_LONG_2ADDR: #if defined(TARGET_X86) - callOut = true; - retReg = rRET0; - funcOffset = ENTRYPOINT_OFFSET(pLor); -#endif + return genOrLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); +#else firstOp = kOpOr; secondOp = kOpOr; break; +#endif case Instruction::XOR_LONG: case Instruction::XOR_LONG_2ADDR: #if defined(TARGET_X86) - callOut = true; - retReg = rRET0; - funcOffset = ENTRYPOINT_OFFSET(pLxor); -#endif + return genXorLong(cUnit, mir, rlDest, rlSrc1, rlSrc2); +#else firstOp = kOpXor; secondOp = kOpXor; break; +#endif case Instruction::NEG_LONG: { return genNegLong(cUnit, mir, rlDest, rlSrc2); } diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc index 8a9d1f5765..a904419138 100644 --- a/src/compiler/codegen/GenInvoke.cc +++ b/src/compiler/codegen/GenInvoke.cc @@ -286,15 +286,15 @@ int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline, * This handles the case in which the base method is not fully * resolved at compile time, we bail to a runtime helper. */ -#if !defined(TARGET_X86) if (state == 0) { +#if !defined(TARGET_X86) // Load trampoline target loadWordDisp(cUnit, rSELF, trampoline, rINVOKE_TGT); +#endif // Load rARG0 with method index loadConstant(cUnit, rARG0, dexIdx); return 1; } -#endif return -1; } @@ -357,11 +357,7 @@ int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn, uint32_t methodIdx, uintptr_t directCode, uintptr_t directMethod, InvokeType type, bool skipThis) { -#if !defined(TARGET_X86) int lastArgReg = rARG3; -#else - int lastArgReg = rARG2; -#endif int nextReg = rARG1; int nextArg = 0; if (skipThis) { diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc index 5ffe3e44c2..b28df0183e 100644 --- a/src/compiler/codegen/MethodCodegenDriver.cc +++ b/src/compiler/codegen/MethodCodegenDriver.cc @@ -128,11 +128,31 @@ void genInvoke(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir, #if !defined(TARGET_X86) opReg(cUnit, kOpBlx, rINVOKE_TGT); #else - if (fastPath) { + if (fastPath && type != kInterface) { opMem(cUnit, kOpBlx, rARG0, Method::GetCodeOffset().Int32Value()); } else { - UNIMPLEMENTED(FATAL) << "compute trampoline"; - opThreadMem(cUnit, kOpBlx, 0); + int trampoline = 0; + switch (type) { + case kInterface: + trampoline = fastPath ? ENTRYPOINT_OFFSET(pInvokeInterfaceTrampoline) + : ENTRYPOINT_OFFSET(pInvokeInterfaceTrampolineWithAccessCheck); + break; + case kDirect: + trampoline = ENTRYPOINT_OFFSET(pInvokeDirectTrampolineWithAccessCheck); + break; + case kStatic: + trampoline = ENTRYPOINT_OFFSET(pInvokeStaticTrampolineWithAccessCheck); + break; + case kSuper: + trampoline = ENTRYPOINT_OFFSET(pInvokeSuperTrampolineWithAccessCheck); + break; + case kVirtual: + trampoline = ENTRYPOINT_OFFSET(pInvokeVirtualTrampolineWithAccessCheck); + break; + default: + LOG(FATAL) << "Unexpected invoke type"; + } + opThreadMem(cUnit, kOpBlx, trampoline); } #endif diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc index bd95afbf76..e6682505ef 100644 --- a/src/compiler/codegen/x86/ArchFactory.cc +++ b/src/compiler/codegen/x86/ArchFactory.cc @@ -24,30 +24,94 @@ namespace art { +bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + opRegReg(cUnit, kOpAdd, r0, r2); // r0 = r0 + r2 + opRegReg(cUnit, kOpAdc, r1, r3); // r1 = r1 + r3 + CF + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + opRegReg(cUnit, kOpSub, r0, r2); // r0 = r0 - r2 + opRegReg(cUnit, kOpSbc, r1, r3); // r1 = r1 - r3 - CF + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +bool genAndLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + opRegReg(cUnit, kOpAnd, r0, r2); // r0 = r0 - r2 + opRegReg(cUnit, kOpAnd, r1, r3); // r1 = r1 - r3 - CF + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +bool genOrLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + opRegReg(cUnit, kOpOr, r0, r2); // r0 = r0 - r2 + opRegReg(cUnit, kOpOr, r1, r3); // r1 = r1 - r3 - CF + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + +bool genXorLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2) +{ + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) + (r2:r3) + opRegReg(cUnit, kOpXor, r0, r2); // r0 = r0 - r2 + opRegReg(cUnit, kOpXor, r1, r3); // r1 = r1 - r3 - CF + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; +} + bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc) { - UNIMPLEMENTED(WARNING) << "genNegLong"; -#if 0 - rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); - RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - /* - * [v1 v0] = -[a1 a0] - * negu v0,a0 - * negu v1,a1 - * sltu t1,r_zero - * subu v1,v1,t1 - */ - - opRegReg(cUnit, kOpNeg, rlResult.lowReg, rlSrc.lowReg); - opRegReg(cUnit, kOpNeg, rlResult.highReg, rlSrc.highReg); - int tReg = oatAllocTemp(cUnit); - newLIR3(cUnit, kX86Sltu, tReg, r_ZERO, rlResult.lowReg); - opRegRegReg(cUnit, kOpSub, rlResult.highReg, rlResult.highReg, tReg); - oatFreeTemp(cUnit, tReg); - storeValueWide(cUnit, rlDest, rlResult); -#endif - return false; + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc, r0, r1); + // Compute (r1:r0) = -(r1:r0) + opRegReg(cUnit, kOpNeg, r0, r0); // r0 = -r0 + opRegImm(cUnit, kOpAdc, r1, 0); // r1 = r1 + CF + opRegReg(cUnit, kOpNeg, r1, r1); // r1 = -r1 + RegLocation rlResult = {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}; + storeValueWide(cUnit, rlDest, rlResult); + return false; } void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset); diff --git a/src/compiler/codegen/x86/ArchUtility.cc b/src/compiler/codegen/x86/ArchUtility.cc index 6c54e34469..d325f5c035 100644 --- a/src/compiler/codegen/x86/ArchUtility.cc +++ b/src/compiler/codegen/x86/ArchUtility.cc @@ -49,7 +49,7 @@ static const char* x86CondName[] = { /* * Interpret a format string and build a string no longer than size - * See format key in Assemble.c. + * See format key in Assemble.cc. */ std::string buildInsnString(const char *fmt, LIR *lir, unsigned char* baseAddr) { std::string buf; @@ -79,6 +79,11 @@ std::string buildInsnString(const char *fmt, LIR *lir, unsigned char* baseAddr) case 'd': buf += StringPrintf("%d", operand); break; + case 'p': { + SwitchTable *tabRec = reinterpret_cast<SwitchTable*>(operand); + buf += StringPrintf("0x%08x", tabRec->offset); + break; + } case 'r': if (FPREG(operand) || DOUBLEREG(operand)) { int fp_reg = operand & FP_REG_MASK; diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc index d1a8d64abe..2639057aa5 100644 --- a/src/compiler/codegen/x86/Assemble.cc +++ b/src/compiler/codegen/x86/Assemble.cc @@ -26,7 +26,7 @@ namespace art { X86EncodingMap EncodingMap[kX86Last] = { { kX8632BitData, kData, IS_UNARY_OP, { 0, 0, 0x00, 0, 0, 0, 0, 4 }, "data", "0x!0d" }, - { kX86Bkpt, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 4 }, "int 3", "" }, + { kX86Bkpt, kNullary, NO_OPERAND | IS_BRANCH, { 0, 0, 0xCC, 0, 0, 0, 0, 0 }, "int 3", "" }, { kX86Nop, kNop, IS_UNARY_OP, { 0, 0, 0x90, 0, 0, 0, 0, 0 }, "nop", "" }, #define ENCODING_MAP(opname, is_store, \ @@ -197,17 +197,16 @@ ENCODING_MAP(Cmp, IS_LOAD, { kX86 ## opname ## 32RI, kShiftRegImm, IS_BINARY_OP | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32RI", "!0r,!1d" }, \ { kX86 ## opname ## 32MI, kShiftMemImm, IS_TERTIARY_OP | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32MI", "[!0r+!1d],!2r" }, \ { kX86 ## opname ## 32AI, kShiftArrayImm, IS_QUIN_OP | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "32RC", "" }, \ -{ kX86 ## opname ## 32MC, kShiftMemCl, IS_TERTIARY_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "32MC", "" }, \ -{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_QUIN_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 1 }, #opname "32AC", "" } +{ kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32RC", "" }, \ +{ kX86 ## opname ## 32MC, kShiftMemCl, IS_TERTIARY_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32MC", "" }, \ +{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_QUIN_OP | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32AC", "" } SHIFT_ENCODING_MAP(Rol, 0x0), SHIFT_ENCODING_MAP(Ror, 0x1), SHIFT_ENCODING_MAP(Rcl, 0x2), SHIFT_ENCODING_MAP(Rcr, 0x3), SHIFT_ENCODING_MAP(Sal, 0x4), - SHIFT_ENCODING_MAP(Shl, 0x5), - SHIFT_ENCODING_MAP(Shr, 0x6), + SHIFT_ENCODING_MAP(Shr, 0x5), SHIFT_ENCODING_MAP(Sar, 0x7), #undef SHIFT_ENCODING_MAP @@ -295,11 +294,16 @@ ENCODING_MAP(Cmp, IS_LOAD, { kX86Jcc32, kJcc, IS_BINARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0x0F, 0x80, 0, 0, 0, 0 }, "Jcc32", "!1c !0t" }, { kX86Jmp8, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xEB, 0, 0, 0, 0, 0 }, "Jmp8", "!0t" }, { kX86Jmp32, kJmp, IS_UNARY_OP | IS_BRANCH | NEEDS_FIXUP, { 0, 0, 0xE9, 0, 0, 0, 0, 0 }, "Jmp32", "!0t" }, - { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" }, - { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" }, - { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" }, - { kX86CallT, kCall, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallT", "fs:[!0d]" }, - { kX86Ret, kNullary,NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" }, + { kX86JmpR, kJmp, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xFF, 0, 0, 4, 0, 0 }, "JmpR", "!0r" }, + { kX86CallR, kCall, IS_UNARY_OP | IS_BRANCH, { 0, 0, 0xE8, 0, 0, 0, 0, 0 }, "CallR", "!0r" }, + { kX86CallM, kCall, IS_BINARY_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallM", "[!0r+!1d]" }, + { kX86CallA, kCall, IS_QUAD_OP | IS_BRANCH | IS_LOAD, { 0, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallA", "[!0r+!1r<<!2d+!3d]" }, + { kX86CallT, kCall, IS_UNARY_OP | IS_BRANCH | IS_LOAD, { THREAD_PREFIX, 0, 0xFF, 0, 0, 2, 0, 0 }, "CallT", "fs:[!0d]" }, + { kX86Ret, kNullary,NO_OPERAND | IS_BRANCH, { 0, 0, 0xC3, 0, 0, 0, 0, 0 }, "Ret", "" }, + + { kX86StartOfMethod, kMacro, IS_UNARY_OP | SETS_CCODES, { 0,0,0,0,0,0,0,0 }, "StartOfMethod", "!0r" }, + { kX86PcRelLoadRA, kPcRel, IS_LOAD | IS_QUIN_OP, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "PcRelLoadRA", "!0r,[!1r+!2r<<!3d+!4p]" }, + { kX86PcRelAdr, kPcRel, IS_LOAD | IS_BINARY_OP, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "PcRelAdr", "!0r,!1d" }, }; static size_t computeSize(X86EncodingMap* entry, int displacement, bool has_sib) { @@ -323,7 +327,7 @@ static size_t computeSize(X86EncodingMap* entry, int displacement, bool has_sib) } if (displacement != 0) { if (entry->opcode != kX86Lea32RA) { - DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0); + DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0) << entry->name; } size += IS_SIMM8(displacement) ? 1 : 4; } @@ -428,9 +432,11 @@ int oatGetInsnSize(LIR* lir) { case kJmp: if (lir->opcode == kX86Jmp8) { return 2; // opcode + rel8 - } else { - DCHECK(lir->opcode == kX86Jmp32); + } else if (lir->opcode == kX86Jmp32) { return 5; // opcode + rel32 + } else { + DCHECK(lir->opcode == kX86JmpR); + return 2; // opcode + modrm } case kCall: switch (lir->opcode) { @@ -445,6 +451,19 @@ int oatGetInsnSize(LIR* lir) { break; } break; + case kPcRel: + if (entry->opcode == kX86PcRelLoadRA) { + // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + return computeSize(entry, 0x12345678, true); + } else { + DCHECK(entry->opcode == kX86PcRelAdr); + return 5; // opcode with reg + 4 byte immediate + } + case kMacro: + DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); + return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + + computeSize(&EncodingMap[kX86Sub32RI], 0, false) - + (lir->operands[0] == rAX ? 1 : 0); // shorter ax encoding default: break; } @@ -802,7 +821,7 @@ static void emitMovRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry, } static void emitShiftRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry, - uint8_t reg, int imm) { + uint8_t reg, int imm) { if (entry->skeleton.prefix1 != 0) { cUnit->codeBuffer.push_back(entry->skeleton.prefix1); if (entry->skeleton.prefix2 != 0) { @@ -829,7 +848,7 @@ static void emitShiftRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry, DCHECK_EQ(0, entry->skeleton.extra_opcode2); } DCHECK_LT(reg, 8); - uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; cUnit->codeBuffer.push_back(modrm); if (imm != 1) { DCHECK_EQ(entry->skeleton.immediate_bytes, 1); @@ -838,18 +857,67 @@ static void emitShiftRegImm(CompilationUnit* cUnit, const X86EncodingMap* entry, } } +static void emitShiftRegCl(CompilationUnit* cUnit, const X86EncodingMap* entry, + uint8_t reg, uint8_t cl) { + DCHECK_EQ(cl, static_cast<uint8_t>(rCX)); + if (entry->skeleton.prefix1 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + cUnit->codeBuffer.push_back(entry->skeleton.opcode); + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + cUnit->codeBuffer.push_back(modrm); + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); +} + +static void emitRegCond(CompilationUnit* cUnit, const X86EncodingMap* entry, + uint8_t reg, uint8_t condition) { + if (entry->skeleton.prefix1 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + DCHECK_EQ(0, entry->skeleton.ax_opcode); + DCHECK_EQ(0x0F, entry->skeleton.opcode); + cUnit->codeBuffer.push_back(0x0F); + DCHECK_EQ(0x90, entry->skeleton.extra_opcode1); + cUnit->codeBuffer.push_back(0x90 | condition); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + cUnit->codeBuffer.push_back(modrm); + DCHECK_EQ(entry->skeleton.immediate_bytes, 0); +} + static void emitJmp(CompilationUnit* cUnit, const X86EncodingMap* entry, int rel) { if (entry->opcode == kX86Jmp8) { DCHECK(IS_SIMM8(rel)); cUnit->codeBuffer.push_back(0xEB); cUnit->codeBuffer.push_back(rel & 0xFF); - } else { - DCHECK(entry->opcode == kX86Jmp32); + } else if (entry->opcode == kX86Jmp32) { cUnit->codeBuffer.push_back(0xE9); cUnit->codeBuffer.push_back(rel & 0xFF); cUnit->codeBuffer.push_back((rel >> 8) & 0xFF); cUnit->codeBuffer.push_back((rel >> 16) & 0xFF); cUnit->codeBuffer.push_back((rel >> 24) & 0xFF); + } else { + DCHECK(entry->opcode == kX86JmpR); + cUnit->codeBuffer.push_back(entry->skeleton.opcode); + uint8_t reg = static_cast<uint8_t>(rel); + DCHECK_LT(reg, 8); + uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg; + cUnit->codeBuffer.push_back(modrm); } } @@ -932,6 +1000,68 @@ static void emitCallThread(CompilationUnit* cUnit, const X86EncodingMap* entry, DCHECK_EQ(0, entry->skeleton.immediate_bytes); } +static void emitPcRel(CompilationUnit* cUnit, const X86EncodingMap* entry, uint8_t reg, + int base_or_table, uint8_t index, int scale, int table_or_disp) { + int disp; + if (entry->opcode == kX86PcRelLoadRA) { + SwitchTable *tabRec = (SwitchTable*)table_or_disp; + disp = tabRec->offset; + } else { + DCHECK(entry->opcode == kX86PcRelAdr); + FillArrayData *tabRec = (FillArrayData *)base_or_table; + disp = tabRec->offset; + } + if (entry->skeleton.prefix1 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix2 != 0) { + cUnit->codeBuffer.push_back(entry->skeleton.prefix2); + } + } else { + DCHECK_EQ(0, entry->skeleton.prefix2); + } + if (FPREG(reg)) { + reg = reg & FP_REG_MASK; + } + DCHECK_LT(reg, 8); + if (entry->opcode == kX86PcRelLoadRA) { + cUnit->codeBuffer.push_back(entry->skeleton.opcode); + DCHECK_EQ(0, entry->skeleton.extra_opcode1); + DCHECK_EQ(0, entry->skeleton.extra_opcode2); + uint8_t modrm = (2 << 6) | (reg << 3) | rSP; + cUnit->codeBuffer.push_back(modrm); + DCHECK_LT(scale, 4); + DCHECK_LT(index, 8); + DCHECK_LT(base_or_table, 8); + uint8_t base = static_cast<uint8_t>(base_or_table); + uint8_t sib = (scale << 6) | (index << 3) | base; + cUnit->codeBuffer.push_back(sib); + DCHECK_EQ(0, entry->skeleton.immediate_bytes); + } else { + cUnit->codeBuffer.push_back(entry->skeleton.opcode + reg); + } + cUnit->codeBuffer.push_back(disp & 0xFF); + cUnit->codeBuffer.push_back((disp >> 8) & 0xFF); + cUnit->codeBuffer.push_back((disp >> 16) & 0xFF); + cUnit->codeBuffer.push_back((disp >> 24) & 0xFF); + DCHECK_EQ(0, entry->skeleton.modrm_opcode); + DCHECK_EQ(0, entry->skeleton.ax_opcode); +} + +static void emitMacro(CompilationUnit* cUnit, const X86EncodingMap* entry, + uint8_t reg, int offset) { + DCHECK(entry->opcode == kX86StartOfMethod) << entry->name; + cUnit->codeBuffer.push_back(0xE8); // call +0 + cUnit->codeBuffer.push_back(0); + cUnit->codeBuffer.push_back(0); + cUnit->codeBuffer.push_back(0); + cUnit->codeBuffer.push_back(0); + + DCHECK_LT(reg, 8); + cUnit->codeBuffer.push_back(0x58 + reg); // pop reg + + emitRegImm(cUnit, &EncodingMap[kX86Sub32RI], reg, offset + 5 /* size of call +0 */); +} + void emitUnimplemented(CompilationUnit* cUnit, const X86EncodingMap* entry, LIR* lir) { UNIMPLEMENTED(WARNING) << "encoding for: " << entry->name; for (int i = 0; i < oatGetInsnSize(lir); ++i) { @@ -949,7 +1079,7 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd LIR *lir; AssemblerStatus res = kSuccess; // Assume success - const bool kVerbosePcFixup = false; + const bool kVerbosePcFixup = cUnit->method_idx == 9703; for (lir = (LIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) { if (lir->opcode < 0) { continue; @@ -982,6 +1112,29 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd oatSetupResourceMasks(lir); res = kRetryAll; } + if (kVerbosePcFixup) { + LOG(INFO) << "Source:"; + oatDumpLIRInsn(cUnit, lir, 0); + LOG(INFO) << "Target:"; + oatDumpLIRInsn(cUnit, targetLIR, 0); + LOG(INFO) << "Delta " << delta; + } + lir->operands[0] = delta; + break; + } + case kX86Jcc32: { + LIR *targetLIR = lir->target; + DCHECK(targetLIR != NULL); + intptr_t pc = lir->offset + 6 /* 2 byte opcode + rel32 */; + intptr_t target = targetLIR->offset; + int delta = target - pc; + if (kVerbosePcFixup) { + LOG(INFO) << "Source:"; + oatDumpLIRInsn(cUnit, lir, 0); + LOG(INFO) << "Target:"; + oatDumpLIRInsn(cUnit, targetLIR, 0); + LOG(INFO) << "Delta " << delta; + } lir->operands[0] = delta; break; } @@ -1015,6 +1168,15 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd lir->operands[0] = delta; break; } + case kX86Jmp32: { + LIR *targetLIR = lir->target; + DCHECK(targetLIR != NULL); + intptr_t pc = lir->offset + 5 /* opcode + rel32 */; + intptr_t target = targetLIR->offset; + int delta = target - pc; + lir->operands[0] = delta; + break; + } default: break; } @@ -1028,6 +1190,7 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd if (res != kSuccess) { continue; } + CHECK_EQ(static_cast<size_t>(lir->offset), cUnit->codeBuffer.size()); const X86EncodingMap *entry = &EncodingMap[lir->opcode]; size_t starting_cbuf_size = cUnit->codeBuffer.size(); switch (entry->kind) { @@ -1088,6 +1251,12 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd case kShiftRegImm: // lir operands - 0: reg, 1: immediate emitShiftRegImm(cUnit, entry, lir->operands[0], lir->operands[1]); break; + case kShiftRegCl: // lir operands - 0: reg, 1: cl + emitShiftRegCl(cUnit, entry, lir->operands[0], lir->operands[1]); + break; + case kRegCond: // lir operands - 0: reg, 1: condition + emitRegCond(cUnit, entry, lir->operands[0], lir->operands[1]); + break; case kJmp: // lir operands - 0: rel emitJmp(cUnit, entry, lir->operands[0]); break; @@ -1107,15 +1276,20 @@ AssemblerStatus oatAssembleInstructions(CompilationUnit *cUnit, intptr_t startAd break; } break; + case kPcRel: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + emitPcRel(cUnit, entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3], lir->operands[4]); + break; + case kMacro: + emitMacro(cUnit, entry, lir->operands[0], lir->offset); + break; default: emitUnimplemented(cUnit, entry, lir); break; } - if (entry->kind != kJcc && entry->kind != kJmp) { - CHECK_EQ(static_cast<size_t>(oatGetInsnSize(lir)), - cUnit->codeBuffer.size() - starting_cbuf_size) - << "Instruction size mismatch for entry: " << EncodingMap[lir->opcode].name; - } + CHECK_EQ(static_cast<size_t>(oatGetInsnSize(lir)), + cUnit->codeBuffer.size() - starting_cbuf_size) + << "Instruction size mismatch for entry: " << EncodingMap[lir->opcode].name; } return res; } diff --git a/src/compiler/codegen/x86/Codegen.h b/src/compiler/codegen/x86/Codegen.h index 178b98677a..52ba7c1fdb 100644 --- a/src/compiler/codegen/x86/Codegen.h +++ b/src/compiler/codegen/x86/Codegen.h @@ -31,6 +31,12 @@ bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2); bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2); +bool genAndLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2); +bool genOrLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2); +bool genXorLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, + RegLocation rlSrc1, RegLocation rlSrc2); bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc); LIR *opRegImm(CompilationUnit* cUnit, OpKind op, int rDestSrc1, int value); diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc index 4987c2856c..9421744f69 100644 --- a/src/compiler/codegen/x86/X86/Factory.cc +++ b/src/compiler/codegen/x86/X86/Factory.cc @@ -231,6 +231,7 @@ LIR* opRegRegReg(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc1, int rS if (rDest != rSrc1 && rDest != rSrc2) { if (op == kOpAdd) { // lea special case, except can't encode rbp as base if (rSrc1 == rSrc2) { + opRegCopy(cUnit, rDest, rSrc1); return opRegImm(cUnit, kOpLsl, rDest, 1); } else if (rSrc1 != rBP) { return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc1 /* base */, @@ -285,9 +286,10 @@ LIR* opRegRegImm(CompilationUnit *cUnit, OpKind op, int rDest, int rSrc, int val } } if (rDest != rSrc) { - if (op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case - return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */, - r4sib_no_index /* index */, value /* scale */, value /* disp */); + if (false && op == kOpLsl && value >= 0 && value <= 3) { // lea shift special case + // TODO: fix bug in LEA encoding when disp == 0 + return newLIR5(cUnit, kX86Lea32RA, rDest, r5sib_no_base /* base */, + rSrc /* index */, value /* scale */, 0 /* disp */); } else if (op == kOpAdd) { // lea add special case return newLIR5(cUnit, kX86Lea32RA, rDest, rSrc /* base */, r4sib_no_index /* index */, 0 /* scale */, value /* disp */); @@ -351,6 +353,7 @@ LIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, int rIndex, int rDest, int scale, OpSize size) { UNIMPLEMENTED(WARNING) << "loadBaseIndexed"; + newLIR0(cUnit, kX86Bkpt); return NULL; #if 0 LIR *first = NULL; @@ -406,6 +409,7 @@ LIR *loadBaseIndexed(CompilationUnit *cUnit, int rBase, LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask) { UNIMPLEMENTED(WARNING) << "loadMultiple"; + newLIR0(cUnit, kX86Bkpt); return NULL; #if 0 int i; @@ -432,6 +436,7 @@ LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask) LIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask) { UNIMPLEMENTED(WARNING) << "storeMultiple"; + newLIR0(cUnit, kX86Bkpt); return NULL; #if 0 int i; diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc index f957cbc1a2..31939f2e78 100644 --- a/src/compiler/codegen/x86/X86/Gen.cc +++ b/src/compiler/codegen/x86/X86/Gen.cc @@ -79,6 +79,7 @@ LIR* genRegMemCheck(CompilationUnit* cUnit, ConditionCode cCode, void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) { UNIMPLEMENTED(WARNING) << "genSparseSwitch"; + newLIR0(cUnit, kX86Bkpt); return; #if 0 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB; @@ -158,82 +159,55 @@ void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) * jr r_RA * done: */ -void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) -{ - UNIMPLEMENTED(WARNING) << "genPackedSwitch"; -#if 0 - const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB; - if (cUnit->printMe) { - dumpPackedSwitchTable(table); - } - // Add the table to the list - we'll process it later - SwitchTable *tabRec = (SwitchTable *)oatNew(cUnit, sizeof(SwitchTable), - true, kAllocData); - tabRec->table = table; - tabRec->vaddr = mir->offset; - int size = table[1]; - tabRec->targets = (LIR* *)oatNew(cUnit, size * sizeof(LIR*), true, - kAllocLIR); - oatInsertGrowableList(cUnit, &cUnit->switchTables, (intptr_t)tabRec); - - // Get the switch value - rlSrc = loadValue(cUnit, rlSrc, kCoreReg); - - // Prepare the bias. If too big, handle 1st stage here - int lowKey = s4FromSwitchData(&table[2]); - bool largeBias = false; - int rKey; - if (lowKey == 0) { - rKey = rlSrc.lowReg; - } else if ((lowKey & 0xffff) != lowKey) { - rKey = oatAllocTemp(cUnit); - loadConstant(cUnit, rKey, lowKey); - largeBias = true; - } else { - rKey = oatAllocTemp(cUnit); - } - - // Must prevent code motion for the curr pc pair - genBarrier(cUnit); - newLIR0(cUnit, kX86CurrPC); // Really a jal to .+8 - // Now, fill the branch delay slot with bias strip - if (lowKey == 0) { - newLIR0(cUnit, kX86Nop); - } else { - if (largeBias) { - opRegRegReg(cUnit, kOpSub, rKey, rlSrc.lowReg, rKey); - } else { - opRegRegImm(cUnit, kOpSub, rKey, rlSrc.lowReg, lowKey); - } - } - genBarrier(cUnit); // Scheduling barrier - - // Construct BaseLabel and set up table base register - LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel); - // Remember base label so offsets can be computed later - tabRec->anchor = baseLabel; - - // Bounds check - if < 0 or >= size continue following switch - LIR* branchOver = opCmpImmBranch(cUnit, kCondHi, rKey, size-1, NULL); - - // Materialize the table base pointer - int rBase = oatAllocTemp(cUnit); - newLIR4(cUnit, kX86Delta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec); - - // Load the displacement from the switch table - int rDisp = oatAllocTemp(cUnit); - loadBaseIndexed(cUnit, rBase, rKey, rDisp, 2, kWord); - - // Add to r_AP and go - opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp); - opReg(cUnit, kOpBx, r_RA); - - /* branchOver target here */ - LIR* target = newLIR0(cUnit, kPseudoTargetLabel); - branchOver->target = (LIR*)target; -#endif +void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) { + const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB; + if (cUnit->printMe) { + dumpPackedSwitchTable(table); + } + // Add the table to the list - we'll process it later + SwitchTable *tabRec = (SwitchTable *)oatNew(cUnit, sizeof(SwitchTable), + true, kAllocData); + tabRec->table = table; + tabRec->vaddr = mir->offset; + int size = table[1]; + tabRec->targets = (LIR* *)oatNew(cUnit, size * sizeof(LIR*), true, + kAllocLIR); + oatInsertGrowableList(cUnit, &cUnit->switchTables, (intptr_t)tabRec); + + // Get the switch value + rlSrc = loadValue(cUnit, rlSrc, kCoreReg); + int startOfMethodReg = oatAllocTemp(cUnit); + // Materialize a pointer to the switch table + //newLIR0(cUnit, kX86Bkpt); + newLIR1(cUnit, kX86StartOfMethod, startOfMethodReg); + int lowKey = s4FromSwitchData(&table[2]); + int keyReg; + // Remove the bias, if necessary + if (lowKey == 0) { + keyReg = rlSrc.lowReg; + } else { + keyReg = oatAllocTemp(cUnit); + opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey); + } + // Bounds check - if < 0 or >= size continue following switch + opRegImm(cUnit, kOpCmp, keyReg, size-1); + LIR* branchOver = opCondBranch(cUnit, kCondHi, NULL); + + // Load the displacement from the switch table + int dispReg = oatAllocTemp(cUnit); + newLIR5(cUnit, kX86PcRelLoadRA, dispReg, startOfMethodReg, keyReg, 2, (intptr_t)tabRec); + // Add displacement to start of method + opRegReg(cUnit, kOpAdd, startOfMethodReg, dispReg); + // ..and go! + LIR* switchBranch = newLIR1(cUnit, kX86JmpR, startOfMethodReg); + tabRec->anchor = switchBranch; + + /* branchOver target here */ + LIR* target = newLIR0(cUnit, kPseudoTargetLabel); + branchOver->target = (LIR*)target; } +void callRuntimeHelperRegReg(CompilationUnit* cUnit, int helperOffset, int arg0, int arg1); /* * Array data table format: * ushort ident = 0x0300 magic value @@ -246,47 +220,31 @@ void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) */ void genFillArrayData(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) { - UNIMPLEMENTED(WARNING) << "genFillArrayData"; -#if 0 - const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB; - // Add the table to the list - we'll process it later - FillArrayData *tabRec = (FillArrayData *) - oatNew(cUnit, sizeof(FillArrayData), true, kAllocData); - tabRec->table = table; - tabRec->vaddr = mir->offset; - u2 width = tabRec->table[1]; - u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16); - tabRec->size = (size * width) + 8; - - oatInsertGrowableList(cUnit, &cUnit->fillArrayData, (intptr_t)tabRec); - - // Making a call - use explicit registers - oatFlushAllRegs(cUnit); /* Everything to home location */ - oatLockCallTemps(cUnit); - loadValueDirectFixed(cUnit, rlSrc, rARG0); - - // Must prevent code motion for the curr pc pair - genBarrier(cUnit); - newLIR0(cUnit, kX86CurrPC); // Really a jal to .+8 - // Now, fill the branch delay slot with the helper load - int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, - pHandleFillArrayDataFromCode)); - genBarrier(cUnit); // Scheduling barrier - - // Construct BaseLabel and set up table base register - LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel); - - // Materialize a pointer to the fill data image - newLIR4(cUnit, kX86Delta, rARG1, 0, (intptr_t)baseLabel, (intptr_t)tabRec); - - // And go... - callRuntimeHelper(cUnit, rTgt); // ( array*, fill_data* ) -#endif + const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB; + // Add the table to the list - we'll process it later + FillArrayData *tabRec = (FillArrayData *)oatNew(cUnit, sizeof(FillArrayData), true, kAllocData); + tabRec->table = table; + tabRec->vaddr = mir->offset; + u2 width = tabRec->table[1]; + u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16); + tabRec->size = (size * width) + 8; + + oatInsertGrowableList(cUnit, &cUnit->fillArrayData, (intptr_t)tabRec); + + // Making a call - use explicit registers + oatFlushAllRegs(cUnit); /* Everything to home location */ + loadValueDirectFixed(cUnit, rlSrc, rARG0); + // Materialize a pointer to the fill data image + newLIR1(cUnit, kX86StartOfMethod, rARG2); + newLIR2(cUnit, kX86PcRelAdr, rARG1, (intptr_t)tabRec); + newLIR2(cUnit, kX86Add32RR, rARG1, rARG2); + callRuntimeHelperRegReg(cUnit, ENTRYPOINT_OFFSET(pHandleFillArrayDataFromCode), rARG0, rARG1); } void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) { UNIMPLEMENTED(WARNING) << "genNegFloat"; + newLIR0(cUnit, kX86Bkpt); #if 0 RegLocation rlResult; rlSrc = loadValue(cUnit, rlSrc, kCoreReg); @@ -300,6 +258,7 @@ void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) { UNIMPLEMENTED(WARNING) << "genNegDouble"; + newLIR0(cUnit, kX86Bkpt); #if 0 RegLocation rlResult; rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg); @@ -311,21 +270,20 @@ void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc) #endif } +LIR* genNullCheck(CompilationUnit* cUnit, int sReg, int mReg, MIR* mir); +void callRuntimeHelperReg(CompilationUnit* cUnit, int helperOffset, int arg0); + /* * TODO: implement fast path to short-circuit thin-lock case */ void genMonitorEnter(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) { - UNIMPLEMENTED(WARNING) << "genMonitorEnter"; -#if 0 oatFlushAllRegs(cUnit); loadValueDirectFixed(cUnit, rlSrc, rARG0); // Get obj oatLockCallTemps(cUnit); // Prepare for explicit register usage genNullCheck(cUnit, rlSrc.sRegLow, rARG0, mir); // Go expensive route - artLockObjectFromCode(self, obj); - int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pLockObjectFromCode)); - callRuntimeHelper(cUnit, rTgt); -#endif + callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pLockObjectFromCode), rARG0); } /* @@ -333,16 +291,12 @@ void genMonitorEnter(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) */ void genMonitorExit(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) { - UNIMPLEMENTED(WARNING) << "genMonitor"; -#if 0 oatFlushAllRegs(cUnit); loadValueDirectFixed(cUnit, rlSrc, rARG0); // Get obj oatLockCallTemps(cUnit); // Prepare for explicit register usage genNullCheck(cUnit, rlSrc.sRegLow, rARG0, mir); // Go expensive route - UnlockObjectFromCode(obj); - int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pUnlockObjectFromCode)); - callRuntimeHelper(cUnit, rTgt); -#endif + callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rARG0); } /* @@ -364,26 +318,20 @@ void genMonitorExit(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) void genCmpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { - UNIMPLEMENTED(WARNING) << "genCmpLong"; -#if 0 - rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); - rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); - int t0 = oatAllocTemp(cUnit); - int t1 = oatAllocTemp(cUnit); - RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - newLIR3(cUnit, kX86Slt, t0, rlSrc1.highReg, rlSrc2.highReg); - newLIR3(cUnit, kX86Slt, t1, rlSrc2.highReg, rlSrc1.highReg); - newLIR3(cUnit, kX86Subu, rlResult.lowReg, t1, t0); - LIR* branch = opCmpImmBranch(cUnit, kCondNe, rlResult.lowReg, 0, NULL); - newLIR3(cUnit, kX86Sltu, t0, rlSrc1.lowReg, rlSrc2.lowReg); - newLIR3(cUnit, kX86Sltu, t1, rlSrc2.lowReg, rlSrc1.lowReg); - newLIR3(cUnit, kX86Subu, rlResult.lowReg, t1, t0); - oatFreeTemp(cUnit, t0); - oatFreeTemp(cUnit, t1); - LIR* target = newLIR0(cUnit, kPseudoTargetLabel); - branch->target = (LIR*)target; + oatFlushAllRegs(cUnit); + oatLockCallTemps(cUnit); // Prepare for explicit register usage + loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1); + loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3); + // Compute (r1:r0) = (r1:r0) - (r2:r3) + opRegReg(cUnit, kOpSub, r0, r2); // r0 = r0 - r2 + opRegReg(cUnit, kOpSbc, r1, r3); // r1 = r1 - r3 - CF + opRegReg(cUnit, kOpOr, r0, r1); // r0 = high | low - sets ZF + newLIR2(cUnit, kX86Set8R, r0, kX86CondNz); // r0 = (r1:r0) != (r2:r3) ? 1 : 0 + newLIR2(cUnit, kX86Movzx8RR, r0, r0); + opRegImm(cUnit, kOpAsr, r1, 31); // r1 = high >> 31 + opRegReg(cUnit, kOpOr, r0, r1); // r0 holds result + RegLocation rlResult = LOC_C_RETURN; storeValue(cUnit, rlDest, rlResult); -#endif } X86ConditionCode oatX86ConditionEncoding(ConditionCode cond) { @@ -420,8 +368,12 @@ LIR* opCmpBranch(CompilationUnit* cUnit, ConditionCode cond, int src1, int src2, LIR* opCmpImmBranch(CompilationUnit* cUnit, ConditionCode cond, int reg, int checkValue, LIR* target) { - // TODO: when checkValue == 0 and reg is rCX, use the jcxz/nz opcode - newLIR2(cUnit, kX86Cmp32RI, reg, checkValue); + if (false && (checkValue == 0) && (cond == kCondEq || cond == kCondNe)) { + // TODO: when checkValue == 0 and reg is rCX, use the jcxz/nz opcode + // newLIR2(cUnit, kX86Test32RR, reg, reg); + } else { + newLIR2(cUnit, kX86Cmp32RI, reg, checkValue); + } X86ConditionCode cc = oatX86ConditionEncoding(cond); LIR* branch = newLIR2(cUnit, kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); branch->target = target; @@ -458,10 +410,12 @@ void opRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi, opRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi)); } else { UNIMPLEMENTED(WARNING); + newLIR0(cUnit, kX86Bkpt); } } else { if (srcFP) { UNIMPLEMENTED(WARNING); + newLIR0(cUnit, kX86Bkpt); } else { // Handle overlap if (srcHi == destLo) { diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h index 1fc44b3add..85d2565397 100644 --- a/src/compiler/codegen/x86/X86LIR.h +++ b/src/compiler/codegen/x86/X86LIR.h @@ -194,6 +194,7 @@ enum NativeRegisterPool { r4sib_no_index = r4sp, r5 = 5, rBP = r5, + r5sib_no_base = r5, r6 = 6, rSI = r6, r7 = 7, @@ -277,7 +278,7 @@ enum X86ConditionCode { kX86CondNge = kX86CondL, // not-greater-equal kX86CondNl = 0xD, // not-less-than - kX86CondGe = kX86CondL, // not-greater-equal + kX86CondGe = kX86CondNl, // not-greater-equal kX86CondLe = 0xE, // less-than-equal kX86CondNg = kX86CondLe, // not-greater @@ -387,7 +388,6 @@ enum X86OpCode { BinaryShiftOpCode(kX86Rcl), BinaryShiftOpCode(kX86Rcr), BinaryShiftOpCode(kX86Sal), - BinaryShiftOpCode(kX86Shl), BinaryShiftOpCode(kX86Shr), BinaryShiftOpCode(kX86Sar), #undef BinaryShiftOpcode @@ -447,12 +447,18 @@ enum X86OpCode { #undef Binary0fOpCode kX86Jcc8, kX86Jcc32, // jCC rel8/32; lir operands - 0: rel, 1: CC, target assigned kX86Jmp8, kX86Jmp32, // jmp rel8/32; lir operands - 0: rel, target assigned + kX86JmpR, // jmp reg; lir operands - 0: reg kX86CallR, // call reg; lir operands - 0: reg kX86CallM, // call [base + disp]; lir operands - 0: base, 1: disp kX86CallA, // call [base + index * scale + disp] // lir operands - 0: base, 1: index, 2: scale, 3: disp kX86CallT, // call fs:[disp]; fs: is equal to Thread::Current(); lir operands - 0: disp kX86Ret, // ret; no lir operands + kX86StartOfMethod, // call 0; pop reg; sub reg, # - generate start of method into reg + // lir operands - 0: reg + kX86PcRelLoadRA, // mov reg, [base + index * scale + PC relative displacement] + // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table + kX86PcRelAdr, // mov reg, PC relative displacement; lir operands - 0: reg, 1: table kX86Last }; @@ -472,6 +478,8 @@ enum X86EncodingKind { kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. kRegCond, kMemCond, kArrayCond, // R, M, A instruction kinds following by a condition. kJmp, kJcc, kCall, // Branch instruction kinds. + kPcRel, // Operation with displacement that is PC relative + kMacro, // An instruction composing multiple others kUnimplemented // Encoding used when an instruction isn't yet implemented. }; diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc index ba5c063c88..297163250b 100644 --- a/src/compiler/codegen/x86/X86RallocUtil.cc +++ b/src/compiler/codegen/x86/X86RallocUtil.cc @@ -96,9 +96,9 @@ uint32_t oatFpRegMask() { /* Clobber all regs that might be used by an external C call */ extern void oatClobberCalleeSave(CompilationUnit *cUnit) { - oatClobber(cUnit, rBP); - oatClobber(cUnit, rSI); - oatClobber(cUnit, rDI); + oatClobber(cUnit, rAX); + oatClobber(cUnit, rCX); + oatClobber(cUnit, rDX); } extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit) { |