diff options
| -rw-r--r-- | src/compiler/Dataflow.cc | 27 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 6 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 92 | ||||
| -rw-r--r-- | src/compiler/SSATransformation.cc | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/GenCommon.cc | 3 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodBitcode.cc | 196 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 2 | ||||
| -rw-r--r-- | src/greenland/intrinsic_func_list.def | 46 |
8 files changed, 231 insertions, 143 deletions
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index e7998d106f..33ef0addad 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -69,7 +69,7 @@ const int oatDataFlowAttributes[kMirOpLast] = { DF_DA | DF_REF_A, // 0D MOVE_EXCEPTION vAA - DF_DA | DF_CORE_A, + DF_DA | DF_REF_A, // 0E RETURN_VOID DF_NOP, @@ -180,41 +180,40 @@ const int oatDataFlowAttributes[kMirOpLast] = { DF_DA | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C, // 32 IF_EQ vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_UA | DF_UB, // 33 IF_NE vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_UA | DF_UB, // 34 IF_LT vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_UA | DF_UB, // 35 IF_GE vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_UA | DF_UB, // 36 IF_GT vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, + DF_UA | DF_UB, // 37 IF_LE vA, vB, +CCCC - DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, - + DF_UA | DF_UB, // 38 IF_EQZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 39 IF_NEZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 3A IF_LTZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 3B IF_GEZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 3C IF_GTZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 3D IF_LEZ vAA, +BBBB - DF_UA | DF_CORE_A, + DF_UA, // 3E UNUSED_3E DF_NOP, diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index bcaba10ea8..3924f45ec8 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -763,8 +763,10 @@ CompiledMethod* oatCompileMethod(Compiler& compiler, cUnit->numRegs = code_item->registers_size_ - cUnit->numIns; cUnit->numOuts = code_item->outs_size_; #if defined(ART_USE_QUICK_COMPILER) +#if defined(TARGET_ARM) cUnit->genBitcode = true; #endif +#endif /* Adjust this value accordingly once inlining is performed */ cUnit->numDalvikRegisters = code_item->registers_size_; // TODO: set this from command line @@ -781,8 +783,8 @@ CompiledMethod* oatCompileMethod(Compiler& compiler, } #if defined(ART_USE_QUICK_COMPILER) if (cUnit->genBitcode) { - cUnit->printMe = true; - cUnit->enableDebug |= (1 << kDebugDumpBitcodeFile); + //cUnit->printMe = true; + //cUnit->enableDebug |= (1 << kDebugDumpBitcodeFile); // Disable non-safe optimizations for now cUnit->disableOpt |= ~(1 << kSafeOptimizations); } diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index ea4d6c109d..500b1b2d1b 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -23,9 +23,6 @@ namespace art { bool setFp(CompilationUnit* cUnit, int index, bool isFP) { bool change = false; - if (cUnit->regLocation[index].highWord) { - return change; - } if (isFP && !cUnit->regLocation[index].fp) { cUnit->regLocation[index].fp = true; cUnit->regLocation[index].defined = true; @@ -36,9 +33,6 @@ bool setFp(CompilationUnit* cUnit, int index, bool isFP) { bool setCore(CompilationUnit* cUnit, int index, bool isCore) { bool change = false; - if (cUnit->regLocation[index].highWord) { - return change; - } if (isCore && !cUnit->regLocation[index].defined) { cUnit->regLocation[index].core = true; cUnit->regLocation[index].defined = true; @@ -49,9 +43,6 @@ bool setCore(CompilationUnit* cUnit, int index, bool isCore) { bool setRef(CompilationUnit* cUnit, int index, bool isRef) { bool change = false; - if (cUnit->regLocation[index].highWord) { - return change; - } if (isRef && !cUnit->regLocation[index].defined) { cUnit->regLocation[index].ref = true; cUnit->regLocation[index].defined = true; @@ -60,6 +51,24 @@ bool setRef(CompilationUnit* cUnit, int index, bool isRef) { return change; } +bool setWide(CompilationUnit* cUnit, int index, bool isWide) { + bool change = false; + if (isWide && !cUnit->regLocation[index].wide) { + cUnit->regLocation[index].wide = true; + change = true; + } + return change; +} + +bool setHigh(CompilationUnit* cUnit, int index, bool isHigh) { + bool change = false; + if (isHigh && !cUnit->regLocation[index].highWord) { + cUnit->regLocation[index].highWord = true; + change = true; + } + return change; +} + bool remapNames(CompilationUnit* cUnit, BasicBlock* bb) { if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock && @@ -123,6 +132,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } if (attrs & DF_A_WIDE) { cUnit->regLocation[ssaRep->defs[0]].wide = true; + cUnit->regLocation[ssaRep->defs[1]].wide = true; cUnit->regLocation[ssaRep->defs[1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->defs[0])+1, SRegToVReg(cUnit, ssaRep->defs[1])); @@ -140,6 +150,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } if (attrs & DF_A_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -157,6 +168,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } if (attrs & DF_B_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -174,6 +186,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } if (attrs & DF_C_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; + cUnit->regLocation[ssaRep->uses[next + 1]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -192,6 +205,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) changed |= setCore(cUnit, ssaRep->uses[0], true); changed |= setCore(cUnit, ssaRep->uses[1], true); cUnit->regLocation[ssaRep->uses[0]].wide = true; + cUnit->regLocation[ssaRep->uses[1]].wide = true; cUnit->regLocation[ssaRep->uses[1]].highWord = true; break; case 'F': @@ -201,6 +215,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) changed |= setFp(cUnit, ssaRep->uses[0], true); changed |= setFp(cUnit, ssaRep->uses[1], true); cUnit->regLocation[ssaRep->uses[0]].wide = true; + cUnit->regLocation[ssaRep->uses[1]].wide = true; cUnit->regLocation[ssaRep->uses[1]].highWord = true; break; case 'L': @@ -254,6 +269,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) ssaRep->fpUse[i] = true; ssaRep->fpUse[i+1] = true; cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, SRegToVReg(cUnit, ssaRep->uses[i+1])); @@ -261,6 +277,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) break; case 'J': cUnit->regLocation[ssaRep->uses[i]].wide = true; + cUnit->regLocation[ssaRep->uses[i+1]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, SRegToVReg(cUnit, ssaRep->uses[i+1])); @@ -292,23 +309,27 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) } // Special-case handling for moves & Phi if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { - // If any of our inputs or outputs is defined, set all - bool definedFP = false; - bool definedCore = false; - bool definedRef = false; - definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined && - cUnit->regLocation[ssaRep->defs[0]].fp); - definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined && - cUnit->regLocation[ssaRep->defs[0]].core); - definedRef |= (cUnit->regLocation[ssaRep->defs[0]].defined && - cUnit->regLocation[ssaRep->defs[0]].ref); - for (int i = 0; i < ssaRep->numUses; i++) { - definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined && - cUnit->regLocation[ssaRep->uses[i]].fp); - definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined - && cUnit->regLocation[ssaRep->uses[i]].core); - definedRef |= (cUnit->regLocation[ssaRep->uses[i]].defined - && cUnit->regLocation[ssaRep->uses[i]].ref); + /* + * If any of our inputs or outputs is defined, set all. + * Some ugliness related to Phi nodes and wide values. + * The Phi set will include all low words or all high + * words, so we have to treat them specially. + */ + bool isPhi = (static_cast<int>(mir->dalvikInsn.opcode) == + kMirOpPhi); + RegLocation rlTemp = cUnit->regLocation[ssaRep->defs[0]]; + bool definedFP = rlTemp.defined && rlTemp.fp; + bool definedCore = rlTemp.defined && rlTemp.core; + bool definedRef = rlTemp.defined && rlTemp.ref; + bool isWide = rlTemp.wide || ((attrs & DF_A_WIDE) != 0); + bool isHigh = isPhi && rlTemp.wide && rlTemp.highWord; + for (int i = 0; i < ssaRep->numUses;i++) { + rlTemp = cUnit->regLocation[ssaRep->uses[i]]; + definedFP |= rlTemp.defined && rlTemp.fp; + definedCore |= rlTemp.defined && rlTemp.core; + definedRef |= rlTemp.defined && rlTemp.ref; + isWide |= rlTemp.wide; + isHigh |= isPhi && rlTemp.wide && rlTemp.highWord; } /* * TODO: cleaner fix @@ -334,10 +355,23 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) changed |= setFp(cUnit, ssaRep->defs[0], definedFP); changed |= setCore(cUnit, ssaRep->defs[0], definedCore); changed |= setRef(cUnit, ssaRep->defs[0], definedRef); + changed |= setWide(cUnit, ssaRep->defs[0], isWide); + changed |= setHigh(cUnit, ssaRep->defs[0], isHigh); + if (attrs & DF_A_WIDE) { + changed |= setWide(cUnit, ssaRep->defs[1], true); + changed |= setHigh(cUnit, ssaRep->defs[1], true); + } for (int i = 0; i < ssaRep->numUses; i++) { - changed |= setFp(cUnit, ssaRep->uses[i], definedFP); - changed |= setCore(cUnit, ssaRep->uses[i], definedCore); - changed |= setRef(cUnit, ssaRep->uses[i], definedRef); + changed |= setFp(cUnit, ssaRep->uses[i], definedFP); + changed |= setCore(cUnit, ssaRep->uses[i], definedCore); + changed |= setRef(cUnit, ssaRep->uses[i], definedRef); + changed |= setWide(cUnit, ssaRep->uses[i], isWide); + changed |= setHigh(cUnit, ssaRep->uses[i], isHigh); + } + if (attrs & DF_A_WIDE) { + DCHECK_EQ(ssaRep->numUses, 2); + changed |= setWide(cUnit, ssaRep->uses[1], true); + changed |= setHigh(cUnit, ssaRep->uses[1], true); } } } diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc index 7d6a733277..10957b2517 100644 --- a/src/compiler/SSATransformation.cc +++ b/src/compiler/SSATransformation.cc @@ -747,7 +747,7 @@ void insertPhiNodes(CompilationUnit* cUnit) kPostOrderDFSTraversal, true /* isIterative */); /* Iterate through each Dalvik register */ - for (dalvikReg = 0; dalvikReg < cUnit->numDalvikRegisters; dalvikReg++) { + for (dalvikReg = cUnit->numDalvikRegisters - 1; dalvikReg >= 0; dalvikReg--) { bool change; ArenaBitVectorIterator iterator; diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc index 9082a49ad3..b4b0f6a9e0 100644 --- a/src/compiler/codegen/GenCommon.cc +++ b/src/compiler/codegen/GenCommon.cc @@ -2062,16 +2062,19 @@ bool genArithOpIntLit(CompilationUnit* cUnit, Instruction::Code opcode, op = kOpXor; break; case Instruction::SHL_INT_LIT8: + case Instruction::SHL_INT: lit &= 31; shiftOp = true; op = kOpLsl; break; case Instruction::SHR_INT_LIT8: + case Instruction::SHR_INT: lit &= 31; shiftOp = true; op = kOpAsr; break; case Instruction::USHR_INT_LIT8: + case Instruction::USHR_INT: lit &= 31; shiftOp = true; op = kOpLsr; diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc index 83ebf9bfce..b7c4331d7d 100644 --- a/src/compiler/codegen/MethodBitcode.cc +++ b/src/compiler/codegen/MethodBitcode.cc @@ -464,24 +464,27 @@ void convertFPArithOp(CompilationUnit* cUnit, OpKind op, RegLocation rlDest, defineValue(cUnit, res, rlDest.origSReg); } -void convertShift(CompilationUnit* cUnit, OpKind op, RegLocation rlDest, - RegLocation rlSrc1, RegLocation rlSrc2) +void convertShift(CompilationUnit* cUnit, + greenland::IntrinsicHelper::IntrinsicId id, + RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { - llvm::Value* src1 = getLLVMValue(cUnit, rlSrc1.origSReg); - llvm::Value* src2 = getLLVMValue(cUnit, rlSrc2.origSReg); - /* - * TODO: Figure out how best to handle constraining the shift - * amount to 31 for int and 63 for long. We take care of this - * inline for int and in the out-of-line handler for longs, so - * it's a bit of a waste to generate llvm bitcode for this. - * Yet more intrinsics? - */ - UNIMPLEMENTED(WARNING) << "llvm shift mismatch"; - if (rlDest.wide) { - // llvm thinks the shift could should be in 64 bits. - src2 = cUnit->irb->CreateZExt(src2, cUnit->irb->getInt64Ty()); - } - llvm::Value* res = genArithOp(cUnit, op, rlDest.wide, src1, src2); + llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id); + llvm::SmallVector<llvm::Value*, 2>args; + args.push_back(getLLVMValue(cUnit, rlSrc1.origSReg)); + args.push_back(getLLVMValue(cUnit, rlSrc2.origSReg)); + llvm::Value* res = cUnit->irb->CreateCall(intr, args); + defineValue(cUnit, res, rlDest.origSReg); +} + +void convertShiftLit(CompilationUnit* cUnit, + greenland::IntrinsicHelper::IntrinsicId id, + RegLocation rlDest, RegLocation rlSrc, int shiftAmount) +{ + llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id); + llvm::SmallVector<llvm::Value*, 2>args; + args.push_back(getLLVMValue(cUnit, rlSrc.origSReg)); + args.push_back(cUnit->irb->getInt32(shiftAmount)); + llvm::Value* res = cUnit->irb->CreateCall(intr, args); defineValue(cUnit, res, rlDest.origSReg); } @@ -1099,27 +1102,33 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb, break; case Instruction::SHL_LONG: case Instruction::SHL_LONG_2ADDR: - convertShift(cUnit, kOpLsl, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::SHLLong, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::SHL_INT: case Instruction::SHL_INT_2ADDR: - convertShift(cUnit, kOpLsl, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::SHLInt, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::SHR_LONG: case Instruction::SHR_LONG_2ADDR: - convertShift(cUnit, kOpAsr, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::SHRLong, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::SHR_INT: case Instruction::SHR_INT_2ADDR: - convertShift(cUnit, kOpAsr, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::SHRInt, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::USHR_LONG: case Instruction::USHR_LONG_2ADDR: - convertShift(cUnit, kOpLsr, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::USHRLong, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::USHR_INT: case Instruction::USHR_INT_2ADDR: - convertShift(cUnit, kOpLsr, rlDest, rlSrc[0], rlSrc[1]); + convertShift(cUnit, greenland::IntrinsicHelper::USHRInt, + rlDest, rlSrc[0], rlSrc[1]); break; case Instruction::ADD_INT_LIT16: @@ -1155,13 +1164,16 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb, convertArithOpLit(cUnit, kOpXor, rlDest, rlSrc[0], vC); break; case Instruction::SHL_INT_LIT8: - convertArithOpLit(cUnit, kOpLsl, rlDest, rlSrc[0], vC & 0x1f); + convertShiftLit(cUnit, greenland::IntrinsicHelper::SHLInt, + rlDest, rlSrc[0], vC & 0x1f); break; case Instruction::SHR_INT_LIT8: - convertArithOpLit(cUnit, kOpAsr, rlDest, rlSrc[0], vC & 0x1f); + convertShiftLit(cUnit, greenland::IntrinsicHelper::SHRInt, + rlDest, rlSrc[0], vC & 0x1f); break; case Instruction::USHR_INT_LIT8: - convertArithOpLit(cUnit, kOpLsr, rlDest, rlSrc[0], vC & 0x1f); + convertShiftLit(cUnit, greenland::IntrinsicHelper::USHRInt, + rlDest, rlSrc[0], vC & 0x1f); break; case Instruction::ADD_FLOAT: @@ -1589,19 +1601,30 @@ void convertExtendedMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir, switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) { case kMirOpPhi: { - int* incoming = (int*)mir->dalvikInsn.vB; RegLocation rlDest = cUnit->regLocation[mir->ssaRep->defs[0]]; + /* + * The Art compiler's Phi nodes only handle 32-bit operands, + * representing wide values using a matched set of Phi nodes + * for the lower and upper halves. In the llvm world, we only + * want a single Phi for wides. Here we will simply discard + * the Phi node representing the high word. + */ + if (rlDest.highWord) { + return; // No Phi node - handled via low word + } + int* incoming = (int*)mir->dalvikInsn.vB; llvm::Type* phiType = llvmTypeFromLocRec(cUnit, rlDest); llvm::PHINode* phi = cUnit->irb->CreatePHI(phiType, mir->ssaRep->numUses); for (int i = 0; i < mir->ssaRep->numUses; i++) { RegLocation loc; - if (rlDest.wide) { - loc = oatGetSrcWide(cUnit, mir, i); - i++; - } else { - loc = oatGetSrc(cUnit, mir, i); - } + // Don't check width here. + loc = oatGetRawSrc(cUnit, mir, i); + DCHECK_EQ(rlDest.wide, loc.wide); + DCHECK_EQ(rlDest.wide & rlDest.highWord, loc.wide & loc.highWord); + DCHECK_EQ(rlDest.fp, loc.fp); + DCHECK_EQ(rlDest.core, loc.core); + DCHECK_EQ(rlDest.ref, loc.ref); phi->addIncoming(getLLVMValue(cUnit, loc.origSReg), getLLVMBlock(cUnit, incoming[i])); } @@ -1895,30 +1918,18 @@ void oatMethodMIR2Bitcode(CompilationUnit* cUnit) arg_iter++; /* Skip path method */ for (int i = 0; i < cUnit->numSSARegs; i++) { llvm::Value* val; - if ((i < cUnit->numRegs) || (i >= (cUnit->numRegs + cUnit->numIns))) { - // Handle SSA defs, skipping Method* and compiler temps - if (SRegToVReg(cUnit, i) < 0) { - val = NULL; - } else { - llvm::Constant* immValue = cUnit->irb->GetJInt(0); - val = emitConst(cUnit, immValue, cUnit->regLocation[i]); - val->setName(llvmSSAName(cUnit, i)); - } + if ((SRegToVReg(cUnit, i) < 0) || cUnit->regLocation[i].highWord) { + oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); + } else if ((i < cUnit->numRegs) || + (i >= (cUnit->numRegs + cUnit->numIns))) { + llvm::Constant* immValue = cUnit->irb->GetJInt(0); + val = emitConst(cUnit, immValue, cUnit->regLocation[i]); + val->setName(llvmSSAName(cUnit, i)); oatInsertGrowableList(cUnit, &cUnit->llvmValues, (intptr_t)val); - if (cUnit->regLocation[i].wide) { - // Skip high half of wide values - oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); - i++; - } } else { // Recover previously-created argument values llvm::Value* argVal = arg_iter++; oatInsertGrowableList(cUnit, &cUnit->llvmValues, (intptr_t)argVal); - if (cUnit->regLocation[i].wide) { - // Skip high half of wide values. - oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); - i++; - } } } @@ -1959,7 +1970,7 @@ void oatMethodMIR2Bitcode(CompilationUnit* cUnit) cUnit->irb->SetInsertPoint(cUnit->entryBB); cUnit->irb->CreateBr(cUnit->entryTargetBB); - llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction); + //llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction); if (cUnit->enableDebug & (1 << kDebugDumpBitcodeFile)) { // Write bitcode to file @@ -2258,43 +2269,23 @@ void cvtBinOp(CompilationUnit* cUnit, OpKind op, llvm::Instruction* inst) } } -void cvtShiftOp(CompilationUnit* cUnit, OpKind op, llvm::Instruction* inst) +void cvtShiftOp(CompilationUnit* cUnit, Instruction::Code opcode, + llvm::CallInst* callInst) { - if (inst->getType() == cUnit->irb->getInt64Ty()) { - /* - * llvm wants the shift amount to be 64 bits, whereas we've constained - * it to be in 6 bits. It should always be held as an unnamed temp - * at this point that was the result of a previous UExt. We'll backtrack - * to find the pre-extension value and use that. - * TODO: probably better to handle this in cvtIntExt() or just intrinsify - */ - RegLocation rlDest = getLoc(cUnit, inst); - RegLocation rlSrc = getLoc(cUnit, inst->getOperand(0)); - RegLocation rlShift = getLoc(cUnit, inst->getOperand(1)); - DCHECK(rlShift.wide); - DCHECK_EQ(rlShift.sRegLow, INVALID_SREG); - // Now, free the temp registers - we won't need them. - // TODO: kill the dead extend ops - oatFreeTemp(cUnit, rlShift.lowReg); - oatFreeTemp(cUnit, rlShift.highReg); - // Get the pre-extend operand - llvm::Instruction* extInst = - llvm::dyn_cast<llvm::Instruction>(inst->getOperand(1)); - DCHECK(extInst != NULL); - rlShift = getLoc(cUnit, extInst->getOperand(0)); - DCHECK(!rlShift.wide); - Instruction::Code opcode; - if (op == kOpLsl) - opcode = Instruction::SHL_LONG; - else if (op == kOpAsr) - opcode = Instruction::SHR_LONG; - else { - DCHECK_EQ(op, kOpLsr); - opcode = Instruction::USHR_LONG; - } - genShiftOpLong(cUnit, opcode, rlDest, rlSrc, rlShift); + DCHECK_EQ(callInst->getNumArgOperands(), 2U); + RegLocation rlDest = getLoc(cUnit, callInst); + RegLocation rlSrc = getLoc(cUnit, callInst->getArgOperand(0)); + llvm::Value* rhs = callInst->getArgOperand(1); + if (llvm::ConstantInt* src2 = llvm::dyn_cast<llvm::ConstantInt>(rhs)) { + DCHECK(!rlDest.wide); + genArithOpIntLit(cUnit, opcode, rlDest, rlSrc, src2->getSExtValue()); } else { - cvtBinOp(cUnit, op, inst); + RegLocation rlShift = getLoc(cUnit, rhs); + if (callInst->getType() == cUnit->irb->getInt64Ty()) { + genShiftOpLong(cUnit, opcode, rlDest, rlSrc, rlShift); + } else { + genArithOpInt(cUnit, opcode, rlDest, rlSrc, rlShift); + } } } @@ -3098,9 +3089,25 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb) cvtLongCompare(cUnit, callInst); break; - case greenland::IntrinsicHelper::UnknownId: - cvtCall(cUnit, callInst, callee); + case greenland::IntrinsicHelper::SHLLong: + cvtShiftOp(cUnit, Instruction::SHL_LONG, callInst); + break; + case greenland::IntrinsicHelper::SHRLong: + cvtShiftOp(cUnit, Instruction::SHR_LONG, callInst); + break; + case greenland::IntrinsicHelper::USHRLong: + cvtShiftOp(cUnit, Instruction::USHR_LONG, callInst); break; + case greenland::IntrinsicHelper::SHLInt: + cvtShiftOp(cUnit, Instruction::SHL_INT, callInst); + break; + case greenland::IntrinsicHelper::SHRInt: + cvtShiftOp(cUnit, Instruction::SHR_INT, callInst); + break; + case greenland::IntrinsicHelper::USHRInt: + cvtShiftOp(cUnit, Instruction::USHR_INT, callInst); + break; + default: LOG(FATAL) << "Unexpected intrinsic " << (int)id << ", " << cUnit->intrinsic_helper->GetName(id); @@ -3117,9 +3124,6 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb) case llvm::Instruction::And: cvtBinOp(cUnit, kOpAnd, inst); break; case llvm::Instruction::Or: cvtBinOp(cUnit, kOpOr, inst); break; case llvm::Instruction::Xor: cvtBinOp(cUnit, kOpXor, inst); break; - case llvm::Instruction::Shl: cvtShiftOp(cUnit, kOpLsl, inst); break; - case llvm::Instruction::LShr: cvtShiftOp(cUnit, kOpLsr, inst); break; - case llvm::Instruction::AShr: cvtShiftOp(cUnit, kOpAsr, inst); break; case llvm::Instruction::PHI: cvtPhi(cUnit, inst); break; case llvm::Instruction::Ret: cvtRet(cUnit, inst); break; case llvm::Instruction::FAdd: cvtBinFPOp(cUnit, kOpAdd, inst); break; @@ -3143,6 +3147,9 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb) case llvm::Instruction::Unreachable: break; // FIXME: can we really ignore these? + case llvm::Instruction::Shl: + case llvm::Instruction::LShr: + case llvm::Instruction::AShr: case llvm::Instruction::Invoke: case llvm::Instruction::FPToUI: case llvm::Instruction::UIToFP: @@ -3174,7 +3181,8 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb) LOG(FATAL) << "Unexpected llvm opcode: " << opcode; break; default: - LOG(FATAL) << "Unknown llvm opcode: " << opcode; break; + LOG(FATAL) << "Unknown llvm opcode: " << inst->getOpcodeName(); + break; } } diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 2088cdc360..9d1878a02b 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -998,14 +998,12 @@ extern RegLocation oatGetRawSrc(CompilationUnit* cUnit, MIR* mir, int num) { DCHECK(num < mir->ssaRep->numUses); RegLocation res = cUnit->regLocation[mir->ssaRep->uses[num]]; - DCHECK(!res.wide || num < (mir->ssaRep->numUses - 1)); return res; } extern RegLocation oatGetRawDest(CompilationUnit* cUnit, MIR* mir) { DCHECK_GT(mir->ssaRep->numDefs, 0); RegLocation res = cUnit->regLocation[mir->ssaRep->defs[0]]; - DCHECK(!res.wide || mir->ssaRep->numDefs == 2); return res; } extern RegLocation oatGetDest(CompilationUnit* cUnit, MIR* mir) diff --git a/src/greenland/intrinsic_func_list.def b/src/greenland/intrinsic_func_list.def index 608e760c75..0ebebb25a3 100644 --- a/src/greenland/intrinsic_func_list.def +++ b/src/greenland/intrinsic_func_list.def @@ -1228,7 +1228,7 @@ _EVAL_DEF_INTRINSICS_FUNC(CopyObj, kJavaObjectTy, _EXPAND_ARG1(kJavaObjectTy)) -// int copy_long(long) +// long copy_long(long) _EVAL_DEF_INTRINSICS_FUNC(CopyLong, dex_lang_copy_long, kAttrReadOnly | kAttrNoThrow, @@ -1250,6 +1250,50 @@ _EVAL_DEF_INTRINSICS_FUNC(CopyDouble, _EXPAND_ARG1(kDoubleTy)) //---------------------------------------------------------------------------- +// Shift intrinsics. Shift semantics for Dalvik are a bit different than +// the llvm shift operators. For 32-bit shifts, the shift count is constrained +// to the range of 0..31, while for 64-bit shifts we limit to 0..63. +// Further, the shift count for Long shifts in Dalvik is 32 bits, while +// llvm requires a 64-bit shift count. For GBC, we represent shifts as an +// intrinsic to allow most efficient target-dependent lowering. +//---------------------------------------------------------------------------- +// long shl_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(SHLLong, + dex_lang_shl_long, + kAttrReadOnly | kAttrNoThrow, + kInt64Ty, + _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// long shr_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(SHRLong, + dex_lang_shr_long, + kAttrReadOnly | kAttrNoThrow, + kInt64Ty, + _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// long ushr_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(USHRLong, + dex_lang_ushl_long, + kAttrReadOnly | kAttrNoThrow, + kInt64Ty, + _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// int shl_int(int,int) +_EVAL_DEF_INTRINSICS_FUNC(SHLInt, + dex_lang_shl_int, + kAttrReadOnly | kAttrNoThrow, + kInt32Ty, + _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +// long shr_int(int,int) +_EVAL_DEF_INTRINSICS_FUNC(SHRInt, + dex_lang_shr_int, + kAttrReadOnly | kAttrNoThrow, + kInt32Ty, + _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +// int ushr_long(int,int) +_EVAL_DEF_INTRINSICS_FUNC(USHRInt, + dex_lang_ushl_int, + kAttrReadOnly | kAttrNoThrow, + kInt32Ty, + _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +//---------------------------------------------------------------------------- // Conversion instrinsics. Note: these should eventually be removed. We // can express these directly in bitcode, but by using intrinsics the // Quick compiler can be more efficient. Some extra optimization infrastructure |