diff options
| author | 2012-07-16 14:47:19 -0700 | |
|---|---|---|
| committer | 2012-07-16 14:47:19 -0700 | |
| commit | ea905deb5e2b84ad7acbccfd03bbbf2e759bbf45 (patch) | |
| tree | 30043b68c6e6741dd42d938e1990ee16315a24d5 /src | |
| parent | 6a607ad0902f3b8478e95d0b6b3e63a538571a3f (diff) | |
| parent | 2a83e8f06031948741ae3dda3633433ddd669693 (diff) | |
Merge "Quick compiler, fix wide bug" into ics-mr1-plus-art
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/Dataflow.cc | 27 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 6 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 92 | ||||
| -rw-r--r-- | src/compiler/SSATransformation.cc | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/GenCommon.cc | 3 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodBitcode.cc | 196 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 2 | ||||
| -rw-r--r-- | src/greenland/intrinsic_func_list.def | 46 | 
8 files changed, 231 insertions, 143 deletions
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index e7998d106f..33ef0addad 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -69,7 +69,7 @@ const int oatDataFlowAttributes[kMirOpLast] = {    DF_DA | DF_REF_A,    // 0D MOVE_EXCEPTION vAA -  DF_DA | DF_CORE_A, +  DF_DA | DF_REF_A,    // 0E RETURN_VOID    DF_NOP, @@ -180,41 +180,40 @@ const int oatDataFlowAttributes[kMirOpLast] = {    DF_DA | DF_UB | DF_B_WIDE | DF_UC | DF_C_WIDE | DF_CORE_A | DF_CORE_B | DF_CORE_C,    // 32 IF_EQ vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, +  DF_UA | DF_UB,    // 33 IF_NE vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, +  DF_UA | DF_UB,    // 34 IF_LT vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, +  DF_UA | DF_UB,    // 35 IF_GE vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, +  DF_UA | DF_UB,    // 36 IF_GT vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, +  DF_UA | DF_UB,    // 37 IF_LE vA, vB, +CCCC -  DF_UA | DF_UB | DF_CORE_A | DF_CORE_B, - +  DF_UA | DF_UB,    // 38 IF_EQZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 39 IF_NEZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 3A IF_LTZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 3B IF_GEZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 3C IF_GTZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 3D IF_LEZ vAA, +BBBB -  DF_UA | DF_CORE_A, +  DF_UA,    // 3E UNUSED_3E    DF_NOP, diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index bcaba10ea8..3924f45ec8 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -763,8 +763,10 @@ CompiledMethod* oatCompileMethod(Compiler& compiler,    cUnit->numRegs = code_item->registers_size_ - cUnit->numIns;    cUnit->numOuts = code_item->outs_size_;  #if defined(ART_USE_QUICK_COMPILER) +#if defined(TARGET_ARM)    cUnit->genBitcode = true;  #endif +#endif    /* Adjust this value accordingly once inlining is performed */    cUnit->numDalvikRegisters = code_item->registers_size_;    // TODO: set this from command line @@ -781,8 +783,8 @@ CompiledMethod* oatCompileMethod(Compiler& compiler,    }  #if defined(ART_USE_QUICK_COMPILER)    if (cUnit->genBitcode) { -    cUnit->printMe = true; -    cUnit->enableDebug |= (1 << kDebugDumpBitcodeFile); +    //cUnit->printMe = true; +    //cUnit->enableDebug |= (1 << kDebugDumpBitcodeFile);      // Disable non-safe optimizations for now      cUnit->disableOpt |= ~(1 << kSafeOptimizations);    } diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index ea4d6c109d..500b1b2d1b 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -23,9 +23,6 @@ namespace art {  bool setFp(CompilationUnit* cUnit, int index, bool isFP) {    bool change = false; -  if (cUnit->regLocation[index].highWord) { -    return change; -  }    if (isFP && !cUnit->regLocation[index].fp) {      cUnit->regLocation[index].fp = true;      cUnit->regLocation[index].defined = true; @@ -36,9 +33,6 @@ bool setFp(CompilationUnit* cUnit, int index, bool isFP) {  bool setCore(CompilationUnit* cUnit, int index, bool isCore) {    bool change = false; -  if (cUnit->regLocation[index].highWord) { -    return change; -  }    if (isCore && !cUnit->regLocation[index].defined) {      cUnit->regLocation[index].core = true;      cUnit->regLocation[index].defined = true; @@ -49,9 +43,6 @@ bool setCore(CompilationUnit* cUnit, int index, bool isCore) {  bool setRef(CompilationUnit* cUnit, int index, bool isRef) {    bool change = false; -  if (cUnit->regLocation[index].highWord) { -    return change; -  }    if (isRef && !cUnit->regLocation[index].defined) {      cUnit->regLocation[index].ref = true;      cUnit->regLocation[index].defined = true; @@ -60,6 +51,24 @@ bool setRef(CompilationUnit* cUnit, int index, bool isRef) {    return change;  } +bool setWide(CompilationUnit* cUnit, int index, bool isWide) { +  bool change = false; +  if (isWide && !cUnit->regLocation[index].wide) { +    cUnit->regLocation[index].wide = true; +    change = true; +  } +  return change; +} + +bool setHigh(CompilationUnit* cUnit, int index, bool isHigh) { +  bool change = false; +  if (isHigh && !cUnit->regLocation[index].highWord) { +    cUnit->regLocation[index].highWord = true; +    change = true; +  } +  return change; +} +  bool remapNames(CompilationUnit* cUnit, BasicBlock* bb)  {    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock && @@ -123,6 +132,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          }          if (attrs & DF_A_WIDE) {            cUnit->regLocation[ssaRep->defs[0]].wide = true; +          cUnit->regLocation[ssaRep->defs[1]].wide = true;            cUnit->regLocation[ssaRep->defs[1]].highWord = true;            DCHECK_EQ(SRegToVReg(cUnit, ssaRep->defs[0])+1,            SRegToVReg(cUnit, ssaRep->defs[1])); @@ -140,6 +150,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          }          if (attrs & DF_A_WIDE) {            cUnit->regLocation[ssaRep->uses[next]].wide = true; +          cUnit->regLocation[ssaRep->uses[next + 1]].wide = true;            cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;            DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,            SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -157,6 +168,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          }          if (attrs & DF_B_WIDE) {            cUnit->regLocation[ssaRep->uses[next]].wide = true; +          cUnit->regLocation[ssaRep->uses[next + 1]].wide = true;            cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;            DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,                                 SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -174,6 +186,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          }          if (attrs & DF_C_WIDE) {            cUnit->regLocation[ssaRep->uses[next]].wide = true; +          cUnit->regLocation[ssaRep->uses[next + 1]].wide = true;            cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;            DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,            SRegToVReg(cUnit, ssaRep->uses[next + 1])); @@ -192,6 +205,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)                changed |= setCore(cUnit, ssaRep->uses[0], true);                changed |= setCore(cUnit, ssaRep->uses[1], true);                cUnit->regLocation[ssaRep->uses[0]].wide = true; +              cUnit->regLocation[ssaRep->uses[1]].wide = true;                cUnit->regLocation[ssaRep->uses[1]].highWord = true;                break;              case 'F': @@ -201,6 +215,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)                changed |= setFp(cUnit, ssaRep->uses[0], true);                changed |= setFp(cUnit, ssaRep->uses[1], true);                cUnit->regLocation[ssaRep->uses[0]].wide = true; +              cUnit->regLocation[ssaRep->uses[1]].wide = true;                cUnit->regLocation[ssaRep->uses[1]].highWord = true;                break;              case 'L': @@ -254,6 +269,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)                  ssaRep->fpUse[i] = true;                  ssaRep->fpUse[i+1] = true;                  cUnit->regLocation[ssaRep->uses[i]].wide = true; +                cUnit->regLocation[ssaRep->uses[i+1]].wide = true;                  cUnit->regLocation[ssaRep->uses[i+1]].highWord = true;                  DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1,                                       SRegToVReg(cUnit, ssaRep->uses[i+1])); @@ -261,6 +277,7 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)                  break;                case 'J':                  cUnit->regLocation[ssaRep->uses[i]].wide = true; +                cUnit->regLocation[ssaRep->uses[i+1]].wide = true;                  cUnit->regLocation[ssaRep->uses[i+1]].highWord = true;                  DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1,                                       SRegToVReg(cUnit, ssaRep->uses[i+1])); @@ -292,23 +309,27 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          }        // Special-case handling for moves & Phi        if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) { -        // If any of our inputs or outputs is defined, set all -        bool definedFP = false; -        bool definedCore = false; -        bool definedRef = false; -        definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined && -                      cUnit->regLocation[ssaRep->defs[0]].fp); -        definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined && -                        cUnit->regLocation[ssaRep->defs[0]].core); -        definedRef |= (cUnit->regLocation[ssaRep->defs[0]].defined && -                       cUnit->regLocation[ssaRep->defs[0]].ref); -        for (int i = 0; i < ssaRep->numUses; i++) { -          definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined && -                        cUnit->regLocation[ssaRep->uses[i]].fp); -          definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined -                          && cUnit->regLocation[ssaRep->uses[i]].core); -          definedRef |= (cUnit->regLocation[ssaRep->uses[i]].defined -                         && cUnit->regLocation[ssaRep->uses[i]].ref); +        /* +         * If any of our inputs or outputs is defined, set all. +         * Some ugliness related to Phi nodes and wide values. +         * The Phi set will include all low words or all high +         * words, so we have to treat them specially. +         */ +        bool isPhi = (static_cast<int>(mir->dalvikInsn.opcode) == +                      kMirOpPhi); +        RegLocation rlTemp = cUnit->regLocation[ssaRep->defs[0]]; +        bool definedFP = rlTemp.defined && rlTemp.fp; +        bool definedCore = rlTemp.defined && rlTemp.core; +        bool definedRef = rlTemp.defined && rlTemp.ref; +        bool isWide = rlTemp.wide || ((attrs & DF_A_WIDE) != 0); +        bool isHigh = isPhi && rlTemp.wide && rlTemp.highWord; +        for (int i = 0; i < ssaRep->numUses;i++) { +          rlTemp = cUnit->regLocation[ssaRep->uses[i]]; +          definedFP |= rlTemp.defined && rlTemp.fp; +          definedCore |= rlTemp.defined && rlTemp.core; +          definedRef |= rlTemp.defined && rlTemp.ref; +          isWide |= rlTemp.wide; +          isHigh |= isPhi && rlTemp.wide && rlTemp.highWord;          }          /*           * TODO: cleaner fix @@ -334,10 +355,23 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)          changed |= setFp(cUnit, ssaRep->defs[0], definedFP);          changed |= setCore(cUnit, ssaRep->defs[0], definedCore);          changed |= setRef(cUnit, ssaRep->defs[0], definedRef); +        changed |= setWide(cUnit, ssaRep->defs[0], isWide); +        changed |= setHigh(cUnit, ssaRep->defs[0], isHigh); +        if (attrs & DF_A_WIDE) { +          changed |= setWide(cUnit, ssaRep->defs[1], true); +          changed |= setHigh(cUnit, ssaRep->defs[1], true); +        }          for (int i = 0; i < ssaRep->numUses; i++) { -         changed |= setFp(cUnit, ssaRep->uses[i], definedFP); -         changed |= setCore(cUnit, ssaRep->uses[i], definedCore); -         changed |= setRef(cUnit, ssaRep->uses[i], definedRef); +          changed |= setFp(cUnit, ssaRep->uses[i], definedFP); +          changed |= setCore(cUnit, ssaRep->uses[i], definedCore); +          changed |= setRef(cUnit, ssaRep->uses[i], definedRef); +          changed |= setWide(cUnit, ssaRep->uses[i], isWide); +          changed |= setHigh(cUnit, ssaRep->uses[i], isHigh); +        } +        if (attrs & DF_A_WIDE) { +          DCHECK_EQ(ssaRep->numUses, 2); +          changed |= setWide(cUnit, ssaRep->uses[1], true); +          changed |= setHigh(cUnit, ssaRep->uses[1], true);          }        }      } diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc index 7d6a733277..10957b2517 100644 --- a/src/compiler/SSATransformation.cc +++ b/src/compiler/SSATransformation.cc @@ -747,7 +747,7 @@ void insertPhiNodes(CompilationUnit* cUnit)                                  kPostOrderDFSTraversal, true /* isIterative */);    /* Iterate through each Dalvik register */ -  for (dalvikReg = 0; dalvikReg < cUnit->numDalvikRegisters; dalvikReg++) { +  for (dalvikReg = cUnit->numDalvikRegisters - 1; dalvikReg >= 0; dalvikReg--) {      bool change;      ArenaBitVectorIterator iterator; diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc index 9082a49ad3..b4b0f6a9e0 100644 --- a/src/compiler/codegen/GenCommon.cc +++ b/src/compiler/codegen/GenCommon.cc @@ -2062,16 +2062,19 @@ bool genArithOpIntLit(CompilationUnit* cUnit, Instruction::Code opcode,        op = kOpXor;        break;      case Instruction::SHL_INT_LIT8: +    case Instruction::SHL_INT:        lit &= 31;        shiftOp = true;        op = kOpLsl;        break;      case Instruction::SHR_INT_LIT8: +    case Instruction::SHR_INT:        lit &= 31;        shiftOp = true;        op = kOpAsr;        break;      case Instruction::USHR_INT_LIT8: +    case Instruction::USHR_INT:        lit &= 31;        shiftOp = true;        op = kOpLsr; diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc index 83ebf9bfce..b7c4331d7d 100644 --- a/src/compiler/codegen/MethodBitcode.cc +++ b/src/compiler/codegen/MethodBitcode.cc @@ -464,24 +464,27 @@ void convertFPArithOp(CompilationUnit* cUnit, OpKind op, RegLocation rlDest,    defineValue(cUnit, res, rlDest.origSReg);  } -void convertShift(CompilationUnit* cUnit, OpKind op, RegLocation rlDest, -                  RegLocation rlSrc1, RegLocation rlSrc2) +void convertShift(CompilationUnit* cUnit, +                  greenland::IntrinsicHelper::IntrinsicId id, +                  RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2)  { -  llvm::Value* src1 = getLLVMValue(cUnit, rlSrc1.origSReg); -  llvm::Value* src2 = getLLVMValue(cUnit, rlSrc2.origSReg); -  /* -   * TODO: Figure out how best to handle constraining the shift -   * amount to 31 for int and 63 for long.  We take care of this -   * inline for int and in the out-of-line handler for longs, so -   * it's a bit of a waste to generate llvm bitcode for this. -   * Yet more intrinsics? -   */ -  UNIMPLEMENTED(WARNING) << "llvm shift mismatch"; -  if (rlDest.wide) { -    // llvm thinks the shift could should be in 64 bits. -    src2 = cUnit->irb->CreateZExt(src2, cUnit->irb->getInt64Ty()); -  } -  llvm::Value* res = genArithOp(cUnit, op, rlDest.wide, src1, src2); +  llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id); +  llvm::SmallVector<llvm::Value*, 2>args; +  args.push_back(getLLVMValue(cUnit, rlSrc1.origSReg)); +  args.push_back(getLLVMValue(cUnit, rlSrc2.origSReg)); +  llvm::Value* res = cUnit->irb->CreateCall(intr, args); +  defineValue(cUnit, res, rlDest.origSReg); +} + +void convertShiftLit(CompilationUnit* cUnit, +                     greenland::IntrinsicHelper::IntrinsicId id, +                     RegLocation rlDest, RegLocation rlSrc, int shiftAmount) +{ +  llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(id); +  llvm::SmallVector<llvm::Value*, 2>args; +  args.push_back(getLLVMValue(cUnit, rlSrc.origSReg)); +  args.push_back(cUnit->irb->getInt32(shiftAmount)); +  llvm::Value* res = cUnit->irb->CreateCall(intr, args);    defineValue(cUnit, res, rlDest.origSReg);  } @@ -1099,27 +1102,33 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,        break;      case Instruction::SHL_LONG:      case Instruction::SHL_LONG_2ADDR: -      convertShift(cUnit, kOpLsl, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::SHLLong, +                    rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::SHL_INT:      case Instruction::SHL_INT_2ADDR: -      convertShift(cUnit, kOpLsl, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::SHLInt, +                   rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::SHR_LONG:      case Instruction::SHR_LONG_2ADDR: -      convertShift(cUnit, kOpAsr, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::SHRLong, +                   rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::SHR_INT:      case Instruction::SHR_INT_2ADDR: -      convertShift(cUnit, kOpAsr, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::SHRInt, +                   rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::USHR_LONG:      case Instruction::USHR_LONG_2ADDR: -      convertShift(cUnit, kOpLsr, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::USHRLong, +                   rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::USHR_INT:      case Instruction::USHR_INT_2ADDR: -      convertShift(cUnit, kOpLsr, rlDest, rlSrc[0], rlSrc[1]); +      convertShift(cUnit, greenland::IntrinsicHelper::USHRInt, +                   rlDest, rlSrc[0], rlSrc[1]);        break;      case Instruction::ADD_INT_LIT16: @@ -1155,13 +1164,16 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,        convertArithOpLit(cUnit, kOpXor, rlDest, rlSrc[0], vC);        break;      case Instruction::SHL_INT_LIT8: -      convertArithOpLit(cUnit, kOpLsl, rlDest, rlSrc[0], vC & 0x1f); +      convertShiftLit(cUnit, greenland::IntrinsicHelper::SHLInt, +                      rlDest, rlSrc[0], vC & 0x1f);        break;      case Instruction::SHR_INT_LIT8: -      convertArithOpLit(cUnit, kOpAsr, rlDest, rlSrc[0], vC & 0x1f); +      convertShiftLit(cUnit, greenland::IntrinsicHelper::SHRInt, +                      rlDest, rlSrc[0], vC & 0x1f);        break;      case Instruction::USHR_INT_LIT8: -      convertArithOpLit(cUnit, kOpLsr, rlDest, rlSrc[0], vC & 0x1f); +      convertShiftLit(cUnit, greenland::IntrinsicHelper::USHRInt, +                      rlDest, rlSrc[0], vC & 0x1f);        break;      case Instruction::ADD_FLOAT: @@ -1589,19 +1601,30 @@ void convertExtendedMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,    switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {      case kMirOpPhi: { -      int* incoming = (int*)mir->dalvikInsn.vB;        RegLocation rlDest = cUnit->regLocation[mir->ssaRep->defs[0]]; +      /* +       * The Art compiler's Phi nodes only handle 32-bit operands, +       * representing wide values using a matched set of Phi nodes +       * for the lower and upper halves.  In the llvm world, we only +       * want a single Phi for wides.  Here we will simply discard +       * the Phi node representing the high word. +       */ +      if (rlDest.highWord) { +        return;  // No Phi node - handled via low word +      } +      int* incoming = (int*)mir->dalvikInsn.vB;        llvm::Type* phiType =            llvmTypeFromLocRec(cUnit, rlDest);        llvm::PHINode* phi = cUnit->irb->CreatePHI(phiType, mir->ssaRep->numUses);        for (int i = 0; i < mir->ssaRep->numUses; i++) {          RegLocation loc; -        if (rlDest.wide) { -           loc = oatGetSrcWide(cUnit, mir, i); -           i++; -        } else { -           loc = oatGetSrc(cUnit, mir, i); -        } +        // Don't check width here. +        loc = oatGetRawSrc(cUnit, mir, i); +        DCHECK_EQ(rlDest.wide, loc.wide); +        DCHECK_EQ(rlDest.wide & rlDest.highWord, loc.wide & loc.highWord); +        DCHECK_EQ(rlDest.fp, loc.fp); +        DCHECK_EQ(rlDest.core, loc.core); +        DCHECK_EQ(rlDest.ref, loc.ref);          phi->addIncoming(getLLVMValue(cUnit, loc.origSReg),                           getLLVMBlock(cUnit, incoming[i]));        } @@ -1895,30 +1918,18 @@ void oatMethodMIR2Bitcode(CompilationUnit* cUnit)    arg_iter++;  /* Skip path method */    for (int i = 0; i < cUnit->numSSARegs; i++) {      llvm::Value* val; -    if ((i < cUnit->numRegs) || (i >= (cUnit->numRegs + cUnit->numIns))) { -      // Handle SSA defs, skipping Method* and compiler temps -      if (SRegToVReg(cUnit, i) < 0) { -        val = NULL; -      } else { -        llvm::Constant* immValue = cUnit->irb->GetJInt(0); -        val = emitConst(cUnit, immValue, cUnit->regLocation[i]); -        val->setName(llvmSSAName(cUnit, i)); -      } +    if ((SRegToVReg(cUnit, i) < 0) || cUnit->regLocation[i].highWord) { +      oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); +    } else if ((i < cUnit->numRegs) || +               (i >= (cUnit->numRegs + cUnit->numIns))) { +      llvm::Constant* immValue = cUnit->irb->GetJInt(0); +      val = emitConst(cUnit, immValue, cUnit->regLocation[i]); +      val->setName(llvmSSAName(cUnit, i));        oatInsertGrowableList(cUnit, &cUnit->llvmValues, (intptr_t)val); -      if (cUnit->regLocation[i].wide) { -        // Skip high half of wide values -        oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); -        i++; -      }      } else {        // Recover previously-created argument values        llvm::Value* argVal = arg_iter++;        oatInsertGrowableList(cUnit, &cUnit->llvmValues, (intptr_t)argVal); -      if (cUnit->regLocation[i].wide) { -        // Skip high half of wide values. -        oatInsertGrowableList(cUnit, &cUnit->llvmValues, 0); -        i++; -      }      }    } @@ -1959,7 +1970,7 @@ void oatMethodMIR2Bitcode(CompilationUnit* cUnit)    cUnit->irb->SetInsertPoint(cUnit->entryBB);    cUnit->irb->CreateBr(cUnit->entryTargetBB); -  llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction); +  //llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction);    if (cUnit->enableDebug & (1 << kDebugDumpBitcodeFile)) {      // Write bitcode to file @@ -2258,43 +2269,23 @@ void cvtBinOp(CompilationUnit* cUnit, OpKind op, llvm::Instruction* inst)    }  } -void cvtShiftOp(CompilationUnit* cUnit, OpKind op, llvm::Instruction* inst) +void cvtShiftOp(CompilationUnit* cUnit, Instruction::Code opcode, +                llvm::CallInst* callInst)  { -  if (inst->getType() == cUnit->irb->getInt64Ty()) { -    /* -     * llvm wants the shift amount to be 64 bits, whereas we've constained -     * it to be in 6 bits.  It should always be held as an unnamed temp -     * at this point that was the result of a previous UExt.  We'll backtrack -     * to find the pre-extension value and use that. -     * TODO: probably better to handle this in cvtIntExt() or just intrinsify -     */ -    RegLocation rlDest = getLoc(cUnit, inst); -    RegLocation rlSrc = getLoc(cUnit, inst->getOperand(0)); -    RegLocation rlShift = getLoc(cUnit, inst->getOperand(1)); -    DCHECK(rlShift.wide); -    DCHECK_EQ(rlShift.sRegLow, INVALID_SREG); -    // Now, free the temp registers - we won't need them. -    // TODO: kill the dead extend ops -    oatFreeTemp(cUnit, rlShift.lowReg); -    oatFreeTemp(cUnit, rlShift.highReg); -    // Get the pre-extend operand -    llvm::Instruction* extInst = -        llvm::dyn_cast<llvm::Instruction>(inst->getOperand(1)); -    DCHECK(extInst != NULL); -    rlShift = getLoc(cUnit, extInst->getOperand(0)); -    DCHECK(!rlShift.wide); -    Instruction::Code opcode; -    if (op == kOpLsl) -      opcode = Instruction::SHL_LONG; -    else if (op == kOpAsr) -      opcode = Instruction::SHR_LONG; -    else { -      DCHECK_EQ(op, kOpLsr); -      opcode = Instruction::USHR_LONG; -    } -    genShiftOpLong(cUnit, opcode, rlDest, rlSrc, rlShift); +  DCHECK_EQ(callInst->getNumArgOperands(), 2U); +  RegLocation rlDest = getLoc(cUnit, callInst); +  RegLocation rlSrc = getLoc(cUnit, callInst->getArgOperand(0)); +  llvm::Value* rhs = callInst->getArgOperand(1); +  if (llvm::ConstantInt* src2 = llvm::dyn_cast<llvm::ConstantInt>(rhs)) { +    DCHECK(!rlDest.wide); +    genArithOpIntLit(cUnit, opcode, rlDest, rlSrc, src2->getSExtValue());    } else { -    cvtBinOp(cUnit, op, inst); +    RegLocation rlShift = getLoc(cUnit, rhs); +    if (callInst->getType() == cUnit->irb->getInt64Ty()) { +      genShiftOpLong(cUnit, opcode, rlDest, rlSrc, rlShift); +    } else { +      genArithOpInt(cUnit, opcode, rlDest, rlSrc, rlShift); +    }    }  } @@ -3098,9 +3089,25 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)                cvtLongCompare(cUnit, callInst);                break; -            case greenland::IntrinsicHelper::UnknownId: -              cvtCall(cUnit, callInst, callee); +            case greenland::IntrinsicHelper::SHLLong: +              cvtShiftOp(cUnit, Instruction::SHL_LONG, callInst); +              break; +            case greenland::IntrinsicHelper::SHRLong: +              cvtShiftOp(cUnit, Instruction::SHR_LONG, callInst); +              break; +            case greenland::IntrinsicHelper::USHRLong: +              cvtShiftOp(cUnit, Instruction::USHR_LONG, callInst);                break; +            case greenland::IntrinsicHelper::SHLInt: +              cvtShiftOp(cUnit, Instruction::SHL_INT, callInst); +              break; +            case greenland::IntrinsicHelper::SHRInt: +              cvtShiftOp(cUnit, Instruction::SHR_INT, callInst); +              break; +            case greenland::IntrinsicHelper::USHRInt: +              cvtShiftOp(cUnit, Instruction::USHR_INT, callInst); +              break; +              default:                LOG(FATAL) << "Unexpected intrinsic " << (int)id << ", "                           << cUnit->intrinsic_helper->GetName(id); @@ -3117,9 +3124,6 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)        case llvm::Instruction::And: cvtBinOp(cUnit, kOpAnd, inst); break;        case llvm::Instruction::Or: cvtBinOp(cUnit, kOpOr, inst); break;        case llvm::Instruction::Xor: cvtBinOp(cUnit, kOpXor, inst); break; -      case llvm::Instruction::Shl: cvtShiftOp(cUnit, kOpLsl, inst); break; -      case llvm::Instruction::LShr: cvtShiftOp(cUnit, kOpLsr, inst); break; -      case llvm::Instruction::AShr: cvtShiftOp(cUnit, kOpAsr, inst); break;        case llvm::Instruction::PHI: cvtPhi(cUnit, inst); break;        case llvm::Instruction::Ret: cvtRet(cUnit, inst); break;        case llvm::Instruction::FAdd: cvtBinFPOp(cUnit, kOpAdd, inst); break; @@ -3143,6 +3147,9 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)        case llvm::Instruction::Unreachable:          break;  // FIXME: can we really ignore these? +      case llvm::Instruction::Shl: +      case llvm::Instruction::LShr: +      case llvm::Instruction::AShr:        case llvm::Instruction::Invoke:        case llvm::Instruction::FPToUI:        case llvm::Instruction::UIToFP: @@ -3174,7 +3181,8 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)          LOG(FATAL) << "Unexpected llvm opcode: " << opcode; break;        default: -        LOG(FATAL) << "Unknown llvm opcode: " << opcode; break; +        LOG(FATAL) << "Unknown llvm opcode: " << inst->getOpcodeName(); +        break;      }    } diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 2088cdc360..9d1878a02b 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -998,14 +998,12 @@ extern RegLocation oatGetRawSrc(CompilationUnit* cUnit, MIR* mir, int num)  {    DCHECK(num < mir->ssaRep->numUses);    RegLocation res = cUnit->regLocation[mir->ssaRep->uses[num]]; -  DCHECK(!res.wide || num < (mir->ssaRep->numUses - 1));    return res;  }  extern RegLocation oatGetRawDest(CompilationUnit* cUnit, MIR* mir)  {    DCHECK_GT(mir->ssaRep->numDefs, 0);    RegLocation res = cUnit->regLocation[mir->ssaRep->defs[0]]; -  DCHECK(!res.wide || mir->ssaRep->numDefs == 2);    return res;  }  extern RegLocation oatGetDest(CompilationUnit* cUnit, MIR* mir) diff --git a/src/greenland/intrinsic_func_list.def b/src/greenland/intrinsic_func_list.def index 608e760c75..0ebebb25a3 100644 --- a/src/greenland/intrinsic_func_list.def +++ b/src/greenland/intrinsic_func_list.def @@ -1228,7 +1228,7 @@ _EVAL_DEF_INTRINSICS_FUNC(CopyObj,                            kJavaObjectTy,                            _EXPAND_ARG1(kJavaObjectTy)) -// int copy_long(long) +// long copy_long(long)  _EVAL_DEF_INTRINSICS_FUNC(CopyLong,                            dex_lang_copy_long,                            kAttrReadOnly | kAttrNoThrow, @@ -1250,6 +1250,50 @@ _EVAL_DEF_INTRINSICS_FUNC(CopyDouble,                            _EXPAND_ARG1(kDoubleTy))  //---------------------------------------------------------------------------- +// Shift intrinsics.  Shift semantics for Dalvik are a bit different than +// the llvm shift operators.  For 32-bit shifts, the shift count is constrained +// to the range of 0..31, while for 64-bit shifts we limit to 0..63. +// Further, the shift count for Long shifts in Dalvik is 32 bits, while +// llvm requires a 64-bit shift count. For GBC, we represent shifts as an +//  intrinsic to allow most efficient target-dependent lowering. +//---------------------------------------------------------------------------- +// long shl_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(SHLLong, +                          dex_lang_shl_long, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt64Ty, +                          _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// long shr_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(SHRLong, +                          dex_lang_shr_long, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt64Ty, +                          _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// long ushr_long(long,int) +_EVAL_DEF_INTRINSICS_FUNC(USHRLong, +                          dex_lang_ushl_long, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt64Ty, +                          _EXPAND_ARG2(kInt64Ty,kInt32Ty)) +// int shl_int(int,int) +_EVAL_DEF_INTRINSICS_FUNC(SHLInt, +                          dex_lang_shl_int, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt32Ty, +                          _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +// long shr_int(int,int) +_EVAL_DEF_INTRINSICS_FUNC(SHRInt, +                          dex_lang_shr_int, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt32Ty, +                          _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +// int ushr_long(int,int) +_EVAL_DEF_INTRINSICS_FUNC(USHRInt, +                          dex_lang_ushl_int, +                          kAttrReadOnly | kAttrNoThrow, +                          kInt32Ty, +                          _EXPAND_ARG2(kInt32Ty,kInt32Ty)) +//----------------------------------------------------------------------------  // Conversion instrinsics.  Note: these should eventually be removed.  We  // can express these directly in bitcode, but by using intrinsics the  // Quick compiler can be more efficient.  Some extra optimization infrastructure  |