/*
 * Copyright (C) 2012 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

namespace art {

/*
 * This source files contains "gen" codegen routines that should
 * be applicable to most targets.  Only mid-level support utilities
 * and "op" calls may be used here.
 */


/*
 * x86 targets will likely be different enough to need their own
 * invoke gen routies.
 */
#if defined(TARGET_ARM) || defined (TARGET_MIPS)
typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
                            uint32_t methodIdx);
/*
 * If there are any ins passed in registers that have not been promoted
 * to a callee-save register, flush them to the frame.  Perform intial
 * assignment of promoted arguments.
 */
void flushIns(CompilationUnit* cUnit)
{
    if (cUnit->numIns == 0)
        return;
    int firstArgReg = rARG1;
    int lastArgReg = rARG3;
    int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
    /*
     * Arguments passed in registers should be flushed
     * to their backing locations in the frame for now.
     * Also, we need to do initial assignment for promoted
     * arguments.  NOTE: an older version of dx had an issue
     * in which it would reuse static method argument registers.
     * This could result in the same Dalvik virtual register
     * being promoted to both core and fp regs.  In those
     * cases, copy argument to both.  This will be uncommon
     * enough that it isn't worth attempting to optimize.
     */
    for (int i = 0; i < cUnit->numIns; i++) {
        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
        if (i <= (lastArgReg - firstArgReg)) {
            // If arriving in register
            if (vMap.coreLocation == kLocPhysReg) {
                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
            }
            if (vMap.fpLocation == kLocPhysReg) {
                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
            }
            // Also put a copy in memory in case we're partially promoted
            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                          firstArgReg + i, kWord);
        } else {
            // If arriving in frame & promoted
            if (vMap.coreLocation == kLocPhysReg) {
                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                             vMap.coreReg);
            }
            if (vMap.fpLocation == kLocPhysReg) {
                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
                             vMap.fpReg);
            }
        }
    }
}

/*
 * Bit of a hack here - in leiu of a real scheduling pass,
 * emit the next instruction in static & direct invoke sequences.
 */
int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
                   int state, uint32_t dexIdx, uint32_t unused)
{
    switch(state) {
        case 0:  // Get the current Method* [sets rARG0]
            loadCurrMethodDirect(cUnit, rARG0);
            break;
        case 1:  // Get method->dex_cache_resolved_methods_
            loadWordDisp(cUnit, rARG0,
                Method::DexCacheResolvedMethodsOffset().Int32Value(),
                rARG0);
            break;
        case 2:  // Grab target method*
            loadWordDisp(cUnit, rARG0,
                Array::DataOffset(sizeof(Object*)).Int32Value() + dexIdx * 4,
                rARG0);
            break;
        case 3:  // Grab the code from the method*
            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                         rLINK);
            break;
        default:
            return -1;
    }
    return state + 1;
}

/*
 * Bit of a hack here - in leiu of a real scheduling pass,
 * emit the next instruction in a virtual invoke sequence.
 * We can use rLR as a temp prior to target address loading
 * Note also that we'll load the first argument ("this") into
 * rARG1 here rather than the standard loadArgRegs.
 */
int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
                  int state, uint32_t dexIdx, uint32_t methodIdx)
{
    RegLocation rlArg;
    /*
     * This is the fast path in which the target virtual method is
     * fully resolved at compile time.
     */
    switch(state) {
        case 0:  // Get "this" [set rARG1]
            rlArg = oatGetSrc(cUnit, mir, 0);
            loadValueDirectFixed(cUnit, rlArg, rARG1);
            break;
        case 1: // Is "this" null? [use rARG1]
            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
            // get this->klass_ [use rARG1, set rLINK]
            loadWordDisp(cUnit, rARG1, Object::ClassOffset().Int32Value(),
                         rLINK);
            break;
        case 2: // Get this->klass_->vtable [usr rLINK, set rLINK]
            loadWordDisp(cUnit, rLINK, Class::VTableOffset().Int32Value(),
                         rLINK);
            break;
        case 3: // Get target method [use rLINK, set rARG0]
            loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
                         Array::DataOffset(sizeof(Object*)).Int32Value(),
                         rARG0);
            break;
        case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                         rLINK);
            break;
        default:
            return -1;
    }
    return state + 1;
}

/*
 * Interleave launch code for INVOKE_SUPER.  See comments
 * for nextVCallIns.
 */
int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
                      int state, uint32_t dexIdx, uint32_t methodIdx)
{
    /*
     * This is the fast path in which the target virtual method is
     * fully resolved at compile time.  Note also that this path assumes
     * that the check to verify that the target method index falls
     * within the size of the super's vtable has been done at compile-time.
     */
    RegLocation rlArg;
    switch(state) {
        case 0: // Get current Method* [set rARG0]
            loadCurrMethodDirect(cUnit, rARG0);
            // Load "this" [set rARG1]
            rlArg = oatGetSrc(cUnit, mir, 0);
            loadValueDirectFixed(cUnit, rlArg, rARG1);
            // Get method->declaring_class_ [use rARG0, set rLINK]
            loadWordDisp(cUnit, rARG0,
                         Method::DeclaringClassOffset().Int32Value(),
                         rLINK);
            // Is "this" null? [use rARG1]
            genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
            break;
        case 1: // Get method->declaring_class_->super_class [use/set rLINK]
            loadWordDisp(cUnit, rLINK,
                         Class::SuperClassOffset().Int32Value(), rLINK);
            break;
        case 2: // Get ...->super_class_->vtable [u/s rLINK]
            loadWordDisp(cUnit, rLINK,
                         Class::VTableOffset().Int32Value(), rLINK);
            break;
        case 3: // Get target method [use rLINK, set rARG0]
            loadWordDisp(cUnit, rLINK, (methodIdx * 4) +
                         Array::DataOffset(sizeof(Object*)).Int32Value(),
                         rARG0);
            break;
        case 4: // Get the target compiled code address [uses rARG0, sets rLINK]
            loadWordDisp(cUnit, rARG0, Method::GetCodeOffset().Int32Value(),
                         rLINK);
            break;
        default:
            return -1;
    }
    return state + 1;
}

int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
                     int state, uint32_t dexIdx, uint32_t methodIdx)
{
    /*
     * This handles the case in which the base method is not fully
     * resolved at compile time, we bail to a runtime helper.
     */
    if (state == 0) {
        // Load trampoline target
        loadWordDisp(cUnit, rSELF, trampoline, rLINK);
        // Load rARG0 with method index
        loadConstant(cUnit, rARG0, dexIdx);
        return 1;
    }
    return -1;
}

int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
                         int state, uint32_t dexIdx, uint32_t methodIdx)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                         uint32_t dexIdx, uint32_t methodIdx)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                        uint32_t dexIdx, uint32_t methodIdx)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir, int state,
                    uint32_t dexIdx, uint32_t methodIdx)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

/*
 * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
 * which will locate the target and continue on via a tail call.
 */
int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir, int state,
                          uint32_t dexIdx, uint32_t unused)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit, MIR* mir,
                                         int state, uint32_t dexIdx,
                                         uint32_t unused)
{
  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
}

int loadArgRegs(CompilationUnit* cUnit, MIR* mir, DecodedInstruction* dInsn,
                int callState, NextCallInsn nextCallInsn, uint32_t dexIdx,
                uint32_t methodIdx, bool skipThis)
{
    int nextReg = rARG1;
    int nextArg = 0;
    if (skipThis) {
        nextReg++;
        nextArg++;
    }
    for (; (nextReg <= rARG3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
        rlArg = oatUpdateRawLoc(cUnit, rlArg);
        if (rlArg.wide && (nextReg <= rARG2)) {
            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
            nextReg++;
            nextArg++;
        } else {
            rlArg.wide = false;
            loadValueDirectFixed(cUnit, rlArg, nextReg);
        }
        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
    }
    return callState;
}

/*
 * Load up to 5 arguments, the first three of which will be in
 * rARG1 .. rARG3.  On entry rARG0 contains the current method pointer,
 * and as part of the load sequence, it must be replaced with
 * the target method pointer.  Note, this may also be called
 * for "range" variants if the number of arguments is 5 or fewer.
 */
int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
                         DecodedInstruction* dInsn, int callState,
                         LIR** pcrLabel, NextCallInsn nextCallInsn,
                         uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
{
    RegLocation rlArg;

    /* If no arguments, just return */
    if (dInsn->vA == 0)
        return callState;

    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);

    DCHECK_LE(dInsn->vA, 5U);
    if (dInsn->vA > 3) {
        uint32_t nextUse = 3;
        //Detect special case of wide arg spanning arg3/arg4
        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
            rlUse2.wide) {
            int reg;
            // Wide spans, we need the 2nd half of uses[2].
            rlArg = oatUpdateLocWide(cUnit, rlUse2);
            if (rlArg.location == kLocPhysReg) {
                reg = rlArg.highReg;
            } else {
                // rARG2 & rARG3 can safely be used here
                reg = rARG3;
                loadWordDisp(cUnit, rSP,
                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
                                         methodIdx);
            }
            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
            nextUse++;
        }
        // Loop through the rest
        while (nextUse < dInsn->vA) {
            int lowReg;
            int highReg;
            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
            rlArg = oatUpdateRawLoc(cUnit, rlArg);
            if (rlArg.location == kLocPhysReg) {
                lowReg = rlArg.lowReg;
                highReg = rlArg.highReg;
            } else {
                lowReg = rARG2;
                highReg = rARG3;
                if (rlArg.wide) {
                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
                } else {
                    loadValueDirectFixed(cUnit, rlArg, lowReg);
                }
                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
                                         methodIdx);
            }
            int outsOffset = (nextUse + 1) * 4;
            if (rlArg.wide) {
                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
                nextUse += 2;
            } else {
                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
                nextUse++;
            }
            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
        }
    }

    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
                            dexIdx, methodIdx, skipThis);

    if (pcrLabel) {
        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
    }
    return callState;
}

/*
 * May have 0+ arguments (also used for jumbo).  Note that
 * source virtual registers may be in physical registers, so may
 * need to be flushed to home location before copying.  This
 * applies to arg3 and above (see below).
 *
 * Two general strategies:
 *    If < 20 arguments
 *       Pass args 3-18 using vldm/vstm block copy
 *       Pass arg0, arg1 & arg2 in rARG1-rARG3
 *    If 20+ arguments
 *       Pass args arg19+ using memcpy block copy
 *       Pass arg0, arg1 & arg2 in rARG1-rARG3
 *
 */
int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
                       DecodedInstruction* dInsn, int callState,
                       LIR** pcrLabel, NextCallInsn nextCallInsn,
                       uint32_t dexIdx, uint32_t methodIdx, bool skipThis)
{
    int firstArg = dInsn->vC;
    int numArgs = dInsn->vA;

    // If we can treat it as non-range (Jumbo ops will use range form)
    if (numArgs <= 5)
        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
                                    nextCallInsn, dexIdx, methodIdx,
                                    skipThis);
    /*
     * Make sure range list doesn't span the break between in normal
     * Dalvik vRegs and the ins.
     */
    int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
    int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
    if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
        LOG(FATAL) << "Argument list spanned locals & args";
    }

    /*
     * First load the non-register arguments.  Both forms expect all
     * of the source arguments to be in their home frame location, so
     * scan the sReg names and flush any that have been promoted to
     * frame backing storage.
     */
    // Scan the rest of the args - if in physReg flush to memory
    for (int nextArg = 0; nextArg < numArgs;) {
        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
        if (loc.wide) {
            loc = oatUpdateLocWide(cUnit, loc);
            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
                storeBaseDispWide(cUnit, rSP,
                                  oatSRegOffset(cUnit, loc.sRegLow),
                                  loc.lowReg, loc.highReg);
            }
            nextArg += 2;
        } else {
            loc = oatUpdateLoc(cUnit, loc);
            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
                              loc.lowReg, kWord);
            }
            nextArg++;
        }
    }

    int startOffset = oatSRegOffset(cUnit,
        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
    int outsOffset = 4 /* Method* */ + (3 * 4);
#if defined(TARGET_MIPS)
    // Generate memcpy
    opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
    opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
    loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
    callRuntimeHelper(cUnit, rTgt);
    // Restore Method*
    loadCurrMethodDirect(cUnit, rARG0);
#else
    if (numArgs >= 20) {
        // Generate memcpy
        opRegRegImm(cUnit, kOpAdd, rARG0, rSP, outsOffset);
        opRegRegImm(cUnit, kOpAdd, rARG1, rSP, startOffset);
        int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pMemcpy));
        loadConstant(cUnit, rARG2, (numArgs - 3) * 4);
        callRuntimeHelper(cUnit, rTgt);
        // Restore Method*
        loadCurrMethodDirect(cUnit, rARG0);
    } else {
        // Use vldm/vstm pair using rARG3 as a temp
        int regsLeft = std::min(numArgs - 3, 16);
        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, startOffset);
        LIR* ld = newLIR3(cUnit, kThumb2Vldms, rARG3, fr0, regsLeft);
        //TUNING: loosen barrier
        ld->defMask = ENCODE_ALL;
        setMemRefType(ld, true /* isLoad */, kDalvikReg);
        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
        opRegRegImm(cUnit, kOpAdd, rARG3, rSP, 4 /* Method* */ + (3 * 4));
        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
        LIR* st = newLIR3(cUnit, kThumb2Vstms, rARG3, fr0, regsLeft);
        setMemRefType(st, false /* isLoad */, kDalvikReg);
        st->defMask = ENCODE_ALL;
        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
    }
#endif

    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
                            dexIdx, methodIdx, skipThis);

    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
    if (pcrLabel) {
        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), rARG1, mir);
    }
    return callState;
}

#endif  // TARGET_ARM || TARGET_MIPS


}  // namespace art