diff options
author | 2011-03-17 16:12:47 -0700 | |
---|---|---|
committer | 2011-03-17 16:14:27 -0700 | |
commit | 55d2a25402319380c62a97f3f84b57e2977448d1 (patch) | |
tree | 92a149a6f2b5cac9dbb85962773a033802aa3835 | |
parent | e4a06c5fc738bf219f2a495e12a637b2d0871651 (diff) |
Migrate thread launch to driver.
Change-Id: If182c524cceb327547640f22f956856d291d1787
-rw-r--r-- | libs/rs/driver/rsdBcc.cpp | 221 | ||||
-rw-r--r-- | libs/rs/driver/rsdBcc.h | 15 | ||||
-rw-r--r-- | libs/rs/driver/rsdCore.cpp | 139 | ||||
-rw-r--r-- | libs/rs/driver/rsdCore.h | 34 | ||||
-rw-r--r-- | libs/rs/rsContext.cpp | 90 | ||||
-rw-r--r-- | libs/rs/rsContext.h | 29 | ||||
-rw-r--r-- | libs/rs/rsScriptC.cpp | 204 | ||||
-rw-r--r-- | libs/rs/rsScriptC.h | 2 | ||||
-rw-r--r-- | libs/rs/rsScriptC_Lib.cpp | 7 | ||||
-rw-r--r-- | libs/rs/rsScriptC_LibGL.cpp | 4 | ||||
-rw-r--r-- | libs/rs/rs_hal.h | 22 |
11 files changed, 413 insertions, 354 deletions
diff --git a/libs/rs/driver/rsdBcc.cpp b/libs/rs/driver/rsdBcc.cpp index 36a4b0173097..2038a4c0c767 100644 --- a/libs/rs/driver/rsdBcc.cpp +++ b/libs/rs/driver/rsdBcc.cpp @@ -55,6 +55,15 @@ struct DrvScript { }; +static Script * setTLS(Script *sc) { + ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); + rsAssert(tls); + Script *old = tls->mScript; + tls->mScript = sc; + return old; +} + + // Input: cacheDir // Input: resName // Input: extName @@ -234,13 +243,215 @@ error: } +typedef struct { + Context *rsc; + Script *script; + const Allocation * ain; + Allocation * aout; + const void * usr; + + uint32_t mSliceSize; + volatile int mSliceNum; + + const uint8_t *ptrIn; + uint32_t eStrideIn; + uint8_t *ptrOut; + uint32_t eStrideOut; + + uint32_t xStart; + uint32_t xEnd; + uint32_t yStart; + uint32_t yEnd; + uint32_t zStart; + uint32_t zEnd; + uint32_t arrayStart; + uint32_t arrayEnd; + + uint32_t dimX; + uint32_t dimY; + uint32_t dimZ; + uint32_t dimArray; +} MTLaunchStruct; +typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); + +static void wc_xy(void *usr, uint32_t idx) { + MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + + while (1) { + uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); + uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; + uint32_t yEnd = yStart + mtls->mSliceSize; + yEnd = rsMin(yEnd, mtls->yEnd); + if (yEnd <= yStart) { + return; + } + + //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); + //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); + for (uint32_t y = yStart; y < yEnd; y++) { + uint32_t offset = mtls->dimX * y; + uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); + const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); + + for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { + ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); + xPtrIn += mtls->eStrideIn; + xPtrOut += mtls->eStrideOut; + } + } + } +} + +static void wc_x(void *usr, uint32_t idx) { + MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + + while (1) { + uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); + uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; + uint32_t xEnd = xStart + mtls->mSliceSize; + xEnd = rsMin(xEnd, mtls->xEnd); + if (xEnd <= xStart) { + return; + } + + //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); + //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); + uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); + const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); + for (uint32_t x = xStart; x < xEnd; x++) { + ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0); + xPtrIn += mtls->eStrideIn; + xPtrOut += mtls->eStrideOut; + } + } +} + +void rsdScriptInvokeForEach(const Context *rsc, + Script *s, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc) { + + RsHal * dc = (RsHal *)rsc->mHal.drv; + + MTLaunchStruct mtls; + memset(&mtls, 0, sizeof(mtls)); + + if (ain) { + mtls.dimX = ain->getType()->getDimX(); + mtls.dimY = ain->getType()->getDimY(); + mtls.dimZ = ain->getType()->getDimZ(); + //mtls.dimArray = ain->getType()->getDimArray(); + } else if (aout) { + mtls.dimX = aout->getType()->getDimX(); + mtls.dimY = aout->getType()->getDimY(); + mtls.dimZ = aout->getType()->getDimZ(); + //mtls.dimArray = aout->getType()->getDimArray(); + } else { + rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + return; + } + + if (!sc || (sc->xEnd == 0)) { + mtls.xEnd = mtls.dimX; + } else { + rsAssert(sc->xStart < mtls.dimX); + rsAssert(sc->xEnd <= mtls.dimX); + rsAssert(sc->xStart < sc->xEnd); + mtls.xStart = rsMin(mtls.dimX, sc->xStart); + mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); + if (mtls.xStart >= mtls.xEnd) return; + } + + if (!sc || (sc->yEnd == 0)) { + mtls.yEnd = mtls.dimY; + } else { + rsAssert(sc->yStart < mtls.dimY); + rsAssert(sc->yEnd <= mtls.dimY); + rsAssert(sc->yStart < sc->yEnd); + mtls.yStart = rsMin(mtls.dimY, sc->yStart); + mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); + if (mtls.yStart >= mtls.yEnd) return; + } + + mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); + mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); + mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); + mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); + + rsAssert(ain->getType()->getDimZ() == 0); + + Context *mrsc = (Context *)rsc; + Script * oldTLS = setTLS(s); + + mtls.rsc = mrsc; + mtls.ain = ain; + mtls.aout = aout; + mtls.script = s; + mtls.usr = usr; + mtls.mSliceSize = 10; + mtls.mSliceNum = 0; + + mtls.ptrIn = NULL; + mtls.eStrideIn = 0; + if (ain) { + mtls.ptrIn = (const uint8_t *)ain->getPtr(); + mtls.eStrideIn = ain->getType()->getElementSizeBytes(); + } + + mtls.ptrOut = NULL; + mtls.eStrideOut = 0; + if (aout) { + mtls.ptrOut = (uint8_t *)aout->getPtr(); + mtls.eStrideOut = aout->getType()->getElementSizeBytes(); + } + + if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) { + if (mtls.dimY > 1) { + rsdLaunchThreads(mrsc, wc_xy, &mtls); + } else { + rsdLaunchThreads(mrsc, wc_x, &mtls); + } + + //LOGE("launch 1"); + } else { + //LOGE("launch 3"); + for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) { + for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) { + for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) { + uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar + + mtls.dimX * mtls.dimY * z + + mtls.dimX * y; + uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); + const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); + + for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) { + ((rs_t)s->mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar); + xPtrIn += mtls.eStrideIn; + xPtrOut += mtls.eStrideOut; + } + } + } + } + } + + setTLS(oldTLS); +} + -int rsdScriptInvokeRoot(const Context *dc, const Script *script) { +int rsdScriptInvokeRoot(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; - return drv->mRoot(); + + Script * oldTLS = setTLS(script); + int ret = drv->mRoot(); + setTLS(oldTLS); + + return ret; } -void rsdScriptInvokeInit(const Context *dc, const Script *script) { +void rsdScriptInvokeInit(const Context *dc, Script *script) { DrvScript *drv = (DrvScript *)script->mHal.drv; if (drv->mInit) { @@ -249,15 +460,17 @@ void rsdScriptInvokeInit(const Context *dc, const Script *script) { } -void rsdScriptInvokeFunction(const Context *dc, const Script *script, +void rsdScriptInvokeFunction(const Context *dc, Script *script, uint32_t slot, const void *params, size_t paramLength) { DrvScript *drv = (DrvScript *)script->mHal.drv; //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength); + Script * oldTLS = setTLS(script); ((void (*)(const void *, uint32_t)) drv->mInvokeFunctions[slot])(params, paramLength); + setTLS(oldTLS); } void rsdScriptSetGlobalVar(const Context *dc, const Script *script, diff --git a/libs/rs/driver/rsdBcc.h b/libs/rs/driver/rsdBcc.h index f697f293d32e..6723a3640fd0 100644 --- a/libs/rs/driver/rsdBcc.h +++ b/libs/rs/driver/rsdBcc.h @@ -25,14 +25,23 @@ bool rsdScriptInit(const android::renderscript::Context *, android::renderscript uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags, android::renderscript::RsHalSymbolLookupFunc lookupFunc); void rsdScriptInvokeFunction(const android::renderscript::Context *dc, - const android::renderscript::Script *script, + android::renderscript::Script *script, uint32_t slot, const void *params, size_t paramLength); + +void rsdScriptInvokeForEach(const android::renderscript::Context *rsc, + android::renderscript::Script *s, + const android::renderscript::Allocation * ain, + android::renderscript::Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc); + int rsdScriptInvokeRoot(const android::renderscript::Context *dc, - const android::renderscript::Script *script); + android::renderscript::Script *script); void rsdScriptInvokeInit(const android::renderscript::Context *dc, - const android::renderscript::Script *script); + android::renderscript::Script *script); void rsdScriptSetGlobalVar(const android::renderscript::Context *, const android::renderscript::Script *, diff --git a/libs/rs/driver/rsdCore.cpp b/libs/rs/driver/rsdCore.cpp index 79fcab5d6d2b..bb6cce9419da 100644 --- a/libs/rs/driver/rsdCore.cpp +++ b/libs/rs/driver/rsdCore.cpp @@ -20,16 +20,29 @@ #include <malloc.h> #include "rsContext.h" +#include <sys/types.h> +#include <sys/resource.h> +#include <sched.h> +#include <cutils/properties.h> +#include <cutils/sched_policy.h> +#include <sys/syscall.h> +#include <string.h> + using namespace android; using namespace android::renderscript; +static void Shutdown(Context *rsc); +static void SetPriority(const Context *rsc, int32_t priority); + static RsdHalFunctions FunctionTable = { + Shutdown, NULL, - NULL, + SetPriority, { rsdScriptInit, rsdScriptInvokeFunction, rsdScriptInvokeRoot, + rsdScriptInvokeForEach, rsdScriptInvokeInit, rsdScriptSetGlobalVar, rsdScriptSetGlobalBind, @@ -39,16 +52,134 @@ static RsdHalFunctions FunctionTable = { }; + +static void * HelperThreadProc(void *vrsc) { + Context *rsc = static_cast<Context *>(vrsc); + RsHal *dc = (RsHal *)rsc->mHal.drv; + + + uint32_t idx = (uint32_t)android_atomic_inc(&dc->mWorkers.mLaunchCount); + + //LOGV("RS helperThread starting %p idx=%i", rsc, idx); + + dc->mWorkers.mLaunchSignals[idx].init(); + dc->mWorkers.mNativeThreadId[idx] = gettid(); + +#if 0 + typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; + cpu_set_t cpuset; + memset(&cpuset, 0, sizeof(cpuset)); + cpuset.bits[idx / 64] |= 1ULL << (idx % 64); + int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], + sizeof(cpuset), &cpuset); + LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); +#endif + + int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); + if (status) { + LOGE("pthread_setspecific %i", status); + } + + while (!dc->mExit) { + dc->mWorkers.mLaunchSignals[idx].wait(); + if (dc->mWorkers.mLaunchCallback) { + dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx); + } + android_atomic_dec(&dc->mWorkers.mRunningCount); + dc->mWorkers.mCompleteSignal.set(); + } + + //LOGV("RS helperThread exited %p idx=%i", rsc, idx); + return NULL; +} + +void rsdLaunchThreads(Context *rsc, WorkerCallback_t cbk, void *data) { + RsHal *dc = (RsHal *)rsc->mHal.drv; + + dc->mWorkers.mLaunchData = data; + dc->mWorkers.mLaunchCallback = cbk; + android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); + for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { + dc->mWorkers.mLaunchSignals[ct].set(); + } + while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { + dc->mWorkers.mCompleteSignal.wait(); + } +} + bool rsdHalInit(Context *rsc, uint32_t version_major, uint32_t version_minor) { rsc->mHal.funcs = FunctionTable; - /* - rsc->mHal.drv = (RsHal *)calloc(1, sizeof(RsHal)); + RsHal *dc = (RsHal *)calloc(1, sizeof(RsHal)); if (!rsc->mHal.drv) { return false; } - */ + rsc->mHal.drv = dc; + + int cpu = sysconf(_SC_NPROCESSORS_ONLN); + LOGV("RS Launching thread(s), reported CPU count %i", cpu); + if (cpu < 2) cpu = 0; + + dc->mWorkers.mCount = (uint32_t)cpu; + dc->mWorkers.mThreadId = (pthread_t *) calloc(dc->mWorkers.mCount, sizeof(pthread_t)); + dc->mWorkers.mNativeThreadId = (pid_t *) calloc(dc->mWorkers.mCount, sizeof(pid_t)); + dc->mWorkers.mLaunchSignals = new Signal[dc->mWorkers.mCount]; + dc->mWorkers.mLaunchCallback = NULL; + + dc->mWorkers.mCompleteSignal.init(); + + android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); + android_atomic_release_store(0, &dc->mWorkers.mLaunchCount); + + int status; + pthread_attr_t threadAttr; + status = pthread_attr_init(&threadAttr); + if (status) { + LOGE("Failed to init thread attribute."); + return false; + } + + for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { + status = pthread_create(&dc->mWorkers.mThreadId[ct], &threadAttr, HelperThreadProc, rsc); + if (status) { + dc->mWorkers.mCount = ct; + LOGE("Created fewer than expected number of RS threads."); + break; + } + } + while (android_atomic_acquire_load(&dc->mWorkers.mRunningCount) != 0) { + usleep(100); + } + + pthread_attr_destroy(&threadAttr); return true; } + +void SetPriority(const Context *rsc, int32_t priority) { + RsHal *dc = (RsHal *)rsc->mHal.drv; + for (uint32_t ct=0; ct < dc->mWorkers.mCount; ct++) { + setpriority(PRIO_PROCESS, dc->mWorkers.mNativeThreadId[ct], priority); + } +} + +void Shutdown(Context *rsc) { + RsHal *dc = (RsHal *)rsc->mHal.drv; + + dc->mExit = true; + dc->mWorkers.mLaunchData = NULL; + dc->mWorkers.mLaunchCallback = NULL; + android_atomic_release_store(dc->mWorkers.mCount, &dc->mWorkers.mRunningCount); + for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { + dc->mWorkers.mLaunchSignals[ct].set(); + } + int status; + void *res; + for (uint32_t ct = 0; ct < dc->mWorkers.mCount; ct++) { + status = pthread_join(dc->mWorkers.mThreadId[ct], &res); + } + rsAssert(android_atomic_acquire_load(&dc->mWorkers.mRunningCount) == 0); +} + + diff --git a/libs/rs/driver/rsdCore.h b/libs/rs/driver/rsdCore.h index 78596a1689f4..02b2fbc5441c 100644 --- a/libs/rs/driver/rsdCore.h +++ b/libs/rs/driver/rsdCore.h @@ -20,28 +20,36 @@ #include <rs_hal.h> #include <bcc/bcc.h> +#include "rsMutex.h" +#include "rsSignal.h" + + typedef void (* InvokeFunc_t)(void); +typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); -struct RsHalRec { +typedef struct RsHalRec { uint32_t version_major; uint32_t version_minor; -}; - -struct RsHalProgramStoreRec { -}; - -struct RsHalProgramRasterRec { -}; - -struct RsHalProgramVertexRec { -}; -struct RsHalProgramFragmentRec { + struct Workers { + volatile int mRunningCount; + volatile int mLaunchCount; + uint32_t mCount; + pthread_t *mThreadId; + pid_t *mNativeThreadId; + android::renderscript::Signal mCompleteSignal; -}; + android::renderscript::Signal *mLaunchSignals; + WorkerCallback_t mLaunchCallback; + void *mLaunchData; + }; + Workers mWorkers; + bool mExit; +} RsHal; +void rsdLaunchThreads(android::renderscript::Context *rsc, WorkerCallback_t cbk, void *data); #endif diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp index 7dc26d23a6a3..339a7737d6ca 100644 --- a/libs/rs/rsContext.cpp +++ b/libs/rs/rsContext.cpp @@ -554,56 +554,6 @@ void Context::destroyWorkerThreadResources() { mExit = true; } -void * Context::helperThreadProc(void *vrsc) { - Context *rsc = static_cast<Context *>(vrsc); - uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount); - - //LOGV("RS helperThread starting %p idx=%i", rsc, idx); - - rsc->mWorkers.mLaunchSignals[idx].init(); - rsc->mWorkers.mNativeThreadId[idx] = gettid(); - -#if 0 - typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; - cpu_set_t cpuset; - memset(&cpuset, 0, sizeof(cpuset)); - cpuset.bits[idx / 64] |= 1ULL << (idx % 64); - int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], - sizeof(cpuset), &cpuset); - LOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); -#endif - - setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority); - int status = pthread_setspecific(rsc->gThreadTLSKey, rsc->mTlsStruct); - if (status) { - LOGE("pthread_setspecific %i", status); - } - - while (!rsc->mExit) { - rsc->mWorkers.mLaunchSignals[idx].wait(); - if (rsc->mWorkers.mLaunchCallback) { - rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx); - } - android_atomic_dec(&rsc->mWorkers.mRunningCount); - rsc->mWorkers.mCompleteSignal.set(); - } - - //LOGV("RS helperThread exited %p idx=%i", rsc, idx); - return NULL; -} - -void Context::launchThreads(WorkerCallback_t cbk, void *data) { - mWorkers.mLaunchData = data; - mWorkers.mLaunchCallback = cbk; - android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); - for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { - mWorkers.mLaunchSignals[ct].set(); - } - while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { - mWorkers.mCompleteSignal.wait(); - } -} - void Context::setPriority(int32_t p) { // Note: If we put this in the proper "background" policy // the wallpapers can become completly unresponsive at times. @@ -620,9 +570,6 @@ void Context::setPriority(int32_t p) { } #else setpriority(PRIO_PROCESS, mNativeThreadId, p); - for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { - setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p); - } #endif } @@ -691,16 +638,8 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { if (!rsdHalInit(this, 0, 0)) { return false; } + mHal.funcs.setPriority(this, mThreadPriority); - int cpu = sysconf(_SC_NPROCESSORS_ONLN); - LOGV("RS Launching thread(s), reported CPU count %i", cpu); - if (cpu < 2) cpu = 0; - - mWorkers.mCount = (uint32_t)cpu; - mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); - mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); - mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; - mWorkers.mLaunchCallback = NULL; status = pthread_create(&mThreadId, &threadAttr, threadProc, this); if (status) { LOGE("Failed to start rs context thread."); @@ -714,20 +653,6 @@ bool Context::initContext(Device *dev, const RsSurfaceConfig *sc) { return false; } - mWorkers.mCompleteSignal.init(); - android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); - android_atomic_release_store(0, &mWorkers.mLaunchCount); - for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { - status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); - if (status) { - mWorkers.mCount = ct; - LOGE("Created fewer than expected number of RS threads."); - break; - } - } - while (android_atomic_acquire_load(&mWorkers.mRunningCount) != 0) { - usleep(100); - } pthread_attr_destroy(&threadAttr); return true; } @@ -744,17 +669,10 @@ Context::~Context() { mIO.shutdown(); int status = pthread_join(mThreadId, &res); - // Cleanup compute threads. - mWorkers.mLaunchData = NULL; - mWorkers.mLaunchCallback = NULL; - android_atomic_release_store(mWorkers.mCount, &mWorkers.mRunningCount); - for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { - mWorkers.mLaunchSignals[ct].set(); - } - for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { - status = pthread_join(mWorkers.mThreadId[ct], &res); + + if (mHal.funcs.shutdownDriver) { + mHal.funcs.shutdownDriver(this); } - rsAssert(android_atomic_acquire_load(&mWorkers.mRunningCount) == 0); // Global structure cleanup. pthread_mutex_lock(&gInitMutex); diff --git a/libs/rs/rsContext.h b/libs/rs/rsContext.h index dee16d6ebd25..72574a60a37d 100644 --- a/libs/rs/rsContext.h +++ b/libs/rs/rsContext.h @@ -43,7 +43,6 @@ #include "rsgApiStructs.h" #include "rsLocklessFifo.h" - #include <ui/egl/android_natives.h> #endif // ANDROID_RS_SERIALIZE @@ -91,15 +90,6 @@ public: // Library mutex (for providing thread-safe calls from the runtime) static pthread_mutex_t gLibMutex; - struct ScriptTLSStruct { - Context * mContext; - Script * mScript; - }; - - //const RsHalComputeFunctions *mHalComputeFuncs; - //const RsHalGraphicsFunctions *mHalGraphicsFuncs; - //RsHal *mHal; - class PushState { public: PushState(Context *); @@ -117,9 +107,6 @@ public: ScriptTLSStruct *mTlsStruct; RsSurfaceConfig mUserSurfaceConfig; - typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); - - //StructuredAllocationContext mStateAllocation; ElementState mStateElement; TypeState mStateType; SamplerState mStateSampler; @@ -230,8 +217,6 @@ public: uint32_t getMaxVertexUniformVectors() const {return mGL.mMaxVertexUniformVectors;} uint32_t getMaxVertexAttributes() const {return mGL.mMaxVertexAttribs;} - void launchThreads(WorkerCallback_t cbk, void *data); - uint32_t getWorkerPoolSize() const {return (uint32_t)mWorkers.mCount;} uint32_t getDPI() const {return mDPI;} void setDPI(uint32_t dpi) {mDPI = dpi;} @@ -288,20 +273,6 @@ protected: pthread_t mThreadId; pid_t mNativeThreadId; - struct Workers { - volatile int mRunningCount; - volatile int mLaunchCount; - uint32_t mCount; - pthread_t *mThreadId; - pid_t *mNativeThreadId; - Signal mCompleteSignal; - - Signal *mLaunchSignals; - WorkerCallback_t mLaunchCallback; - void *mLaunchData; - }; - Workers mWorkers; - ObjectBaseRef<Script> mRootScript; ObjectBaseRef<ProgramFragment> mFragment; ObjectBaseRef<ProgramVertex> mVertex; diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp index f99534fa8e08..d5c486b14d3d 100644 --- a/libs/rs/rsScriptC.cpp +++ b/libs/rs/rsScriptC.cpp @@ -38,9 +38,6 @@ ScriptC::ScriptC(Context *rsc) : Script(rsc) { ScriptC::~ScriptC() { mRSC->mHal.funcs.script.destroy(mRSC, this); - - //free(mEnviroment.mScriptText); - //mEnviroment.mScriptText = NULL; } void ScriptC::setupScript(Context *rsc) { @@ -79,15 +76,6 @@ const Allocation *ScriptC::ptrToAllocation(const void *ptr) const { return NULL; } -Script * ScriptC::setTLS(Script *sc) { - Context::ScriptTLSStruct * tls = (Context::ScriptTLSStruct *) - pthread_getspecific(Context::gThreadTLSKey); - rsAssert(tls); - Script *old = tls->mScript; - tls->mScript = sc; - return old; -} - void ScriptC::setupGLState(Context *rsc) { if (mEnviroment.mFragmentStore.get()) { rsc->setProgramStore(mEnviroment.mFragmentStore.get()); @@ -113,215 +101,32 @@ uint32_t ScriptC::run(Context *rsc) { setupScript(rsc); uint32_t ret = 0; - Script * oldTLS = setTLS(this); if (rsc->props.mLogScripts) { LOGV("%p ScriptC::run invoking root, ptr %p", rsc, mHal.info.root); } - ret = mHal.info.root(); + ret = rsc->mHal.funcs.script.invokeRoot(rsc, this); if (rsc->props.mLogScripts) { LOGV("%p ScriptC::run invoking complete, ret=%i", rsc, ret); } - setTLS(oldTLS); return ret; } -typedef struct { - Context *rsc; - ScriptC *script; - const Allocation * ain; - Allocation * aout; - const void * usr; - - uint32_t mSliceSize; - volatile int mSliceNum; - - const uint8_t *ptrIn; - uint32_t eStrideIn; - uint8_t *ptrOut; - uint32_t eStrideOut; - - uint32_t xStart; - uint32_t xEnd; - uint32_t yStart; - uint32_t yEnd; - uint32_t zStart; - uint32_t zEnd; - uint32_t arrayStart; - uint32_t arrayEnd; - - uint32_t dimX; - uint32_t dimY; - uint32_t dimZ; - uint32_t dimArray; -} MTLaunchStruct; -typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); - -static void wc_xy(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; - - while (1) { - uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); - uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; - uint32_t yEnd = yStart + mtls->mSliceSize; - yEnd = rsMin(yEnd, mtls->yEnd); - if (yEnd <= yStart) { - return; - } - - //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); - //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); - for (uint32_t y = yStart; y < yEnd; y++) { - uint32_t offset = mtls->dimX * y; - uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); - const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); - - for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { - ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); - xPtrIn += mtls->eStrideIn; - xPtrOut += mtls->eStrideOut; - } - } - } -} - -static void wc_x(void *usr, uint32_t idx) { - MTLaunchStruct *mtls = (MTLaunchStruct *)usr; - - while (1) { - uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); - uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize; - uint32_t xEnd = xStart + mtls->mSliceSize; - xEnd = rsMin(xEnd, mtls->xEnd); - if (xEnd <= xStart) { - return; - } - - //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); - //LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut); - uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart); - const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart); - for (uint32_t x = xStart; x < xEnd; x++) { - ((rs_t)mtls->script->mHal.info.root) (xPtrIn, xPtrOut, mtls->usr, x, 0, 0, 0); - xPtrIn += mtls->eStrideIn; - xPtrOut += mtls->eStrideOut; - } - } -} void ScriptC::runForEach(Context *rsc, const Allocation * ain, Allocation * aout, const void * usr, const RsScriptCall *sc) { - MTLaunchStruct mtls; - memset(&mtls, 0, sizeof(mtls)); - Context::PushState ps(rsc); - - - if (ain) { - mtls.dimX = ain->getType()->getDimX(); - mtls.dimY = ain->getType()->getDimY(); - mtls.dimZ = ain->getType()->getDimZ(); - //mtls.dimArray = ain->getType()->getDimArray(); - } else if (aout) { - mtls.dimX = aout->getType()->getDimX(); - mtls.dimY = aout->getType()->getDimY(); - mtls.dimZ = aout->getType()->getDimZ(); - //mtls.dimArray = aout->getType()->getDimArray(); - } else { - rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); - return; - } - - if (!sc || (sc->xEnd == 0)) { - mtls.xEnd = mtls.dimX; - } else { - rsAssert(sc->xStart < mtls.dimX); - rsAssert(sc->xEnd <= mtls.dimX); - rsAssert(sc->xStart < sc->xEnd); - mtls.xStart = rsMin(mtls.dimX, sc->xStart); - mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); - if (mtls.xStart >= mtls.xEnd) return; - } - - if (!sc || (sc->yEnd == 0)) { - mtls.yEnd = mtls.dimY; - } else { - rsAssert(sc->yStart < mtls.dimY); - rsAssert(sc->yEnd <= mtls.dimY); - rsAssert(sc->yStart < sc->yEnd); - mtls.yStart = rsMin(mtls.dimY, sc->yStart); - mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); - if (mtls.yStart >= mtls.yEnd) return; - } - - mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); - mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); - mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); - mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); - rsAssert(ain->getType()->getDimZ() == 0); + Context::PushState ps(rsc); setupGLState(rsc); setupScript(rsc); - Script * oldTLS = setTLS(this); - - mtls.rsc = rsc; - mtls.ain = ain; - mtls.aout = aout; - mtls.script = this; - mtls.usr = usr; - mtls.mSliceSize = 10; - mtls.mSliceNum = 0; - - mtls.ptrIn = NULL; - mtls.eStrideIn = 0; - if (ain) { - mtls.ptrIn = (const uint8_t *)ain->getPtr(); - mtls.eStrideIn = ain->getType()->getElementSizeBytes(); - } - - mtls.ptrOut = NULL; - mtls.eStrideOut = 0; - if (aout) { - mtls.ptrOut = (uint8_t *)aout->getPtr(); - mtls.eStrideOut = aout->getType()->getElementSizeBytes(); - } - - if ((rsc->getWorkerPoolSize() > 1) && mHal.info.isThreadable) { - if (mtls.dimY > 1) { - rsc->launchThreads(wc_xy, &mtls); - } else { - rsc->launchThreads(wc_x, &mtls); - } - - //LOGE("launch 1"); - } else { - //LOGE("launch 3"); - for (uint32_t ar = mtls.arrayStart; ar < mtls.arrayEnd; ar++) { - for (uint32_t z = mtls.zStart; z < mtls.zEnd; z++) { - for (uint32_t y = mtls.yStart; y < mtls.yEnd; y++) { - uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * ar + - mtls.dimX * mtls.dimY * z + - mtls.dimX * y; - uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset); - const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset); - - for (uint32_t x = mtls.xStart; x < mtls.xEnd; x++) { - ((rs_t)mHal.info.root) (xPtrIn, xPtrOut, usr, x, y, z, ar); - xPtrIn += mtls.eStrideIn; - xPtrOut += mtls.eStrideOut; - } - } - } - } - } - - setTLS(oldTLS); + rsc->mHal.funcs.script.invokeForEach(rsc, this, ain, aout, usr, 0, sc); } void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len) { @@ -330,14 +135,11 @@ void ScriptC::Invoke(Context *rsc, uint32_t slot, const void *data, uint32_t len return; } setupScript(rsc); - Script * oldTLS = setTLS(this); if (rsc->props.mLogScripts) { LOGV("%p ScriptC::Invoke invoking slot %i, ptr %p", rsc, slot, this); } rsc->mHal.funcs.script.invokeFunction(rsc, this, slot, data, len); - - setTLS(oldTLS); } ScriptCState::ScriptCState() { diff --git a/libs/rs/rsScriptC.h b/libs/rs/rsScriptC.h index da5cb2bc00f5..2edeb9b42c35 100644 --- a/libs/rs/rsScriptC.h +++ b/libs/rs/rsScriptC.h @@ -56,7 +56,7 @@ public: bool runCompiler(Context *rsc, const char *resName, const char *cacheDir, const uint8_t *bitcode, size_t bitcodeLen); -protected: +//protected: void setupScript(Context *); void setupGLState(Context *); Script * setTLS(Script *); diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp index 8095f5a82665..4e8cbdcc60be 100644 --- a/libs/rs/rsScriptC_Lib.cpp +++ b/libs/rs/rsScriptC_Lib.cpp @@ -25,8 +25,8 @@ using namespace android; using namespace android::renderscript; -#define GET_TLS() Context::ScriptTLSStruct * tls = \ - (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \ +#define GET_TLS() ScriptTLSStruct * tls = \ + (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \ Context * rsc = tls->mContext; \ ScriptC * sc = (ScriptC *) tls->mScript @@ -1006,6 +1006,3 @@ const ScriptCState::SymbolTable_t * ScriptCState::lookupSymbol(const char *sym) return NULL; } - - - diff --git a/libs/rs/rsScriptC_LibGL.cpp b/libs/rs/rsScriptC_LibGL.cpp index 15426bc33f9f..4047049a8ae5 100644 --- a/libs/rs/rsScriptC_LibGL.cpp +++ b/libs/rs/rsScriptC_LibGL.cpp @@ -32,8 +32,8 @@ using namespace android; using namespace android::renderscript; -#define GET_TLS() Context::ScriptTLSStruct * tls = \ - (Context::ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \ +#define GET_TLS() ScriptTLSStruct * tls = \ + (ScriptTLSStruct *)pthread_getspecific(Context::gThreadTLSKey); \ Context * rsc = tls->mContext; \ ScriptC * sc = (ScriptC *) tls->mScript diff --git a/libs/rs/rs_hal.h b/libs/rs/rs_hal.h index 48e3f369f3c1..17983ce76484 100644 --- a/libs/rs/rs_hal.h +++ b/libs/rs/rs_hal.h @@ -31,18 +31,21 @@ class Script; class ScriptC; -typedef struct RsHalRec RsHal; - typedef void *(*RsHalSymbolLookupFunc)(void *usrptr, char const *symbolName); +typedef struct ScriptTLSStructRec { + Context * mContext; + Script * mScript; +} ScriptTLSStruct; /** * Script management functions */ typedef struct { - void (*shutdownDriver)(RsHal dc); + void (*shutdownDriver)(Context *); void (*getVersion)(unsigned int *major, unsigned int *minor); + void (*setPriority)(const Context *, int32_t priority); @@ -55,12 +58,19 @@ typedef struct { uint32_t flags, RsHalSymbolLookupFunc lookupFunc); - void (*invokeFunction)(const Context *rsc, const Script *s, + void (*invokeFunction)(const Context *rsc, Script *s, uint32_t slot, const void *params, size_t paramLength); - int (*invokeRoot)(const Context *rsc, const Script *s); - void (*invokeInit)(const Context *rsc, const Script *s); + int (*invokeRoot)(const Context *rsc, Script *s); + void (*invokeForEach)(const Context *rsc, + Script *s, + const Allocation * ain, + Allocation * aout, + const void * usr, + uint32_t usrLen, + const RsScriptCall *sc); + void (*invokeInit)(const Context *rsc, Script *s); void (*setGlobalVar)(const Context *rsc, const Script *s, uint32_t slot, |