diff options
author | 2010-07-19 15:38:19 -0700 | |
---|---|---|
committer | 2010-07-19 15:38:19 -0700 | |
commit | 8e6c17fcc40bfb2391dcadecb397431239ab8a18 (patch) | |
tree | 78253130ab783fb7bf0885a8ef121edd0bba2f04 | |
parent | 6ef3593c3666dda3718e7e06547498ec33451690 (diff) |
Threading RS ForEach.
Change-Id: I5d6fe4db2b6ac0613394bc5a066ff90ec146d60e
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs | 7 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc | bin | 1324 -> 1348 bytes | |||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/ip.rsh | 2 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/threshold.rs | 123 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc | bin | 7208 -> 3304 bytes | |||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs | 20 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc | bin | 1464 -> 1468 bytes | |||
-rw-r--r-- | libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java | 5 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java | 12 | ||||
-rw-r--r-- | libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java | 12 | ||||
-rw-r--r-- | libs/rs/rsContext.cpp | 67 | ||||
-rw-r--r-- | libs/rs/rsContext.h | 18 | ||||
-rw-r--r-- | libs/rs/rsScriptC.cpp | 161 | ||||
-rw-r--r-- | libs/rs/rsScriptC_Lib.cpp | 33 | ||||
-rw-r--r-- | libs/rs/rsUtils.h | 1 | ||||
-rw-r--r-- | libs/rs/scriptc/rs_math.rsh | 6 |
16 files changed, 286 insertions, 181 deletions
diff --git a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs index 7b0e6bcc68fb..10815fb33075 100644 --- a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs +++ b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur.rs @@ -5,17 +5,14 @@ #include "ip.rsh" -uchar4 * ScratchPixel; - -#pragma rs export_var(ScratchPixel) - void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) { uchar4 *output = (uchar4 *)v_out; - const uchar4 *input = (uchar4 *)v_in; const FilterStruct *fs = (const FilterStruct *)usrData; + const uchar4 *input = (const uchar4 *)rsGetElementAt(fs->ain, 0, y); float4 blurredPixel = 0; float4 currentPixel = 0; + for(int r = -fs->radius; r <= fs->radius; r ++) { // Stepping left and right away from the pixel int validW = x + r; diff --git a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc Binary files differindex c9ba5d9c5cec..5920f3ab0d47 100644 --- a/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc +++ b/libs/rs/java/ImageProcessing/res/raw/horizontal_blur_bc.bc diff --git a/libs/rs/java/ImageProcessing/res/raw/ip.rsh b/libs/rs/java/ImageProcessing/res/raw/ip.rsh index 4073304f6490..dea92c3568d2 100644 --- a/libs/rs/java/ImageProcessing/res/raw/ip.rsh +++ b/libs/rs/java/ImageProcessing/res/raw/ip.rsh @@ -3,6 +3,8 @@ #define MAX_RADIUS 25 typedef struct { + rs_allocation ain; + float *gaussian; //[MAX_RADIUS * 2 + 1]; rs_matrix3x3 colorMat; diff --git a/libs/rs/java/ImageProcessing/res/raw/threshold.rs b/libs/rs/java/ImageProcessing/res/raw/threshold.rs index ecbfac4631e6..aa6b6fa8a179 100644 --- a/libs/rs/java/ImageProcessing/res/raw/threshold.rs +++ b/libs/rs/java/ImageProcessing/res/raw/threshold.rs @@ -24,7 +24,6 @@ float saturation; static float inWMinInB; static float outWMinOutB; static float overInWMinInB; -static FilterStruct filterStruct; #pragma rs export_var(height, width, radius, InPixel, OutPixel, ScratchPixel, inBlack, outBlack, inWhite, outWhite, gamma, saturation, InPixel, OutPixel, ScratchPixel, vBlurScript, hBlurScript) #pragma rs export_func(filter, filterBenchmark); @@ -106,138 +105,70 @@ static void computeGaussianWeights() { } } -// This needs to be inline -static float4 levelsSaturation(float4 currentPixel) { - float3 temp = rsMatrixMultiply(&colorMat, currentPixel.xyz); - temp = (clamp(temp, 0.1f, 255.f) - inBlack) * overInWMinInB; - temp = pow(temp, (float3)gamma); - currentPixel.xyz = clamp(temp * outWMinOutB + outBlack, 0.1f, 255.f); - return currentPixel; -} - static void processNoBlur() { - int w, h, r; - int count = 0; - float inWMinInB = inWhite - inBlack; float outWMinOutB = outWhite - outBlack; float4 currentPixel = 0; - for(h = 0; h < height; h ++) { - for(w = 0; w < width; w ++) { - uchar4 *input = InPixel + h*width + w; + for(int h = 0; h < height; h ++) { + uchar4 *input = InPixel + h*width; + uchar4 *output = OutPixel + h*width; + for(int w = 0; w < width; w ++) { //currentPixel.xyz = convert_float3(input.xyz); currentPixel.x = (float)(input->x); currentPixel.y = (float)(input->y); currentPixel.z = (float)(input->z); - currentPixel = levelsSaturation(currentPixel); + float3 temp = rsMatrixMultiply(&colorMat, currentPixel.xyz); + temp = (clamp(temp, 0.f, 255.f) - inBlack) * overInWMinInB; + temp = pow(temp, (float3)gamma); + currentPixel.xyz = clamp(temp * outWMinOutB + outBlack, 0.f, 255.f); - uchar4 *output = OutPixel + h*width + w; //output.xyz = convert_uchar3(currentPixel.xyz); output->x = (uint8_t)currentPixel.x; output->y = (uint8_t)currentPixel.y; output->z = (uint8_t)currentPixel.z; output->w = input->w; - } - } - rsSendToClient(&count, 1, 4, 0); -} - -static void horizontalBlurLevels() { - float4 blurredPixel = 0; - float4 currentPixel = 0; - // Horizontal blur - int w, h, r; - for(h = 0; h < height; h ++) { - uchar4 *output = OutPixel + h*width; - for(w = 0; w < width; w ++) { - blurredPixel = 0; - - for(r = -radius; r <= radius; r ++) { - // Stepping left and right away from the pixel - int validW = w + r; - // Clamp to zero and width max() isn't exposed for ints yet - if(validW < 0) { - validW = 0; - } - if(validW > width - 1) { - validW = width - 1; - } - //int validW = rsClamp(w + r, 0, width - 1); - - uchar4 *input = InPixel + h*width + validW; - - float weight = gaussian[r + radius]; - currentPixel.x = (float)(input->x); - currentPixel.y = (float)(input->y); - currentPixel.z = (float)(input->z); - //currentPixel.w = (float)(input->a); - - blurredPixel.xyz += currentPixel.xyz * weight; - } - - blurredPixel = levelsSaturation(blurredPixel); - - output->x = (uint8_t)blurredPixel.x; - output->y = (uint8_t)blurredPixel.y; - output->z = (uint8_t)blurredPixel.z; - //output->a = (uint8_t)blurredPixel.w; + input++; output++; } } } -static void initStructs() { - filterStruct.gaussian = gaussian; - filterStruct.width = width; - filterStruct.height = height; - filterStruct.radius = radius; +static void blur() { + computeGaussianWeights(); + + FilterStruct fs; + fs.gaussian = gaussian; + fs.width = width; + fs.height = height; + fs.radius = radius; + + fs.ain = rsGetAllocation(InPixel); + rsForEach(hBlurScript, fs.ain, rsGetAllocation(ScratchPixel), &fs); + + fs.ain = rsGetAllocation(ScratchPixel); + rsForEach(vBlurScript, fs.ain, rsGetAllocation(OutPixel), &fs); } void filter() { - RS_DEBUG(height); - RS_DEBUG(width); RS_DEBUG(radius); - initStructs(); - computeColorMatrix(); - if(radius == 0) { - processNoBlur(); - return; + if(radius > 0) { + blur(); } - - computeGaussianWeights(); - - horizontalBlurLevels(); - - rsForEach(vBlurScript, - rsGetAllocation(InPixel), - rsGetAllocation(OutPixel), - &filterStruct); + processNoBlur(); int count = 0; rsSendToClient(&count, 1, 4, 0); } void filterBenchmark() { - initStructs(); - - computeGaussianWeights(); - - rsForEach(hBlurScript, - rsGetAllocation(InPixel), - rsGetAllocation(OutPixel), - &filterStruct); - - rsForEach(vBlurScript, - rsGetAllocation(InPixel), - rsGetAllocation(OutPixel), - &filterStruct); + blur(); int count = 0; rsSendToClient(&count, 1, 4, 0); diff --git a/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc b/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc Binary files differindex 8f37fdc349cc..2b5d2543c38d 100644 --- a/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc +++ b/libs/rs/java/ImageProcessing/res/raw/threshold_bc.bc diff --git a/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs b/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs index 846f515a4eb0..f5f2d69b12a4 100644 --- a/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs +++ b/libs/rs/java/ImageProcessing/res/raw/vertical_blur.rs @@ -5,14 +5,10 @@ #include "ip.rsh" -uchar4 * ScratchPixel; - -#pragma rs export_var(ScratchPixel) - void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32_t y) { uchar4 *output = (uchar4 *)v_out; - const uchar4 *input = (uchar4 *)v_in; const FilterStruct *fs = (const FilterStruct *)usrData; + const uchar4 *input = (const uchar4 *)rsGetElementAt(fs->ain, x, 0); float4 blurredPixel = 0; float4 currentPixel = 0; @@ -27,19 +23,21 @@ void root(const void *v_in, void *v_out, const void *usrData, uint32_t x, uint32 validH = fs->height - 1; } - uchar4 *input = ScratchPixel + validH * fs->width + x; + const uchar4 *i = input + validH * fs->width; + //const uchar4 *i = (const uchar4 *)rsGetElementAt(fs->ain, x, validH); float weight = fs->gaussian[r + fs->radius]; - currentPixel.x = (float)(input->x); - currentPixel.y = (float)(input->y); - currentPixel.z = (float)(input->z); + currentPixel.x = (float)(i->x); + currentPixel.y = (float)(i->y); + currentPixel.z = (float)(i->z); blurredPixel.xyz += currentPixel.xyz * weight; #else int validH = rsClamp(y + r, 0, height - 1); - uchar4 *input = ScratchPixel + validH * width + x; - blurredPixel.xyz += convert_float3(input->xyz) * gaussian[r + fs->radius]; + validH -= y; + uchar4 *i = input + validH * width + x; + blurredPixel.xyz += convert_float3(i->xyz) * gaussian[r + fs->radius]; #endif } diff --git a/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc b/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc Binary files differindex af1cd8e09777..be5d0e4ee69b 100644 --- a/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc +++ b/libs/rs/java/ImageProcessing/res/raw/vertical_blur_bc.bc diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java index 21c3d7450096..0ed1185916e4 100644 --- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java +++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ImageProcessingActivity.java @@ -376,10 +376,7 @@ public class ImageProcessingActivity extends Activity mScratchPixelsAllocation = Allocation.createBitmapRef(mRS, mBitmapScratch); mScriptVBlur = new ScriptC_Vertical_blur(mRS, getResources(), R.raw.vertical_blur_bc, false); - mScriptVBlur.bind_ScratchPixel(mScratchPixelsAllocation); - mScriptHBlur = new ScriptC_Horizontal_blur(mRS, getResources(), R.raw.horizontal_blur_bc, false); - mScriptHBlur.bind_ScratchPixel(mScratchPixelsAllocation); mScript = new ScriptC_Threshold(mRS, getResources(), R.raw.threshold_bc, false); mScript.set_width(mBitmapIn.getWidth()); @@ -431,8 +428,8 @@ public class ImageProcessingActivity extends Activity android.util.Log.v("Img", "Renderscript frame time core ms " + t); long javaTime = javaFilter(); - mBenchmarkResult.setText("RS: " + t + " ms Java: " + javaTime + " ms"); + //mBenchmarkResult.setText("RS: " + t + " ms"); mRadius = oldRadius; mScript.set_radius(mRadius); diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java index 8ee50a833aeb..c447b9b6b97b 100644 --- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java +++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Horizontal_blur.java @@ -26,17 +26,5 @@ public class ScriptC_Horizontal_blur extends ScriptC { super(rs, resources, id, isRoot); } - private final static int mExportVarIdx_ScratchPixel = 0; - private Allocation mExportVar_ScratchPixel; - public void bind_ScratchPixel(Allocation v) { - mExportVar_ScratchPixel = v; - if(v == null) bindAllocation(null, mExportVarIdx_ScratchPixel); - else bindAllocation(v, mExportVarIdx_ScratchPixel); - } - - public Allocation get_ScratchPixel() { - return mExportVar_ScratchPixel; - } - } diff --git a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java index 0215f6067c96..cee74d965b77 100644 --- a/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java +++ b/libs/rs/java/ImageProcessing/src/com/android/rs/image/ScriptC_Vertical_blur.java @@ -26,17 +26,5 @@ public class ScriptC_Vertical_blur extends ScriptC { super(rs, resources, id, isRoot); } - private final static int mExportVarIdx_ScratchPixel = 0; - private Allocation mExportVar_ScratchPixel; - public void bind_ScratchPixel(Allocation v) { - mExportVar_ScratchPixel = v; - if(v == null) bindAllocation(null, mExportVarIdx_ScratchPixel); - else bindAllocation(v, mExportVarIdx_ScratchPixel); - } - - public Allocation get_ScratchPixel() { - return mExportVar_ScratchPixel; - } - } diff --git a/libs/rs/rsContext.cpp b/libs/rs/rsContext.cpp index 68eca4495acd..629b48114289 100644 --- a/libs/rs/rsContext.cpp +++ b/libs/rs/rsContext.cpp @@ -23,6 +23,7 @@ #include <sys/types.h> #include <sys/resource.h> +#include <sched.h> #include <cutils/properties.h> @@ -355,6 +356,49 @@ void * Context::threadProc(void *vrsc) return NULL; } +void * Context::helperThreadProc(void *vrsc) +{ + Context *rsc = static_cast<Context *>(vrsc); + uint32_t idx = (uint32_t)android_atomic_inc(&rsc->mWorkers.mLaunchCount); + + LOGE("helperThreadProc 1 %p idx=%i", rsc, idx); + + rsc->mWorkers.mLaunchSignals[idx].init(); + rsc->mWorkers.mNativeThreadId[idx] = gettid(); + + //cpu_set_t cpset[16]; + //int ret = sched_getaffinity(rsc->mWorkers.mNativeThreadId[idx], sizeof(cpset), &cpset); + //LOGE("ret = %i", ret); + +//sched_setaffinity + + setpriority(PRIO_PROCESS, rsc->mWorkers.mNativeThreadId[idx], rsc->mThreadPriority); + while(rsc->mRunning) { + rsc->mWorkers.mLaunchSignals[idx].wait(); + if (rsc->mWorkers.mLaunchCallback) { + LOGE("helperThreadProc 4"); + rsc->mWorkers.mLaunchCallback(rsc->mWorkers.mLaunchData, idx); + } + LOGE("helperThreadProc 5"); + android_atomic_dec(&rsc->mWorkers.mRunningCount); + rsc->mWorkers.mCompleteSignal.set(); + } + return NULL; +} + +void Context::launchThreads(WorkerCallback_t cbk, void *data) +{ + mWorkers.mLaunchData = data; + mWorkers.mLaunchCallback = cbk; + mWorkers.mRunningCount = (int)mWorkers.mCount; + for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { + mWorkers.mLaunchSignals[ct].set(); + } + while(mWorkers.mRunningCount) { + mWorkers.mCompleteSignal.wait(); + } +} + void Context::setPriority(int32_t p) { // Note: If we put this in the proper "background" policy @@ -371,7 +415,10 @@ void Context::setPriority(int32_t p) // success; reset the priority as well } #else - setpriority(PRIO_PROCESS, mNativeThreadId, p); + setpriority(PRIO_PROCESS, mNativeThreadId, p); + for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { + setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], p); + } #endif } @@ -421,10 +468,26 @@ Context::Context(Device *dev, bool isGraphics, bool useDepth) timerInit(); timerSet(RS_TIMER_INTERNAL); - LOGV("RS Launching thread"); + LOGV("RS Launching thread(s)"); + mWorkers.mCount = 2; + mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); + mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); + mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; + mWorkers.mLaunchCallback = NULL; status = pthread_create(&mThreadId, &threadAttr, threadProc, this); if (status) { LOGE("Failed to start rs context thread."); + return; + } + mWorkers.mRunningCount = 0; + mWorkers.mLaunchCount = 0; + for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { + status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); + if (status) { + mWorkers.mCount = ct; + LOGE("Created fewer than expected number of RS threads."); + break; + } } while(!mRunning) { diff --git a/libs/rs/rsContext.h b/libs/rs/rsContext.h index 06433a17f149..98ad3a4e9042 100644 --- a/libs/rs/rsContext.h +++ b/libs/rs/rsContext.h @@ -65,6 +65,7 @@ public: Script * mScript; }; + typedef void (*WorkerCallback_t)(void *usr, uint32_t idx); //StructuredAllocationContext mStateAllocation; ElementState mStateElement; @@ -172,6 +173,8 @@ public: bool ext_OES_texture_npot() const {return mGL.OES_texture_npot;} + void launchThreads(WorkerCallback_t cbk, void *data); + protected: Device *mDev; @@ -222,6 +225,20 @@ protected: pthread_t mThreadId; pid_t mNativeThreadId; + struct Workers { + volatile int mRunningCount; + volatile int mLaunchCount; + uint32_t mCount; + pthread_t *mThreadId; + pid_t *mNativeThreadId; + Signal mCompleteSignal; + + Signal *mLaunchSignals; + WorkerCallback_t mLaunchCallback; + void *mLaunchData; + }; + Workers mWorkers; + ObjectBaseRef<Script> mRootScript; ObjectBaseRef<ProgramFragment> mFragment; ObjectBaseRef<ProgramVertex> mVertex; @@ -248,6 +265,7 @@ private: uint32_t runRootScript(); static void * threadProc(void *); + static void * helperThreadProc(void *); ANativeWindow *mWndSurface; diff --git a/libs/rs/rsScriptC.cpp b/libs/rs/rsScriptC.cpp index b87ac281dd51..9693b16e4ac0 100644 --- a/libs/rs/rsScriptC.cpp +++ b/libs/rs/rsScriptC.cpp @@ -137,72 +137,155 @@ uint32_t ScriptC::run(Context *rsc) } +typedef struct { + Context *rsc; + ScriptC *script; + const Allocation * ain; + Allocation * aout; + const void * usr; + + uint32_t mSliceSize; + volatile int mSliceNum; + + const uint8_t *ptrIn; + uint32_t eStrideIn; + uint8_t *ptrOut; + uint32_t eStrideOut; + + uint32_t xStart; + uint32_t xEnd; + uint32_t yStart; + uint32_t yEnd; + uint32_t zStart; + uint32_t zEnd; + uint32_t arrayStart; + uint32_t arrayEnd; + + uint32_t dimX; + uint32_t dimY; + uint32_t dimZ; + uint32_t dimArray; +} MTLaunchStruct; +typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); + +static void wc_xy(void *usr, uint32_t idx) +{ + MTLaunchStruct *mtls = (MTLaunchStruct *)usr; + LOGE("usr %p, idx %i", usr, idx); + + while (1) { + uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum); + uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize; + uint32_t yEnd = yStart + mtls->mSliceSize; + yEnd = rsMin(yEnd, mtls->yEnd); + if (yEnd <= yStart) { + return; + } + + //LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd); + + for (uint32_t y = yStart; y < yEnd; y++) { + uint32_t offset = mtls->dimX * y; + uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset); + const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset); + + for (uint32_t x = mtls->xStart; x < mtls->xEnd; x++) { + ((rs_t)mtls->script->mProgram.mRoot) (xPtrIn, xPtrOut, mtls->usr, x, y, 0, 0); + xPtrIn += mtls->eStrideIn; + xPtrOut += mtls->eStrideOut; + } + } + } + +} + void ScriptC::runForEach(Context *rsc, const Allocation * ain, Allocation * aout, const void * usr, const RsScriptCall *sc) { - uint32_t dimX = ain->getType()->getDimX(); - uint32_t dimY = ain->getType()->getDimY(); - uint32_t dimZ = ain->getType()->getDimZ(); - uint32_t dimA = 0;//ain->getType()->getDimArray(); - - uint32_t xStart = 0; - uint32_t xEnd = 0; - uint32_t yStart = 0; - uint32_t yEnd = 0; - uint32_t zStart = 0; - uint32_t zEnd = 0; - uint32_t arrayStart = 0; - uint32_t arrayEnd = 0; + MTLaunchStruct mtls; + memset(&mtls, 0, sizeof(mtls)); + + if (ain) { + mtls.dimX = ain->getType()->getDimX(); + mtls.dimY = ain->getType()->getDimY(); + mtls.dimZ = ain->getType()->getDimZ(); + //mtls.dimArray = ain->getType()->getDimArray(); + } else if (aout) { + mtls.dimX = aout->getType()->getDimX(); + mtls.dimY = aout->getType()->getDimY(); + mtls.dimZ = aout->getType()->getDimZ(); + //mtls.dimArray = aout->getType()->getDimArray(); + } else { + rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations"); + return; + } if (!sc || (sc->xEnd == 0)) { - xStart = 0; - xEnd = ain->getType()->getDimX(); + mtls.xEnd = mtls.dimX; } else { - rsAssert(xStart < dimX); - rsAssert(xEnd <= dimX); + rsAssert(sc->xStart < mtls.dimX); + rsAssert(sc->xEnd <= mtls.dimX); rsAssert(sc->xStart < sc->xEnd); - xStart = rsMin(dimX, sc->xStart); - xEnd = rsMin(dimX, sc->xEnd); - if (xStart >= xEnd) return; + mtls.xStart = rsMin(mtls.dimX, sc->xStart); + mtls.xEnd = rsMin(mtls.dimX, sc->xEnd); + if (mtls.xStart >= mtls.xEnd) return; } if (!sc || (sc->yEnd == 0)) { - yStart = 0; - yEnd = ain->getType()->getDimY(); + mtls.yEnd = mtls.dimY; } else { - rsAssert(yStart < dimY); - rsAssert(yEnd <= dimY); + rsAssert(sc->yStart < mtls.dimY); + rsAssert(sc->yEnd <= mtls.dimY); rsAssert(sc->yStart < sc->yEnd); - yStart = rsMin(dimY, sc->yStart); - yEnd = rsMin(dimY, sc->yEnd); - if (yStart >= yEnd) return; + mtls.yStart = rsMin(mtls.dimY, sc->yStart); + mtls.yEnd = rsMin(mtls.dimY, sc->yEnd); + if (mtls.yStart >= mtls.yEnd) return; } - xEnd = rsMax((uint32_t)1, xEnd); - yEnd = rsMax((uint32_t)1, yEnd); - zEnd = rsMax((uint32_t)1, zEnd); - arrayEnd = rsMax((uint32_t)1, arrayEnd); + mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd); + mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd); + mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd); + mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd); rsAssert(ain->getType()->getDimZ() == 0); setupScript(rsc); Script * oldTLS = setTLS(this); - typedef int (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t); - const uint8_t *ptrIn = (const uint8_t *)ain->getPtr(); - uint32_t eStrideIn = ain->getType()->getElementSizeBytes(); + mtls.rsc = rsc; + mtls.ain = ain; + mtls.aout = aout; + mtls.script = this; + mtls.usr = usr; + mtls.mSliceSize = 10; + mtls.mSliceNum = 0; + + mtls.ptrIn = NULL; + mtls.eStrideIn = 0; + if (ain) { + mtls.ptrIn = (const uint8_t *)ain->getPtr(); + mtls.eStrideIn = ain->getType()->getElementSizeBytes(); + } - uint8_t *ptrOut = NULL; - uint32_t eStrideOut = 0; + mtls.ptrOut = NULL; + mtls.eStrideOut = 0; if (aout) { - ptrOut = (uint8_t *)aout->getPtr(); - eStrideOut = aout->getType()->getElementSizeBytes(); + mtls.ptrOut = (uint8_t *)aout->getPtr(); + mtls.eStrideOut = aout->getType()->getElementSizeBytes(); } + + { + LOGE("launch 1"); + rsc->launchThreads(wc_xy, &mtls); + LOGE("launch 2"); + } + +/* for (uint32_t ar = arrayStart; ar < arrayEnd; ar++) { for (uint32_t z = zStart; z < zEnd; z++) { for (uint32_t y = yStart; y < yEnd; y++) { @@ -221,7 +304,7 @@ void ScriptC::runForEach(Context *rsc, } } - +*/ setTLS(oldTLS); } diff --git a/libs/rs/rsScriptC_Lib.cpp b/libs/rs/rsScriptC_Lib.cpp index 8d9ca9f0b580..9c29ca6f640e 100644 --- a/libs/rs/rsScriptC_Lib.cpp +++ b/libs/rs/rsScriptC_Lib.cpp @@ -329,6 +329,29 @@ static uint32_t SC_allocGetDimFaces(RsAllocation va) return a->getType()->getDimFaces(); } +const void * SC_getElementAtX(RsAllocation va, uint32_t x) +{ + const Allocation *a = static_cast<const Allocation *>(va); + const Type *t = a->getType(); + const uint8_t *p = (const uint8_t *)a->getPtr(); + return &p[t->getElementSizeBytes() * x]; +} + +const void * SC_getElementAtXY(RsAllocation va, uint32_t x, uint32_t y) +{ + const Allocation *a = static_cast<const Allocation *>(va); + const Type *t = a->getType(); + const uint8_t *p = (const uint8_t *)a->getPtr(); + return &p[t->getElementSizeBytes() * (x + y*t->getDimX())]; +} + +const void * SC_getElementAtXYZ(RsAllocation va, uint32_t x, uint32_t y, uint32_t z) +{ + const Allocation *a = static_cast<const Allocation *>(va); + const Type *t = a->getType(); + const uint8_t *p = (const uint8_t *)a->getPtr(); + return &p[t->getElementSizeBytes() * (x + y*t->getDimX())]; +} static void SC_debugF(const char *s, float f) { @@ -350,6 +373,10 @@ static void SC_debugI32(const char *s, int32_t i) { LOGE("%s %i 0x%x", s, i, i); } +static void SC_debugP(const char *s, const void *p) { + LOGE("%s %p", s, p); +} + static uint32_t SC_toClient(void *data, int cmdID, int len, int waitForSpace) { GET_TLS(); @@ -433,12 +460,18 @@ static ScriptCState::SymbolTable_t gSyms[] = { { "rsAllocationGetDimFaces", (void *)&SC_allocGetDimFaces }, { "rsGetAllocation", (void *)&SC_getAllocation }, + { "_Z14rsGetElementAt13rs_allocationj", (void *)&SC_getElementAtX }, + { "_Z14rsGetElementAt13rs_allocationjj", (void *)&SC_getElementAtXY }, + { "_Z14rsGetElementAt13rs_allocationjjj", (void *)&SC_getElementAtXYZ }, + + // Debug { "_Z7rsDebugPKcf", (void *)&SC_debugF }, { "_Z7rsDebugPKcDv2_f", (void *)&SC_debugFv2 }, { "_Z7rsDebugPKcDv3_f", (void *)&SC_debugFv3 }, { "_Z7rsDebugPKcDv4_f", (void *)&SC_debugFv4 }, { "_Z7rsDebugPKci", (void *)&SC_debugI32 }, + { "_Z7rsDebugPKcPKv", (void *)&SC_debugP }, //extern void __attribute__((overloadable))rsDebug(const char *, const void *); diff --git a/libs/rs/rsUtils.h b/libs/rs/rsUtils.h index 0a37a5bfb77c..17feb22fda70 100644 --- a/libs/rs/rsUtils.h +++ b/libs/rs/rsUtils.h @@ -30,6 +30,7 @@ #include <stdlib.h> #include <pthread.h> #include <time.h> +#include <cutils/atomic.h> #ifndef ANDROID_RS_BUILD_FOR_HOST #include <EGL/egl.h> diff --git a/libs/rs/scriptc/rs_math.rsh b/libs/rs/scriptc/rs_math.rsh index e11c832df0ab..bd6e5a9e9e52 100644 --- a/libs/rs/scriptc/rs_math.rsh +++ b/libs/rs/scriptc/rs_math.rsh @@ -14,6 +14,12 @@ extern uint32_t rsAllocationGetDimZ(rs_allocation); extern uint32_t rsAllocationGetDimLOD(rs_allocation); extern uint32_t rsAllocationGetDimFaces(rs_allocation); +extern const void * __attribute__((overloadable)) + rsGetElementAt(rs_allocation, uint32_t x); +extern const void * __attribute__((overloadable)) + rsGetElementAt(rs_allocation, uint32_t x, uint32_t y); +extern const void * __attribute__((overloadable)) + rsGetElementAt(rs_allocation, uint32_t x, uint32_t y, uint32_t z); // Debugging |