| /* |
| * Copyright (C) 2007 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define LOG_TAG "AudioResamplerSinc" |
| //#define LOG_NDEBUG 0 |
| |
| #include <malloc.h> |
| #include <pthread.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <dlfcn.h> |
| |
| #include <cutils/compiler.h> |
| #include <cutils/properties.h> |
| |
| #include <utils/Log.h> |
| #include <audio_utils/primitives.h> |
| |
| #include "AudioResamplerSinc.h" |
| |
| #if defined(__clang__) && !__has_builtin(__builtin_assume_aligned) |
| #define __builtin_assume_aligned(p, a) \ |
| (((uintptr_t(p) % (a)) == 0) ? (p) : (__builtin_unreachable(), (p))) |
| #endif |
| |
| #if defined(__arm__) && !defined(__thumb__) |
| #define USE_INLINE_ASSEMBLY (true) |
| #else |
| #define USE_INLINE_ASSEMBLY (false) |
| #endif |
| |
| #if defined(__aarch64__) || defined(__ARM_NEON__) |
| #ifndef USE_NEON |
| #define USE_NEON (true) |
| #endif |
| #else |
| #define USE_NEON (false) |
| #endif |
| #if USE_NEON |
| #include <arm_neon.h> |
| #endif |
| |
| #define UNUSED(x) ((void)(x)) |
| |
| namespace android { |
| // ---------------------------------------------------------------------------- |
| |
| |
| /* |
| * These coeficients are computed with the "fir" utility found in |
| * tools/resampler_tools |
| * cmd-line: fir -l 7 -s 48000 -c 20478 |
| */ |
| const uint32_t AudioResamplerSinc::mFirCoefsUp[] __attribute__ ((aligned (32))) = { |
| #include "AudioResamplerSincUp.h" |
| }; |
| |
| /* |
| * These coefficients are optimized for 48KHz -> 44.1KHz |
| * cmd-line: fir -l 7 -s 48000 -c 17189 |
| */ |
| const uint32_t AudioResamplerSinc::mFirCoefsDown[] __attribute__ ((aligned (32))) = { |
| #include "AudioResamplerSincDown.h" |
| }; |
| |
| // we use 15 bits to interpolate between these samples |
| // this cannot change because the mul below rely on it. |
| static const int pLerpBits = 15; |
| |
| static pthread_once_t once_control = PTHREAD_ONCE_INIT; |
| static readCoefficientsFn readResampleCoefficients = NULL; |
| |
| /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::highQualityConstants; |
| /*static*/ AudioResamplerSinc::Constants AudioResamplerSinc::veryHighQualityConstants; |
| |
| void AudioResamplerSinc::init_routine() |
| { |
| // for high quality resampler, the parameters for coefficients are compile-time constants |
| Constants *c = &highQualityConstants; |
| c->coefsBits = RESAMPLE_FIR_LERP_INT_BITS; |
| c->cShift = kNumPhaseBits - c->coefsBits; |
| c->cMask = ((1<< c->coefsBits)-1) << c->cShift; |
| c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; |
| c->pMask = ((1<< pLerpBits)-1) << c->pShift; |
| c->halfNumCoefs = RESAMPLE_FIR_NUM_COEF; |
| |
| // for very high quality resampler, the parameters are load-time constants |
| veryHighQualityConstants = highQualityConstants; |
| |
| // Open the dll to get the coefficients for VERY_HIGH_QUALITY |
| void *resampleCoeffLib = dlopen("libaudio-resampler.so", RTLD_NOW); |
| ALOGV("Open libaudio-resampler library = %p", resampleCoeffLib); |
| if (resampleCoeffLib == NULL) { |
| ALOGE("Could not open audio-resampler library: %s", dlerror()); |
| return; |
| } |
| |
| readResampleFirNumCoeffFn readResampleFirNumCoeff; |
| readResampleFirLerpIntBitsFn readResampleFirLerpIntBits; |
| |
| readResampleCoefficients = (readCoefficientsFn) |
| dlsym(resampleCoeffLib, "readResamplerCoefficients"); |
| readResampleFirNumCoeff = (readResampleFirNumCoeffFn) |
| dlsym(resampleCoeffLib, "readResampleFirNumCoeff"); |
| readResampleFirLerpIntBits = (readResampleFirLerpIntBitsFn) |
| dlsym(resampleCoeffLib, "readResampleFirLerpIntBits"); |
| |
| if (!readResampleCoefficients || !readResampleFirNumCoeff || !readResampleFirLerpIntBits) { |
| readResampleCoefficients = NULL; |
| dlclose(resampleCoeffLib); |
| resampleCoeffLib = NULL; |
| ALOGE("Could not find symbol: %s", dlerror()); |
| return; |
| } |
| |
| c = &veryHighQualityConstants; |
| c->coefsBits = readResampleFirLerpIntBits(); |
| c->cShift = kNumPhaseBits - c->coefsBits; |
| c->cMask = ((1<<c->coefsBits)-1) << c->cShift; |
| c->pShift = kNumPhaseBits - c->coefsBits - pLerpBits; |
| c->pMask = ((1<<pLerpBits)-1) << c->pShift; |
| // number of zero-crossing on each side |
| c->halfNumCoefs = readResampleFirNumCoeff(); |
| ALOGV("coefsBits = %d", c->coefsBits); |
| ALOGV("halfNumCoefs = %d", c->halfNumCoefs); |
| // note that we "leak" resampleCoeffLib until the process exits |
| } |
| |
| // ---------------------------------------------------------------------------- |
| |
| #if !USE_NEON |
| |
| static inline |
| int32_t mulRL(int left, int32_t in, uint32_t vRL) |
| { |
| #if USE_INLINE_ASSEMBLY |
| int32_t out; |
| if (left) { |
| asm( "smultb %[out], %[in], %[vRL] \n" |
| : [out]"=r"(out) |
| : [in]"%r"(in), [vRL]"r"(vRL) |
| : ); |
| } else { |
| asm( "smultt %[out], %[in], %[vRL] \n" |
| : [out]"=r"(out) |
| : [in]"%r"(in), [vRL]"r"(vRL) |
| : ); |
| } |
| return out; |
| #else |
| int16_t v = left ? int16_t(vRL) : int16_t(vRL>>16); |
| return int32_t((int64_t(in) * v) >> 16); |
| #endif |
| } |
| |
| static inline |
| int32_t mulAdd(int16_t in, int32_t v, int32_t a) |
| { |
| #if USE_INLINE_ASSEMBLY |
| int32_t out; |
| asm( "smlawb %[out], %[v], %[in], %[a] \n" |
| : [out]"=r"(out) |
| : [in]"%r"(in), [v]"r"(v), [a]"r"(a) |
| : ); |
| return out; |
| #else |
| return a + int32_t((int64_t(v) * in) >> 16); |
| #endif |
| } |
| |
| static inline |
| int32_t mulAddRL(int left, uint32_t inRL, int32_t v, int32_t a) |
| { |
| #if USE_INLINE_ASSEMBLY |
| int32_t out; |
| if (left) { |
| asm( "smlawb %[out], %[v], %[inRL], %[a] \n" |
| : [out]"=r"(out) |
| : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) |
| : ); |
| } else { |
| asm( "smlawt %[out], %[v], %[inRL], %[a] \n" |
| : [out]"=r"(out) |
| : [inRL]"%r"(inRL), [v]"r"(v), [a]"r"(a) |
| : ); |
| } |
| return out; |
| #else |
| int16_t s = left ? int16_t(inRL) : int16_t(inRL>>16); |
| return a + int32_t((int64_t(v) * s) >> 16); |
| #endif |
| } |
| |
| #endif // !USE_NEON |
| |
| // ---------------------------------------------------------------------------- |
| |
| AudioResamplerSinc::AudioResamplerSinc( |
| int inChannelCount, int32_t sampleRate, src_quality quality) |
| : AudioResampler(inChannelCount, sampleRate, quality), |
| mState(0), mImpulse(0), mRingFull(0), mFirCoefs(0) |
| { |
| /* |
| * Layout of the state buffer for 32 tap: |
| * |
| * "present" sample beginning of 2nd buffer |
| * v v |
| * 0 01 2 23 3 |
| * 0 F0 0 F0 F |
| * [pppppppppppppppInnnnnnnnnnnnnnnnpppppppppppppppInnnnnnnnnnnnnnnn] |
| * ^ ^ head |
| * |
| * p = past samples, convoluted with the (p)ositive side of sinc() |
| * n = future samples, convoluted with the (n)egative side of sinc() |
| * r = extra space for implementing the ring buffer |
| * |
| */ |
| |
| mVolumeSIMD[0] = 0; |
| mVolumeSIMD[1] = 0; |
| |
| // Load the constants for coefficients |
| int ok = pthread_once(&once_control, init_routine); |
| if (ok != 0) { |
| ALOGE("%s pthread_once failed: %d", __func__, ok); |
| } |
| mConstants = (quality == VERY_HIGH_QUALITY) ? |
| &veryHighQualityConstants : &highQualityConstants; |
| } |
| |
| |
| AudioResamplerSinc::~AudioResamplerSinc() { |
| free(mState); |
| } |
| |
| void AudioResamplerSinc::init() { |
| const Constants& c(*mConstants); |
| const size_t numCoefs = 2 * c.halfNumCoefs; |
| const size_t stateSize = numCoefs * mChannelCount * 2; |
| mState = (int16_t*)memalign(32, stateSize*sizeof(int16_t)); |
| memset(mState, 0, sizeof(int16_t)*stateSize); |
| mImpulse = mState + (c.halfNumCoefs-1)*mChannelCount; |
| mRingFull = mImpulse + (numCoefs+1)*mChannelCount; |
| } |
| |
| void AudioResamplerSinc::setVolume(float left, float right) { |
| AudioResampler::setVolume(left, right); |
| // convert to U4_28 (rounding down). |
| // integer volume values are clamped to 0 to UNITY_GAIN. |
| mVolumeSIMD[0] = u4_28_from_float(clampFloatVol(left)); |
| mVolumeSIMD[1] = u4_28_from_float(clampFloatVol(right)); |
| } |
| |
| size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, |
| AudioBufferProvider* provider) |
| { |
| // FIXME store current state (up or down sample) and only load the coefs when the state |
| // changes. Or load two pointers one for up and one for down in the init function. |
| // Not critical now since the read functions are fast, but would be important if read was slow. |
| if (mConstants == &veryHighQualityConstants && readResampleCoefficients) { |
| mFirCoefs = readResampleCoefficients( mInSampleRate <= mSampleRate ); |
| } else { |
| mFirCoefs = (const int32_t *) |
| ((mInSampleRate <= mSampleRate) ? mFirCoefsUp : mFirCoefsDown); |
| } |
| |
| // select the appropriate resampler |
| switch (mChannelCount) { |
| case 1: |
| return resample<1>(out, outFrameCount, provider); |
| case 2: |
| return resample<2>(out, outFrameCount, provider); |
| default: |
| LOG_ALWAYS_FATAL("invalid channel count: %d", mChannelCount); |
| return 0; |
| } |
| } |
| |
| |
| template<int CHANNELS> |
| size_t AudioResamplerSinc::resample(int32_t* out, size_t outFrameCount, |
| AudioBufferProvider* provider) |
| { |
| const Constants& c(*mConstants); |
| const size_t headOffset = c.halfNumCoefs*CHANNELS; |
| int16_t* impulse = mImpulse; |
| uint32_t vRL = mVolumeRL; |
| size_t inputIndex = mInputIndex; |
| uint32_t phaseFraction = mPhaseFraction; |
| uint32_t phaseIncrement = mPhaseIncrement; |
| size_t outputIndex = 0; |
| size_t outputSampleCount = outFrameCount * 2; |
| size_t inFrameCount = getInFrameCountRequired(outFrameCount); |
| |
| while (outputIndex < outputSampleCount) { |
| // buffer is empty, fetch a new one |
| while (mBuffer.frameCount == 0) { |
| mBuffer.frameCount = inFrameCount; |
| provider->getNextBuffer(&mBuffer); |
| if (mBuffer.raw == NULL) { |
| goto resample_exit; |
| } |
| const uint32_t phaseIndex = phaseFraction >> kNumPhaseBits; |
| if (phaseIndex == 1) { |
| // read one frame |
| read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); |
| } else if (phaseIndex == 2) { |
| // read 2 frames |
| read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); |
| inputIndex++; |
| if (inputIndex >= mBuffer.frameCount) { |
| inputIndex -= mBuffer.frameCount; |
| provider->releaseBuffer(&mBuffer); |
| } else { |
| read<CHANNELS>(impulse, phaseFraction, mBuffer.i16, inputIndex); |
| } |
| } |
| } |
| int16_t const * const in = mBuffer.i16; |
| const size_t frameCount = mBuffer.frameCount; |
| |
| // Always read-in the first samples from the input buffer |
| int16_t* head = impulse + headOffset; |
| for (size_t i=0 ; i<CHANNELS ; i++) { |
| head[i] = in[inputIndex*CHANNELS + i]; |
| } |
| |
| // handle boundary case |
| while (CC_LIKELY(outputIndex < outputSampleCount)) { |
| filterCoefficient<CHANNELS>(&out[outputIndex], phaseFraction, impulse, vRL); |
| outputIndex += 2; |
| |
| phaseFraction += phaseIncrement; |
| const size_t phaseIndex = phaseFraction >> kNumPhaseBits; |
| for (size_t i=0 ; i<phaseIndex ; i++) { |
| inputIndex++; |
| if (inputIndex >= frameCount) { |
| goto done; // need a new buffer |
| } |
| read<CHANNELS>(impulse, phaseFraction, in, inputIndex); |
| } |
| } |
| done: |
| // if done with buffer, save samples |
| if (inputIndex >= frameCount) { |
| inputIndex -= frameCount; |
| provider->releaseBuffer(&mBuffer); |
| } |
| } |
| |
| resample_exit: |
| mImpulse = impulse; |
| mInputIndex = inputIndex; |
| mPhaseFraction = phaseFraction; |
| return outputIndex / CHANNELS; |
| } |
| |
| template<int CHANNELS> |
| /*** |
| * read() |
| * |
| * This function reads only one frame from input buffer and writes it in |
| * state buffer |
| * |
| **/ |
| void AudioResamplerSinc::read( |
| int16_t*& impulse, uint32_t& phaseFraction, |
| const int16_t* in, size_t inputIndex) |
| { |
| impulse += CHANNELS; |
| phaseFraction -= 1LU<<kNumPhaseBits; |
| |
| const Constants& c(*mConstants); |
| if (CC_UNLIKELY(impulse >= mRingFull)) { |
| const size_t stateSize = (c.halfNumCoefs*2)*CHANNELS; |
| memcpy(mState, mState+stateSize, sizeof(int16_t)*stateSize); |
| impulse -= stateSize; |
| } |
| |
| int16_t* head = impulse + c.halfNumCoefs*CHANNELS; |
| for (size_t i=0 ; i<CHANNELS ; i++) { |
| head[i] = in[inputIndex*CHANNELS + i]; |
| } |
| } |
| |
| template<int CHANNELS> |
| void AudioResamplerSinc::filterCoefficient(int32_t* out, uint32_t phase, |
| const int16_t *samples, uint32_t vRL) |
| { |
| // NOTE: be very careful when modifying the code here. register |
| // pressure is very high and a small change might cause the compiler |
| // to generate far less efficient code. |
| // Always validate the result with objdump or test-resample. |
| |
| // compute the index of the coefficient on the positive side and |
| // negative side |
| const Constants& c(*mConstants); |
| const int32_t ONE = c.cMask | c.pMask; |
| uint32_t indexP = ( phase & c.cMask) >> c.cShift; |
| uint32_t lerpP = ( phase & c.pMask) >> c.pShift; |
| uint32_t indexN = ((ONE-phase) & c.cMask) >> c.cShift; |
| uint32_t lerpN = ((ONE-phase) & c.pMask) >> c.pShift; |
| |
| const size_t offset = c.halfNumCoefs; |
| indexP *= offset; |
| indexN *= offset; |
| |
| int32_t const* coefsP = mFirCoefs + indexP; |
| int32_t const* coefsN = mFirCoefs + indexN; |
| int16_t const* sP = samples; |
| int16_t const* sN = samples + CHANNELS; |
| |
| size_t count = offset; |
| |
| #if !USE_NEON |
| int32_t l = 0; |
| int32_t r = 0; |
| for (size_t i=0 ; i<count ; i++) { |
| interpolate<CHANNELS>(l, r, coefsP++, offset, lerpP, sP); |
| sP -= CHANNELS; |
| interpolate<CHANNELS>(l, r, coefsN++, offset, lerpN, sN); |
| sN += CHANNELS; |
| } |
| out[0] += 2 * mulRL(1, l, vRL); |
| out[1] += 2 * mulRL(0, r, vRL); |
| #else |
| UNUSED(vRL); |
| if (CHANNELS == 1) { |
| int32_t const* coefsP1 = coefsP + offset; |
| int32_t const* coefsN1 = coefsN + offset; |
| sP -= CHANNELS*3; |
| |
| int32x4_t sum; |
| int32x2_t lerpPN; |
| lerpPN = vdup_n_s32(0); |
| lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); |
| lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); |
| lerpPN = vshl_n_s32(lerpPN, 16); |
| sum = vdupq_n_s32(0); |
| |
| int16x4_t sampleP, sampleN; |
| int32x4_t samplePExt, sampleNExt; |
| int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; |
| |
| coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); |
| coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); |
| coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); |
| coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); |
| for (; count > 0; count -= 4) { |
| sampleP = vld1_s16(sP); |
| sampleN = vld1_s16(sN); |
| coefsPV0 = vld1q_s32(coefsP); |
| coefsNV0 = vld1q_s32(coefsN); |
| coefsPV1 = vld1q_s32(coefsP1); |
| coefsNV1 = vld1q_s32(coefsN1); |
| sP -= 4; |
| sN += 4; |
| coefsP += 4; |
| coefsN += 4; |
| coefsP1 += 4; |
| coefsN1 += 4; |
| |
| sampleP = vrev64_s16(sampleP); |
| |
| // interpolate (step1) |
| coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); |
| coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); |
| samplePExt = vshll_n_s16(sampleP, 15); |
| // interpolate (step2) |
| coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); |
| coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); |
| sampleNExt = vshll_n_s16(sampleN, 15); |
| // interpolate (step3) |
| coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); |
| coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); |
| |
| samplePExt = vqrdmulhq_s32(samplePExt, coefsPV0); |
| sampleNExt = vqrdmulhq_s32(sampleNExt, coefsNV0); |
| sum = vaddq_s32(sum, samplePExt); |
| sum = vaddq_s32(sum, sampleNExt); |
| } |
| int32x2_t volumesV, outV; |
| volumesV = vld1_s32(mVolumeSIMD); |
| outV = vld1_s32(out); |
| |
| //add all 4 partial sums |
| int32x2_t sumLow, sumHigh; |
| sumLow = vget_low_s32(sum); |
| sumHigh = vget_high_s32(sum); |
| sumLow = vpadd_s32(sumLow, sumHigh); |
| sumLow = vpadd_s32(sumLow, sumLow); |
| |
| sumLow = vqrdmulh_s32(sumLow, volumesV); |
| outV = vadd_s32(outV, sumLow); |
| vst1_s32(out, outV); |
| } else if (CHANNELS == 2) { |
| int32_t const* coefsP1 = coefsP + offset; |
| int32_t const* coefsN1 = coefsN + offset; |
| sP -= CHANNELS*3; |
| |
| int32x4_t sum0, sum1; |
| int32x2_t lerpPN; |
| |
| lerpPN = vdup_n_s32(0); |
| lerpPN = vld1_lane_s32((int32_t *)&lerpP, lerpPN, 0); |
| lerpPN = vld1_lane_s32((int32_t *)&lerpN, lerpPN, 1); |
| lerpPN = vshl_n_s32(lerpPN, 16); |
| sum0 = vdupq_n_s32(0); |
| sum1 = vdupq_n_s32(0); |
| |
| int16x4x2_t sampleP, sampleN; |
| int32x4x2_t samplePExt, sampleNExt; |
| int32x4_t coefsPV0, coefsPV1, coefsNV0, coefsNV1; |
| |
| coefsP = (const int32_t*)__builtin_assume_aligned(coefsP, 16); |
| coefsN = (const int32_t*)__builtin_assume_aligned(coefsN, 16); |
| coefsP1 = (const int32_t*)__builtin_assume_aligned(coefsP1, 16); |
| coefsN1 = (const int32_t*)__builtin_assume_aligned(coefsN1, 16); |
| for (; count > 0; count -= 4) { |
| sampleP = vld2_s16(sP); |
| sampleN = vld2_s16(sN); |
| coefsPV0 = vld1q_s32(coefsP); |
| coefsNV0 = vld1q_s32(coefsN); |
| coefsPV1 = vld1q_s32(coefsP1); |
| coefsNV1 = vld1q_s32(coefsN1); |
| sP -= 8; |
| sN += 8; |
| coefsP += 4; |
| coefsN += 4; |
| coefsP1 += 4; |
| coefsN1 += 4; |
| |
| sampleP.val[0] = vrev64_s16(sampleP.val[0]); |
| sampleP.val[1] = vrev64_s16(sampleP.val[1]); |
| |
| // interpolate (step1) |
| coefsPV1 = vsubq_s32(coefsPV1, coefsPV0); |
| coefsNV1 = vsubq_s32(coefsNV1, coefsNV0); |
| samplePExt.val[0] = vshll_n_s16(sampleP.val[0], 15); |
| samplePExt.val[1] = vshll_n_s16(sampleP.val[1], 15); |
| // interpolate (step2) |
| coefsPV1 = vqrdmulhq_lane_s32(coefsPV1, lerpPN, 0); |
| coefsNV1 = vqrdmulhq_lane_s32(coefsNV1, lerpPN, 1); |
| sampleNExt.val[0] = vshll_n_s16(sampleN.val[0], 15); |
| sampleNExt.val[1] = vshll_n_s16(sampleN.val[1], 15); |
| // interpolate (step3) |
| coefsPV0 = vaddq_s32(coefsPV0, coefsPV1); |
| coefsNV0 = vaddq_s32(coefsNV0, coefsNV1); |
| |
| samplePExt.val[0] = vqrdmulhq_s32(samplePExt.val[0], coefsPV0); |
| samplePExt.val[1] = vqrdmulhq_s32(samplePExt.val[1], coefsPV0); |
| sampleNExt.val[0] = vqrdmulhq_s32(sampleNExt.val[0], coefsNV0); |
| sampleNExt.val[1] = vqrdmulhq_s32(sampleNExt.val[1], coefsNV0); |
| sum0 = vaddq_s32(sum0, samplePExt.val[0]); |
| sum1 = vaddq_s32(sum1, samplePExt.val[1]); |
| sum0 = vaddq_s32(sum0, sampleNExt.val[0]); |
| sum1 = vaddq_s32(sum1, sampleNExt.val[1]); |
| } |
| int32x2_t volumesV, outV; |
| volumesV = vld1_s32(mVolumeSIMD); |
| outV = vld1_s32(out); |
| |
| //add all 4 partial sums |
| int32x2_t sumLow0, sumHigh0, sumLow1, sumHigh1; |
| sumLow0 = vget_low_s32(sum0); |
| sumHigh0 = vget_high_s32(sum0); |
| sumLow1 = vget_low_s32(sum1); |
| sumHigh1 = vget_high_s32(sum1); |
| sumLow0 = vpadd_s32(sumLow0, sumHigh0); |
| sumLow0 = vpadd_s32(sumLow0, sumLow0); |
| sumLow1 = vpadd_s32(sumLow1, sumHigh1); |
| sumLow1 = vpadd_s32(sumLow1, sumLow1); |
| |
| sumLow0 = vtrn_s32(sumLow0, sumLow1).val[0]; |
| sumLow0 = vqrdmulh_s32(sumLow0, volumesV); |
| outV = vadd_s32(outV, sumLow0); |
| vst1_s32(out, outV); |
| } |
| #endif |
| } |
| |
| template<int CHANNELS> |
| void AudioResamplerSinc::interpolate( |
| int32_t& l, int32_t& r, |
| const int32_t* coefs, size_t offset, |
| int32_t lerp, const int16_t* samples) |
| { |
| int32_t c0 = coefs[0]; |
| int32_t c1 = coefs[offset]; |
| int32_t sinc = mulAdd(lerp, (c1-c0)<<1, c0); |
| if (CHANNELS == 2) { |
| uint32_t rl = *reinterpret_cast<const uint32_t*>(samples); |
| l = mulAddRL(1, rl, sinc, l); |
| r = mulAddRL(0, rl, sinc, r); |
| } else { |
| r = l = mulAdd(samples[0], sinc, l); |
| } |
| } |
| // ---------------------------------------------------------------------------- |
| } // namespace android |