diff options
3 files changed, 160 insertions, 33 deletions
diff --git a/core/java/android/speech/tts/FileSynthesisRequest.java b/core/java/android/speech/tts/FileSynthesisRequest.java index 370ad5338c2a..6a9b2dc14de3 100644 --- a/core/java/android/speech/tts/FileSynthesisRequest.java +++ b/core/java/android/speech/tts/FileSynthesisRequest.java @@ -19,6 +19,7 @@ import android.media.AudioFormat; import android.util.Log; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; @@ -32,6 +33,8 @@ class FileSynthesisRequest extends SynthesisRequest { private static final String TAG = "FileSynthesisRequest"; private static final boolean DBG = false; + private static final int MAX_AUDIO_BUFFER_SIZE = 8192; + private static final int WAV_HEADER_LENGTH = 44; private static final short WAV_FORMAT_PCM = 0x0001; @@ -81,6 +84,11 @@ class FileSynthesisRequest extends SynthesisRequest { } @Override + public int getMaxBufferSize() { + return MAX_AUDIO_BUFFER_SIZE; + } + + @Override public int start(int sampleRateInHz, int audioFormat, int channelCount) { if (DBG) { Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat @@ -152,8 +160,9 @@ class FileSynthesisRequest extends SynthesisRequest { try { // Write WAV header at start of file mFile.seek(0); - int fileLen = (int) mFile.length(); - mFile.write(makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, fileLen)); + int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH); + mFile.write( + makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength)); closeFile(); return TextToSpeech.SUCCESS; } catch (IOException ex) { @@ -164,8 +173,37 @@ class FileSynthesisRequest extends SynthesisRequest { } } + @Override + public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount, + byte[] buffer, int offset, int length) { + synchronized (mStateLock) { + if (mStopped) { + if (DBG) Log.d(TAG, "Request has been aborted."); + return TextToSpeech.ERROR; + } + } + FileOutputStream out = null; + try { + out = new FileOutputStream(mFileName); + out.write(makeWavHeader(sampleRateInHz, audioFormat, channelCount, length)); + out.write(buffer, offset, length); + return TextToSpeech.SUCCESS; + } catch (IOException ex) { + Log.e(TAG, "Failed to write to " + mFileName + ": " + ex); + return TextToSpeech.ERROR; + } finally { + try { + if (out != null) { + out.close(); + } + } catch (IOException ex) { + Log.e(TAG, "Failed to close " + mFileName + ": " + ex); + } + } + } + private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount, - int fileLength) { + int dataLength) { // TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT? int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2); int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount; @@ -177,7 +215,7 @@ class FileSynthesisRequest extends SynthesisRequest { header.order(ByteOrder.LITTLE_ENDIAN); header.put(new byte[]{ 'R', 'I', 'F', 'F' }); - header.putInt(fileLength - 8); // RIFF chunk size + header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size header.put(new byte[]{ 'W', 'A', 'V', 'E' }); header.put(new byte[]{ 'f', 'm', 't', ' ' }); header.putInt(16); // size of fmt chunk @@ -188,7 +226,6 @@ class FileSynthesisRequest extends SynthesisRequest { header.putShort(blockAlign); header.putShort(bitsPerSample); header.put(new byte[]{ 'd', 'a', 't', 'a' }); - int dataLength = fileLength - WAV_HEADER_LENGTH; header.putInt(dataLength); return headerBuf; diff --git a/core/java/android/speech/tts/PlaybackSynthesisRequest.java b/core/java/android/speech/tts/PlaybackSynthesisRequest.java index 15a4ee96ecc8..226701518805 100644 --- a/core/java/android/speech/tts/PlaybackSynthesisRequest.java +++ b/core/java/android/speech/tts/PlaybackSynthesisRequest.java @@ -78,6 +78,13 @@ class PlaybackSynthesisRequest extends SynthesisRequest { } } + @Override + public int getMaxBufferSize() { + // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be + // a safe buffer size to pass in. + return MIN_AUDIO_BUFFER_SIZE; + } + // TODO: add a thread that writes to the AudioTrack? @Override public int start(int sampleRateInHz, int audioFormat, int channelCount) { @@ -86,20 +93,6 @@ class PlaybackSynthesisRequest extends SynthesisRequest { + "," + channelCount + ")"); } - int channelConfig; - if (channelCount == 1) { - channelConfig = AudioFormat.CHANNEL_OUT_MONO; - } else if (channelCount == 2){ - channelConfig = AudioFormat.CHANNEL_OUT_STEREO; - } else { - Log.e(TAG, "Unsupported number of channels: " + channelCount); - return TextToSpeech.ERROR; - } - - int minBufferSizeInBytes - = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); - int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); - synchronized (mStateLock) { if (mStopped) { if (DBG) Log.d(TAG, "Request has been aborted."); @@ -111,22 +104,19 @@ class PlaybackSynthesisRequest extends SynthesisRequest { return TextToSpeech.ERROR; } - mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, audioFormat, - bufferSizeInBytes, AudioTrack.MODE_STREAM); - if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) { - cleanUp(); + mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount, + AudioTrack.MODE_STREAM); + if (mAudioTrack == null) { return TextToSpeech.ERROR; } - - setupVolume(); } return TextToSpeech.SUCCESS; } - private void setupVolume() { - float vol = clip(mVolume, 0.0f, 1.0f); - float panning = clip(mPan, -1.0f, 1.0f); + private void setupVolume(AudioTrack audioTrack, float volume, float pan) { + float vol = clip(volume, 0.0f, 1.0f); + float panning = clip(pan, -1.0f, 1.0f); float volLeft = vol; float volRight = vol; if (panning > 0.0f) { @@ -135,7 +125,7 @@ class PlaybackSynthesisRequest extends SynthesisRequest { volRight *= (1.0f + panning); } if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight); - if (mAudioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { + if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) { Log.e(TAG, "Failed to set volume"); } } @@ -148,7 +138,10 @@ class PlaybackSynthesisRequest extends SynthesisRequest { public int audioAvailable(byte[] buffer, int offset, int length) { if (DBG) { Log.d(TAG, "audioAvailable(byte[" + buffer.length + "]," - + offset + "," + length + "), thread ID=" + android.os.Process.myTid()); + + offset + "," + length + ")"); + } + if (length > getMaxBufferSize()) { + throw new IllegalArgumentException("buffer is too large (" + length + " bytes)"); } synchronized (mStateLock) { if (mStopped) { @@ -194,4 +187,72 @@ class PlaybackSynthesisRequest extends SynthesisRequest { } return TextToSpeech.SUCCESS; } + + @Override + public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount, + byte[] buffer, int offset, int length) { + if (DBG) { + Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat + + "," + channelCount + "byte[" + buffer.length + "]," + + offset + "," + length + ")"); + } + + synchronized (mStateLock) { + if (mStopped) { + if (DBG) Log.d(TAG, "Request has been aborted."); + return TextToSpeech.ERROR; + } + if (mAudioTrack != null) { + Log.e(TAG, "start() called before completeAudioAvailable()"); + cleanUp(); + return TextToSpeech.ERROR; + } + + mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount, + AudioTrack.MODE_STATIC); + if (mAudioTrack == null) { + return TextToSpeech.ERROR; + } + + try { + mAudioTrack.write(buffer, offset, length); + mAudioTrack.play(); + } catch (IllegalStateException ex) { + Log.e(TAG, "Playback error", ex); + return TextToSpeech.ERROR; + } finally { + cleanUp(); + } + } + + return TextToSpeech.SUCCESS; + } + + private AudioTrack createAudioTrack(int sampleRateInHz, int audioFormat, int channelCount, + int mode) { + int channelConfig; + if (channelCount == 1) { + channelConfig = AudioFormat.CHANNEL_OUT_MONO; + } else if (channelCount == 2){ + channelConfig = AudioFormat.CHANNEL_OUT_STEREO; + } else { + Log.e(TAG, "Unsupported number of channels: " + channelCount); + return null; + } + + int minBufferSizeInBytes + = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat); + int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes); + AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, + audioFormat, bufferSizeInBytes, mode); + if (audioTrack == null) { + return null; + } + if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) { + audioTrack.release(); + return null; + } + setupVolume(audioTrack, mVolume, mPan); + return audioTrack; + } }
\ No newline at end of file diff --git a/core/java/android/speech/tts/SynthesisRequest.java b/core/java/android/speech/tts/SynthesisRequest.java index 3f2ec5d746b7..f4bb85228901 100644 --- a/core/java/android/speech/tts/SynthesisRequest.java +++ b/core/java/android/speech/tts/SynthesisRequest.java @@ -18,6 +18,13 @@ package android.speech.tts; /** * A request for speech synthesis given to a TTS engine for processing. * + * The engine can provide streaming audio by calling + * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally + * {@link #done}. + * + * Alternatively, the engine can provide all the audio at once, by using + * {@link #completeAudioAvailable}. + * * @hide Pending approval */ public abstract class SynthesisRequest { @@ -101,6 +108,12 @@ public abstract class SynthesisRequest { } /** + * Gets the maximum number of bytes that the TTS engine can pass in a single call of + * {@link #audioAvailable}. This does not apply to {@link #completeAudioAvailable}. + */ + public abstract int getMaxBufferSize(); + + /** * Aborts the speech request. * * Can be called from multiple threads. @@ -117,7 +130,7 @@ public abstract class SynthesisRequest { * @param sampleRateInHz Sample rate in HZ of the generated audio. * @param audioFormat Audio format of the generated audio. Must be one of * the ENCODING_ constants defined in {@link android.media.AudioFormat}. - * @param channelCount The number of channels + * @param channelCount The number of channels. Must be {@code 1} or {@code 2}. * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. */ public abstract int start(int sampleRateInHz, int audioFormat, int channelCount); @@ -131,8 +144,8 @@ public abstract class SynthesisRequest { * @param buffer The generated audio data. This method will not hold on to {@code buffer}, * so the caller is free to modify it after this method returns. * @param offset The offset into {@code buffer} where the audio data starts. - * @param length The number of bytes of audio data in {@code buffer}. - * Must be less than or equal to {@code buffer.length - offset}. + * @param length The number of bytes of audio data in {@code buffer}. This must be + * less than or equal to the return value of {@link #getMaxBufferSize}. * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. */ public abstract int audioAvailable(byte[] buffer, int offset, int length); @@ -148,4 +161,20 @@ public abstract class SynthesisRequest { */ public abstract int done(); + /** + * The service can call this method instead of using {@link #start}, {@link #audioAvailable} + * and {@link #done} if all the audio data is available in a single buffer. + * + * @param sampleRateInHz Sample rate in HZ of the generated audio. + * @param audioFormat Audio format of the generated audio. Must be one of + * the ENCODING_ constants defined in {@link android.media.AudioFormat}. + * @param channelCount The number of channels. Must be {@code 1} or {@code 2}. + * @param buffer The generated audio data. This method will not hold on to {@code buffer}, + * so the caller is free to modify it after this method returns. + * @param offset The offset into {@code buffer} where the audio data starts. + * @param length The number of bytes of audio data in {@code buffer}. + * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}. + */ + public abstract int completeAudioAvailable(int sampleRateInHz, int audioFormat, + int channelCount, byte[] buffer, int offset, int length); }
\ No newline at end of file |