summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Bjorn Bringert <bringert@android.com> 2011-04-15 07:28:40 -0700
committer Android (Google) Code Review <android-gerrit@google.com> 2011-04-15 07:28:40 -0700
commit3ad604b3d8a3ae87ee3f7545677bacc8f11159c0 (patch)
tree7d22b8cbbb0b2fae1433449cf132b939bc1b8b0b
parent702acacfdadcb496130e6e1c8b427a47a1f35930 (diff)
parent71e0b4807797c602e7fc787d00d27c4f9c92a507 (diff)
Merge "Improve TTS engine audio buffer API"
-rw-r--r--core/java/android/speech/tts/FileSynthesisRequest.java47
-rw-r--r--core/java/android/speech/tts/PlaybackSynthesisRequest.java111
-rw-r--r--core/java/android/speech/tts/SynthesisRequest.java35
3 files changed, 160 insertions, 33 deletions
diff --git a/core/java/android/speech/tts/FileSynthesisRequest.java b/core/java/android/speech/tts/FileSynthesisRequest.java
index 370ad5338c2a..6a9b2dc14de3 100644
--- a/core/java/android/speech/tts/FileSynthesisRequest.java
+++ b/core/java/android/speech/tts/FileSynthesisRequest.java
@@ -19,6 +19,7 @@ import android.media.AudioFormat;
import android.util.Log;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
@@ -32,6 +33,8 @@ class FileSynthesisRequest extends SynthesisRequest {
private static final String TAG = "FileSynthesisRequest";
private static final boolean DBG = false;
+ private static final int MAX_AUDIO_BUFFER_SIZE = 8192;
+
private static final int WAV_HEADER_LENGTH = 44;
private static final short WAV_FORMAT_PCM = 0x0001;
@@ -81,6 +84,11 @@ class FileSynthesisRequest extends SynthesisRequest {
}
@Override
+ public int getMaxBufferSize() {
+ return MAX_AUDIO_BUFFER_SIZE;
+ }
+
+ @Override
public int start(int sampleRateInHz, int audioFormat, int channelCount) {
if (DBG) {
Log.d(TAG, "FileSynthesisRequest.start(" + sampleRateInHz + "," + audioFormat
@@ -152,8 +160,9 @@ class FileSynthesisRequest extends SynthesisRequest {
try {
// Write WAV header at start of file
mFile.seek(0);
- int fileLen = (int) mFile.length();
- mFile.write(makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, fileLen));
+ int dataLength = (int) (mFile.length() - WAV_HEADER_LENGTH);
+ mFile.write(
+ makeWavHeader(mSampleRateInHz, mAudioFormat, mChannelCount, dataLength));
closeFile();
return TextToSpeech.SUCCESS;
} catch (IOException ex) {
@@ -164,8 +173,37 @@ class FileSynthesisRequest extends SynthesisRequest {
}
}
+ @Override
+ public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
+ byte[] buffer, int offset, int length) {
+ synchronized (mStateLock) {
+ if (mStopped) {
+ if (DBG) Log.d(TAG, "Request has been aborted.");
+ return TextToSpeech.ERROR;
+ }
+ }
+ FileOutputStream out = null;
+ try {
+ out = new FileOutputStream(mFileName);
+ out.write(makeWavHeader(sampleRateInHz, audioFormat, channelCount, length));
+ out.write(buffer, offset, length);
+ return TextToSpeech.SUCCESS;
+ } catch (IOException ex) {
+ Log.e(TAG, "Failed to write to " + mFileName + ": " + ex);
+ return TextToSpeech.ERROR;
+ } finally {
+ try {
+ if (out != null) {
+ out.close();
+ }
+ } catch (IOException ex) {
+ Log.e(TAG, "Failed to close " + mFileName + ": " + ex);
+ }
+ }
+ }
+
private byte[] makeWavHeader(int sampleRateInHz, int audioFormat, int channelCount,
- int fileLength) {
+ int dataLength) {
// TODO: is AudioFormat.ENCODING_DEFAULT always the same as ENCODING_PCM_16BIT?
int sampleSizeInBytes = (audioFormat == AudioFormat.ENCODING_PCM_8BIT ? 1 : 2);
int byteRate = sampleRateInHz * sampleSizeInBytes * channelCount;
@@ -177,7 +215,7 @@ class FileSynthesisRequest extends SynthesisRequest {
header.order(ByteOrder.LITTLE_ENDIAN);
header.put(new byte[]{ 'R', 'I', 'F', 'F' });
- header.putInt(fileLength - 8); // RIFF chunk size
+ header.putInt(dataLength + WAV_HEADER_LENGTH - 8); // RIFF chunk size
header.put(new byte[]{ 'W', 'A', 'V', 'E' });
header.put(new byte[]{ 'f', 'm', 't', ' ' });
header.putInt(16); // size of fmt chunk
@@ -188,7 +226,6 @@ class FileSynthesisRequest extends SynthesisRequest {
header.putShort(blockAlign);
header.putShort(bitsPerSample);
header.put(new byte[]{ 'd', 'a', 't', 'a' });
- int dataLength = fileLength - WAV_HEADER_LENGTH;
header.putInt(dataLength);
return headerBuf;
diff --git a/core/java/android/speech/tts/PlaybackSynthesisRequest.java b/core/java/android/speech/tts/PlaybackSynthesisRequest.java
index 15a4ee96ecc8..226701518805 100644
--- a/core/java/android/speech/tts/PlaybackSynthesisRequest.java
+++ b/core/java/android/speech/tts/PlaybackSynthesisRequest.java
@@ -78,6 +78,13 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
}
}
+ @Override
+ public int getMaxBufferSize() {
+ // The AudioTrack buffer will be at least MIN_AUDIO_BUFFER_SIZE, so that should always be
+ // a safe buffer size to pass in.
+ return MIN_AUDIO_BUFFER_SIZE;
+ }
+
// TODO: add a thread that writes to the AudioTrack?
@Override
public int start(int sampleRateInHz, int audioFormat, int channelCount) {
@@ -86,20 +93,6 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
+ "," + channelCount + ")");
}
- int channelConfig;
- if (channelCount == 1) {
- channelConfig = AudioFormat.CHANNEL_OUT_MONO;
- } else if (channelCount == 2){
- channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
- } else {
- Log.e(TAG, "Unsupported number of channels: " + channelCount);
- return TextToSpeech.ERROR;
- }
-
- int minBufferSizeInBytes
- = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
- int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
-
synchronized (mStateLock) {
if (mStopped) {
if (DBG) Log.d(TAG, "Request has been aborted.");
@@ -111,22 +104,19 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
return TextToSpeech.ERROR;
}
- mAudioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig, audioFormat,
- bufferSizeInBytes, AudioTrack.MODE_STREAM);
- if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
- cleanUp();
+ mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
+ AudioTrack.MODE_STREAM);
+ if (mAudioTrack == null) {
return TextToSpeech.ERROR;
}
-
- setupVolume();
}
return TextToSpeech.SUCCESS;
}
- private void setupVolume() {
- float vol = clip(mVolume, 0.0f, 1.0f);
- float panning = clip(mPan, -1.0f, 1.0f);
+ private void setupVolume(AudioTrack audioTrack, float volume, float pan) {
+ float vol = clip(volume, 0.0f, 1.0f);
+ float panning = clip(pan, -1.0f, 1.0f);
float volLeft = vol;
float volRight = vol;
if (panning > 0.0f) {
@@ -135,7 +125,7 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
volRight *= (1.0f + panning);
}
if (DBG) Log.d(TAG, "volLeft=" + volLeft + ",volRight=" + volRight);
- if (mAudioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
+ if (audioTrack.setStereoVolume(volLeft, volRight) != AudioTrack.SUCCESS) {
Log.e(TAG, "Failed to set volume");
}
}
@@ -148,7 +138,10 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
public int audioAvailable(byte[] buffer, int offset, int length) {
if (DBG) {
Log.d(TAG, "audioAvailable(byte[" + buffer.length + "],"
- + offset + "," + length + "), thread ID=" + android.os.Process.myTid());
+ + offset + "," + length + ")");
+ }
+ if (length > getMaxBufferSize()) {
+ throw new IllegalArgumentException("buffer is too large (" + length + " bytes)");
}
synchronized (mStateLock) {
if (mStopped) {
@@ -194,4 +187,72 @@ class PlaybackSynthesisRequest extends SynthesisRequest {
}
return TextToSpeech.SUCCESS;
}
+
+ @Override
+ public int completeAudioAvailable(int sampleRateInHz, int audioFormat, int channelCount,
+ byte[] buffer, int offset, int length) {
+ if (DBG) {
+ Log.d(TAG, "completeAudioAvailable(" + sampleRateInHz + "," + audioFormat
+ + "," + channelCount + "byte[" + buffer.length + "],"
+ + offset + "," + length + ")");
+ }
+
+ synchronized (mStateLock) {
+ if (mStopped) {
+ if (DBG) Log.d(TAG, "Request has been aborted.");
+ return TextToSpeech.ERROR;
+ }
+ if (mAudioTrack != null) {
+ Log.e(TAG, "start() called before completeAudioAvailable()");
+ cleanUp();
+ return TextToSpeech.ERROR;
+ }
+
+ mAudioTrack = createAudioTrack(sampleRateInHz, audioFormat, channelCount,
+ AudioTrack.MODE_STATIC);
+ if (mAudioTrack == null) {
+ return TextToSpeech.ERROR;
+ }
+
+ try {
+ mAudioTrack.write(buffer, offset, length);
+ mAudioTrack.play();
+ } catch (IllegalStateException ex) {
+ Log.e(TAG, "Playback error", ex);
+ return TextToSpeech.ERROR;
+ } finally {
+ cleanUp();
+ }
+ }
+
+ return TextToSpeech.SUCCESS;
+ }
+
+ private AudioTrack createAudioTrack(int sampleRateInHz, int audioFormat, int channelCount,
+ int mode) {
+ int channelConfig;
+ if (channelCount == 1) {
+ channelConfig = AudioFormat.CHANNEL_OUT_MONO;
+ } else if (channelCount == 2){
+ channelConfig = AudioFormat.CHANNEL_OUT_STEREO;
+ } else {
+ Log.e(TAG, "Unsupported number of channels: " + channelCount);
+ return null;
+ }
+
+ int minBufferSizeInBytes
+ = AudioTrack.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
+ int bufferSizeInBytes = Math.max(MIN_AUDIO_BUFFER_SIZE, minBufferSizeInBytes);
+ AudioTrack audioTrack = new AudioTrack(mStreamType, sampleRateInHz, channelConfig,
+ audioFormat, bufferSizeInBytes, mode);
+ if (audioTrack == null) {
+ return null;
+ }
+ if (audioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
+ audioTrack.release();
+ return null;
+ }
+ setupVolume(audioTrack, mVolume, mPan);
+ return audioTrack;
+ }
} \ No newline at end of file
diff --git a/core/java/android/speech/tts/SynthesisRequest.java b/core/java/android/speech/tts/SynthesisRequest.java
index 3f2ec5d746b7..f4bb85228901 100644
--- a/core/java/android/speech/tts/SynthesisRequest.java
+++ b/core/java/android/speech/tts/SynthesisRequest.java
@@ -18,6 +18,13 @@ package android.speech.tts;
/**
* A request for speech synthesis given to a TTS engine for processing.
*
+ * The engine can provide streaming audio by calling
+ * {@link #start}, then {@link #audioAvailable} until all audio has been provided, then finally
+ * {@link #done}.
+ *
+ * Alternatively, the engine can provide all the audio at once, by using
+ * {@link #completeAudioAvailable}.
+ *
* @hide Pending approval
*/
public abstract class SynthesisRequest {
@@ -101,6 +108,12 @@ public abstract class SynthesisRequest {
}
/**
+ * Gets the maximum number of bytes that the TTS engine can pass in a single call of
+ * {@link #audioAvailable}. This does not apply to {@link #completeAudioAvailable}.
+ */
+ public abstract int getMaxBufferSize();
+
+ /**
* Aborts the speech request.
*
* Can be called from multiple threads.
@@ -117,7 +130,7 @@ public abstract class SynthesisRequest {
* @param sampleRateInHz Sample rate in HZ of the generated audio.
* @param audioFormat Audio format of the generated audio. Must be one of
* the ENCODING_ constants defined in {@link android.media.AudioFormat}.
- * @param channelCount The number of channels
+ * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
* @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
*/
public abstract int start(int sampleRateInHz, int audioFormat, int channelCount);
@@ -131,8 +144,8 @@ public abstract class SynthesisRequest {
* @param buffer The generated audio data. This method will not hold on to {@code buffer},
* so the caller is free to modify it after this method returns.
* @param offset The offset into {@code buffer} where the audio data starts.
- * @param length The number of bytes of audio data in {@code buffer}.
- * Must be less than or equal to {@code buffer.length - offset}.
+ * @param length The number of bytes of audio data in {@code buffer}. This must be
+ * less than or equal to the return value of {@link #getMaxBufferSize}.
* @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
*/
public abstract int audioAvailable(byte[] buffer, int offset, int length);
@@ -148,4 +161,20 @@ public abstract class SynthesisRequest {
*/
public abstract int done();
+ /**
+ * The service can call this method instead of using {@link #start}, {@link #audioAvailable}
+ * and {@link #done} if all the audio data is available in a single buffer.
+ *
+ * @param sampleRateInHz Sample rate in HZ of the generated audio.
+ * @param audioFormat Audio format of the generated audio. Must be one of
+ * the ENCODING_ constants defined in {@link android.media.AudioFormat}.
+ * @param channelCount The number of channels. Must be {@code 1} or {@code 2}.
+ * @param buffer The generated audio data. This method will not hold on to {@code buffer},
+ * so the caller is free to modify it after this method returns.
+ * @param offset The offset into {@code buffer} where the audio data starts.
+ * @param length The number of bytes of audio data in {@code buffer}.
+ * @return {@link TextToSpeech#SUCCESS} or {@link TextToSpeech#ERROR}.
+ */
+ public abstract int completeAudioAvailable(int sampleRateInHz, int audioFormat,
+ int channelCount, byte[] buffer, int offset, int length);
} \ No newline at end of file