Skip to content

Commit

Permalink
Development of WebSocket is over.
Browse files Browse the repository at this point in the history
  • Loading branch information
UtopiaXC committed Dec 2, 2022
1 parent ca833db commit af55b7e
Show file tree
Hide file tree
Showing 11 changed files with 258 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package com.utopiaxc.utopiatts.enums;

import com.utopiaxc.utopiatts.tts.enums.Driver;
import com.utopiaxc.utopiatts.tts.enums.OutputFormat;

public enum SettingsEnum {

THEME("THEME", "theme", "auto"),
FIRST_BOOT("FIRST_BOOT", "first_boot", true),
AZURE_REGION("AZURE_REGION", "azure_region", ""),
AZURE_TOKEN("AZURE_TOKEN", "azure_token", "NULL"),
OUTPUT_FORMAT("OUTPUT_FORMAT", "output_format", ""),
OUTPUT_FORMAT("OUTPUT_FORMAT", "output_format", OutputFormat.OGG_48K_HZ_16_BIT_MONO_OPUS.getName()),
ACTOR("ACTOR", "actor", ""),
ROLE("ROLE", "role", ""),
STYLE("STYLE", "style", ""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import android.content.SharedPreferences;
import android.os.Bundle;
import android.text.InputType;
import android.util.Log;

import androidx.preference.EditTextPreference;
import androidx.preference.ListPreference;
Expand Down Expand Up @@ -98,5 +99,29 @@ public void onCreatePreferences(Bundle savedInstanceState, String rootKey) {
}
return true;
});

ListPreference listFormat = findPreference(SettingsEnum.OUTPUT_FORMAT.getKey());
assert listFormat != null;
listFormat.setOnPreferenceChangeListener((preference, newValue) -> {
new AlertDialog.Builder(requireActivity()).setTitle(R.string.warning)
.setMessage(R.string.warning_of_output_format)
.setPositiveButton(R.string.confirm, null)
.create()
.show();
return true;
});

EditTextPreference editTextPreferenceToken = findPreference(SettingsEnum.AZURE_TOKEN.getKey());
assert editTextPreferenceToken != null;
editTextPreferenceToken.setEnabled(Driver.AZURE_SDK.getId().equals(
sharedPreferences.getString(SettingsEnum.TTS_DRIVER.getKey(),
String.valueOf(SettingsEnum.TTS_DRIVER.getDefaultValue()))));

ListPreference listDriver = findPreference(SettingsEnum.TTS_DRIVER.getKey());
assert listDriver != null;
listDriver.setOnPreferenceChangeListener((preference, newValue) -> {
editTextPreferenceToken.setEnabled(Driver.AZURE_SDK.getId().equals(newValue));
return true;
});
}
}
67 changes: 38 additions & 29 deletions app/src/main/java/com/utopiaxc/utopiatts/tts/WsTts.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@

import android.content.Context;
import android.content.SharedPreferences;
import android.media.AudioFormat;
import android.media.MediaCodec;
import android.media.MediaExtractor;
import android.media.MediaFormat;
import android.os.Build;
import android.os.SystemClock;
import android.speech.tts.SynthesisCallback;
import android.speech.tts.SynthesisRequest;
import android.speech.tts.TextToSpeech;
import android.text.TextUtils;
import android.util.Log;

Expand All @@ -37,7 +33,6 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.Date;
import java.util.Objects;
Expand All @@ -63,6 +58,7 @@ public class WsTts implements Tts {
private final Buffer mData = new Buffer();
private MediaCodec mMediaCodec;
private String mOldMime;
private Ssml mSsml;
private SynthesisCallback mCallback;
private final WebSocketListener mWebSocketListener = new WebSocketListener() {
@Override
Expand All @@ -87,16 +83,13 @@ public void onFailure(@NotNull WebSocket webSocket, @NotNull Throwable t, @Nulla
super.onFailure(webSocket, t, response);
mWebSocket = null;
mWebSocketState = WebSocketState.OFFLINE;
Log.e(TAG, "onFailure" + t.getMessage(), t);
if (mIsSynthesizing) {
mWebSocket = getOrCreateWs();
}
Log.e(TAG, "onFailure, throwable = \n",t);
getOrCreateWs().send(mSsml.toStringForWs());
}

@Override
public void onMessage(@NotNull WebSocket webSocket, @NotNull String text) {
super.onMessage(webSocket, text);
//Log.v(TAG, "onMessage" + text);
final String endTag = "turn.end";
final String startTag = "turn.start";
int endIndex = text.lastIndexOf(endTag);
Expand All @@ -121,15 +114,12 @@ public void onMessage(@NotNull WebSocket webSocket, @NotNull ByteString bytes) {
final String audioTag = "Path:audio\r\n";
final String startTag = "Content-Type:";
final String endTag = "\r\nX-StreamId";

int audioIndex = bytes.lastIndexOf(audioTag.getBytes(StandardCharsets.UTF_8)) + audioTag.length();
int startIndex = bytes.lastIndexOf(startTag.getBytes(StandardCharsets.UTF_8)) + startTag.length();
int endIndex = bytes.lastIndexOf(endTag.getBytes(StandardCharsets.UTF_8));
if (audioIndex != -1) {

try {
String temp = bytes.substring(startIndex, endIndex).utf8();
Log.v(TAG, "当前Mime:" + temp);
String mCurrentMime;
if (temp.startsWith("audio")) {
mCurrentMime = temp;
Expand All @@ -139,12 +129,10 @@ public void onMessage(@NotNull WebSocket webSocket, @NotNull ByteString bytes) {
if (!mOutputFormat.needDecode()) {
if ("audio/x-wav".equals(mCurrentMime) && bytes.lastIndexOf("RIFF".getBytes(StandardCharsets.UTF_8)) != -1) {
audioIndex += 44;
Log.v(TAG, "移除WAV文件头");
}
}
mData.write(bytes.substring(audioIndex));
} catch (Exception e) {
Log.e(TAG, "onMessage Error:", e);
mIsSynthesizing = false;
}
}
Expand Down Expand Up @@ -199,12 +187,17 @@ public boolean doSpeak(String text, int pitch, int rate,SynthesisCallback synthe
}
}
mIsSynthesizing = false;
return false;
return true;
}

@Override
public void stopSpeak() {

if (mWebSocket != null) {
Objects.requireNonNull(mWebSocket).close(1000, "closed by call onStop");
mWebSocket = null;
}
mIsSynthesizing = false;
mData.clear();
}

@Override
Expand Down Expand Up @@ -237,24 +230,41 @@ public synchronized void sendText(String text, int pitch, int rate) {
int styleDegree = mSharedPreferences.getInt(SettingsEnum.STYLE_DEGREE.getKey(),
(Integer) SettingsEnum.STYLE_DEGREE.getDefaultValue());

Ssml ssml = new Ssml(text, actor.getId(), pitch,
mSsml = new Ssml(text, actor.getId(), pitch,
rate, role.getId(), style.getId(), styleDegree);

try {
boolean success = getOrCreateWs().send(ssml.toStringForWs());
if (!success && mIsSynthesizing) {
getOrCreateWs().send(ssml.toStringForWs());
}
} catch (Exception e) {
getOrCreateWs();
while (mWebSocket == null) {
while (mIsSynthesizing){
Log.w(TAG,"try sendText");
try {
if (getOrCreateWs().send(mSsml.toStringForWs())){
break;
}
}catch (Exception e){
try {
this.wait(500);
} catch (Exception ignored) {
}
Log.w(TAG,"Retry sendText");
}
getOrCreateWs().send(ssml.toStringForWs());
}
// try {
// while(!(getOrCreateWs().send(ssml.toStringForWs()))&&mIsSynthesizing){
// Log.w(TAG,"Retry sendText");
// }
// boolean success = getOrCreateWs().send(ssml.toStringForWs());
// if (!success && mIsSynthesizing) {
// getOrCreateWs().send(ssml.toStringForWs());
// }
// } catch (Exception e) {
// getOrCreateWs();
// while (mWebSocket == null) {
// try {
// this.wait(500);
// } catch (Exception ignored) {
// }
// }
// getOrCreateWs().send(ssml.toStringForWs());
// }
}

private synchronized void sendConfig(@NonNull WebSocket ws) {
Expand All @@ -265,7 +275,7 @@ private synchronized void sendConfig(@NonNull WebSocket ws) {
"{\"synthesis\":" +
"{\"audio\":" +
"{\"metadataoptions\":" +
"{\"sentenceBoundaryEnabled\":\"false\",\"wordBoundaryEnabled\":\"true\"" +
"{\"sentenceBoundaryEnabled\":\"true\",\"wordBoundaryEnabled\":\"true\"" +
"},\"outputFormat\":\"" + mOutputFormat.getValue() + "\"}}}}";
ws.send(msg);
}
Expand Down Expand Up @@ -387,7 +397,6 @@ private synchronized void doDecode(@NonNull ByteString data) {
private synchronized void doUnDecode(@NonNull ByteString data) {
mIsSynthesizing = true;
int length = data.toByteArray().length;
//最大BufferSize
final int maxBufferSize = mCallback.getMaxBufferSize();
int offset = 0;
while (offset < length && mIsSynthesizing) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,6 @@ public static OutputFormat getOutputFormat(String name) {
return outputFormat;
}
}
return RAW_48K_HZ_16_BIT_MONO_PCM;
return OGG_48K_HZ_16_BIT_MONO_OPUS;
}
}
10 changes: 10 additions & 0 deletions app/src/main/java/com/utopiaxc/utopiatts/tts/utils/CommonTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,16 @@ public static String getTime() {
return sdf.format(date);
}

/**
* 获取时间戳
*
* @return String time
*/
public static String getTime(long timestamp) {
Date date = new Date(timestamp);
return sdf.format(date);
}


public static String localeToEmoji(Locale locale) {
String countryCode = locale.getCountry();
Expand Down
36 changes: 25 additions & 11 deletions app/src/main/java/com/utopiaxc/utopiatts/tts/utils/Ssml.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,27 @@

import androidx.annotation.NonNull;

import java.util.List;

public class Ssml {
private static final String TAG = "Ssml";
private final String mActor;
private final int mPitch;
private final int mRate;
private final String mStyle;
private final int mStyleDegree;
private final String mText;
private final StringBuilder mText;
private final String mRole;

public Ssml(String text, String actor, int pitch, int rate,String role, String style, int styleDegree) {
this.mText = text;
this.mActor = actor;
this.mPitch = (pitch/4)-25;
this.mRate = (rate/4)-25;
this.mRole =role;
this.mStyle = style;
this.mStyleDegree = styleDegree;
mText = new StringBuilder(text);
mActor = actor;
mPitch = (pitch/4)-25;
mRate = (rate/4)-25;
mRole =role;
mStyle = style;
mStyleDegree = styleDegree;
handleContent();
}


Expand Down Expand Up @@ -53,14 +56,15 @@ public String toString() {
}

public String toStringForWs(){
long timestamp=System.currentTimeMillis();
StringBuilder sb = new StringBuilder()
.append("Path:ssml\r\n")
.append("X-RequestId:").append(CommonTool.getMD5String(mText + "" + System.currentTimeMillis()))
.append("X-RequestId:").append(CommonTool.getMD5String(mText + "" + timestamp))
.append("\r\n")
.append("X-Timestamp:")
.append(CommonTool.getTime()).append("Z\r\n")
.append(CommonTool.getTime(timestamp)).append("Z\r\n")
.append("Content-Type:application/ssml+xml\r\n\r\n");
sb.append("<speak xmlns=\"http://www.w3.org/2001/10/synthesis\" xmlns:mstts=\"http://www.w3.org/2001/mstts\" xmlns:emo=\"http://www.w3.org/2009/10/emotionml\" version=\"1.0\" xml:lang=\"en-US\">");
sb.append("<speak xmlns=\"http://www.w3.org/2001/10/synthesis\" xmlns:mstts=\"http://www.w3.org/2001/mstts\" xmlns:emo=\"http://www.w3.org/2009/10/emotionml\" version=\"1.0\" xml:lang=\"zh-CN\">");
sb.append("<voice name=\"").append(mActor).append("\">");
if (!"".equals(mStyle)||!"".equals(mRole)) {
sb.append("<mstts:express-as ");
Expand All @@ -82,4 +86,14 @@ public String toStringForWs(){
Log.d(TAG, "toString = " + sb);
return sb.toString();
}

private void handleContent() {
CommonTool.replace(mText, "\n", " ");
CommonTool.Trim(mText);
CommonTool.replace(mText, "&", "&amp;");
CommonTool.replace(mText, "\"", "&quot;");
CommonTool.replace(mText, "'", "&apos;");
CommonTool.replace(mText, ">", "&lt;");
CommonTool.replace(mText, "<", "&gt;");
}
}
45 changes: 45 additions & 0 deletions app/src/main/res/values-zh/arrays.xml
Original file line number Diff line number Diff line change
Expand Up @@ -90,4 +90,49 @@
<item>轻松叙事</item>
<item>纪录片解说</item>
</string-array>

<string-array name="tts_output_format_entries">
<item>AUDIO_16K_HZ_128K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_16K_HZ_16_BIT_32KBPS_MONO_OPUS *</item>
<item>AUDIO_16K_HZ_16KBPS_MONO_SIREN</item>
<item>AUDIO_16K_HZ_32K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_16K_HZ_64K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_24K_HZ_160K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_24K_HZ_16_BIT_24KBPS_MONO_OPUS *</item>
<item>AUDIO_24K_HZ_16_BIT_48KBPS_MONO_OPUS *</item>
<item>AUDIO_24K_HZ_48K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_24K_HZ_96K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_48K_HZ_192K_BIT_RATE_MONO_MP3 *</item>
<item>AUDIO_48K_HZ_96K_BIT_RATE_MONO_MP3 *</item>
<item>OGG_16K_HZ_16_BIT_MONO_OPUS *</item>
<item>OGG_24K_HZ_16_BIT_MONO_OPUS *</item>
<item>OGG_48K_HZ_16_BIT_MONO_OPUS * 默认格式</item>
<item>RAW_16K_HZ_16_BIT_MONO_PCM</item>
<item>RAW_16K_HZ_16_BIT_MONO_TRUE_SILK</item>
<item>RAW_22050_HZ_16_BIT_MONO_PCM</item>
<item>RAW_24K_HZ_16_BIT_MONO_PCM</item>
<item>RAW_24K_HZ_16_BIT_MONO_TRUE_SILK</item>
<item>RAW_44100_HZ_16_BIT_MONO_PCM</item>
<item>RAW_48K_HZ_16_BIT_MONO_PCM</item>
<item>RAW_8K_HZ_16_BIT_MONO_PCM</item>
<item>RAW_8K_HZ_8_BIT_MONO_ALAW</item>
<item>RAW_8K_HZ_8_BIT_MONO_MULAW</item>
<item>RIFF_16K_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_16K_HZ_16KBPS_MONO_SIREN</item>
<item>RIFF_22050_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_24K_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_44100_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_48K_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_8K_HZ_16_BIT_MONO_PCM</item>
<item>RIFF_8K_HZ_8_BIT_MONO_ALAW</item>
<item>RIFF_8K_HZ_8_BIT_MONO_MULAW</item>
<item>WEBM_16K_HZ_16_BIT_MONO_OPUS *</item>
<item>WEBM_24K_HZ_16_BIT_24KBPS_MONO_OPUS *</item>
<item>WEBM_24K_HZ_16_BIT_MONO_OPUS *</item>
</string-array>

<string-array name="tts_driver_entries">
<item>Azure SDK个人密钥</item>
<item>官网Demo WebSocket</item>
</string-array>
</resources>
4 changes: 3 additions & 1 deletion app/src/main/res/values-zh/strings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<string name="confirm">确认</string>
<string name="tts_driver_title">TTS引擎驱动方式</string>
<string name="tts_driver_title_sdk">Azure SDK</string>
<string name="tts_driver_title_ws">WebSocket</string>
<string name="tts_driver_title_ws">Demo WebSocket</string>
<string name="warning">警告</string>
<string name="warning_blank_token">请输入Azure TTS服务密钥。</string>
<string name="warning_azure_token_not_checked">您的Azure信息未经检查,请先点击确认进行检查。</string>
Expand Down Expand Up @@ -52,4 +52,6 @@
<string name="sorry">抱歉</string>
<string name="tutorials_is_not_ready">教程还未编写完成。</string>
<string name="tips_azure_token">此处输入从微软Azure中获取的TTS服务密钥,如果不理解,可以点击教程前往了解。</string>
<string name="output_format">音频流格式</string>
<string name="warning_of_output_format">"如果您的语音不卡顿,请不要修改本项,因为部分语音在WebSocket模式下存在部分无法解决的问题以加剧卡顿。\n如果需要修改,请首选带'*'的格式,因为它们是需要进行解码的格式,因此可能更加稳定。"</string>
</resources>
Loading

0 comments on commit af55b7e

Please sign in to comment.