From 203f60f1e175f4d171a329ad1449df65a6b6bebd Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 21:30:55 +0200 Subject: [PATCH 1/3] refactor(espeak_wrapper): remove sync argument _espeak_exe is always called with sync=True, so remove code for sync==False --- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index dd74db6fae..13da605c2b 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -50,7 +50,7 @@ def get_espeakng_version() -> str: _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]: +def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -70,13 +70,6 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte err = iter(p.stderr.readline, b"") for line in err: logger.warning("espeakng: %s", line.decode("utf-8").strip()) - if not sync: - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - return res res2 = list(res) p.stdout.close() if p.stderr: @@ -201,7 +194,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False args.append(text) # compute phonemes phonemes = "" - for line in _espeak_exe(self.backend, args, sync=True): + for line in _espeak_exe(self.backend, args): logger.debug("line: %s", repr(line)) ph_decoded = line.decode("utf8").strip() # espeak: @@ -232,7 +225,7 @@ def supported_languages() -> dict[str, str]: return {} args = ["--voices"] langs = {} - for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)): + for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)): line = line.decode("utf8").strip() if count > 0: cols = line.split() From 49fcbd908b818d8ec6daa3123818906f03fe9868 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 21:43:35 +0200 Subject: [PATCH 2/3] fix(espeak_wrapper): avoid stuck process on windows Fixes #24 --- .../utils/text/phonemizers/espeak_wrapper.py | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 13da605c2b..91fb93c70e 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -50,7 +50,7 @@ def get_espeakng_version() -> str: _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: +def _espeak_exe(espeak_lib: str, args: list) -> list[str]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -59,25 +59,18 @@ def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: "1", # UTF8 text encoding ] cmd.extend(args) - logger.debug("espeakng: executing %s", repr(cmd)) - - with subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as p: - res = iter(p.stdout.readline, b"") - err = iter(p.stderr.readline, b"") - for line in err: - logger.warning("espeakng: %s", line.decode("utf-8").strip()) - res2 = list(res) - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - p.wait() - return res2 + logger.debug("Executing: %s", repr(cmd)) + + p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True) + for line in p.stderr.strip().split("\n"): + if line.strip() != "": + logger.warning("%s: %s", espeak_lib, line.strip()) + res = [] + for line in p.stdout.strip().split("\n"): + if line.strip() != "": + logger.debug("%s: %s", espeak_lib, line.strip()) + res.append(line.strip()) + return res class ESpeak(BasePhonemizer): @@ -195,8 +188,6 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False # compute phonemes phonemes = "" for line in _espeak_exe(self.backend, args): - logger.debug("line: %s", repr(line)) - ph_decoded = line.decode("utf8").strip() # espeak: # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" # espeak-ng: @@ -206,7 +197,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." # phonemize needs to remove the language flags of the returned text: # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." - ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) + ph_decoded = re.sub(r"\(.+?\)", "", line) phonemes += ph_decoded.strip() return phonemes.replace("_", separator) @@ -226,13 +217,11 @@ def supported_languages() -> dict[str, str]: args = ["--voices"] langs = {} for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)): - line = line.decode("utf8").strip() if count > 0: cols = line.split() lang_code = cols[1] lang_name = cols[3] langs[lang_code] = lang_name - logger.debug("line: %s", repr(line)) return langs def version(self) -> str: From 07cbcf825c8837838229bb1b6952bd35191133eb Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 29 May 2024 09:52:18 +0200 Subject: [PATCH 3/3] fix(espeak_wrapper): read phonemize() input from file Avoids utf8 encoding issues on Windows when passing the text directly. Fixes https://github.com/coqui-ai/TTS/discussions/3761 --- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 10 +++++++++- tests/text_tests/test_phonemizer.py | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 91fb93c70e..a15df716e7 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -3,6 +3,8 @@ import logging import re import subprocess +import tempfile +from pathlib import Path from typing import Optional from packaging.version import Version @@ -184,7 +186,12 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False if tie: args.append("--tie=%s" % tie) - args.append(text) + tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8") + tmp.write(text) + tmp.close() + args.append("-f") + args.append(tmp.name) + # compute phonemes phonemes = "" for line in _espeak_exe(self.backend, args): @@ -200,6 +207,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False ph_decoded = re.sub(r"\(.+?\)", "", line) phonemes += ph_decoded.strip() + Path(tmp.name).unlink() return phonemes.replace("_", separator) def _phonemize(self, text: str, separator: str = "") -> str: diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index ca25b302c5..f9067530e6 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -116,6 +116,12 @@ def setUp(self): output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt) + # UTF8 characters + text = "źrebię" + gt = "ʑrˈɛbjɛ" + output = ESpeak("pl").phonemize(text, separator="") + self.assertEqual(output, gt) + def test_name(self): self.assertEqual(self.phonemizer.name(), "espeak")