Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Espeak issues on Windows #34

Merged
merged 3 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 25 additions & 35 deletions TTS/tts/utils/text/phonemizers/espeak_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import logging
import re
import subprocess
import tempfile
from pathlib import Path
from typing import Optional

from packaging.version import Version
Expand Down Expand Up @@ -50,7 +52,7 @@ def get_espeakng_version() -> str:
_DEF_ESPEAK_VER = None


def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]:
def _espeak_exe(espeak_lib: str, args: list) -> list[str]:
"""Run espeak with the given arguments."""
cmd = [
espeak_lib,
Expand All @@ -59,32 +61,18 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte
"1", # UTF8 text encoding
]
cmd.extend(args)
logger.debug("espeakng: executing %s", repr(cmd))

with subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
) as p:
res = iter(p.stdout.readline, b"")
err = iter(p.stderr.readline, b"")
for line in err:
logger.warning("espeakng: %s", line.decode("utf-8").strip())
if not sync:
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
return res
res2 = list(res)
p.stdout.close()
if p.stderr:
p.stderr.close()
if p.stdin:
p.stdin.close()
p.wait()
return res2
logger.debug("Executing: %s", repr(cmd))

p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True)
for line in p.stderr.strip().split("\n"):
if line.strip() != "":
logger.warning("%s: %s", espeak_lib, line.strip())
res = []
for line in p.stdout.strip().split("\n"):
if line.strip() != "":
logger.debug("%s: %s", espeak_lib, line.strip())
res.append(line.strip())
return res


class ESpeak(BasePhonemizer):
Expand Down Expand Up @@ -198,12 +186,15 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
if tie:
args.append("--tie=%s" % tie)

args.append(text)
tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8")
tmp.write(text)
tmp.close()
args.append("-f")
args.append(tmp.name)

# compute phonemes
phonemes = ""
for line in _espeak_exe(self.backend, args, sync=True):
logger.debug("line: %s", repr(line))
ph_decoded = line.decode("utf8").strip()
for line in _espeak_exe(self.backend, args):
# espeak:
# version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n"
# espeak-ng:
Expand All @@ -213,9 +204,10 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False
# "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
# phonemize needs to remove the language flags of the returned text:
# "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ."
ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded)
ph_decoded = re.sub(r"\(.+?\)", "", line)

phonemes += ph_decoded.strip()
Path(tmp.name).unlink()
return phonemes.replace("_", separator)

def _phonemize(self, text: str, separator: str = "") -> str:
Expand All @@ -232,14 +224,12 @@ def supported_languages() -> dict[str, str]:
return {}
args = ["--voices"]
langs = {}
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)):
line = line.decode("utf8").strip()
for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)):
if count > 0:
cols = line.split()
lang_code = cols[1]
lang_name = cols[3]
langs[lang_code] = lang_name
logger.debug("line: %s", repr(line))
return langs

def version(self) -> str:
Expand Down
6 changes: 6 additions & 0 deletions tests/text_tests/test_phonemizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,12 @@ def setUp(self):
output = self.phonemizer.phonemize(text, separator="")
self.assertEqual(output, gt)

# UTF8 characters
text = "źrebię"
gt = "ʑrˈɛbjɛ"
output = ESpeak("pl").phonemize(text, separator="")
self.assertEqual(output, gt)

def test_name(self):
self.assertEqual(self.phonemizer.name(), "espeak")

Expand Down
Loading