diff --git a/fish_speech/text/chinese.py b/fish_speech/text/chinese.py index 3cdec82f..2133a9a1 100644 --- a/fish_speech/text/chinese.py +++ b/fish_speech/text/chinese.py @@ -1,9 +1,9 @@ import os import re -import cn2an import jieba.posseg as psg from pypinyin import Style, lazy_pinyin +from tn.chinese.normalizer import Normalizer from fish_speech.text.symbols import punctuation from fish_speech.text.tone_sandhi import ToneSandhi @@ -16,7 +16,7 @@ for line in open(OPENCPOP_DICT_PATH).readlines() } - +normalizer = Normalizer() tone_modifier = ToneSandhi() @@ -123,10 +123,7 @@ def _g2p(segments): def text_normalize(text): - numbers = re.findall(r"\d+(?:\.?\d+)?", text) - for number in numbers: - text = text.replace(number, cn2an.an2cn(number), 1) - return text + return normalizer.normalize(text) if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 18c40735..eca2885b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "vector-quantize-pytorch>=1.10.0", "rich>=13.5.3", "gradio>=4.0.0", - "cn2an>=0.5.22", "pypinyin>=0.49.0", "jieba>=0.42.1", "g2p-en>=2.1.0", @@ -34,6 +33,7 @@ dependencies = [ "kui>=1.6.0", "zibai-server>=0.9.0", "loguru>=0.6.0", + "WeTextProcessing>=0.1.10", ] [project.optional-dependencies]