-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Fix win deps * Fix win docs * Fix indent = 4 * Fix zh docs indent * Update whisper asr: no split * Add sensevoice labeling pipeline * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * allow api accept any ref audio * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update mel-band-roformer * revert to bs-roformer * Add option to save emo * fsmnvad -> silerovad * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update req * max single seg time * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
5a842d1
commit 8702c61
Showing
16 changed files
with
1,194 additions
and
165 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
from pathlib import Path | ||
from typing import Union | ||
|
||
from loguru import logger | ||
from natsort import natsorted | ||
|
||
AUDIO_EXTENSIONS = { | ||
".mp3", | ||
".wav", | ||
".flac", | ||
".ogg", | ||
".m4a", | ||
".wma", | ||
".aac", | ||
".aiff", | ||
".aif", | ||
".aifc", | ||
} | ||
|
||
VIDEO_EXTENSIONS = { | ||
".mp4", | ||
".avi", | ||
} | ||
|
||
|
||
def list_files( | ||
path: Union[Path, str], | ||
extensions: set[str] = None, | ||
recursive: bool = False, | ||
sort: bool = True, | ||
) -> list[Path]: | ||
"""List files in a directory. | ||
Args: | ||
path (Path): Path to the directory. | ||
extensions (set, optional): Extensions to filter. Defaults to None. | ||
recursive (bool, optional): Whether to search recursively. Defaults to False. | ||
sort (bool, optional): Whether to sort the files. Defaults to True. | ||
Returns: | ||
list: List of files. | ||
""" | ||
|
||
if isinstance(path, str): | ||
path = Path(path) | ||
|
||
if not path.exists(): | ||
raise FileNotFoundError(f"Directory {path} does not exist.") | ||
|
||
files = [file for ext in extensions for file in path.rglob(f"*{ext}")] | ||
|
||
if sort: | ||
files = natsorted(files) | ||
|
||
return files | ||
|
||
|
||
def load_filelist(path: Path | str) -> list[tuple[Path, str, str, str]]: | ||
""" | ||
Load a Bert-VITS2 style filelist. | ||
""" | ||
|
||
files = set() | ||
results = [] | ||
count_duplicated, count_not_found = 0, 0 | ||
|
||
LANGUAGE_TO_LANGUAGES = { | ||
"zh": ["zh", "en"], | ||
"jp": ["jp", "en"], | ||
"en": ["en"], | ||
} | ||
|
||
with open(path, "r", encoding="utf-8") as f: | ||
for line in f.readlines(): | ||
splits = line.strip().split("|", maxsplit=3) | ||
if len(splits) != 4: | ||
logger.warning(f"Invalid line: {line}") | ||
continue | ||
|
||
filename, speaker, language, text = splits | ||
file = Path(filename) | ||
language = language.strip().lower() | ||
|
||
if language == "ja": | ||
language = "jp" | ||
|
||
assert language in ["zh", "jp", "en"], f"Invalid language {language}" | ||
languages = LANGUAGE_TO_LANGUAGES[language] | ||
|
||
if file in files: | ||
logger.warning(f"Duplicated file: {file}") | ||
count_duplicated += 1 | ||
continue | ||
|
||
if not file.exists(): | ||
logger.warning(f"File not found: {file}") | ||
count_not_found += 1 | ||
continue | ||
|
||
results.append((file, speaker, languages, text)) | ||
|
||
if count_duplicated > 0: | ||
logger.warning(f"Total duplicated files: {count_duplicated}") | ||
|
||
if count_not_found > 0: | ||
logger.warning(f"Total files not found: {count_not_found}") | ||
|
||
return results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.