diff --git a/.gitignore b/.gitignore index 58d39a7..e9e3e95 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ test_output output /UltraSinger*.spec /registry_path.txt + +# evaluation resources +/evaluation \ No newline at end of file diff --git a/evaluation/README.md b/evaluation/README.md new file mode 100644 index 0000000..5ddf152 --- /dev/null +++ b/evaluation/README.md @@ -0,0 +1,107 @@ +# UltraSinger evaluation + +This tool exists to measure the accuracy of UltraSinger. + +It takes a directory of known-good UltraStar format files, runs them through UltraSinger, and compares the output to the +original files. + +The idea is, that as you make changes to UltraSinger, you can run this tool to see how the changes affect the accuracy +of UltraSinger. The tool will reuse any cached files from previous runs, as long as the configuration used to generate the cache is the same. + +## Measurements taken + +### Pitch + +#### Base measurements + +| measurement | description | +|--------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------| +| input_match_ratio | ratio of how many of the pitch datapoints in the **input** can be found as an exact match in the _output_ | +| output_match_ratio | ratio of how many of the pitch datapoints in the _output_ can be found as an exact match in the **input** | +| no_pitch_where_should_be_pitch_ratio | ratio of how many of the datapoints in the **input** have a pitch, where the corresponding datapoint in the _output_ has no pitch | +| pitch_where_should_be_no_pitch_ratio | ratio of how many of the datapoints in the _output_ have a pitch, where the corresponding datapoint in the **input** has no pitch | + +#### Measurements after transposing the output + +For these measurements the output is transposed by up to 12 half-steps, and the octave is being ignored when comparing +to the input. Whichever half-step value scores highest is used. This accounts for octave mismatches and wrongly +transposed inputs + +| measurement | description | +|----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------| +| best_input_pitch_shift_match_ratio | same as input_match_ratio but after transposing the _output_ to achieve the highest possible input_match_ratio | +| matching_input_best_output_pitch_shift_match_ratio | the corresponding output_match_ratio when transposing the same amount of half-steps as used for best_input_pitch_shift_match_ratio | +| best_output_pitch_shift_match_ratio | same as output_match_ratio but after transposing the _output_ to achieve the highest possible output_match_ratio | +| matching_output_best_input_pitch_shift_match_ratio | the corresponding input_match_ratio when transposing the same amount of half-steps as used for best_output_pitch_shift_match_ratio | + + + +## Running the evaluation + +- Copy the `example.local.py` file in the `evaluation/input/config` directory and name it `local.py`. This file is used to configure the evaluation tool. +- Add songs to the `evaluation/input/songs` directory. You can use the songs from https://github.com/UltraStar-Deluxe/songs. +- Simply run `py UltraSingerEvaluation.py` after following the "How to use this source code/Run" instructions in the root README.md. +- The evaluation tool will create a directory in the `evaluation/output` directory with the current date and time as the name. The output of the evaluation will be stored in this directory. + +### Comparing runs + +- To compare the results of all runs in the `evaluation/output` folder, run `py UltraSingerMetaEvaluation.py`. This will output each run's measurements to the console. + +## Directory structure + +``` +evaluation +├───input +│ ├───config # programmatic configuration of UltraSingerEvaluation +│ │ │ example.local.py # example configuration file, copy this and name it local.py +│ │ │ local.py # your configuration file, UltraSingerEvaluation will look for this file +│ │ │ +│ └───songs # this is the directory that contains the known-good songs to run through UltraSinger and then compare against +│ ├───Jonathan Coulton - A Talk with George +│ │ │ audio.mp3 +│ │ │ background.jpg +│ │ │ cover.jpg +│ │ │ license.txt +│ │ │ song.txt # known good input UltraStar txt file. UltraSingerEvaluation compares this to the output of UltraSinger +│ │ │ +│ │ └───cache # this cache will be reused for subsequent evaluation runs +│ │ │ crepe_False_full_10_cuda.json # the cached file's name contains the configuration used to generate it +│ │ │ Jonathan Coulton - A Talk with George.wav +│ │ │ Jonathan Coulton - A Talk with George_denoised.wav +│ │ │ Jonathan Coulton - A Talk with George_mono.wav +│ │ │ Jonathan Coulton - A Talk with George_mute.wav +│ │ │ whisper_large-v2_cuda_None_None_16_None_en.json # the cached file's name contains the configuration used to generate it +│ │ │ +│ │ └───separated +│ │ └───htdemucs +│ │ └───audio +│ │ no_vocals.wav +│ │ vocals.wav +│ │ +│ ├───... +│ │ │ ... +│ │ +│ └───Many - Songs +│ │ ... +│ +└───output + └───2024-07-27_16-58-27 + │ run.json + │ + └───songs + ├───Jonathan Coulton - A Talk with George + │ Jonathan Coulton - A Talk with George.txt # UltraStar txt file generated by UltraSinger + │ + ├───... + │ ....txt # UltraStar txt file generated by UltraSinger + │ + └───Many - Songs + Many - Songs.txt # UltraStar txt file generated by UltraSinger +``` + +## TODO + +- automate comparison in [UltraSingerMetaEvaluation.py](..%2Fsrc%2FUltraSingerMetaEvaluation.py) instead of just printing each run's measurements +- currently only pitch accuracy is being measured, text accuracy should be measured as well +- the cached file's configuration is part of their filename, this will quickly become unmanageable, a better way to store this information should be found +- the tool could verify that there are no changes according to git and store the latest commit hash for a test run ([TestRun.py](..%2Fsrc%2Fmodules%2FEvaluation%2FTestRun.py)) \ No newline at end of file diff --git a/evaluation/input/.gitkeep b/evaluation/input/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/evaluation/input/config/example.local.py b/evaluation/input/config/example.local.py new file mode 100644 index 0000000..1f555a7 --- /dev/null +++ b/evaluation/input/config/example.local.py @@ -0,0 +1,41 @@ +# programmatically customize settings for evaluation runs + +import os + +from Settings import Settings + + +def init_settings() -> Settings: + settings = Settings() + settings.language = "en" + # settings.pitch_loudness_threshold = 10000 + settings.create_midi = False + settings.create_plot = False + settings.calculate_score = True + settings.create_karaoke = False + settings.keep_cache = True + settings.ignore_audio = False + # settings.whisper_batch_size = 12 + # settings.whisper_compute_type = "int8" + # settings.test_songs_input_folder = "C:/Users/Benedikt/git/songs/Creative Commons" + # settings.skip_cache_vocal_separation = True + # settings.skip_cache_denoise_vocal_audio = True + # settings.skip_cache_transcription = True + # settings.skip_cache_pitch_detection = True + + + dedicated_test_folder = "" + # dedicated_test_folder = "C:/My/Dedicated/Test/songs" + dedicated_test_songs_exist = False + if os.path.isdir(dedicated_test_folder): + for item in os.listdir(dedicated_test_folder): + if os.path.isdir(os.path.join(dedicated_test_folder, item)): + dedicated_test_songs_exist = True + + if dedicated_test_songs_exist: + settings.test_songs_input_folder = dedicated_test_folder + + return settings + + +user_settings = init_settings() diff --git a/evaluation/input/songs/.gitkeep b/evaluation/input/songs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/evaluation/output/.gitkeep b/evaluation/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/pytest/modules/UltraStar/converter/test_ultrastar_converter.py b/pytest/modules/UltraStar/converter/test_ultrastar_converter.py index cac66bd..ebdc21c 100644 --- a/pytest/modules/UltraStar/converter/test_ultrastar_converter.py +++ b/pytest/modules/UltraStar/converter/test_ultrastar_converter.py @@ -1,6 +1,6 @@ """Tests for ultrastar_converter.py""" -from modules.Ultrastar.coverter.ultrastar_converter import real_bpm_to_ultrastar_bpm +from modules.Ultrastar.converter.ultrastar_converter import real_bpm_to_ultrastar_bpm def test_real_bpm_to_ultrastar_bpm(): diff --git a/pytest/modules/UltraStar/converter/test_ultrastar_txt_converter.py b/pytest/modules/UltraStar/converter/test_ultrastar_txt_converter.py index a76bc83..472eb06 100644 --- a/pytest/modules/UltraStar/converter/test_ultrastar_txt_converter.py +++ b/pytest/modules/UltraStar/converter/test_ultrastar_txt_converter.py @@ -1,7 +1,7 @@ """Tests for ultrastar_txt_converter.py""" import unittest -from modules.Ultrastar.coverter.ultrastar_txt_converter import extract_year +from modules.Ultrastar.converter.ultrastar_txt_converter import extract_year class TestUltrastarTxtConverter(unittest.TestCase): diff --git a/src/Settings.py b/src/Settings.py index a8cae1c..c36fac5 100644 --- a/src/Settings.py +++ b/src/Settings.py @@ -1,4 +1,5 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field +from typing import Optional from dataclasses_json import dataclass_json @@ -10,55 +11,56 @@ @dataclass_json @dataclass class Settings: - APP_VERSION = "0.0.12-dev2" + APP_VERSION: str = "0.0.12-dev2" - create_midi = True - create_plot = False - create_audio_chunks = False - hyphenation = True - use_separated_vocal = True - create_karaoke = True - ignore_audio = False - input_file_is_ultrastar_txt = False # todo: to process_data - keep_cache = False + create_midi: bool = True + create_plot: bool = False + create_audio_chunks: bool = False + hyphenation: bool = True + use_separated_vocal: bool = True + create_karaoke: bool = True + ignore_audio: Optional[bool] = None + input_file_is_ultrastar_txt: bool = False # todo: to process_data + keep_cache: bool = False # Process data Paths - input_file_path = "" - output_folder_path = "" - - language = None - format_version = FormatVersion.V1_0_0 + input_file_path: str = "" + output_folder_path: str = "" + + language: Optional[str] = None + format_version: str = FormatVersion.V1_0_0 # Demucs - demucs_model = DemucsModel.HTDEMUCS # htdemucs|htdemucs_ft|htdemucs_6s|hdemucs_mmi|mdx|mdx_extra|mdx_q|mdx_extra_q|SIG + demucs_model: str = DemucsModel.HTDEMUCS # htdemucs|htdemucs_ft|htdemucs_6s|hdemucs_mmi|mdx|mdx_extra|mdx_q|mdx_extra_q|SIG # Whisper - transcriber = "whisper" # whisper - whisper_model = WhisperModel.LARGE_V2 # Multilingual model tiny|base|small|medium|large-v1|large-v2|large-v3 + transcriber: str = "whisper" # whisper + whisper_model: str = WhisperModel.LARGE_V2 # Multilingual model tiny|base|small|medium|large-v1|large-v2|large-v3 # English-only model tiny.en|base.en|small.en|medium.en - whisper_align_model = None # Model for other languages from huggingface.co e.g -> "gigant/romanian-wav2vec2" - whisper_batch_size = 16 # reduce if low on GPU mem - whisper_compute_type = None # change to "int8" if low on GPU mem (may reduce accuracy) + whisper_align_model: Optional[str] = None # Model for other languages from huggingface.co e.g -> "gigant/romanian-wav2vec2" + whisper_batch_size: int = 16 # reduce if low on GPU mem + whisper_compute_type: Optional[str] = None # change to "int8" if low on GPU mem (may reduce accuracy) # Pitch - crepe_model_capacity = "full" # tiny|small|medium|large|full - crepe_step_size = 10 # in miliseconds + crepe_model_capacity: str = "full" # tiny|small|medium|large|full + crepe_step_size: int = 10 # in miliseconds + pitch_loudness_threshold: int = -60 # Device - pytorch_device = 'cpu' # cpu|cuda - tensorflow_device = 'cpu' # cpu|cuda - force_cpu = False - force_whisper_cpu = False - force_crepe_cpu = False + pytorch_device: str = "cpu" # cpu|cuda + tensorflow_device: str = "cpu" # cpu|cuda + force_cpu: bool = False + force_whisper_cpu: bool = False + force_crepe_cpu: bool = False # MuseScore - musescore_path = None + musescore_path: Optional[str] = None # UltraSinger Evaluation Configuration - test_songs_input_folder = None - cache_override_path = None - skip_cache_vocal_separation = False - skip_cache_denoise_vocal_audio = False - skip_cache_transcription = False - skip_cache_pitch_detection = False - calculate_score = True \ No newline at end of file + test_songs_input_folder: Optional[str] = None + cache_override_path: Optional[str] = None + skip_cache_vocal_separation: bool = False + skip_cache_denoise_vocal_audio: bool = False + skip_cache_transcription: bool = False + skip_cache_pitch_detection: bool = False + calculate_score: bool = True diff --git a/src/UltraSinger.py b/src/UltraSinger.py index 5283b7b..e7ed059 100644 --- a/src/UltraSinger.py +++ b/src/UltraSinger.py @@ -8,7 +8,7 @@ from packaging import version -from modules import os_helper +from modules import os_helper, timer from modules.Audio.denoise import denoise_vocal_audio from modules.Audio.separation import separate_vocal_from_audio from modules.Audio.vocal_chunks import ( @@ -50,7 +50,7 @@ from modules.Speech_Recognition.TranscribedData import TranscribedData from modules.Ultrastar.ultrastar_score_calculator import Score, calculate_score_points from modules.Ultrastar.ultrastar_txt import FILE_ENCODING, FormatVersion -from modules.Ultrastar.coverter.ultrastar_txt_converter import from_ultrastar_txt, \ +from modules.Ultrastar.converter.ultrastar_txt_converter import from_ultrastar_txt, \ create_ultrastar_txt_from_midi_segments, create_ultrastar_txt_from_automation from modules.Ultrastar.ultrastar_parser import parse_ultrastar_txt from modules.common_print import print_support, print_help, print_version @@ -212,7 +212,8 @@ def InitProcessData(): process_data.basename = basename process_data.process_data_paths.audio_output_file_path = audio_file_path # todo: ignore transcribe - settings.ignore_audio = True + if settings.ignore_audio is None: + settings.ignore_audio = True elif settings.input_file_path.startswith("https:"): # Youtube @@ -295,9 +296,9 @@ def CreateUltraStarTxt(process_data: ProcessData): if settings.calculate_score: simple_score, accurate_score = calculate_score_points(process_data, ultrastar_file_output) - # Add calculated score to Ultrastar txt + # Add calculated score to Ultrastar txt #Todo: Missing Karaoke - ultrastar_writer.add_score_to_ultrastar_txt(ultrastar_file_output, simple_score) + ultrastar_writer.add_score_to_ultrastar_txt(ultrastar_file_output, simple_score) return accurate_score, simple_score, ultrastar_file_output diff --git a/src/UltraSingerEvaluation.py b/src/UltraSingerEvaluation.py new file mode 100644 index 0000000..ba1eaf0 --- /dev/null +++ b/src/UltraSingerEvaluation.py @@ -0,0 +1,187 @@ +import copy +import os +import traceback +from datetime import datetime +from pathlib import Path +from typing import List +import importlib.util + +import pandas + +import UltraSinger +from modules import timer +from modules.DeviceDetection.device_detection import check_gpu_support +from Settings import Settings +from modules.Evaluation.TestRun import TestRun, TestedSong +from modules.Evaluation.TestSong import TestSong +from modules.Ultrastar import ultrastar_parser +from modules.Ultrastar.converter.ultrastar_converter import compare_pitches +from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue, FILE_ENCODING +from modules.console_colors import ULTRASINGER_HEAD, red_highlighted + +default_test_input_folder = os.path.normpath(os.path.abspath(__file__ + "/../../evaluation/input")) +test_output_folder = os.path.normpath(os.path.abspath(__file__ + "/../../evaluation/output")) + +test_start_time = datetime.now() + +test_run_name = test_start_time.strftime("%Y-%m-%d_%H-%M-%S") +test_run_folder = os.path.join(test_output_folder, test_run_name) +test_run_songs_folder = os.path.join(test_run_folder, "songs") + + +def main() -> None: + """Main function""" + Path(test_output_folder).mkdir(parents=True, exist_ok=True) + Path(test_run_folder).mkdir(parents=True) + Path(test_run_songs_folder).mkdir(parents=True) + + base_settings = initialize_settings() + base_settings.output_folder_path = test_run_songs_folder + + if base_settings.test_songs_input_folder is None: + base_settings.test_songs_input_folder = os.path.join(default_test_input_folder, "songs") + + base_settings.test_songs_input_folder = os.path.normpath( + base_settings.test_songs_input_folder + ) + if not os.path.isdir(base_settings.test_songs_input_folder): + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Error!')} No test songs input folder configured (refer to " + f"evaluation section in readme)." + ) + exit(1) + + test_songs: List[TestSong] = [] + for dir_entry in os.listdir(base_settings.test_songs_input_folder): + song_folder = os.path.join(base_settings.test_songs_input_folder, dir_entry) + found_song = find_ultrastar_song(song_folder) + if found_song is None: + continue + + test_songs.append(TestSong(found_song[0], song_folder, found_song[1])) + + if len(test_songs) == 0: + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Error!')} No test songs found in {base_settings.test_songs_input_folder}." + ) + exit(1) + + print(f"{ULTRASINGER_HEAD} Running evaluation for {len(test_songs)} songs") + + test_run = TestRun(test_run_name, base_settings, test_start_time) + for index, test_song in enumerate(test_songs): + print(f"\n{ULTRASINGER_HEAD} ========================") + print( + f"{ULTRASINGER_HEAD} {index + 1}/{len(test_songs)}: {os.path.basename(test_song.input_txt)}" + ) + + timer.log(f"{index + 1}/{len(test_songs)}: {os.path.basename(test_song.input_txt)}") + + # prepare cache directory + song_cache_path = os.path.join(test_song.input_folder, "cache") + Path(song_cache_path).mkdir(parents=True, exist_ok=True) + + test_song_settings = copy.deepcopy(base_settings) + test_song_settings.input_file_path = test_song.input_txt + test_song_settings.cache_override_path = song_cache_path + UltraSinger.settings = test_song_settings + + tested_song = TestedSong(test_song.input_txt) + test_run.tested_songs.append(tested_song) + try: + output_txt, _, _ = UltraSinger.run() + except Exception as error: + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Error!')} Failed to process {test_song.input_txt}\n{error}." + ) + traceback.print_exc() + continue + + output_folder_name = f"{test_song.input_ultrastar_class.artist} - {test_song.input_ultrastar_class.title}" + output_folder = os.path.join(test_run_songs_folder, output_folder_name) + + if not os.path.isfile(output_txt): + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Error!')} Could not find song txt in '{output_folder}'." + ) + test_run.tested_songs.append(tested_song) + continue + + ultrastar_class = ultrastar_parser.parse(output_txt) + ( + input_match_ratio, + output_match_ratio, + input_pitch_shift_match_ratios, + output_pitch_shift_match_ratios, + pitch_where_should_be_no_pitch_ratio, + no_pitch_where_should_be_pitch_ratio, + ) = compare_pitches(test_song.input_ultrastar_class, ultrastar_class) + + tested_song.output_path = output_txt + tested_song.success = True + tested_song.input_match_ratio = input_match_ratio + tested_song.output_match_ratio = output_match_ratio + tested_song.input_pitch_shift_match_ratios = input_pitch_shift_match_ratios + tested_song.output_pitch_shift_match_ratios = output_pitch_shift_match_ratios + tested_song.pitch_where_should_be_no_pitch_ratio = pitch_where_should_be_no_pitch_ratio + tested_song.no_pitch_where_should_be_pitch_ratio = no_pitch_where_should_be_pitch_ratio + + test_run.end_time = datetime.now() + test_run_result_file = os.path.join(test_run_folder, "run.json") + test_run_json = test_run.to_json() + with open(test_run_result_file, "w", encoding=FILE_ENCODING) as file: + file.write(test_run_json) + + +def find_ultrastar_song( + song_folder, require_audio: bool = True +) -> tuple[str, UltrastarTxtValue]: + if os.path.isdir(song_folder): + for song_folder_item in os.listdir(song_folder): + if ( + song_folder_item.endswith(".txt") + and song_folder_item != "license.txt" + and not song_folder_item.endswith("[Karaoke].txt") + and not song_folder_item.endswith("[MULTI].txt") + and not song_folder_item.endswith("[DUET].txt") + and not song_folder_item.endswith("instrumental.txt") + ): + txt_file = os.path.join(song_folder, song_folder_item) + ultrastar_class = ultrastar_parser.parse(txt_file) + + if ultrastar_class.mp3 != "" or not require_audio: + return txt_file, ultrastar_class + else: + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Warning.')} {song_folder} contains an UltraStar text file but has no audio referenced in it. Skipping." + ) + + +def initialize_settings(): + s = Settings() + user_config_file = os.path.normpath( + os.path.join(default_test_input_folder, "config/local.py") + ) + + if os.path.isfile(user_config_file): + print( + f"{ULTRASINGER_HEAD} Using custom settings found under {user_config_file}" + ) + + spec = importlib.util.spec_from_file_location( + "custom_settings", user_config_file + ) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + s = module.user_settings + else: + print(f"{ULTRASINGER_HEAD} No custom settings found under {user_config_file}") + + if not s.force_cpu: + s.tensorflow_device, s.pytorch_device = check_gpu_support() + return s + + +if __name__ == "__main__": + main() diff --git a/src/UltraSingerMetaEvaluation.py b/src/UltraSingerMetaEvaluation.py new file mode 100644 index 0000000..2429125 --- /dev/null +++ b/src/UltraSingerMetaEvaluation.py @@ -0,0 +1,111 @@ +import os +from pathlib import Path +from typing import List + +import pandas + +from modules.Evaluation.TestRun import TestRun +from modules.console_colors import ULTRASINGER_HEAD, red_highlighted + +test_output_folder = os.path.normpath(os.path.abspath(__file__ + "/../../evaluation/output")) + + +def main() -> None: + """Main function""" + Path(test_output_folder).mkdir(parents=True, exist_ok=True) + + test_runs: List[TestRun] = [] + for dir_entry in os.listdir(test_output_folder): + test_run_folder = os.path.join(test_output_folder, dir_entry) + test_run = find_test_run_result(test_run_folder) + if test_run is None: + continue + + test_runs.append(test_run) + + if len(test_runs) == 0: + print( + f"{ULTRASINGER_HEAD} {red_highlighted('Error!')} No test runs found in {test_output_folder}." + ) + exit(1) + + print(f"{ULTRASINGER_HEAD} Running meta evaluation for {len(test_runs)} test runs") + + for test_run in test_runs: + tested_songs_dicts = [] + for tested_song in [s for s in test_run.tested_songs if s.success]: + tested_song_dict = tested_song.to_dict() + + best_input_pitch_shift_match_ratio = max( + tested_song.input_pitch_shift_match_ratios.values() + ) + + # based on the pitch shift of the highest input_pitch_shift_match_ratio picked previously + # we pick the corresponding value of output_pitch_shift_match_ratios + matching_input_best_output_pitch_shift_match_ratio = ( + tested_song.output_pitch_shift_match_ratios[ + list(tested_song.input_pitch_shift_match_ratios.values()).index( + best_input_pitch_shift_match_ratio + ) + ] + ) + + best_output_pitch_shift_match_ratio = max( + tested_song.output_pitch_shift_match_ratios.values() + ) + + # based on the pitch shift of the highest output_pitch_shift_match_ratio picked previously + # we pick the corresponding value of input_pitch_shift_match_ratios + matching_output_best_input_pitch_shift_match_ratio = ( + tested_song.input_pitch_shift_match_ratios[ + list(tested_song.output_pitch_shift_match_ratios.values()).index( + best_output_pitch_shift_match_ratio + ) + ] + ) + + tested_song_dict[ + "best_input_pitch_shift_match_ratio" + ] = best_input_pitch_shift_match_ratio + tested_song_dict[ + "matching_input_best_output_pitch_shift_match_ratio" + ] = matching_input_best_output_pitch_shift_match_ratio + tested_song_dict[ + "best_output_pitch_shift_match_ratio" + ] = best_output_pitch_shift_match_ratio + tested_song_dict[ + "matching_output_best_input_pitch_shift_match_ratio" + ] = matching_output_best_input_pitch_shift_match_ratio + + tested_songs_dicts.append(tested_song_dict) + + records = pandas.DataFrame.from_records(tested_songs_dicts) + pandas.options.display.max_columns = records.shape[1] + pandas.set_option('display.expand_frame_repr', False) + describe_result = records.describe(percentiles=[0.25, 0.5, 0.75, 0.95, 0.99]) + + print("Test run:", test_run.name) + print(describe_result) + + print("Done") + + +def find_test_run_result(test_run_folder) -> TestRun: + if os.path.isdir(test_run_folder): + for test_run_folder_item in os.listdir(test_run_folder): + test_run_folder_item_path = os.path.join( + test_run_folder, test_run_folder_item + ) + if ( + os.path.isfile(test_run_folder_item_path) + and test_run_folder_item == "run.json" + ): + test_run = None + with open(test_run_folder_item_path) as file: + json = file.read() + test_run = TestRun.from_json(json) + return test_run + + +if __name__ == "__main__": + main() diff --git a/src/modules/Audio/vocal_chunks.py b/src/modules/Audio/vocal_chunks.py index f17ec05..af6ed61 100644 --- a/src/modules/Audio/vocal_chunks.py +++ b/src/modules/Audio/vocal_chunks.py @@ -11,7 +11,7 @@ from modules.console_colors import ULTRASINGER_HEAD from modules.csv_handler import export_transcribed_data_to_csv from modules.os_helper import create_folder -from modules.Ultrastar.coverter.ultrastar_converter import ( +from modules.Ultrastar.converter.ultrastar_converter import ( get_end_time_from_ultrastar, get_start_time_from_ultrastar, ) diff --git a/src/modules/Evaluation/TestRun.py b/src/modules/Evaluation/TestRun.py new file mode 100644 index 0000000..802e2af --- /dev/null +++ b/src/modules/Evaluation/TestRun.py @@ -0,0 +1,34 @@ +import datetime + +from Settings import Settings +from dataclasses import dataclass, field + +from dataclasses_json import dataclass_json + + +@dataclass_json +@dataclass +class TestedSong: + """Tested song""" + + input_path: str + output_path: str = "" + success: bool = False + input_match_ratio: float = 0.0 + output_match_ratio: float = 0.0 + input_pitch_shift_match_ratios: dict[int, float] = field(default_factory=lambda: {}) + output_pitch_shift_match_ratios: dict[int, float] = field(default_factory=lambda: {}) + no_pitch_where_should_be_pitch_ratio: float = 0.0 + pitch_where_should_be_no_pitch_ratio: float = 0.0 + + +@dataclass_json +@dataclass +class TestRun: + """Test run""" + + name: str + settings: Settings = None + start_time: datetime.datetime = None + end_time: datetime.datetime = None + tested_songs: list[TestedSong] = field(default_factory=lambda: []) diff --git a/src/modules/Evaluation/TestSong.py b/src/modules/Evaluation/TestSong.py new file mode 100644 index 0000000..be3a1b4 --- /dev/null +++ b/src/modules/Evaluation/TestSong.py @@ -0,0 +1,12 @@ +from dataclasses import dataclass + +from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue + + +@dataclass +class TestSong: + """Test song""" + + input_txt: str + input_folder: str + input_ultrastar_class: UltrastarTxtValue diff --git a/src/modules/Speech_Recognition/Whisper.py b/src/modules/Speech_Recognition/Whisper.py index d23fd29..77513da 100644 --- a/src/modules/Speech_Recognition/Whisper.py +++ b/src/modules/Speech_Recognition/Whisper.py @@ -48,8 +48,14 @@ def transcribe_with_whisper( try: torch.cuda.empty_cache() + asr_options = { + "max_new_tokens": None, + "clip_timestamps": None, + "hallucination_silence_threshold": None + } + loaded_whisper_model = whisperx.load_model( - model.value, language=language, device=device, compute_type=compute_type + model.value, asr_options=asr_options, language=language, device=device, compute_type=compute_type ) audio = whisperx.load_audio(audio_path) diff --git a/src/modules/Ultrastar/converter/ultrastar_converter.py b/src/modules/Ultrastar/converter/ultrastar_converter.py new file mode 100644 index 0000000..6a676e9 --- /dev/null +++ b/src/modules/Ultrastar/converter/ultrastar_converter.py @@ -0,0 +1,236 @@ +"""Ultrastar Converter""" +from typing import Tuple +import librosa +from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue +from modules.Ultrastar.ultrastar_txt import UltrastarTxtNoteTypeTag +from modules.Midi.MidiSegment import MidiSegment +import numpy + +NO_PITCH = -1000 +FREESTYLE = -1001 + + +def real_bpm_to_ultrastar_bpm(real_bpm: float) -> float: + """Converts real BPM to UltraStar BPM""" + # The UltraStar BPM info is a fourth beat of the real BPM + ultrastar_bpm = real_bpm / 4 + return ultrastar_bpm + + +def ultrastar_bpm_to_real_bpm(ultrastar_bpm: float) -> float: + """Converts UltraStar BPM to real BPM""" + # The UltraStar BPM info is a fourth beat of the real BPM + bpm = ultrastar_bpm * 4 + return bpm + + +def second_to_beat(seconds: float, real_bpm: float) -> float: + """Converts seconds to beat""" + # BPM = 60 * beat / T + # T * BPM = 60 * beat + # beat = T * BPM / 60 + beat = seconds * real_bpm / 60 + return beat + + +def beat_to_second(beat: float, real_bpm: float) -> float: + """Converts beat to seconds""" + + seconds = beat * 60 / real_bpm + return seconds + + +def midi_note_to_ultrastar_note(midi_note: int) -> int: + """Converts Midi note to UltraStar note""" + + # C4 == 48 in Midi + ultrastar_note = midi_note - 48 + return ultrastar_note + + +def ultrastar_note_to_midi_note(ultrastar_note: int) -> int: + """Converts UltraStar note to Midi note""" + + # C4 == 48 in Midi + midi_note = ultrastar_note + 48 + return midi_note + + +def get_start_time_from_ultrastar(ultrastar_class: UltrastarTxtValue, pos: int) -> float: + """Calculates the start time from the Ultrastar txt""" + + start_time = get_start_time(ultrastar_class.gap, ultrastar_class.bpm, + ultrastar_class.UltrastarNoteLines[pos].startBeat) + return start_time + + +def get_start_time(gap: str, ultrastar_bpm: str, startBeat: float) -> float: + """Calculates the start time from the Ultrastar txt""" + + gap = __convert_gap(gap) + real_bpm = __convert_bpm(ultrastar_bpm) + start_time = beat_to_second(int(startBeat), real_bpm) + gap + return start_time + + +def get_end_time_from_ultrastar(ultrastar_class: UltrastarTxtValue, pos: int) -> float: + """Calculates the end time from the Ultrastar txt""" + + end_time = get_end_time(ultrastar_class.gap, ultrastar_class.bpm, ultrastar_class.UltrastarNoteLines[pos].startBeat, + ultrastar_class.UltrastarNoteLines[pos].duration) + return end_time + + +def get_end_time(gap: str, ultrastar_bpm: str, startBeat: float, duration: float) -> float: + """Calculates the end time from the Ultrastar txt""" + + gap = __convert_gap(gap) + real_bpm = __convert_bpm(ultrastar_bpm) + end_time = ( + beat_to_second( + int(startBeat) + int(duration), + real_bpm, + ) + + gap + ) + return end_time + + +def __convert_gap(gap: str) -> float: + gap = float(gap.replace(",", ".")) / 1000 + return gap + + +def __convert_bpm(ultrastar_bpm: str) -> float: + real_bpm = ultrastar_bpm_to_real_bpm(float(ultrastar_bpm.replace(",", "."))) + return real_bpm + + +def ultrastar_to_midi_segments(ultrastar_txt: UltrastarTxtValue) -> list[MidiSegment]: + """Converts an Ultrastar txt to Midi segments""" + midi_segments = [] + for i, data in enumerate(ultrastar_txt.UltrastarNoteLines): + start_time = get_start_time_from_ultrastar(ultrastar_txt, i) + end_time = get_end_time_from_ultrastar(ultrastar_txt, i) + midi_segments.append( + MidiSegment(librosa.midi_to_note(ultrastar_note_to_midi_note(data.pitch)), + start_time, + end_time, + data.word, + ) + ) + return midi_segments + + +def map_to_datapoints( + ultrastar_class: UltrastarTxtValue, step_size: int = 10 +) -> list[int]: + gap = float(ultrastar_class.gap.replace(",", ".")) + + data = [] + + previous_step = -step_size + for pos, note_line in enumerate(ultrastar_class.UltrastarNoteLines): + # TODO: does this make sense? + if note_line.noteType == UltrastarTxtNoteTypeTag.FREESTYLE: + continue + + start_time = int(get_start_time_from_ultrastar(ultrastar_class, pos) * 1000 + gap) + end_time = int(get_end_time_from_ultrastar(ultrastar_class, pos) * 1000 + gap) + + start_nearest_step = (start_time + step_size - 1) // step_size * step_size + end_nearest_step = (end_time + step_size - 1) // step_size * step_size + + if previous_step == start_nearest_step: + start_nearest_step += step_size + + duration = end_nearest_step - start_nearest_step + + if duration < 10: + continue + + # pad gaps between pitches with empty datapoints + gap_steps_count = (start_nearest_step - previous_step - step_size) // step_size + data += [NO_PITCH] * gap_steps_count + + pitch_steps_count = duration // step_size + + if note_line.noteType == UltrastarTxtNoteTypeTag.FREESTYLE: + data += [FREESTYLE] * pitch_steps_count + else: + data += [int(note_line.pitch)] * pitch_steps_count + + previous_step = end_nearest_step + + return data + + +def compare_pitches(input_ultrastar_class, output_ultrastar_class) -> tuple[float, float, dict[int, float], dict[int, float], float, float]: + step_size = 10 + + input_datapoints = map_to_datapoints(input_ultrastar_class, step_size) + output_datapoints = map_to_datapoints(output_ultrastar_class, step_size) + + longest = max(len(input_datapoints), len(output_datapoints)) + for datapoints in [input_datapoints, output_datapoints]: + length = len(datapoints) + if length < longest: + gap_steps_count = longest - length + # pad gaps between pitches with empty datapoints + datapoints += [NO_PITCH] * gap_steps_count + + input_pitched_datapoints = len([x for x in input_datapoints if x != NO_PITCH]) + output_pitched_datapoints = len([x for x in output_datapoints if x != NO_PITCH]) + + matches = 0 + pitch_shift_matches = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + pitch_where_should_be_no_pitch = 0 + no_pitch_where_should_be_pitch = 0 + for index, _ in enumerate(input_datapoints): + input_pitch = input_datapoints[index] + output_pitch = output_datapoints[index] + if input_pitch == NO_PITCH and output_pitch == NO_PITCH: + continue + + if input_pitch == output_pitch or (input_pitch == FREESTYLE and output_pitch != NO_PITCH): + matches += 1 + elif input_pitch == NO_PITCH: + pitch_where_should_be_no_pitch += 1 + elif output_pitch == NO_PITCH: + no_pitch_where_should_be_pitch += 1 + else: + _, input_pitch_remainder = divmod(input_pitch, 12) + _, output_pitch_remainder = divmod(output_pitch, 12) + pitch_difference = abs(input_pitch_remainder - output_pitch_remainder) + pitch_shift_matches[pitch_difference] += 1 + + input_match_ratio = matches / input_pitched_datapoints + output_match_ratio = matches / output_pitched_datapoints + + input_pitch_shift_match_ratios = {} + output_pitch_shift_match_ratios = {} + for index, pitch_shift_matches_item in enumerate(pitch_shift_matches): + pitch_shift_matches_count = pitch_shift_matches_item + if index == 0: + pitch_shift_matches_count += matches + input_pitch_shift_match_ratios[index] = pitch_shift_matches_item / input_pitched_datapoints + output_pitch_shift_match_ratios[index] = pitch_shift_matches_item / output_pitched_datapoints + + output_pitch_where_should_be_no_pitch_ratio = pitch_where_should_be_no_pitch / output_pitched_datapoints + output_no_pitch_where_should_be_pitch_ratio = no_pitch_where_should_be_pitch / input_pitched_datapoints + + return (input_match_ratio, + output_match_ratio, + input_pitch_shift_match_ratios, + output_pitch_shift_match_ratios, + output_pitch_where_should_be_no_pitch_ratio, + output_no_pitch_where_should_be_pitch_ratio + ) + + +def determine_nearest_end_step(input_ultrastar_class, step_size) -> int: + pitches_count = len(input_ultrastar_class.pitches) - 1 + end_time = int( + get_end_time_from_ultrastar(input_ultrastar_class, pitches_count) * 1000 + ) + int(input_ultrastar_class.gap) + return (end_time + step_size - 1) // step_size * step_size diff --git a/src/modules/Ultrastar/coverter/ultrastar_midi_converter.py b/src/modules/Ultrastar/converter/ultrastar_midi_converter.py similarity index 95% rename from src/modules/Ultrastar/coverter/ultrastar_midi_converter.py rename to src/modules/Ultrastar/converter/ultrastar_midi_converter.py index 676e8d5..6ffb38d 100644 --- a/src/modules/Ultrastar/coverter/ultrastar_midi_converter.py +++ b/src/modules/Ultrastar/converter/ultrastar_midi_converter.py @@ -2,7 +2,7 @@ import pretty_midi from modules.Midi.MidiSegment import MidiSegment -from modules.Ultrastar.coverter.ultrastar_converter import midi_note_to_ultrastar_note, ultrastar_note_to_midi_note, \ +from modules.Ultrastar.converter.ultrastar_converter import midi_note_to_ultrastar_note, ultrastar_note_to_midi_note, \ get_start_time_from_ultrastar, get_end_time_from_ultrastar from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue from modules.console_colors import ULTRASINGER_HEAD diff --git a/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py b/src/modules/Ultrastar/converter/ultrastar_txt_converter.py similarity index 95% rename from src/modules/Ultrastar/coverter/ultrastar_txt_converter.py rename to src/modules/Ultrastar/converter/ultrastar_txt_converter.py index 0f9cce2..c1c1b4b 100644 --- a/src/modules/Ultrastar/coverter/ultrastar_txt_converter.py +++ b/src/modules/Ultrastar/converter/ultrastar_txt_converter.py @@ -6,8 +6,8 @@ from modules import os_helper from modules.Midi.MidiSegment import MidiSegment from modules.ProcessData import ProcessData, MediaInfo -from modules.Ultrastar.coverter.ultrastar_converter import ultrastar_bpm_to_real_bpm -from modules.Ultrastar.coverter.ultrastar_midi_converter import ultrastar_to_midi_segments, \ +from modules.Ultrastar.converter.ultrastar_converter import ultrastar_bpm_to_real_bpm +from modules.Ultrastar.converter.ultrastar_midi_converter import ultrastar_to_midi_segments, \ convert_midi_notes_to_ultrastar_notes from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue, FormatVersion from modules.Ultrastar.ultrastar_writer import create_repitched_txt_from_ultrastar_data, format_separated_string, \ diff --git a/src/modules/Ultrastar/coverter/ultrastar_converter.py b/src/modules/Ultrastar/coverter/ultrastar_converter.py deleted file mode 100644 index 76bedf9..0000000 --- a/src/modules/Ultrastar/coverter/ultrastar_converter.py +++ /dev/null @@ -1,99 +0,0 @@ -"""Ultrastar Converter""" - -from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue - - -def real_bpm_to_ultrastar_bpm(real_bpm: float) -> float: - """Converts real BPM to UltraStar BPM""" - # The UltraStar BPM info is a fourth beat of the real BPM - ultrastar_bpm = real_bpm / 4 - return ultrastar_bpm - - -def ultrastar_bpm_to_real_bpm(ultrastar_bpm: float) -> float: - """Converts UltraStar BPM to real BPM""" - # The UltraStar BPM info is a fourth beat of the real BPM - bpm = ultrastar_bpm * 4 - return bpm - - -def second_to_beat(seconds: float, real_bpm: float) -> float: - """Converts seconds to beat""" - # BPM = 60 * beat / T - # T * BPM = 60 * beat - # beat = T * BPM / 60 - beat = seconds * real_bpm / 60 - return beat - - -def beat_to_second(beat: float, real_bpm: float) -> float: - """Converts beat to seconds""" - - seconds = beat * 60 / real_bpm - return seconds - - -def midi_note_to_ultrastar_note(midi_note: int) -> int: - """Converts Midi note to UltraStar note""" - - # C4 == 48 in Midi - ultrastar_note = midi_note - 48 - return ultrastar_note - - -def ultrastar_note_to_midi_note(ultrastar_note: int) -> int: - """Converts UltraStar note to Midi note""" - - # C4 == 48 in Midi - midi_note = ultrastar_note + 48 - return midi_note - - -def get_start_time_from_ultrastar(ultrastar_class: UltrastarTxtValue, pos: int) -> float: - """Calculates the start time from the Ultrastar txt""" - - start_time = get_start_time(ultrastar_class.gap, ultrastar_class.bpm, - ultrastar_class.UltrastarNoteLines[pos].startBeat) - return start_time - - -def get_start_time(gap: str, ultrastar_bpm: str, startBeat: float) -> float: - """Calculates the start time from the Ultrastar txt""" - - gap = __convert_gap(gap) - real_bpm = __convert_bpm(ultrastar_bpm) - start_time = beat_to_second(int(startBeat), real_bpm) + gap - return start_time - - -def get_end_time_from_ultrastar(ultrastar_class: UltrastarTxtValue, pos: int) -> float: - """Calculates the end time from the Ultrastar txt""" - - end_time = get_end_time(ultrastar_class.gap, ultrastar_class.bpm, ultrastar_class.UltrastarNoteLines[pos].startBeat, - ultrastar_class.UltrastarNoteLines[pos].duration) - return end_time - - -def get_end_time(gap: str, ultrastar_bpm: str, startBeat: float, duration: float) -> float: - """Calculates the end time from the Ultrastar txt""" - - gap = __convert_gap(gap) - real_bpm = __convert_bpm(ultrastar_bpm) - end_time = ( - beat_to_second( - int(startBeat) + int(duration), - real_bpm, - ) - + gap - ) - return end_time - - -def __convert_gap(gap: str) -> float: - gap = float(gap.replace(",", ".")) / 1000 - return gap - - -def __convert_bpm(ultrastar_bpm: str) -> float: - real_bpm = ultrastar_bpm_to_real_bpm(float(ultrastar_bpm.replace(",", "."))) - return real_bpm diff --git a/src/modules/Ultrastar/ultrastar_parser.py b/src/modules/Ultrastar/ultrastar_parser.py index f3024dd..b18a205 100644 --- a/src/modules/Ultrastar/ultrastar_parser.py +++ b/src/modules/Ultrastar/ultrastar_parser.py @@ -4,7 +4,7 @@ from modules import os_helper from modules.console_colors import ULTRASINGER_HEAD, red_highlighted -from modules.Ultrastar.coverter.ultrastar_converter import ( +from modules.Ultrastar.converter.ultrastar_converter import ( get_end_time, get_start_time, ) diff --git a/src/modules/Ultrastar/ultrastar_score_calculator.py b/src/modules/Ultrastar/ultrastar_score_calculator.py index bdf69b8..59e295b 100644 --- a/src/modules/Ultrastar/ultrastar_score_calculator.py +++ b/src/modules/Ultrastar/ultrastar_score_calculator.py @@ -17,7 +17,7 @@ underlined, ) from modules.Midi.midi_creator import create_midi_note_from_pitched_data -from modules.Ultrastar.coverter.ultrastar_converter import ( +from modules.Ultrastar.converter.ultrastar_converter import ( get_end_time_from_ultrastar, get_start_time_from_ultrastar, ultrastar_note_to_midi_note, diff --git a/src/modules/Ultrastar/ultrastar_txt.py b/src/modules/Ultrastar/ultrastar_txt.py index 98a13b6..5845fd4 100644 --- a/src/modules/Ultrastar/ultrastar_txt.py +++ b/src/modules/Ultrastar/ultrastar_txt.py @@ -114,7 +114,7 @@ class UltrastarTxtValue: audio = "" video = None videoGap = None - gap = "" + gap = "0" bpm = "" language = None cover = None diff --git a/src/modules/Ultrastar/ultrastar_writer.py b/src/modules/Ultrastar/ultrastar_writer.py index 6457550..8efc01d 100644 --- a/src/modules/Ultrastar/ultrastar_writer.py +++ b/src/modules/Ultrastar/ultrastar_writer.py @@ -5,10 +5,10 @@ from packaging import version from modules.console_colors import ULTRASINGER_HEAD -from modules.Ultrastar.coverter.ultrastar_converter import ( +from modules.Ultrastar.converter.ultrastar_converter import ( real_bpm_to_ultrastar_bpm, second_to_beat, ) -from modules.Ultrastar.coverter.ultrastar_midi_converter import convert_midi_note_to_ultrastar_note +from modules.Ultrastar.converter.ultrastar_midi_converter import convert_midi_note_to_ultrastar_note from modules.Ultrastar.ultrastar_txt import UltrastarTxtValue, UltrastarTxtTag, UltrastarTxtNoteTypeTag, \ FILE_ENCODING from modules.Ultrastar.ultrastar_score_calculator import Score diff --git a/src/modules/console_colors.py b/src/modules/console_colors.py index 59328ff..e5d9375 100644 --- a/src/modules/console_colors.py +++ b/src/modules/console_colors.py @@ -9,7 +9,7 @@ def blue_highlighted(text: str) -> str: def green_highlighted(text: str) -> str: - """Returns a blue highlighted text""" + """Returns a green highlighted text""" return f"{Bcolors.dark_green}{text}{Bcolors.endc}"