diff --git a/README.md b/README.md index b89840c5..a989b2d6 100644 --- a/README.md +++ b/README.md @@ -1,47 +1,9 @@ -# Autosub - -### Auto-generated subtitles for any video +# Autosub -Autosub is a utility for automatic speech recognition and subtitle generation. It takes a video or an audio file as input, performs voice activity detection to find speech regions, makes parallel requests to Google Web Speech API to generate transcriptions for those regions, (optionally) translates them to a different language, and finally saves the resulting subtitles to disk. It supports a variety of input and output languages (to see which, run the utility with the argument `--list-languages`) and can currently produce subtitles in either the [SRT format](https://en.wikipedia.org/wiki/SubRip) or simple [JSON](https://en.wikipedia.org/wiki/JSON). +This is a modified verson of [Autosub](https://github.com/agermanidis/autosub) with added arguments for minimum and maximum region size (length of audio to be transcribed in each "subtitle"). -### Installation +Personally I found this very convenient when using autosub to transcribe interviews (yes, autosub works great for that application), but it might be useful in other cases too. -1. Install [ffmpeg](https://www.ffmpeg.org/). -2. Run `pip install autosub`. - -### Usage - -``` -$ autosub -h -usage: autosub [-h] [-C CONCURRENCY] [-o OUTPUT] [-F FORMAT] [-S SRC_LANGUAGE] - [-D DST_LANGUAGE] [-K API_KEY] [--list-formats] - [--list-languages] - [source_path] - -positional arguments: - source_path Path to the video or audio file to subtitle - -optional arguments: - -h, --help show this help message and exit - -C CONCURRENCY, --concurrency CONCURRENCY - Number of concurrent API requests to make - -o OUTPUT, --output OUTPUT - Output path for subtitles (by default, subtitles are - saved in the same directory and name as the source - path) - -F FORMAT, --format FORMAT - Destination subtitle format - -S SRC_LANGUAGE, --src-language SRC_LANGUAGE - Language spoken in source file - -D DST_LANGUAGE, --dst-language DST_LANGUAGE - Desired language for the subtitles - -K API_KEY, --api-key API_KEY - The Google Translate API key to be used. (Required for - subtitle translation) - --list-formats List all available subtitle formats - --list-languages List all available source/destination languages -``` - -### License - -MIT +Added arguments: + -m MIN, --min MIN Minimum region size + -M MAX, --max MAX Maximum region size diff --git a/autosub/__init__.py b/autosub/__init__.py index d85e4db9..06ded2c5 100644 --- a/autosub/__init__.py +++ b/autosub/__init__.py @@ -35,7 +35,6 @@ DEFAULT_SRC_LANGUAGE = 'en' DEFAULT_DST_LANGUAGE = 'en' - def percentile(arr, percent): """ Calculate the given percentile of arr. @@ -237,6 +236,8 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments src_language=DEFAULT_SRC_LANGUAGE, dst_language=DEFAULT_DST_LANGUAGE, subtitle_file_format=DEFAULT_SUBTITLE_FORMAT, + min_region_size=0.5, + max_region_size=6, api_key=None, ): """ @@ -244,7 +245,7 @@ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments """ audio_filename, audio_rate = extract_audio(source_path) - regions = find_speech_regions(audio_filename) + regions = find_speech_regions(audio_filename, min_region_size=min_region_size, max_region_size=max_region_size) pool = multiprocessing.Pool(concurrency) converter = FLACConverter(source_path=audio_filename) @@ -354,6 +355,8 @@ def main(): """ Run autosub as a command-line program. """ + + parser = argparse.ArgumentParser() parser.add_argument('source_path', help="Path to the video or audio file to subtitle", nargs='?') @@ -368,6 +371,10 @@ def main(): default=DEFAULT_SRC_LANGUAGE) parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles", default=DEFAULT_DST_LANGUAGE) + parser.add_argument('-m', '--min', help="Minimum region size", + default=0.5) + parser.add_argument('-M', '--max', help="Maximum region size", + default=6) parser.add_argument('-K', '--api-key', help="The Google Translate API key to be used. \ (Required for subtitle translation)") @@ -401,6 +408,8 @@ def main(): dst_language=args.dst_language, api_key=args.api_key, subtitle_file_format=args.format, + min_region_size=int(args.min), + max_region_size=int(args.max), output=args.output, ) print("Subtitles file created at {}".format(subtitle_file_path))