Skip to content

Commit

Permalink
feat: newline mode options
Browse files Browse the repository at this point in the history
  • Loading branch information
p0n1 committed Sep 20, 2023
1 parent 076311a commit 2acda62
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 9 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ python epub_to_audiobook.py <input_file> <output_folder> [--voice_name <voice_na
- `--language`: (Optional) Language for the Text-to-Speech service. Default is `en-US`.
- `--log`: (Optional) Specifies the logging level. Default is `INFO`. Options include `DEBUG`, `INFO`, `WARNING`, `ERROR`, and `CRITICAL`.
- `--preview`: (Optional) Enable preview mode. In this mode, the script won't convert the text to speech but will instead display the chapter index and titles.
- `--newline_mode`: (Optional) Defines how new paragraphs are detected: `single` or `double`. Default is `double`, suitable for most ebooks. The 'single' mode detects paragraphs by one newline character and 'double' by two consecutive newlines.
- `--break_duration`: (Optional) Determines the break duration in milliseconds between different paragraphs or sections. Default is `1250`. Valid values range from 0 to 5000 milliseconds.
- `--chapter_start`: (Optional) Designates the starting chapter index. Default is `1`.
- `--chapter_end`: (Optional) Specifies the ending chapter index. Default is `-1`, meaning it will process up to the last chapter.
Expand Down Expand Up @@ -121,7 +122,7 @@ The `-v ./:/app` option mounts the current directory (`.`) to the `/app` directo
- [Create a Speech resource](https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices) in the Azure portal.
- Get the Speech resource key and region. After your Speech resource is deployed, select **Go to resource** to view and manage keys. For more information about Cognitive Services resources, see [Get the keys for your resource](https://learn.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account#get-the-keys-for-your-resource).

*Source: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites*
*Source: <https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites>*

## Customization of Voice and Language

Expand Down
25 changes: 17 additions & 8 deletions epub_to_audiobook.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,26 @@ def sanitize_title(title: str) -> str:
return sanitized_title


def extract_chapters(epub_book: epub.EpubBook) -> List[Tuple[str, str]]:
def extract_chapters(epub_book: epub.EpubBook, newline_mode: str) -> List[Tuple[str, str]]:
chapters = []
for item in epub_book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
content = item.get_content()
soup = BeautifulSoup(content, 'lxml')
title = soup.title.string if soup.title else ''
raw = soup.get_text(strip=False)
logger.debug(f"Raw text: <{raw[:100]}>")
logger.debug(f"Raw text: <{raw[:]}>")

# Replace excessive whitespaces and newline characters based on the mode
if newline_mode == 'single':
cleaned_text = re.sub(r'[\n]+', MAGIC_BREAK_STRING, raw.strip())
elif newline_mode == 'double':
cleaned_text = re.sub(
r'[\n]{2,}', MAGIC_BREAK_STRING, raw.strip())
else:
raise ValueError(f"Invalid newline mode: {newline_mode}")

# Replace excessive whitespaces and newline characters
cleaned_text = re.sub(r'[\n]+', MAGIC_BREAK_STRING, raw.strip())
logger.debug(f"Cleaned text step 1: <{cleaned_text[:100]}>")
logger.debug(f"Cleaned text step 1: <{cleaned_text[:]}>")
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
logger.info(f"Cleaned text step 2: <{cleaned_text[:100]}>")

Expand Down Expand Up @@ -197,9 +204,9 @@ def text_to_speech(session: requests.Session, text: str, output_file: str, voice
return access_token


def epub_to_audiobook(input_file: str, output_folder: str, voice_name: str, language: str, preview: bool, break_duration: int, chapter_start: int, chapter_end: int, output_format: str) -> None:
def epub_to_audiobook(input_file: str, output_folder: str, voice_name: str, language: str, preview: bool, newline_mode: str, break_duration: int, chapter_start: int, chapter_end: int, output_format: str) -> None:
book = epub.read_epub(input_file)
chapters = extract_chapters(book)
chapters = extract_chapters(book, newline_mode)

os.makedirs(output_folder, exist_ok=True)

Expand Down Expand Up @@ -260,6 +267,8 @@ def main():
help="Log level (default: INFO), can be DEBUG, INFO, WARNING, ERROR, CRITICAL")
parser.add_argument("--preview", action="store_true",
help="Enable preview mode. In preview mode, the script will not convert the text to speech. Instead, it will print the chapter index and titles.")
parser.add_argument('--newline_mode', choices=['single', 'double'], default='double',
help="Choose the mode of detecting new paragraphs: 'single' or 'double'. 'single' means a single newline character, while 'double' means two consecutive newline characters. (default: double, works for most ebooks but will detect less paragraphs for some ebooks)")
parser.add_argument("--break_duration", default="1250",
help="Break duration in milliseconds for the different paragraphs or sections (default: 1250). Valid values range from 0 to 5000 milliseconds.")
parser.add_argument("--chapter_start", default=1, type=int,
Expand All @@ -273,7 +282,7 @@ def main():
logger.setLevel(args.log)

epub_to_audiobook(args.input_file, args.output_folder,
args.voice_name, args.language, args.preview, args.break_duration, args.chapter_start, args.chapter_end, args.output_format)
args.voice_name, args.language, args.preview, args.newline_mode, args.break_duration, args.chapter_start, args.chapter_end, args.output_format)
logger.info("Done! 👍")
logger.info(f"args = {args}")

Expand Down

0 comments on commit 2acda62

Please sign in to comment.