feat: newline mode options

p0n1 · Sep 20, 2023 · 2acda62 · 2acda62
1 parent 076311a
commit 2acda62
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -70,6 +70,7 @@ python epub_to_audiobook.py <input_file> <output_folder> [--voice_name <voice_na
 - `--language`: (Optional) Language for the Text-to-Speech service. Default is `en-US`.
 - `--log`: (Optional) Specifies the logging level. Default is `INFO`. Options include `DEBUG`, `INFO`, `WARNING`, `ERROR`, and `CRITICAL`.
 - `--preview`: (Optional) Enable preview mode. In this mode, the script won't convert the text to speech but will instead display the chapter index and titles.
+- `--newline_mode`: (Optional) Defines how new paragraphs are detected: `single` or `double`. Default is `double`, suitable for most ebooks. The 'single' mode detects paragraphs by one newline character and 'double' by two consecutive newlines.
 - `--break_duration`: (Optional) Determines the break duration in milliseconds between different paragraphs or sections. Default is `1250`. Valid values range from 0 to 5000 milliseconds.
 - `--chapter_start`: (Optional) Designates the starting chapter index. Default is `1`.
 - `--chapter_end`: (Optional) Specifies the ending chapter index. Default is `-1`, meaning it will process up to the last chapter.
@@ -121,7 +122,7 @@ The `-v ./:/app` option mounts the current directory (`.`) to the `/app` directo
 - [Create a Speech resource](https://portal.azure.com/#create/Microsoft.CognitiveServicesSpeechServices) in the Azure portal.
 - Get the Speech resource key and region. After your Speech resource is deployed, select **Go to resource** to view and manage keys. For more information about Cognitive Services resources, see [Get the keys for your resource](https://learn.microsoft.com/en-us/azure/cognitive-services/cognitive-services-apis-create-account#get-the-keys-for-your-resource).
 
-*Source: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites*
+*Source: <https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites>*
 
 ## Customization of Voice and Language
 

diff --git a/epub_to_audiobook.py b/epub_to_audiobook.py
@@ -49,19 +49,26 @@ def sanitize_title(title: str) -> str:
     return sanitized_title
 
 
-def extract_chapters(epub_book: epub.EpubBook) -> List[Tuple[str, str]]:
+def extract_chapters(epub_book: epub.EpubBook, newline_mode: str) -> List[Tuple[str, str]]:
     chapters = []
     for item in epub_book.get_items():
         if item.get_type() == ebooklib.ITEM_DOCUMENT:
             content = item.get_content()
             soup = BeautifulSoup(content, 'lxml')
             title = soup.title.string if soup.title else ''
             raw = soup.get_text(strip=False)
-            logger.debug(f"Raw text: <{raw[:100]}>")
+            logger.debug(f"Raw text: <{raw[:]}>")
+
+            # Replace excessive whitespaces and newline characters based on the mode
+            if newline_mode == 'single':
+                cleaned_text = re.sub(r'[\n]+', MAGIC_BREAK_STRING, raw.strip())
+            elif newline_mode == 'double':
+                cleaned_text = re.sub(
+                    r'[\n]{2,}', MAGIC_BREAK_STRING, raw.strip())
+            else:
+                raise ValueError(f"Invalid newline mode: {newline_mode}")
 
-            # Replace excessive whitespaces and newline characters
-            cleaned_text = re.sub(r'[\n]+', MAGIC_BREAK_STRING, raw.strip())
-            logger.debug(f"Cleaned text step 1: <{cleaned_text[:100]}>")
+            logger.debug(f"Cleaned text step 1: <{cleaned_text[:]}>")
             cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
             logger.info(f"Cleaned text step 2: <{cleaned_text[:100]}>")
 
@@ -197,9 +204,9 @@ def text_to_speech(session: requests.Session, text: str, output_file: str, voice
     return access_token
 
 
-def epub_to_audiobook(input_file: str, output_folder: str, voice_name: str, language: str, preview: bool, break_duration: int, chapter_start: int, chapter_end: int, output_format: str) -> None:
+def epub_to_audiobook(input_file: str, output_folder: str, voice_name: str, language: str, preview: bool, newline_mode: str, break_duration: int, chapter_start: int, chapter_end: int, output_format: str) -> None:
     book = epub.read_epub(input_file)
-    chapters = extract_chapters(book)
+    chapters = extract_chapters(book, newline_mode)
 
     os.makedirs(output_folder, exist_ok=True)
 
@@ -260,6 +267,8 @@ def main():
                         help="Log level (default: INFO), can be DEBUG, INFO, WARNING, ERROR, CRITICAL")
     parser.add_argument("--preview", action="store_true",
                         help="Enable preview mode. In preview mode, the script will not convert the text to speech. Instead, it will print the chapter index and titles.")
+    parser.add_argument('--newline_mode', choices=['single', 'double'], default='double',
+                        help="Choose the mode of detecting new paragraphs: 'single' or 'double'. 'single' means a single newline character, while 'double' means two consecutive newline characters. (default: double, works for most ebooks but will detect less paragraphs for some ebooks)")
     parser.add_argument("--break_duration", default="1250",
                         help="Break duration in milliseconds for the different paragraphs or sections (default: 1250). Valid values range from 0 to 5000 milliseconds.")
     parser.add_argument("--chapter_start", default=1, type=int,
@@ -273,7 +282,7 @@ def main():
     logger.setLevel(args.log)
 
     epub_to_audiobook(args.input_file, args.output_folder,
-                      args.voice_name, args.language, args.preview, args.break_duration, args.chapter_start, args.chapter_end, args.output_format)
+                      args.voice_name, args.language, args.preview, args.newline_mode, args.break_duration, args.chapter_start, args.chapter_end, args.output_format)
     logger.info("Done! 👍")
     logger.info(f"args = {args}")