Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make exception application case invariant #41

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
_prepare_string_for_syllabification,
_split_text_sections,
syllabify_text,
flatten_syllabified_text,
)
from volpiano_display_utilities.volpiano_syllabification import (
prepare_volpiano_for_syllabification,
Expand Down Expand Up @@ -71,6 +72,18 @@ class TestCantusTextSyllabification(unittest.TestCase):
Tests functions in cantus_text_syllabification.
"""

def test_cantus_exceptions(self):
"""Tests syllabification of a few words that are exceptions
in the Cantus Database."""
exception_word = "euouae"
syllabified_word = flatten_syllabified_text(syllabify_text(exception_word))
self.assertEqual(syllabified_word, "e-u-o-u-a-e")
exception_word_capitalized = "Euouae"
syllabified_word_capitalized = flatten_syllabified_text(
syllabify_text(exception_word_capitalized)
)
self.assertEqual(syllabified_word_capitalized, "E-u-o-u-a-e")

def test_clean_text(self):
"""Tests _clean_text."""
initial_text = "abcdefg @#$&*[^@]#${}|~[]/|\\"
Expand Down
27 changes: 18 additions & 9 deletions volpiano_display_utilities/cantus_text_syllabification.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,17 @@
from .syllabified_section import SyllabifiedTextSection

EXCEPTIONS_DICT = {
"euouae": ["e-", "u-", "o-", "u-", "a-", "e"],
"israelitis": ["is-", "ra-", "e-", "li-", "tis"],
"israel": ["is-", "ra-", "el"],
"michael": ["mi-", "cha-", "el"],
# Exceptions to usual word syllabification
# Values are syllable boundaries (eg. a syllable
# boundary should exist before the indicated
# string index in the word)
# Example: "mihi" has a syllable boundary before
# the "h" (index 2), so we would have
# "mihi":[2]
"euouae": [1, 2, 3, 4, 5],
"israelitis": [2, 4, 5, 7],
"israel": [2, 4],
"michael": [2, 5],
}

# INVALID_CHAR_REGEX matches any character not valid in Cantus DB entries
Expand Down Expand Up @@ -59,9 +66,8 @@ def _detect_invalid_characters(text: str) -> bool:
def _prepare_string_for_syllabification(word_str: str) -> Tuple[str, bool, bool]:
"""
Complete preparation of a string before syllabification.
All letters are converted to lowercase. Hyphens are removed
from the beginning and end of the string, and the presence
of these hyphens is recorded.
Hyphens are removed from the beginning and end of the string,
and the presence of these hyphens is recorded.

word_str [str]: string to prepare

Expand Down Expand Up @@ -151,8 +157,11 @@ def syllabify_text(
syllabified_section.append([word])
logging.debug("Word not syllabified: %s", word)
# If the word is an exception, syllabify as specified
elif word in EXCEPTIONS_DICT:
syllabified_section.append(EXCEPTIONS_DICT[word])
elif word.lower() in EXCEPTIONS_DICT:
exception_syllabification = split_word_by_syl_bounds(
word, EXCEPTIONS_DICT[word.lower()]
)
syllabified_section.append(exception_syllabification)
logging.debug(
"Cantus Database syllabification exception found: %s", word
)
Expand Down
Loading