From 77816fe220303bb19946e71b35d88f581a1b61b8 Mon Sep 17 00:00:00 2001 From: Amit Moryossef Date: Wed, 13 Mar 2024 13:55:25 +0200 Subject: [PATCH] fix(signwriting_tokenizer): fix #4 --- signwriting/tokenizer/signwriting_tokenizer.py | 8 ++++---- signwriting/tokenizer/test_signwriting_normalizer.py | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/signwriting/tokenizer/signwriting_tokenizer.py b/signwriting/tokenizer/signwriting_tokenizer.py index 19e7a34..2da8838 100644 --- a/signwriting/tokenizer/signwriting_tokenizer.py +++ b/signwriting/tokenizer/signwriting_tokenizer.py @@ -63,10 +63,10 @@ def text_to_tokens(self, text: str, box_position=False) -> List[str]: def tokens_to_text(self, tokens: List[str]) -> str: tokenized = " ".join(tokens) - tokenized = re.sub(r'p(\d*) p(\d*)', r'\1x\2', tokenized) - tokenized = re.sub(r'c(\d)\d? r(.)', r'\1\2', tokenized) - tokenized = re.sub(r'c(\d)\d?', r'\1 0', tokenized) - tokenized = re.sub(r'r(.)', r'0\1', tokenized) + tokenized = re.sub(r' p(\d*) p(\d*)', r'\1x\2', tokenized) + tokenized = re.sub(r' c(\d)\d? r(.)', r'\1\2', tokenized) + tokenized = re.sub(r' c(\d)\d?', r'\1 0', tokenized) + tokenized = re.sub(r' r(.)', r'0\1', tokenized) tokenized = tokenized.replace(' ', '') tokenized = re.sub(r'(\d)([MBLR])', r'\1 \2', tokenized) diff --git a/signwriting/tokenizer/test_signwriting_normalizer.py b/signwriting/tokenizer/test_signwriting_normalizer.py index b5f98ca..95523f9 100644 --- a/signwriting/tokenizer/test_signwriting_normalizer.py +++ b/signwriting/tokenizer/test_signwriting_normalizer.py @@ -23,6 +23,13 @@ def test_normalizer_creates_space(self): normalized = normalize_signwriting(fsw_1 + fsw_2) self.assertEqual(f"{fsw_1} {fsw_2}", normalized) + def test_normalization_is_identity_regression_4(self): + # https://github.com/sign-language-processing/signwriting/issues/4 + fsw_1 = "M511x510S2c734490x490" + fsw_2 = "M510x518S2c105490x483" + self.assertEqual(fsw_1, normalize_signwriting(fsw_1)) + self.assertEqual(fsw_2, normalize_signwriting(fsw_2)) + if __name__ == '__main__': unittest.main()