diff --git a/README.md b/README.md index cc32b22..7115dd3 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,4 @@ pip install greek-normalisation See `tests.rst` for usage examples. -Also, two command-line utilities `toNFC` and `toNFD` are installed which can be used to do unicode normalisation on files (e.g. `toNFC source.txt > nfc_version.txt`). +Also, three command-line utilities `to2019`, `toNFC` and `toNFD` are installed which can be used to convert U+02BC and U+1FBF to U+2019 and do unicode normalisation on files (e.g. `toNFC source.txt > nfc_version.txt`). diff --git a/greek_normalisation/convert_files.py b/greek_normalisation/convert_files.py index 1b0d525..49544a3 100755 --- a/greek_normalisation/convert_files.py +++ b/greek_normalisation/convert_files.py @@ -3,7 +3,7 @@ import fileinput import sys -from .utils import nfc, nfd +from .utils import nfc, nfd, convert_to_2019 def convert(func): @@ -25,3 +25,7 @@ def to_nfc(): def to_nfd(): convert(nfd) + + +def to_2019(): + convert(convert_to_2019) diff --git a/greek_normalisation/to2019.py b/greek_normalisation/to2019.py deleted file mode 100755 index c12d71c..0000000 --- a/greek_normalisation/to2019.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 - -import fileinput -import sys - - -CHARACTERS_TO_CHANGE = [ - "\u02BC", - "\u1FBF", -] - -lines_changed = 0 - -with fileinput.input() as f: - for line in f: - text = line - for ch in CHARACTERS_TO_CHANGE: - text = text.replace(ch, "\u2019") - print(text, end="") - if text != line: - lines_changed += 1 - -print(f"{lines_changed} lines changed", file=sys.stderr) diff --git a/greek_normalisation/utils.py b/greek_normalisation/utils.py index 87b032e..7b90a20 100644 --- a/greek_normalisation/utils.py +++ b/greek_normalisation/utils.py @@ -26,6 +26,10 @@ def nfkc(s): return unicodedata.normalize("NFKC", s) +def convert_to_2019(s): + return s.replace("\u02BC", "\u2019").replace("\u1FBF", "\u2019") + + def strip_accents(s): return nfc("".join( cp for cp in nfd(s) if cp not in ACCENTS diff --git a/setup.py b/setup.py index ed7441c..14bb3dd 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name="greek-normalisation", - version="0.3", + version="0.4", description="Python 3 utilities for validating and normalising Ancient Greek text", url="http://github.com/jtauber/greek-normalisation", author="James Tauber", @@ -18,6 +18,7 @@ packages=["greek_normalisation"], entry_points={ "console_scripts": [ + "to2019 = greek_normalisation.convert_files:to_2019", "toNFC = greek_normalisation.convert_files:to_nfc", "toNFD = greek_normalisation.convert_files:to_nfd", ],