From e18b065812dcb26647c9e037012eba3cc58b455a Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 31 Aug 2018 11:37:57 +0100 Subject: [PATCH] Add NFC and NFD normalization options (keeping NFKC as the default) While NFKC is kept as the default, this gives the option to use NFC and NFD normalization options. These can't be used directly, but allow a model that has been trained with an alternative normalization to be loaded and used. Without this patch, such a model will throw an error when unpickling such a model. Such a model can be built using different normalization= parameters, as for example in PR#257. --- ocrolib/lstm.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ocrolib/lstm.py b/ocrolib/lstm.py index f5307590..af7be3c0 100644 --- a/ocrolib/lstm.py +++ b/ocrolib/lstm.py @@ -837,6 +837,12 @@ def ctc_align_targets(outputs,targets,threshold=100.0,verbose=0,debug=0,lo=1e-5) plt.ginput(1,0.01); return aligned +def normalize_nfc(s): + return unicodedata.normalize('NFC',s) + +def normalize_nfd(s): + return unicodedata.normalize('NFD',s) + def normalize_nfkc(s): return unicodedata.normalize('NFKC',s)