Merge pull request #18 from allo-media/master

Version 2.0.0: English support.
allo-media · Nov 4, 2019 · f9c651b · f9c651b
2 parents d85ce63 + 361bc00
commit f9c651b
Show file tree

Hide file tree

Showing 14 changed files with 1,006 additions and 399 deletions.
diff --git a/README.rst b/README.rst
@@ -6,13 +6,13 @@ text2num
 
 ``text2num`` is a python package that provides functions and parser classes for:
 
-- parsing numbers expressed as words in French and convert them to integer values;
-- detect ordinal, cardinal and decimal numbers in a stream of French words and get their decimal digit representations.
+- parsing numbers expressed as words in French or English and convert them to integer values;
+- detect ordinal, cardinal and decimal numbers in a stream of French or English words and get their decimal digit representations.
 
 Compatibility
 -------------
 
-Tested on python 3.6, 3.7.
+Tested on python 3.7. Requires Python >= 3.7.
 
 License
 -------
@@ -36,33 +36,51 @@ Usage examples
 Parse and convert
 ~~~~~~~~~~~~~~~~~
 
+
+French examples:
+
 .. code-block:: python
 
     >>> from text_to_num import text2num
-    >>> text2num('quatre-vingt-quinze')
+    >>> text2num('quatre-vingt-quinze', "fr")
     95
 
-    >>> text2num('nonante-cinq')
+    >>> text2num('nonante-cinq', "fr")
     95
 
-    >>> text2num('mille neuf cent quatre-vingt dix-neuf')
+    >>> text2num('mille neuf cent quatre-vingt dix-neuf', "fr")
     1999
 
-    >>> text2num('dix-neuf cent quatre-vingt dix-neuf')
+    >>> text2num('dix-neuf cent quatre-vingt dix-neuf', "fr")
     1999
 
-    >>> text2num("cinquante et un million cinq cent soixante dix-huit mille trois cent deux")
+    >>> text2num("cinquante et un million cinq cent soixante dix-huit mille trois cent deux", "fr")
     51578302
 
-    >>> text2num('mille mille deux cents')
+    >>> text2num('mille mille deux cents', "fr")
     ValueError: invalid literal for text2num: 'mille mille deux cent'
 
 
+English examples:
+
+.. code-block:: python
+
+    >>> from text_to_num import text2num
+
+    >>> text2num("fifty-one million five hundred seventy-eight thousand three hundred two", "en")
+    51578302
+
+    >>> text2num("eighty-one", "en")
+    81
+
+
 Find and transcribe
 ~~~~~~~~~~~~~~~~~~~
 
 Any numbers, even ordinals.
 
+French:
+
 .. code-block:: python
 
     >>> from text_to_num import alpha2digit
@@ -85,6 +103,16 @@ Any numbers, even ordinals.
     Décimaux: 12,99, 120,05 ; mais 60 02.
 
 
+English:
+
+.. code-block:: python
+
+    >>> from text_to_num import alpha2digit
+    >>> text = "On May twenty-third, I bought twenty-five cows, twelve chickens and one hundred twenty five point forty kg of potatoes."
+    >>> alpha2digit(text, "en")
+    'On May 23rd, I bought 25 cows, 12 chickens and 125.40 kg of potatoes.'
+
+
 Read the complete documentation on `ReadTheDocs <http://text2num.readthedocs.io/>`_.
 
 Contribute

diff --git a/doc/conf.py b/doc/conf.py
@@ -19,14 +19,14 @@
 
 # -- Project information -----------------------------------------------------
 
-project = 'text2num'
-copyright = '2018, Allo-Media'
-author = 'Allo-Media'
+project = "text2num"
+copyright = "2018, Allo-Media"
+author = "Allo-Media"
 
 # The short X.Y version
-version = ''
+version = ""
 # The full version, including alpha/beta/rc tags
-release = '1.0'
+release = "1.0"
 
 
 # -- General configuration ---------------------------------------------------
@@ -39,22 +39,22 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.doctest',
-    'sphinx.ext.coverage',
+    "sphinx.ext.autodoc",
+    "sphinx.ext.doctest",
+    "sphinx.ext.coverage",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -66,7 +66,7 @@
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = None
@@ -77,7 +77,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = "alabaster"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -88,7 +88,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -104,7 +104,7 @@
 # -- Options for HTMLHelp output ---------------------------------------------
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'text2numdoc'
+htmlhelp_basename = "text2numdoc"
 
 
 # -- Options for LaTeX output ------------------------------------------------
@@ -113,15 +113,12 @@
     # The paper size ('letterpaper' or 'a4paper').
     #
     # 'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #
     # 'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #
     # 'preamble': '',
-
     # Latex figure (float) alignment
     #
     # 'figure_align': 'htbp',
@@ -131,19 +128,15 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'text2num.tex', 'text2num Documentation',
-     'Allo-Media', 'manual'),
+    (master_doc, "text2num.tex", "text2num Documentation", "Allo-Media", "manual"),
 ]
 
 
 # -- Options for manual page output ------------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'text2num', 'text2num Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "text2num", "text2num Documentation", [author], 1)]
 
 
 # -- Options for Texinfo output ----------------------------------------------
@@ -152,9 +145,15 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'text2num', 'text2num Documentation',
-     author, 'text2num', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "text2num",
+        "text2num Documentation",
+        author,
+        "text2num",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 
@@ -173,7 +172,7 @@
 # epub_uid = ''
 
 # A list of files that should not be packed into the epub file.
-epub_exclude_files = ['search.html']
+epub_exclude_files = ["search.html"]
 
 
-# -- Extension configuration -------------------------------------------------
+# -- Extension configuration -------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
@@ -8,8 +8,8 @@ Welcome to text2num's documentation!
 
 ``text2num`` is a package that provides functions and parser classes for:
 
-- parsing numbers expressed as words in French and convert them to integer values;
-- detect ordinals, cardinals and decimal numbers in a stream of French words and get their decimal digit representations.
+- parsing numbers expressed as words in French or English and convert them to integer values;
+- detect ordinals, cardinals and decimal numbers in a stream of French or English words and get their decimal digit representations.
 
 ``text2num`` is distributed under the MIT license and is known to work on python version 3.6 and above.
 

diff --git a/doc/quickstart.rst b/doc/quickstart.rst
@@ -23,6 +23,13 @@ Integers only.
 .. code-block:: python
 
     >>> from text_to_num import text2num
+
+    >>> text2num("fifty-one million five hundred seventy-eight thousand three hundred two", "en")
+    51578302
+
+    >>> text2num("eighty-one", "en")
+    81
+
     >>> text2num('quatre-vingt-quinze')
     95
 
@@ -50,6 +57,12 @@ Any number, even ordinals.
 .. code-block:: python
 
     >>> from text_to_num import alpha2digit
+
+    >>> text = "On May twenty-third, I bought twenty-five cows, twelve chickens and one hundred twenty five point forty kg of potatoes."
+    >>> alpha2digit(text, "en")
+    'On May 23rd, I bought 25 cows, 12 chickens and 125.40 kg of potatoes.'
+
+
     >>> sentence = (
     ...         "Huit cent quarante-deux pommes, vingt-cinq chiens, mille trois chevaux, "
     ...         "douze mille six cent quatre-vingt-dix-huit clous.\n"

diff --git a/setup.py b/setup.py
@@ -1,33 +1,35 @@
-from setuptools import setup
+from setuptools import setup, find_packages
 
 
-VERSION = '1.4.0'
+VERSION = "2.0.0"
 
 
 def readme():
-    with open('README.rst', encoding='utf-8') as f:
+    with open("README.rst", encoding="utf-8") as f:
         return f.read()
 
 
-setup(name='text2num',
-      version=VERSION,
-      description='Parse and convert numbers written in French into their digit representation.',
-      long_description=readme(),
-      classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'License :: OSI Approved :: MIT License',
-        'Programming Language :: Python :: 3.6',
-        'Topic :: Text Processing :: Linguistic',
-        'Topic :: Text Processing :: Filters',
-        'Natural Language :: French'
-      ],
-      keywords='French NLP words-to-numbers',
-      url='https://github.com/allo-media/text2num',
-      author='Allo-Media',
-      author_email='[email protected]',
-      license='MIT',
-      packages=['text_to_num'],
-      python_requires='>=3',
-      test_suite='tests',
-      include_package_data=True,
-      zip_safe=False)
+setup(
+    name="text2num",
+    version=VERSION,
+    description="Parse and convert numbers written in French or English into their digit representation.",
+    long_description=readme(),
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3.6",
+        "Topic :: Text Processing :: Linguistic",
+        "Topic :: Text Processing :: Filters",
+        "Natural Language :: French",
+    ],
+    keywords="French and English NLP words-to-numbers",
+    url="https://github.com/allo-media/text2num",
+    author="Allo-Media",
+    author_email="[email protected]",
+    license="MIT",
+    packages=find_packages(),
+    python_requires=">=3.7",
+    test_suite="tests",
+    include_package_data=True,
+    zip_safe=False,
+)