From b8a66945191b763b59aa9224a283cba4b44ea13b Mon Sep 17 00:00:00 2001 From: Adrien Carpentier Date: Sat, 21 Dec 2024 19:45:12 +0900 Subject: [PATCH] chore: add pyproject.toml file, remove requirements-build.txt and update README --- .circleci/config.yml | 4 +++ README.md | 32 ++++++++++++++++++--- pyproject.toml | 63 ++++++++++++++++++++++++++++++++++++++++++ requirements-build.txt | 4 --- requirements.txt | 30 ++++++++++---------- 5 files changed, 110 insertions(+), 23 deletions(-) create mode 100644 pyproject.toml delete mode 100644 requirements-build.txt diff --git a/.circleci/config.yml b/.circleci/config.yml index c8ef6b4..ff4848a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -34,7 +34,11 @@ jobs: command: | virtualenv venv source venv/bin/activate + # Install build dependencies + pip install build + # Install project dependencies pip install -r requirements.txt + # Now install the package in editable mode pip install -e . - save_cache: key: << pipeline.parameters.cache-prefix >>-{{ arch }}-{{ checksum "requirements.txt" }} diff --git a/README.md b/README.md index b55ce27..46d4ae8 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,25 @@ You can also directly feed the URL of a remote file (from data.gouv.fr for insta You need to have python >= 3.7 installed. We recommend using a virtual environement. -``` +```shell pip install csv-detective ``` +### Create a lock file + +If you want to ensure reproducible installations, you can create a lock file from the `pyproject.toml`: + +Using pip: +```shell +pip install pip-tools +pip-compile pyproject.toml --output-file requirements.txt +``` + +Using uv: +```shell +uv pip compile pyproject.toml -o requirements.txt +``` + ### Detect some columns Say you have a tabular file located at `file_path`. This is how you could use `csv_detective`: @@ -44,7 +59,7 @@ inspection_results = routine( The program creates a `Python` dictionnary with the following information : -``` +```python { "encoding": "windows-1252", # Encoding detected "separator": ";", # Detected CSV separator @@ -183,9 +198,18 @@ An early version of this analysis of all resources on data.gouv.fr can be found ## Release The release process uses `bumpr`. +`bumpr` will be installed as a build dependency. + +### Install build dependencies + +Using pip: +```shell +pip install build +``` +Using uv: ```shell -pip install -r requirements-build.txt +uv pip install build ``` ### Process @@ -212,7 +236,7 @@ bumpr -v See bumpr options for minor and major: -``` +```shell $ bumpr -h usage: bumpr [-h] [--version] [-v] [-c CONFIG] [-d] [-st] [-b | -pr] [-M] [-m] [-p] [-s SUFFIX] [-u] [-pM] [-pm] [-pp] [-ps PREPARE_SUFFIX] [-pu] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b7cd6f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,63 @@ +[project] +name = "csv_detective" +dynamic = ["version"] +requires-python = ">=3.7, <3.13" +authors = [ + {name = "Etalab", email = "opendatateam@data.gouv.fr"}, +] +description = "Detect CSV column content" +readme = "README.md" +license = {text = "GNU Affero General Public License v3"} +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "License :: OSI Approved :: GNU Affero General Public License v3", + "Operating System :: POSIX", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Information Analysis", +] +keywords = ["CSV", "data processing", "encoding", "guess", "parser", "tabular"] +dependencies = [ + "boto3>=1.34.0", + "dateparser>=1.2.0", + "faust-cchardet>=2.1.19", + "pandas>=2.2.0", + "pytest>=8.3.0", + "python-dateutil>=2.8.2", + "Unidecode>=1.3.6", + "openpyxl>=3.1.5", + "xlrd>=2.0.1", + "odfpy>=1.4.1", + "requests>=2.32.3", + "responses>=0.25.0", + "python-magic>=0.4.27", + "faker>=33.0.0", + "rstr<=3.2.2", +] + +[project.urls] +Homepage = "https://github.com/etalab/csv_detective" + +[project.scripts] +csv_detective = "csv_detective.cli:run" + +[tool.setuptools] +packages = ["csv_detective"] + +[tool.setuptools.dynamic] +version = {attr = "csv_detective.__version__"} + +[tool.setuptools.data-files] +"share/csv_detective" = [ + "CHANGELOG.md", + "LICENSE.AGPL.txt", + "README.md", +] + +[build-system] +requires = [ + "setuptools>=61.0", + "wheel", + "twine", + "bumpr>=0.3.8" +] +build-backend = "setuptools.build_meta" diff --git a/requirements-build.txt b/requirements-build.txt deleted file mode 100644 index 1960754..0000000 --- a/requirements-build.txt +++ /dev/null @@ -1,4 +0,0 @@ -bumpr==0.3.8 -setuptools -wheel -twine diff --git a/requirements.txt b/requirements.txt index e9dc036..8c27c0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,16 +1,16 @@ -boto3>=1.34.0 -dateparser>=1.2.0 -faust-cchardet>=2.1.19 -pandas>=2.2.0 -pytest>=8.3.0 -python-dateutil>=2.8.2 -Unidecode>=1.3.6 -openpyxl>=3.1.5 -xlrd>=2.0.1 -odfpy>=1.4.1 -requests>=2.32.3 -responses>=0.25.0 -python-magic>=0.4.27 +boto3==1.34.0 +dateparser==1.2.0 +faust-cchardet==2.1.19 +pandas==2.2.0 +pytest==8.3.0 +python-dateutil==2.8.2 +Unidecode==1.3.6 +openpyxl==3.1.5 +xlrd==2.0.1 +odfpy==1.4.1 +requests==2.32.3 +responses==0.25.0 +python-magic==0.4.27 frformat==0.4.0 -faker>=33.0.0 -rstr<=3.2.2 +faker==33.0.0 +rstr==3.2.2