From 821177863ff35a5c2889019621eea1a04631e4ed Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Wed, 4 Nov 2020 09:28:50 -0500 Subject: [PATCH 01/13] Minor typo, rephrasing; add HA comments to datapusher-uwsgi.ini --- README.md | 4 ++-- deployment/datapusher-uwsgi.ini | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9677409..78d6b5f 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ Supervisor to keep the process up. At this point you can run DataPusher with the following command: - /usr/lib/ckan/datapusher/bin/uwsgi -i /usr/lib/ckan/datapusher/src/datapusher/deployment/datapusher-uswgi.ini + /usr/lib/ckan/datapusher/bin/uwsgi -i /usr/lib/ckan/datapusher/src/datapusher/deployment/datapusher-uwsgi.ini *Note*: If you are installing the DataPusher on a different location than the default @@ -189,7 +189,7 @@ Here's a summary of the options available. Most of the configuration options above can be also provided as environment variables prepending the name with `DATAPUSHER_`, eg `DATAPUSHER_SQLALCHEMY_DATABASE_URI`, `DATAPUSHER_PORT`, etc. -By default DataPusher uses SQLite as the database backend for the jobs information. This is fine for local development and sites with low activity, but for sites that need more performance should use Postgres as the backend for the jobs database (eg `SQLALCHEMY_DATABASE_URI=postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs`. See also [High Availability Setup](#high-availability-setup). If SQLite is used, is probably a good idea to store the database in a location other than `/tmp`. This will prevent the database being dropped, causing out of sync errors in the CKAN side. A good place to store it is the CKAN storage folder (if DataPusher is installed in the same server), generally in `/var/lib/ckan/`. +By default, DataPusher uses SQLite as the database backend for jobs information. This is fine for local development and sites with low activity, but for sites that need more performance, Postgres should be used as the backend for the jobs database (eg `SQLALCHEMY_DATABASE_URI=postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs`. See also [High Availability Setup](#high-availability-setup). If SQLite is used, its probably a good idea to store the database in a location other than `/tmp`. This will prevent the database being dropped, causing out of sync errors in the CKAN side. A good place to store it is the CKAN storage folder (if DataPusher is installed in the same server), generally in `/var/lib/ckan/`. ## Usage diff --git a/deployment/datapusher-uwsgi.ini b/deployment/datapusher-uwsgi.ini index 44c6b7f..11b5754 100644 --- a/deployment/datapusher-uwsgi.ini +++ b/deployment/datapusher-uwsgi.ini @@ -12,3 +12,8 @@ max-requests = 5000 vacuum = true callable = application buffer-size = 32768 + +## see High Availability Setup +#workers = 3 +#threads = 3 +#lazy-apps = true From 9c8022596e23085e917de004e0cef3e602fc2da1 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 5 Nov 2020 09:49:25 -0500 Subject: [PATCH 02/13] Ensure psycopg2 is installed in datapusher virtualenv --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 78d6b5f..5bae3d8 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ The default DataPusher configuration uses SQLite as the backend for the jobs dat sudo -u postgres createdb -O datapusher_jobs datapusher_jobs -E utf-8 # Run this in the virtualenv where DataPusher is installed - pip install psycopg2 + sudo /usr/lib/ckan/datapusher/bin/pip install psycopg2 # Edit SQLALCHEMY_DATABASE_URI in datapusher_settings.py accordingly # eg SQLALCHEMY_DATABASE_URI=postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs From 105a581d0e435dd6693bf3278ce3a24192273759 Mon Sep 17 00:00:00 2001 From: Marc Dutoo Date: Fri, 31 Dec 2021 17:31:48 +0100 Subject: [PATCH 03/13] python3 Dockerfile, based on CKAN's --- Dockerfile | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e22d34f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +FROM debian:buster + +# Install required system packages +RUN apt-get -q -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -q -y upgrade \ + && apt-get -q -y install \ + python3-dev \ + python3-pip \ + python3-virtualenv \ + zlib1g-dev \ + libxml2-dev \ + libxslt1-dev \ + libffi-dev \ + # else error https://stackoverflow.com/questions/14547631/python-locale-error-unsupported-locale-setting + locales \ + postgresql-client \ + build-essential \ + git \ + vim \ + wget \ + && apt-get -q clean \ + && rm -rf /var/lib/apt/lists/* + + +RUN python3 -m virtualenv --python=python3 /venv +ENV PATH="/venv/bin:$PATH" + +# else error https://stackoverflow.com/questions/59633558/python-based-dockerfile-throws-locale-error-unsupported-locale-setting +ENV LC_ALL=C + +# NO else https://github.com/ckan/datapusher/issues/132 +#datapusher | File "/venv/src/datapusher/jobs.py", line 158, in check_response +#datapusher | request_url=request_url, response=response.text) +#datapusher | datapusher.jobs.HTTPError: +#ENV DATAPUSHER_SSL_VERIFY=true + +# Setup Datapusher +ADD . /venv/src/ +RUN pip install -U pip && \ + cd /venv/src/ && \ + pip install --upgrade --no-cache-dir -r requirements.txt && \ + pip install --upgrade --no-cache-dir -r requirements-dev.txt && \ + #pip install -e . + python setup.py develop + +CMD [ "python", "/venv/src/datapusher/main.py", "/venv/src/deployment/datapusher_settings.py"] From d9939fa93c60eb3c24f757ddf30af89d2213f454 Mon Sep 17 00:00:00 2001 From: Abraham Toriz Date: Wed, 23 Mar 2022 12:45:55 -0700 Subject: [PATCH 04/13] document the new environment variables --- README.md | 40 ++++++++++++++++--------------- deployment/datapusher_settings.py | 3 ++- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 4fbea96..7421cdc 100644 --- a/README.md +++ b/README.md @@ -85,24 +85,24 @@ probably need to set up Nginx as a reverse proxy in front of it and something li Supervisor to keep the process up. - # Install requirements for the DataPusher - sudo apt install python3-venv python3-dev build-essential - sudo apt-get install python-dev python-virtualenv build-essential libxslt1-dev libxml2-dev git libffi-dev + # Install requirements for the DataPusher + sudo apt install python3-venv python3-dev build-essential + sudo apt-get install python-dev python-virtualenv build-essential libxslt1-dev libxml2-dev git libffi-dev - # Create a virtualenv for datapusher + # Create a virtualenv for datapusher sudo python3 -m venv /usr/lib/ckan/datapusher - # Create a source directory and switch to it - sudo mkdir /usr/lib/ckan/datapusher/src - cd /usr/lib/ckan/datapusher/src + # Create a source directory and switch to it + sudo mkdir /usr/lib/ckan/datapusher/src + cd /usr/lib/ckan/datapusher/src - # Clone the source (you should target the latest tagged version) - sudo git clone -b 0.0.17 https://github.com/ckan/datapusher.git + # Clone the source (you should target the latest tagged version) + sudo git clone -b 0.0.17 https://github.com/ckan/datapusher.git - # Install the DataPusher and its requirements - cd datapusher - sudo /usr/lib/ckan/datapusher/bin/pip install -r requirements.txt - sudo /usr/lib/ckan/datapusher/bin/python setup.py develop + # Install the DataPusher and its requirements + cd datapusher + sudo /usr/lib/ckan/datapusher/bin/pip install -r requirements.txt + sudo /usr/lib/ckan/datapusher/bin/python setup.py develop # Create a user to run the web service (if necessary) sudo addgroup www-data @@ -132,8 +132,8 @@ The default DataPusher configuration uses SQLite as the backend for the jobs dat sudo -u postgres createuser -S -D -R -P datapusher_jobs sudo -u postgres createdb -O datapusher_jobs datapusher_jobs -E utf-8 - # Run this in the virtualenv where DataPusher is installed - pip install psycopg2 + # Run this in the virtualenv where DataPusher is installed + pip install psycopg2 # Edit SQLALCHEMY_DATABASE_URI in datapusher_settings.py accordingly # eg SQLALCHEMY_DATABASE_URI=postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs @@ -143,9 +143,9 @@ The default DataPusher configuration uses SQLite as the backend for the jobs dat ``` # ... rest of datapusher-uwsgi.ini - workers = 3 - threads = 3 - lazy-apps = true + workers = 3 + threads = 3 + lazy-apps = true ``` ## Configuring @@ -184,9 +184,11 @@ Here's a summary of the options available. | SSL_VERIFY | False | Do not validate SSL certificates when requesting the data file (*Warning*: Do not use this setting in production) | | TYPES | [messytables.StringType, messytables.DecimalType, messytables.IntegerType, messytables.DateUtilType] | [Messytables][] types used internally, can be modified to customize the type guessing | | TYPE_MAPPING | {'String': 'text', 'Integer': 'numeric', 'Decimal': 'numeric', 'DateUtil': 'timestamp'} | Internal Messytables type mapping | +| LOG_FILE | `/tmp/ckan_service.log` | Where to write the logs. Use an empty string to disable | +| STDERR | `True` | Log to stderr? | -Most of the configuration options above can be also provided as environment variables prepending the name with `DATAPUSHER_`, eg `DATAPUSHER_SQLALCHEMY_DATABASE_URI`, `DATAPUSHER_PORT`, etc. +Most of the configuration options above can be also provided as environment variables prepending the name with `DATAPUSHER_`, eg `DATAPUSHER_SQLALCHEMY_DATABASE_URI`, `DATAPUSHER_PORT`, etc. In the specific case of `DATAPUSHER_STDERR` the possible values are `1` and `0`. By default DataPusher uses SQLite as the database backend for the jobs information. This is fine for local development and sites with low activity, but for sites that need more performance should use Postgres as the backend for the jobs database (eg `SQLALCHEMY_DATABASE_URI=postgresql://datapusher_jobs:YOURPASSWORD@localhost/datapusher_jobs`. See also [High Availability Setup](#high-availability-setup). If SQLite is used, is probably a good idea to store the database in a location other than `/tmp`. This will prevent the database being dropped, causing out of sync errors in the CKAN side. A good place to store it is the CKAN storage folder (if DataPusher is installed in the same server), generally in `/var/lib/ckan/`. diff --git a/deployment/datapusher_settings.py b/deployment/datapusher_settings.py index 0134a24..d4b198b 100644 --- a/deployment/datapusher_settings.py +++ b/deployment/datapusher_settings.py @@ -29,4 +29,5 @@ SSL_VERIFY = os.environ.get('DATAPUSHER_SSL_VERIFY', True) # logging -#LOG_FILE = '/tmp/ckan_service.log' +LOG_FILE = os.environ.get('DATAPUSHER_LOG_FILE', '/tmp/ckan_service.log') +STDERR = bool(int(os.environ.get('DATAPUSHER_STDERR', '1'))) From 7aa0060f2c98fb0cad476bc5e954f1830f786b2c Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:06:27 +0200 Subject: [PATCH 05/13] Bump dev reqs --- requirements-dev-py2.txt | 3 +++ requirements-dev.txt | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 requirements-dev-py2.txt diff --git a/requirements-dev-py2.txt b/requirements-dev-py2.txt new file mode 100644 index 0000000..60746e9 --- /dev/null +++ b/requirements-dev-py2.txt @@ -0,0 +1,3 @@ +-r requirements.txt +httpretty==0.9.4 +pytest diff --git a/requirements-dev.txt b/requirements-dev.txt index 512bb78..de244d4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,3 @@ -r requirements.txt -httpretty==0.9.4 -nose +httpretty==1.1.4 +pytest From 2f2ef9ecf377da263810bfec6f2d7069d624b4b7 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:06:48 +0200 Subject: [PATCH 06/13] Migrate tests to pytest --- README.md | 2 +- requirements.txt | 4 +- tests/test_mocked.py | 7 +-- tests/test_unit.py | 112 ++++++++++++++++++++++--------------------- tests/test_web.py | 5 +- 5 files changed, 66 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 8920386..baf6229 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ If you need to change the host or port, copy `deployment/datapusher_settings.py` To run the tests: - nosetests + pytest ## Production deployment diff --git a/requirements.txt b/requirements.txt index 483fa21..63a4518 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ argparse -ckanserviceprovider==0.0.10 +ckanserviceprovider==1.0.0 html5lib==1.0.1 messytables==0.15.2 certifi -requests[security]==2.24.0 +requests[security]==2.27.1 diff --git a/tests/test_mocked.py b/tests/test_mocked.py index 3bf5a8e..2f6c300 100644 --- a/tests/test_mocked.py +++ b/tests/test_mocked.py @@ -5,8 +5,8 @@ import os import json -import unittest +import pytest import httpretty import datapusher.main as main @@ -27,7 +27,7 @@ def get_static_file(filename): return open(join_static_path(filename)).read() -class TestImport(unittest.TestCase): +class TestImport(): @classmethod def setup_class(cls): cls.host = 'www.ckan.org' @@ -110,4 +110,5 @@ def test_wrong_api_key(self): } } - self.assertRaises(util.JobError, jobs.push_to_datastore, 'fake_id', data) + with pytest.raises(util.JobError): + jobs.push_to_datastore('fake_id', data) diff --git a/tests/test_unit.py b/tests/test_unit.py index 9c1105d..334565c 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -4,11 +4,8 @@ ''' import json -import unittest import requests - -from nose.tools import assert_equal, raises - +import pytest import httpretty import datapusher.jobs as jobs @@ -18,8 +15,8 @@ class TestChunky(): def test_simple(self): chunks = jobs.chunky('abcdefg', 3) - assert_equal( - list(chunks), + assert ( + list(chunks) == [ (['a', 'b', 'c'], False), (['d', 'e', 'f'], False), @@ -28,8 +25,8 @@ def test_simple(self): def test_length_is_the_exact_multiple(self): chunks = jobs.chunky('abcdef', 3) - assert_equal( - list(chunks), + assert ( + list(chunks) == [ (['a', 'b', 'c'], False), (['d', 'e', 'f'], True), @@ -37,33 +34,33 @@ def test_length_is_the_exact_multiple(self): def test_empty(self): chunks = jobs.chunky('', 3) - assert_equal( - list(chunks), []) + assert ( + list(chunks) == []) class TestGetUrl(): def test_get_action_url(self): - assert_equal( - jobs.get_url('datastore_create', 'http://www.ckan.org'), + assert ( + jobs.get_url('datastore_create', 'http://www.ckan.org') == 'http://www.ckan.org/api/3/action/datastore_create') def test_get_action_url_with_stuff(self): - assert_equal( - jobs.get_url('datastore_create', 'http://www.ckan.org/'), + assert ( + jobs.get_url('datastore_create', 'http://www.ckan.org/') == 'http://www.ckan.org/api/3/action/datastore_create') def test_get_action_url_with_https(self): - assert_equal( - jobs.get_url('datastore_create', 'https://www.ckan.org/'), + assert ( + jobs.get_url('datastore_create', 'https://www.ckan.org/') == 'https://www.ckan.org/api/3/action/datastore_create') def test_get_action_url_missing_http(self): - assert_equal( - jobs.get_url('datastore_create', 'www.ckan.org/'), + assert ( + jobs.get_url('datastore_create', 'www.ckan.org/') == 'http://www.ckan.org/api/3/action/datastore_create') -class TestValidation(unittest.TestCase): +class TestValidation(): def test_validate_input(self): jobs.validate_input({ 'metadata': { @@ -73,42 +70,46 @@ def test_validate_input(self): 'api_key': 'köi' }) - @raises(util.JobError) def test_validate_input_raises_if_metadata_missing(self): - jobs.validate_input({ - 'foo': {}, - 'api_key': 'my-key' - }) - @raises(util.JobError) + with pytest.raises(util.JobError): + jobs.validate_input({ + 'foo': {}, + 'api_key': 'my-key' + }) + def test_validate_input_raises_if_res_id_missing(self): - jobs.validate_input({ - 'metadata': { - 'ckan_url': 'http://www.ckan.org' - }, - 'api_key': 'my-key' - }) - @raises(util.JobError) + with pytest.raises(util.JobError): + jobs.validate_input({ + 'metadata': { + 'ckan_url': 'http://www.ckan.org' + }, + 'api_key': 'my-key' + }) + def test_validate_input_raises_if_ckan_url_missing(self): - jobs.validate_input({ - 'metadata': { - 'resource_id': 'h32jk4h34k5' - }, - 'api_key': 'my-key' - }) - @raises(util.JobError) + with pytest.raises(util.JobError): + jobs.validate_input({ + 'metadata': { + 'resource_id': 'h32jk4h34k5' + }, + 'api_key': 'my-key' + }) + def test_validate_api_key(self): - jobs.validate_input({ - 'metadata': { - 'resource_id': 'h32jk4h34k5', - 'ckan_url': 'http://www.ckan.org' - } - }) + with pytest.raises(util.JobError): + jobs.validate_input({ + 'metadata': { + 'resource_id': 'h32jk4h34k5', + 'ckan_url': 'http://www.ckan.org' + } + }) -class TestCkanActionCalls(unittest.TestCase): + +class TestCkanActionCalls(): @httpretty.activate def test_get_resource(self): url = 'http://www.ckan.org/api/3/action/resource_show' @@ -120,7 +121,7 @@ def test_get_resource(self): }}), content_type="application/json") resource = jobs.get_resource('an_id', 'http://www.ckan.org/', None) - assert_equal(resource, {'foo': 42}) + assert resource == {'foo': 42} assert json.loads(httpretty.last_request().body)['id'] == 'an_id' @httpretty.activate @@ -164,7 +165,7 @@ def test_send_resource_to_datastore(self): jobs.send_resource_to_datastore({'id': 'an_id'}, [], [], False, 'my_key', 'http://www.ckan.org/') -class TestCheckResponse(unittest.TestCase): +class TestCheckResponse(): """Unit tests for the check_response() function.""" @httpretty.activate @@ -210,14 +211,15 @@ def test_text_500_with_false_success(self): assert err.request_url == url @httpretty.activate - @raises(util.JobError) def test_text_404(self): - httpretty.register_uri(httpretty.GET, 'http://www.ckan.org/', - body='{"success": true}', - content_type='html/text', - status=404) - r = requests.get('http://www.ckan.org/') - jobs.check_response(r, 'http://www.ckan.org/', 'Me') + + with pytest.raises(util.JobError): + httpretty.register_uri(httpretty.GET, 'http://www.ckan.org/', + body='{"success": true}', + content_type='html/text', + status=404) + r = requests.get('http://www.ckan.org/') + jobs.check_response(r, 'http://www.ckan.org/', 'Me') @httpretty.activate def test_text_404_ignore(self): diff --git a/tests/test_web.py b/tests/test_web.py index 3eada0a..1940d5a 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -6,7 +6,6 @@ import os import json -from nose.tools import assert_equal import datapusher.main as main @@ -21,5 +20,5 @@ class TestWeb(): def test_status(self): rv = app.get('/status') result_dict = json.loads(rv.data) - assert_equal(result_dict['job_types'], ['push_to_datastore']) - assert_equal(result_dict['name'], 'datapusher') + assert result_dict['job_types'] == ['push_to_datastore'] + assert result_dict['name'] == 'datapusher' From 99b08fb0eaea4fc295a68dd383f4d150b22239fe Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:07:08 +0200 Subject: [PATCH 07/13] Enable github actions --- .github/workflows/test.yml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/workflows/test.yml diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..0e1a372 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,27 @@ +name: Tests +on: [push, pull_request] +jobs: + test: + strategy: + matrix: + python-version: [2.7, 3.6, 3.7, 3.8, 3.9, "3.10"] + fail-fast: false + name: Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install requirements (Python 2) + if: ${{ matrix.python-version == '2.7' }} + run: pip install -r requirements-dev-py2.txt && pip install . + - name: Install requirements (Python 3) + if: ${{ matrix.python-version != '2.7' }} + run: pip install -r requirements-dev.txt && pip install . + - name: Run tests + run: pytest --cov=datapusher --cov-append --cov-report=xml --disable-warnings tests + - name: Upload coverage report to codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml From 833b4c1df075657c0a59db716f965da1f4d6cb3f Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:10:44 +0200 Subject: [PATCH 08/13] Tests badge --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index baf6229..2129106 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ -[![Build Status](https://travis-ci.org/ckan/datapusher.png?branch=master)](https://travis-ci.org/ckan/datapusher) -[![Coverage Status](https://coveralls.io/repos/ckan/datapusher/badge.png?branch=master)](https://coveralls.io/r/ckan/datapusher?branch=master) +[![Tests](https://github.com/ckan/datapusher/actions/workflows/test.yml/badge.svg)](https://github.com/ckan/datapusher/actions/workflows/test.yml) [![Latest Version](https://img.shields.io/pypi/v/datapusher.svg)](https://pypi.python.org/pypi/datapusher/) [![Downloads](https://img.shields.io/pypi/dm/datapusher.svg)](https://pypi.python.org/pypi/datapusher/) [![Supported Python versions](https://img.shields.io/pypi/pyversions/datapusher.svg)](https://pypi.python.org/pypi/datapusher/) From 2a9ee54dffc9a2cd9e81c7f74a0ec69b81070232 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:14:15 +0200 Subject: [PATCH 09/13] Add pytest-cov requirement --- requirements-dev-py2.txt | 1 + requirements-dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements-dev-py2.txt b/requirements-dev-py2.txt index 60746e9..4cc72ac 100644 --- a/requirements-dev-py2.txt +++ b/requirements-dev-py2.txt @@ -1,3 +1,4 @@ -r requirements.txt httpretty==0.9.4 pytest +pytest-cov diff --git a/requirements-dev.txt b/requirements-dev.txt index de244d4..8197ee5 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,4 @@ -r requirements.txt httpretty==1.1.4 pytest +pytest-cov From 12f07bf433160da7581ec69552c8506bcaa923d9 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:32:57 +0200 Subject: [PATCH 10/13] Migrate missing test file --- tests/test_acceptance.py | 74 +++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 39 deletions(-) diff --git a/tests/test_acceptance.py b/tests/test_acceptance.py index dc191de..117d0ee 100644 --- a/tests/test_acceptance.py +++ b/tests/test_acceptance.py @@ -12,12 +12,10 @@ """ import os import json -import unittest import datetime -from nose.tools import assert_equal, raises import httpretty -import requests +import pytest import datapusher.main as main import datapusher.jobs as jobs @@ -37,7 +35,7 @@ def get_static_file(filename): return open(join_static_path(filename), 'rb').read() -class TestImport(unittest.TestCase): +class TestImport(): @classmethod def setup_class(cls): cls.host = 'www.ckan.org' @@ -93,18 +91,15 @@ def register_urls(self, filename='simple.csv', format='CSV', body=json.dumps({'success': True}), content_type='application/json') - # A URL that mocks checking if a datastore table exists datastore_check_url = 'http://www.ckan.org/api/3/action/datastore_search' httpretty.register_uri(httpretty.POST, datastore_check_url, body=json.dumps({'success': True}), content_type='application/json') - return source_url, res_url @httpretty.activate - @raises(util.JobError) def test_too_large_content_length(self): """It should raise JobError if the returned Content-Length header is too large. @@ -136,10 +131,10 @@ def test_too_large_content_length(self): content_length=size, content_type='application/json') - jobs.push_to_datastore('fake_id', data, True) + with pytest.raises(util.JobError): + jobs.push_to_datastore('fake_id', data, True) @httpretty.activate - @raises(util.JobError) def test_too_large_file(self): """It should raise JobError if the data file is too large. @@ -172,7 +167,8 @@ def test_too_large_file(self): 'content-length': None }) - jobs.push_to_datastore('fake_id', data, True) + with pytest.raises(util.JobError): + jobs.push_to_datastore('fake_id', data, True) @httpretty.activate def test_content_length_string(self): @@ -246,12 +242,12 @@ def test_simple_csv(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(headers, [{'type': 'timestamp', 'id': 'date'}, + assert (headers == [{'type': 'timestamp', 'id': 'date'}, {'type': 'numeric', 'id': 'temperature'}, {'type': 'text', 'id': 'place'}]) - assert_equal(len(results), 6) - assert_equal( - results[0], + assert len(results) == 6 + assert ( + results[0] == {'date': datetime.datetime(2011, 1, 1, 0, 0), 'place': 'Galway', 'temperature': 1}) @@ -277,11 +273,11 @@ def test_simple_tsv(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(headers, [{'type': 'timestamp', 'id': 'date'}, + assert (headers == [{'type': 'timestamp', 'id': 'date'}, {'type': 'numeric', 'id': 'temperature'}, {'type': 'text', 'id': 'place'}]) - assert_equal(len(results), 6) - assert_equal(results[0], + assert len(results) == 6 + assert (results[0] == {'date': datetime.datetime(2011, 1, 1, 0, 0), 'place': 'Galway', 'temperature': 1}) @@ -307,11 +303,11 @@ def test_simple_ssv(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(headers, [{'type': 'timestamp', 'id': 'date'}, - {'type': 'numeric', 'id': 'temperature'}, + assert (headers == [{'type': 'timestamp', 'id': 'date'}, + {'type': 'numeric', 'id': 'temperature'}, {'type': 'text', 'id': 'place'}]) - assert_equal(len(results), 6) - assert_equal(results[0], + assert len(results) == 6 + assert (results[0] == {'date': datetime.datetime(2011, 1, 1, 0, 0), 'place': 'Galway', 'temperature': 1}) @@ -336,11 +332,11 @@ def test_simple_xls(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(headers, [{'type': 'timestamp', 'id': 'date'}, + assert (headers == [{'type': 'timestamp', 'id': 'date'}, {'type': 'numeric', 'id': 'temperature'}, {'type': 'text', 'id': 'place'}]) - assert_equal(len(results), 6) - assert_equal(results[0], + assert len(results) == 6 + assert (results[0] == {'date': datetime.datetime(2011, 1, 1, 0, 0), 'place': 'Galway', 'temperature': 1}) @@ -365,7 +361,7 @@ def test_real_csv(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(headers, [{'type': 'text', 'id': 'Directorate'}, + assert (headers == [{'type': 'text', 'id': 'Directorate'}, {'type': 'text', 'id': 'Service Area'}, {'type': 'text', 'id': 'Expenditure Category'}, {'type': 'timestamp', 'id': 'Payment Date'}, @@ -376,8 +372,8 @@ def test_real_csv(self): {'type': 'text', 'id': 'Cost Centre Description'}, {'type': 'numeric', 'id': 'Grand Total'}]) - assert_equal(len(results), 230) - assert_equal(results[0], + assert len(results) == 230 + assert (results[0] == {'Directorate': 'Adult and Culture', 'Service Area': 'Ad Serv-Welfare Rights- ', 'Expenditure Category': 'Supplies & Services', @@ -411,12 +407,11 @@ def test_weird_header(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(len(headers), 9) - assert_equal(len(results), 82) - assert_equal(headers[0]['id'].strip(), '1985') - assert_equal(results[1]['1993'].strip(), '379') + assert len(headers) == 9 + assert len(results) == 82 + assert headers[0]['id'].strip() == '1985' + assert results[1]['1993'].strip() == '379' - @raises(util.JobError) @httpretty.activate def test_bad_url(self): """It should raise HTTPError(JobError) if the resource.url is badly @@ -436,9 +431,9 @@ def test_bad_url(self): } } - jobs.push_to_datastore('fake_id', data, True) + with pytest.raises(util.JobError): + jobs.push_to_datastore('fake_id', data, True) - @raises(util.JobError) @httpretty.activate def test_bad_scheme(self): """It should raise HTTPError(JobError) if the resource.url is an @@ -458,7 +453,8 @@ def test_bad_scheme(self): } } - jobs.push_to_datastore('fake_id', data, True) + with pytest.raises(util.JobError): + jobs.push_to_datastore('fake_id', data, True) @httpretty.activate def test_mostly_numbers(self): @@ -481,8 +477,8 @@ def test_mostly_numbers(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(len(headers), 19) - assert_equal(len(results), 133) + assert len(headers) == 19 + assert len(results) == 133 @httpretty.activate def test_long_file(self): @@ -505,8 +501,8 @@ def test_long_file(self): headers, results = jobs.push_to_datastore('fake_id', data, True) results = list(results) - assert_equal(len(headers), 1) - assert_equal(len(results), 4000) + assert len(headers) == 1 + assert len(results) == 4000 @httpretty.activate def test_do_not_push_when_same_hash(self): From 7991f0f3c7bb7a3ca2c12207bd3915c6524bc833 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:39:02 +0200 Subject: [PATCH 11/13] Don't test Python 3.10 just yet --- .github/workflows/test.yml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0e1a372..649e0d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,7 @@ jobs: test: strategy: matrix: - python-version: [2.7, 3.6, 3.7, 3.8, 3.9, "3.10"] + python-version: [2.7, 3.6, 3.7, 3.8, 3.9] fail-fast: false name: Python ${{ matrix.python-version }} runs-on: ubuntu-latest diff --git a/setup.py b/setup.py index df3eaca..cb5fbdf 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', - + 'Programming Language :: Python :: 3.9', ], # What does your project relate to? From 792a6e2255a62fb6777106839afdb9a12a681102 Mon Sep 17 00:00:00 2001 From: amercader Date: Wed, 13 Apr 2022 17:42:51 +0200 Subject: [PATCH 12/13] Bump version --- datapusher/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapusher/__init__.py b/datapusher/__init__.py index b47451b..d9fc5d6 100644 --- a/datapusher/__init__.py +++ b/datapusher/__init__.py @@ -1 +1 @@ -__version__ = '0.0.17' +__version__ = '0.0.18' From 7bbc40b869089331b6bc94c3c865571a391634d4 Mon Sep 17 00:00:00 2001 From: Marc Dutoo Date: Fri, 31 Dec 2021 17:31:48 +0100 Subject: [PATCH 13/13] python3 Dockerfile, based on CKAN's --- Dockerfile | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e22d34f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +FROM debian:buster + +# Install required system packages +RUN apt-get -q -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -q -y upgrade \ + && apt-get -q -y install \ + python3-dev \ + python3-pip \ + python3-virtualenv \ + zlib1g-dev \ + libxml2-dev \ + libxslt1-dev \ + libffi-dev \ + # else error https://stackoverflow.com/questions/14547631/python-locale-error-unsupported-locale-setting + locales \ + postgresql-client \ + build-essential \ + git \ + vim \ + wget \ + && apt-get -q clean \ + && rm -rf /var/lib/apt/lists/* + + +RUN python3 -m virtualenv --python=python3 /venv +ENV PATH="/venv/bin:$PATH" + +# else error https://stackoverflow.com/questions/59633558/python-based-dockerfile-throws-locale-error-unsupported-locale-setting +ENV LC_ALL=C + +# NO else https://github.com/ckan/datapusher/issues/132 +#datapusher | File "/venv/src/datapusher/jobs.py", line 158, in check_response +#datapusher | request_url=request_url, response=response.text) +#datapusher | datapusher.jobs.HTTPError: +#ENV DATAPUSHER_SSL_VERIFY=true + +# Setup Datapusher +ADD . /venv/src/ +RUN pip install -U pip && \ + cd /venv/src/ && \ + pip install --upgrade --no-cache-dir -r requirements.txt && \ + pip install --upgrade --no-cache-dir -r requirements-dev.txt && \ + #pip install -e . + python setup.py develop + +CMD [ "python", "/venv/src/datapusher/main.py", "/venv/src/deployment/datapusher_settings.py"]