From 168e660514a2ced3f7e902cd50476010f33d2337 Mon Sep 17 00:00:00 2001 From: Adrien Barbaresi Date: Wed, 18 May 2022 17:45:32 +0200 Subject: [PATCH] prepare next release --- HISTORY.md | 8 ++++++++ setup.py | 2 +- trafilatura/__init__.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 6043fb12..3153091d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,6 +1,14 @@ ## History / Changelog +### 1.2.2 +- more efficient rules for extraction +- metadata: further attributes used (with @felipehertzer) +- better baseline extraction +- issues fixed: #202, #204, #205 +- evaluation updated + + ### 1.2.1 - ``--precision`` and ``--recall`` arguments added to the CLI - better text cleaning: paywalls and comments diff --git a/setup.py b/setup.py index ae36eee4..31cb5cc9 100644 --- a/setup.py +++ b/setup.py @@ -93,7 +93,7 @@ def get_long_description(): install_requires=[ 'certifi', 'charset_normalizer >= 2.0.12', - 'courlan >= 0.7.1', + 'courlan >= 0.7.2', 'htmldate >= 1.2.1', 'justext >= 3.0.0', 'lxml >= 4.6.4', diff --git a/trafilatura/__init__.py b/trafilatura/__init__.py index add6347a..8506d2e8 100644 --- a/trafilatura/__init__.py +++ b/trafilatura/__init__.py @@ -8,7 +8,7 @@ __author__ = 'Adrien Barbaresi and contributors' __license__ = 'GNU GPL v3+' __copyright__ = 'Copyright 2019-2022, Adrien Barbaresi' -__version__ = '1.2.1' +__version__ = '1.2.2' import logging