Skip to content

Commit

Permalink
fix(types): 🎨 added stubs for gnews
Browse files Browse the repository at this point in the history
  • Loading branch information
AndyTheFactory committed Mar 17, 2024
1 parent 1666575 commit 86d7128
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 29 deletions.
2 changes: 1 addition & 1 deletion newspaper/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def __init__(
scheme = urls.get_scheme(url)
if scheme is None:
scheme = "http"
source_url = scheme + "://" + urls.get_domain(url)
source_url = scheme + "://" + str(urls.get_domain(url))

if source_url is None or source_url == "":
raise ArticleException("input url bad format")
Expand Down
4 changes: 2 additions & 2 deletions newspaper/extractors/categories_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,15 @@ def _filter(candidate):

def is_valid_link(self, url: str, filter_tld: str) -> Tuple[bool, Dict[str, Any]]:
"""Is the url a possible category?"""
parsed_url = {
parsed_url: Dict[str, Any] = {
"scheme": urls.get_scheme(url, allow_fragments=False),
"domain": urls.get_domain(url, allow_fragments=False),
"path": urls.get_path(url, allow_fragments=False),
"tld": None,
}

# No domain or path
if not parsed_url["domain"] and not parsed_url["path"]:
if not parsed_url["domain"] or not parsed_url["path"]:
return False, parsed_url
# remove any url that starts with #
if parsed_url["path"] and parsed_url["path"].startswith("#"):
Expand Down
28 changes: 2 additions & 26 deletions newspaper/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import logging
import re

from typing import Optional, Tuple
from urllib.parse import parse_qs, urljoin, urlparse, urlsplit, urlunsplit
from typing import Optional
from urllib.parse import parse_qs, urljoin, urlparse

from tldextract import tldextract

Expand Down Expand Up @@ -102,28 +102,6 @@
]


def remove_args(url: str, keep_params: Tuple[str] = (), frags: bool = False) -> str:
"""
Remove all query arguments from a url.
Args:
url (str): the url to remove query arguments from
keep_params (Tuple[str]): a tuple of query parameters to keep
frags (bool): whether to keep the fragment part of the url
"""
parsed = urlsplit(url)
filtered_query = "&".join(
qry_item
for qry_item in parsed.query.split("&")
if qry_item.startswith(keep_params)
)
if frags:
frag = parsed[4:]
else:
frag = ("",)

return urlunsplit(parsed[:3] + (filtered_query,) + frag)


def redirect_back(url: str, source_domain: str) -> str:
"""
Some sites like Pinterest have api's that cause news
Expand Down Expand Up @@ -167,9 +145,7 @@ def prepare_url(url: str, source_url: Optional[str] = None) -> str:
source_domain = urlparse(source_url).netloc
proper_url = urljoin(source_url, url)
proper_url = redirect_back(proper_url, source_domain)
# proper_url = remove_args(proper_url)
else:
# proper_url = remove_args(url)
proper_url = url
except ValueError as e:
log.error("url %s failed on err %s", url, str(e))
Expand Down
Empty file added stubs/gnews.pyi
Empty file.
69 changes: 69 additions & 0 deletions stubs/gnews/gnews.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from _typeshed import Incomplete
from gnews.utils.constants import (
AVAILABLE_COUNTRIES as AVAILABLE_COUNTRIES,
AVAILABLE_LANGUAGES as AVAILABLE_LANGUAGES,
BASE_URL as BASE_URL,
TOPICS as TOPICS,
USER_AGENT as USER_AGENT,
)
from gnews.utils.utils import (
connect_database as connect_database,
post_database as post_database,
process_url as process_url,
)

logger: Incomplete

class GNews:
countries: Incomplete
languages: Incomplete
def __init__(
self,
language: str = "en",
country: str | None = "US",
max_results: int = 100,
period: Incomplete | None = None,
start_date: Incomplete | None = None,
end_date: Incomplete | None = None,
exclude_websites: Incomplete | None = None,
proxy: Incomplete | None = None,
) -> None: ...
@property
def language(self): ...
@language.setter
def language(self, language) -> None: ...
@property
def exclude_websites(self): ...
@exclude_websites.setter
def exclude_websites(self, exclude_websites) -> None: ...
@property
def max_results(self): ...
@max_results.setter
def max_results(self, size) -> None: ...
@property
def period(self): ...
@period.setter
def period(self, period) -> None: ...
@property
def start_date(self): ...
@start_date.setter
def start_date(self, start_date) -> None: ...
@property
def end_date(self): ...
@end_date.setter
def end_date(self, end_date) -> None: ...
@property
def country(self): ...
@country.setter
def country(self, country) -> None: ...
def get_full_article(self, url): ...
def docstring_parameter(*sub): ...
indent: str
indent2: Incomplete
standard_output: Incomplete
def get_news(self, key): ...
def get_top_news(self): ...
def get_news_by_topic(self, topic: str): ...
def get_news_by_location(self, location: str): ...
def get_news_by_site(self, site: str): ...
def store_in_mongodb(self, news) -> None: ...
10 changes: 10 additions & 0 deletions stubs/gnews/utils/constants.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from _typeshed import Incomplete

USER_AGENTS: Incomplete
USER_AGENT: Incomplete
AVAILABLE_LANGUAGES: Incomplete
AVAILABLE_COUNTRIES: Incomplete
GOOGLE_NEWS_URL: str
BASE_URL: Incomplete
GOOGLE_NEWS_REGEX: str
TOPICS: Incomplete
11 changes: 11 additions & 0 deletions stubs/gnews/utils/utils.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from gnews.utils.constants import (
AVAILABLE_COUNTRIES as AVAILABLE_COUNTRIES,
AVAILABLE_LANGUAGES as AVAILABLE_LANGUAGES,
GOOGLE_NEWS_REGEX as GOOGLE_NEWS_REGEX,
)

def lang_mapping(lang): ...
def country_mapping(country): ...
def connect_database(db_user, db_pw, db_name, collection_name): ...
def post_database(collection, news) -> None: ...
def process_url(item, exclude_websites): ...

0 comments on commit 86d7128

Please sign in to comment.