diff --git a/arxiv/arxiv.py b/arxiv/arxiv.py index 4a7b1b5..e6e974e 100644 --- a/arxiv/arxiv.py +++ b/arxiv/arxiv.py @@ -1,4 +1,6 @@ """.. include:: ../README.md""" +from __future__ import annotations + import logging import time import feedparser @@ -35,7 +37,7 @@ class Result(object): """When the result was originally published.""" title: str """The title of the result.""" - authors: list + authors: List[Author] """The result's authors.""" summary: str """The result abstract.""" @@ -55,7 +57,7 @@ class Result(object): All of the result's categories. See [arXiv: Category Taxonomy](https://arxiv.org/category_taxonomy). """ - links: list + links: List[Link] """Up to three URLs associated with this result.""" pdf_url: str """The URL of a PDF version of this result if present among links.""" @@ -71,14 +73,14 @@ def __init__( updated: datetime = _DEFAULT_TIME, published: datetime = _DEFAULT_TIME, title: str = "", - authors: List['Result.Author'] = [], + authors: List[Author] = [], summary: str = "", comment: str = "", journal_ref: str = "", doi: str = "", primary_category: str = "", categories: List[str] = [], - links: List['Result.Link'] = [], + links: List[Link] = [], _raw: feedparser.FeedParserDict = None, ): """ @@ -104,7 +106,7 @@ def __init__( # Debugging self._raw = _raw - def _from_feed_entry(entry: feedparser.FeedParserDict) -> 'Result': + def _from_feed_entry(entry: feedparser.FeedParserDict) -> Result: """ Converts a feedparser entry for an arXiv search result feed into a Result object. @@ -221,7 +223,7 @@ def download_source(self, dirpath: str = './', filename: str = '') -> str: written_path, _ = urlretrieve(source_url, path) return written_path - def _get_pdf_url(links: list) -> str: + def _get_pdf_url(links: List[Link]) -> str: """ Finds the PDF link among a result's links and returns its URL. @@ -266,7 +268,7 @@ def __init__(self, name: str): def _from_feed_author( feed_author: feedparser.FeedParserDict - ) -> 'Result.Author': + ) -> Result.Author: """ Constructs an `Author` with the name specified in an author object from a feed entry. @@ -320,7 +322,7 @@ def __init__( def _from_feed_link( feed_link: feedparser.FeedParserDict - ) -> 'Result.Link': + ) -> Result.Link: """ Constructs a `Link` with link metadata specified in a link object from a feed entry. @@ -416,7 +418,7 @@ class Search(object): See [the arXiv API User's Manual: Details of Query Construction](https://arxiv.org/help/api/user-manual#query_details). """ - id_list: list + id_list: List[str] """ A list of arXiv article IDs to which to limit the search. diff --git a/docs/index.html b/docs/index.html index 476f7b5..3b25665 100644 --- a/docs/index.html +++ b/docs/index.html @@ -491,801 +491,803 @@

Example: logging

  1""".. include:: ../README.md"""
-  2import logging
-  3import time
-  4import feedparser
-  5import re
-  6import os
-  7import warnings
-  8
-  9from urllib.parse import urlencode
- 10from urllib.request import urlretrieve
- 11from datetime import datetime, timedelta, timezone
- 12from calendar import timegm
- 13
- 14from enum import Enum
- 15from typing import Dict, Generator, List
- 16
- 17logger = logging.getLogger(__name__)
+  2from __future__ import annotations
+  3
+  4import logging
+  5import time
+  6import feedparser
+  7import re
+  8import os
+  9import warnings
+ 10
+ 11from urllib.parse import urlencode
+ 12from urllib.request import urlretrieve
+ 13from datetime import datetime, timedelta, timezone
+ 14from calendar import timegm
+ 15
+ 16from enum import Enum
+ 17from typing import Dict, Generator, List
  18
- 19_DEFAULT_TIME = datetime.min
+ 19logger = logging.getLogger(__name__)
  20
- 21
- 22class Result(object):
- 23    """
- 24    An entry in an arXiv query results feed.
- 25
- 26    See [the arXiv API User's Manual: Details of Atom Results
- 27    Returned](https://arxiv.org/help/api/user-manual#_details_of_atom_results_returned).
- 28    """
- 29
- 30    entry_id: str
- 31    """A url of the form `http://arxiv.org/abs/{id}`."""
- 32    updated: datetime
- 33    """When the result was last updated."""
- 34    published: datetime
- 35    """When the result was originally published."""
- 36    title: str
- 37    """The title of the result."""
- 38    authors: list
- 39    """The result's authors."""
- 40    summary: str
- 41    """The result abstract."""
- 42    comment: str
- 43    """The authors' comment if present."""
- 44    journal_ref: str
- 45    """A journal reference if present."""
- 46    doi: str
- 47    """A URL for the resolved DOI to an external resource if present."""
- 48    primary_category: str
- 49    """
- 50    The result's primary arXiv category. See [arXiv: Category
- 51    Taxonomy](https://arxiv.org/category_taxonomy).
- 52    """
- 53    categories: List[str]
- 54    """
- 55    All of the result's categories. See [arXiv: Category
- 56    Taxonomy](https://arxiv.org/category_taxonomy).
- 57    """
- 58    links: list
- 59    """Up to three URLs associated with this result."""
- 60    pdf_url: str
- 61    """The URL of a PDF version of this result if present among links."""
- 62    _raw: feedparser.FeedParserDict
- 63    """
- 64    The raw feedparser result object if this Result was constructed with
- 65    Result._from_feed_entry.
- 66    """
- 67
- 68    def __init__(
- 69        self,
- 70        entry_id: str,
- 71        updated: datetime = _DEFAULT_TIME,
- 72        published: datetime = _DEFAULT_TIME,
- 73        title: str = "",
- 74        authors: List['Result.Author'] = [],
- 75        summary: str = "",
- 76        comment: str = "",
- 77        journal_ref: str = "",
- 78        doi: str = "",
- 79        primary_category: str = "",
- 80        categories: List[str] = [],
- 81        links: List['Result.Link'] = [],
- 82        _raw: feedparser.FeedParserDict = None,
- 83    ):
- 84        """
- 85        Constructs an arXiv search result item.
- 86
- 87        In most cases, prefer using `Result._from_feed_entry` to parsing and
- 88        constructing `Result`s yourself.
- 89        """
- 90        self.entry_id = entry_id
- 91        self.updated = updated
- 92        self.published = published
- 93        self.title = title
- 94        self.authors = authors
- 95        self.summary = summary
- 96        self.comment = comment
- 97        self.journal_ref = journal_ref
- 98        self.doi = doi
- 99        self.primary_category = primary_category
-100        self.categories = categories
-101        self.links = links
-102        # Calculated members
-103        self.pdf_url = Result._get_pdf_url(links)
-104        # Debugging
-105        self._raw = _raw
-106
-107    def _from_feed_entry(entry: feedparser.FeedParserDict) -> 'Result':
-108        """
-109        Converts a feedparser entry for an arXiv search result feed into a
-110        Result object.
-111        """
-112        if not hasattr(entry, "id"):
-113            raise Result.MissingFieldError("id")
-114        # Title attribute may be absent for certain titles. Defaulting to "0" as
-115        # it's the only title observed to cause this bug.
-116        # https://github.com/lukasschwab/arxiv.py/issues/71
-117        # title = entry.title if hasattr(entry, "title") else "0"
-118        title = "0"
-119        if hasattr(entry, "title"):
-120            title = entry.title
-121        else:
-122            logger.warning(
-123                "Result %s is missing title attribute; defaulting to '0'",
-124                entry.id
-125            )
-126        return Result(
-127            entry_id=entry.id,
-128            updated=Result._to_datetime(entry.updated_parsed),
-129            published=Result._to_datetime(entry.published_parsed),
-130            title=re.sub(r'\s+', ' ', title),
-131            authors=[Result.Author._from_feed_author(a) for a in entry.authors],
-132            summary=entry.summary,
-133            comment=entry.get('arxiv_comment'),
-134            journal_ref=entry.get('arxiv_journal_ref'),
-135            doi=entry.get('arxiv_doi'),
-136            primary_category=entry.arxiv_primary_category.get('term'),
-137            categories=[tag.get('term') for tag in entry.tags],
-138            links=[Result.Link._from_feed_link(link) for link in entry.links],
-139            _raw=entry
-140        )
-141
-142    def __str__(self) -> str:
-143        return self.entry_id
-144
-145    def __repr__(self) -> str:
-146        return (
-147            '{}(entry_id={}, updated={}, published={}, title={}, authors={}, '
-148            'summary={}, comment={}, journal_ref={}, doi={}, '
-149            'primary_category={}, categories={}, links={})'
-150        ).format(
-151            _classname(self),
-152            repr(self.entry_id),
-153            repr(self.updated),
-154            repr(self.published),
-155            repr(self.title),
-156            repr(self.authors),
-157            repr(self.summary),
-158            repr(self.comment),
-159            repr(self.journal_ref),
-160            repr(self.doi),
-161            repr(self.primary_category),
-162            repr(self.categories),
-163            repr(self.links)
-164        )
-165
-166    def __eq__(self, other) -> bool:
-167        if isinstance(other, Result):
-168            return self.entry_id == other.entry_id
-169        return False
-170
-171    def get_short_id(self) -> str:
-172        """
-173        Returns the short ID for this result.
-174
-175        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
-176        `result.get_short_id()` returns `2107.05580v1`.
-177
-178        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
-179        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
-180        2007 arXiv identifier format).
-181
-182        For an explanation of the difference between arXiv's legacy and current
-183        identifiers, see [Understanding the arXiv
-184        identifier](https://arxiv.org/help/arxiv_identifier).
-185        """
-186        return self.entry_id.split('arxiv.org/abs/')[-1]
-187
-188    def _get_default_filename(self, extension: str = "pdf") -> str:
-189        """
-190        A default `to_filename` function for the extension given.
-191        """
-192        nonempty_title = self.title if self.title else "UNTITLED"
-193        # Remove disallowed characters.
-194        clean_title = '_'.join(re.findall(r'\w+', nonempty_title))
-195        return "{}.{}.{}".format(self.get_short_id(), clean_title, extension)
-196
-197    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
-198        """
-199        Downloads the PDF for this result to the specified directory.
-200
-201        The filename is generated by calling `to_filename(self)`.
-202        """
-203        if not filename:
-204            filename = self._get_default_filename()
-205        path = os.path.join(dirpath, filename)
-206        written_path, _ = urlretrieve(self.pdf_url, path)
-207        return written_path
-208
-209    def download_source(self, dirpath: str = './', filename: str = '') -> str:
-210        """
-211        Downloads the source tarfile for this result to the specified
-212        directory.
-213
-214        The filename is generated by calling `to_filename(self)`.
-215        """
-216        if not filename:
-217            filename = self._get_default_filename('tar.gz')
-218        path = os.path.join(dirpath, filename)
-219        # Bodge: construct the source URL from the PDF URL.
-220        source_url = self.pdf_url.replace('/pdf/', '/src/')
-221        written_path, _ = urlretrieve(source_url, path)
-222        return written_path
-223
-224    def _get_pdf_url(links: list) -> str:
-225        """
-226        Finds the PDF link among a result's links and returns its URL.
-227
-228        Should only be called once for a given `Result`, in its constructor.
-229        After construction, the URL should be available in `Result.pdf_url`.
-230        """
-231        pdf_urls = [link.href for link in links if link.title == 'pdf']
-232        if len(pdf_urls) == 0:
-233            return None
-234        elif len(pdf_urls) > 1:
-235            logger.warning(
-236                "Result has multiple PDF links; using %s",
-237                pdf_urls[0]
-238            )
-239        return pdf_urls[0]
-240
-241    def _to_datetime(ts: time.struct_time) -> datetime:
-242        """
-243        Converts a UTC time.struct_time into a time-zone-aware datetime.
-244
-245        This will be replaced with feedparser functionality [when it becomes
-246        available](https://github.com/kurtmckee/feedparser/issues/212).
-247        """
-248        return datetime.fromtimestamp(timegm(ts), tz=timezone.utc)
-249
-250    class Author(object):
-251        """
-252        A light inner class for representing a result's authors.
-253        """
-254
-255        name: str
-256        """The author's name."""
-257
-258        def __init__(self, name: str):
-259            """
-260            Constructs an `Author` with the specified name.
-261
-262            In most cases, prefer using `Author._from_feed_author` to parsing
-263            and constructing `Author`s yourself.
-264            """
-265            self.name = name
-266
-267        def _from_feed_author(
-268            feed_author: feedparser.FeedParserDict
-269        ) -> 'Result.Author':
-270            """
-271            Constructs an `Author` with the name specified in an author object
-272            from a feed entry.
-273
-274            See usage in `Result._from_feed_entry`.
-275            """
-276            return Result.Author(feed_author.name)
-277
-278        def __str__(self) -> str:
-279            return self.name
-280
-281        def __repr__(self) -> str:
-282            return '{}({})'.format(_classname(self), repr(self.name))
-283
-284        def __eq__(self, other) -> bool:
-285            if isinstance(other, Result.Author):
-286                return self.name == other.name
-287            return False
-288
-289    class Link(object):
-290        """
-291        A light inner class for representing a result's links.
-292        """
-293
-294        href: str
-295        """The link's `href` attribute."""
-296        title: str
-297        """The link's title."""
-298        rel: str
-299        """The link's relationship to the `Result`."""
-300        content_type: str
-301        """The link's HTTP content type."""
-302
-303        def __init__(
-304            self,
-305            href: str,
-306            title: str = None,
-307            rel: str = None,
-308            content_type: str = None
-309        ):
-310            """
-311            Constructs a `Link` with the specified link metadata.
-312
-313            In most cases, prefer using `Link._from_feed_link` to parsing and
-314            constructing `Link`s yourself.
-315            """
-316            self.href = href
-317            self.title = title
-318            self.rel = rel
-319            self.content_type = content_type
-320
-321        def _from_feed_link(
-322            feed_link: feedparser.FeedParserDict
-323        ) -> 'Result.Link':
-324            """
-325            Constructs a `Link` with link metadata specified in a link object
-326            from a feed entry.
-327
-328            See usage in `Result._from_feed_entry`.
-329            """
-330            return Result.Link(
-331                href=feed_link.href,
-332                title=feed_link.get('title'),
-333                rel=feed_link.get('rel'),
-334                content_type=feed_link.get('content_type')
-335            )
-336
-337        def __str__(self) -> str:
-338            return self.href
-339
-340        def __repr__(self) -> str:
-341            return '{}({}, title={}, rel={}, content_type={})'.format(
-342                _classname(self),
-343                repr(self.href),
-344                repr(self.title),
-345                repr(self.rel),
-346                repr(self.content_type)
-347            )
-348
-349        def __eq__(self, other) -> bool:
-350            if isinstance(other, Result.Link):
-351                return self.href == other.href
-352            return False
-353
-354    class MissingFieldError(Exception):
-355        """
-356        An error indicating an entry is unparseable because it lacks required
-357        fields.
-358        """
-359
-360        missing_field: str
-361        """The required field missing from the would-be entry."""
-362        message: str
-363        """Message describing what caused this error."""
-364
-365        def __init__(self, missing_field):
-366            self.missing_field = missing_field
-367            self.message = "Entry from arXiv missing required info"
-368
-369        def __repr__(self) -> str:
-370            return '{}({})'.format(
-371                _classname(self),
-372                repr(self.missing_field)
-373            )
-374
-375
-376class SortCriterion(Enum):
-377    """
-378    A SortCriterion identifies a property by which search results can be
-379    sorted.
-380
-381    See [the arXiv API User's Manual: sort order for return
-382    results](https://arxiv.org/help/api/user-manual#sort).
-383    """
-384    Relevance = "relevance"
-385    LastUpdatedDate = "lastUpdatedDate"
-386    SubmittedDate = "submittedDate"
-387
-388
-389class SortOrder(Enum):
-390    """
-391    A SortOrder indicates order in which search results are sorted according
-392    to the specified arxiv.SortCriterion.
-393
-394    See [the arXiv API User's Manual: sort order for return
-395    results](https://arxiv.org/help/api/user-manual#sort).
-396    """
-397    Ascending = "ascending"
-398    Descending = "descending"
-399
-400
-401class Search(object):
-402    """
-403    A specification for a search of arXiv's database.
-404
-405    To run a search, use `Search.run` to use a default client or `Client.run`
-406    with a specific client.
-407    """
-408
-409    query: str
-410    """
-411    A query string.
-412
-413    This should be unencoded. Use `au:del_maestro AND ti:checkerboard`, not
-414    `au:del_maestro+AND+ti:checkerboard`.
-415
-416    See [the arXiv API User's Manual: Details of Query
-417    Construction](https://arxiv.org/help/api/user-manual#query_details).
-418    """
-419    id_list: list
-420    """
-421    A list of arXiv article IDs to which to limit the search.
-422
-423    See [the arXiv API User's
-424    Manual](https://arxiv.org/help/api/user-manual#search_query_and_id_list)
-425    for documentation of the interaction between `query` and `id_list`.
-426    """
-427    max_results: float
-428    """
-429    The maximum number of results to be returned in an execution of this
-430    search.
-431
-432    To fetch every result available, set `max_results=float('inf')`.
-433    """
-434    sort_by: SortCriterion
-435    """The sort criterion for results."""
-436    sort_order: SortOrder
-437    """The sort order for results."""
-438
-439    def __init__(
-440        self,
-441        query: str = "",
-442        id_list: List[str] = [],
-443        max_results: float = float('inf'),
-444        sort_by: SortCriterion = SortCriterion.Relevance,
-445        sort_order: SortOrder = SortOrder.Descending
-446    ):
-447        """
-448        Constructs an arXiv API search with the specified criteria.
-449        """
-450        self.query = query
-451        self.id_list = id_list
-452        self.max_results = max_results
-453        self.sort_by = sort_by
-454        self.sort_order = sort_order
-455
-456    def __str__(self) -> str:
-457        # TODO: develop a more informative string representation.
-458        return repr(self)
-459
-460    def __repr__(self) -> str:
-461        return (
-462            '{}(query={}, id_list={}, max_results={}, sort_by={}, '
-463            'sort_order={})'
-464        ).format(
-465            _classname(self),
-466            repr(self.query),
-467            repr(self.id_list),
-468            repr(self.max_results),
-469            repr(self.sort_by),
-470            repr(self.sort_order)
-471        )
-472
-473    def _url_args(self) -> Dict[str, str]:
-474        """
-475        Returns a dict of search parameters that should be included in an API
-476        request for this search.
-477        """
-478        return {
-479            "search_query": self.query,
-480            "id_list": ','.join(self.id_list),
-481            "sortBy": self.sort_by.value,
-482            "sortOrder": self.sort_order.value
-483        }
-484
-485    def get(self) -> Generator[Result, None, None]:
-486        """
-487        **Deprecated** after 1.2.0; use `Search.results`.
-488        """
-489        warnings.warn(
-490            "The 'get' method is deprecated, use 'results' instead",
-491            DeprecationWarning,
-492            stacklevel=2
-493        )
-494        return self.results()
-495
-496    def results(self, offset: int = 0) -> Generator[Result, None, None]:
-497        """
-498        Executes the specified search using a default arXiv API client.
-499
-500        For info on default behavior, see `Client.__init__` and `Client.results`.
-501        """
-502        return Client().results(self, offset=offset)
-503
-504
-505class Client(object):
-506    """
-507    Specifies a strategy for fetching results from arXiv's API.
-508
-509    This class obscures pagination and retry logic, and exposes
-510    `Client.results`.
-511    """
-512
-513    query_url_format = 'http://export.arxiv.org/api/query?{}'
-514    """The arXiv query API endpoint format."""
-515    page_size: int
-516    """Maximum number of results fetched in a single API request."""
-517    delay_seconds: int
-518    """Number of seconds to wait between API requests."""
-519    num_retries: int
-520    """Number of times to retry a failing API request."""
-521    _last_request_dt: datetime
-522
-523    def __init__(
-524        self,
-525        page_size: int = 100,
-526        delay_seconds: int = 3,
-527        num_retries: int = 3
-528    ):
-529        """
-530        Constructs an arXiv API client with the specified options.
-531
-532        Note: the default parameters should provide a robust request strategy
-533        for most use cases. Extreme page sizes, delays, or retries risk
-534        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
-535        brittle behavior, and inconsistent results.
-536        """
-537        self.page_size = page_size
-538        self.delay_seconds = delay_seconds
-539        self.num_retries = num_retries
-540        self._last_request_dt = None
-541
-542    def __str__(self) -> str:
-543        # TODO: develop a more informative string representation.
-544        return repr(self)
-545
-546    def __repr__(self) -> str:
-547        return '{}(page_size={}, delay_seconds={}, num_retries={})'.format(
-548            _classname(self),
-549            repr(self.page_size),
-550            repr(self.delay_seconds),
-551            repr(self.num_retries)
-552        )
-553
-554    def get(self, search: Search) -> Generator[Result, None, None]:
-555        """
-556        **Deprecated** after 1.2.0; use `Client.results`.
-557        """
-558        warnings.warn(
-559            "The 'get' method is deprecated, use 'results' instead",
-560            DeprecationWarning,
-561            stacklevel=2
-562        )
-563        return self.results(search)
-564
-565    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
-566        """
-567        Uses this client configuration to fetch one page of the search results
-568        at a time, yielding the parsed `Result`s, until `max_results` results
-569        have been yielded or there are no more search results.
-570
-571        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+ 21_DEFAULT_TIME = datetime.min
+ 22
+ 23
+ 24class Result(object):
+ 25    """
+ 26    An entry in an arXiv query results feed.
+ 27
+ 28    See [the arXiv API User's Manual: Details of Atom Results
+ 29    Returned](https://arxiv.org/help/api/user-manual#_details_of_atom_results_returned).
+ 30    """
+ 31
+ 32    entry_id: str
+ 33    """A url of the form `http://arxiv.org/abs/{id}`."""
+ 34    updated: datetime
+ 35    """When the result was last updated."""
+ 36    published: datetime
+ 37    """When the result was originally published."""
+ 38    title: str
+ 39    """The title of the result."""
+ 40    authors: List[Author]
+ 41    """The result's authors."""
+ 42    summary: str
+ 43    """The result abstract."""
+ 44    comment: str
+ 45    """The authors' comment if present."""
+ 46    journal_ref: str
+ 47    """A journal reference if present."""
+ 48    doi: str
+ 49    """A URL for the resolved DOI to an external resource if present."""
+ 50    primary_category: str
+ 51    """
+ 52    The result's primary arXiv category. See [arXiv: Category
+ 53    Taxonomy](https://arxiv.org/category_taxonomy).
+ 54    """
+ 55    categories: List[str]
+ 56    """
+ 57    All of the result's categories. See [arXiv: Category
+ 58    Taxonomy](https://arxiv.org/category_taxonomy).
+ 59    """
+ 60    links: List[Link]
+ 61    """Up to three URLs associated with this result."""
+ 62    pdf_url: str
+ 63    """The URL of a PDF version of this result if present among links."""
+ 64    _raw: feedparser.FeedParserDict
+ 65    """
+ 66    The raw feedparser result object if this Result was constructed with
+ 67    Result._from_feed_entry.
+ 68    """
+ 69
+ 70    def __init__(
+ 71        self,
+ 72        entry_id: str,
+ 73        updated: datetime = _DEFAULT_TIME,
+ 74        published: datetime = _DEFAULT_TIME,
+ 75        title: str = "",
+ 76        authors: List[Author] = [],
+ 77        summary: str = "",
+ 78        comment: str = "",
+ 79        journal_ref: str = "",
+ 80        doi: str = "",
+ 81        primary_category: str = "",
+ 82        categories: List[str] = [],
+ 83        links: List[Link] = [],
+ 84        _raw: feedparser.FeedParserDict = None,
+ 85    ):
+ 86        """
+ 87        Constructs an arXiv search result item.
+ 88
+ 89        In most cases, prefer using `Result._from_feed_entry` to parsing and
+ 90        constructing `Result`s yourself.
+ 91        """
+ 92        self.entry_id = entry_id
+ 93        self.updated = updated
+ 94        self.published = published
+ 95        self.title = title
+ 96        self.authors = authors
+ 97        self.summary = summary
+ 98        self.comment = comment
+ 99        self.journal_ref = journal_ref
+100        self.doi = doi
+101        self.primary_category = primary_category
+102        self.categories = categories
+103        self.links = links
+104        # Calculated members
+105        self.pdf_url = Result._get_pdf_url(links)
+106        # Debugging
+107        self._raw = _raw
+108
+109    def _from_feed_entry(entry: feedparser.FeedParserDict) -> Result:
+110        """
+111        Converts a feedparser entry for an arXiv search result feed into a
+112        Result object.
+113        """
+114        if not hasattr(entry, "id"):
+115            raise Result.MissingFieldError("id")
+116        # Title attribute may be absent for certain titles. Defaulting to "0" as
+117        # it's the only title observed to cause this bug.
+118        # https://github.com/lukasschwab/arxiv.py/issues/71
+119        # title = entry.title if hasattr(entry, "title") else "0"
+120        title = "0"
+121        if hasattr(entry, "title"):
+122            title = entry.title
+123        else:
+124            logger.warning(
+125                "Result %s is missing title attribute; defaulting to '0'",
+126                entry.id
+127            )
+128        return Result(
+129            entry_id=entry.id,
+130            updated=Result._to_datetime(entry.updated_parsed),
+131            published=Result._to_datetime(entry.published_parsed),
+132            title=re.sub(r'\s+', ' ', title),
+133            authors=[Result.Author._from_feed_author(a) for a in entry.authors],
+134            summary=entry.summary,
+135            comment=entry.get('arxiv_comment'),
+136            journal_ref=entry.get('arxiv_journal_ref'),
+137            doi=entry.get('arxiv_doi'),
+138            primary_category=entry.arxiv_primary_category.get('term'),
+139            categories=[tag.get('term') for tag in entry.tags],
+140            links=[Result.Link._from_feed_link(link) for link in entry.links],
+141            _raw=entry
+142        )
+143
+144    def __str__(self) -> str:
+145        return self.entry_id
+146
+147    def __repr__(self) -> str:
+148        return (
+149            '{}(entry_id={}, updated={}, published={}, title={}, authors={}, '
+150            'summary={}, comment={}, journal_ref={}, doi={}, '
+151            'primary_category={}, categories={}, links={})'
+152        ).format(
+153            _classname(self),
+154            repr(self.entry_id),
+155            repr(self.updated),
+156            repr(self.published),
+157            repr(self.title),
+158            repr(self.authors),
+159            repr(self.summary),
+160            repr(self.comment),
+161            repr(self.journal_ref),
+162            repr(self.doi),
+163            repr(self.primary_category),
+164            repr(self.categories),
+165            repr(self.links)
+166        )
+167
+168    def __eq__(self, other) -> bool:
+169        if isinstance(other, Result):
+170            return self.entry_id == other.entry_id
+171        return False
+172
+173    def get_short_id(self) -> str:
+174        """
+175        Returns the short ID for this result.
+176
+177        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
+178        `result.get_short_id()` returns `2107.05580v1`.
+179
+180        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
+181        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
+182        2007 arXiv identifier format).
+183
+184        For an explanation of the difference between arXiv's legacy and current
+185        identifiers, see [Understanding the arXiv
+186        identifier](https://arxiv.org/help/arxiv_identifier).
+187        """
+188        return self.entry_id.split('arxiv.org/abs/')[-1]
+189
+190    def _get_default_filename(self, extension: str = "pdf") -> str:
+191        """
+192        A default `to_filename` function for the extension given.
+193        """
+194        nonempty_title = self.title if self.title else "UNTITLED"
+195        # Remove disallowed characters.
+196        clean_title = '_'.join(re.findall(r'\w+', nonempty_title))
+197        return "{}.{}.{}".format(self.get_short_id(), clean_title, extension)
+198
+199    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
+200        """
+201        Downloads the PDF for this result to the specified directory.
+202
+203        The filename is generated by calling `to_filename(self)`.
+204        """
+205        if not filename:
+206            filename = self._get_default_filename()
+207        path = os.path.join(dirpath, filename)
+208        written_path, _ = urlretrieve(self.pdf_url, path)
+209        return written_path
+210
+211    def download_source(self, dirpath: str = './', filename: str = '') -> str:
+212        """
+213        Downloads the source tarfile for this result to the specified
+214        directory.
+215
+216        The filename is generated by calling `to_filename(self)`.
+217        """
+218        if not filename:
+219            filename = self._get_default_filename('tar.gz')
+220        path = os.path.join(dirpath, filename)
+221        # Bodge: construct the source URL from the PDF URL.
+222        source_url = self.pdf_url.replace('/pdf/', '/src/')
+223        written_path, _ = urlretrieve(source_url, path)
+224        return written_path
+225
+226    def _get_pdf_url(links: List[Link]) -> str:
+227        """
+228        Finds the PDF link among a result's links and returns its URL.
+229
+230        Should only be called once for a given `Result`, in its constructor.
+231        After construction, the URL should be available in `Result.pdf_url`.
+232        """
+233        pdf_urls = [link.href for link in links if link.title == 'pdf']
+234        if len(pdf_urls) == 0:
+235            return None
+236        elif len(pdf_urls) > 1:
+237            logger.warning(
+238                "Result has multiple PDF links; using %s",
+239                pdf_urls[0]
+240            )
+241        return pdf_urls[0]
+242
+243    def _to_datetime(ts: time.struct_time) -> datetime:
+244        """
+245        Converts a UTC time.struct_time into a time-zone-aware datetime.
+246
+247        This will be replaced with feedparser functionality [when it becomes
+248        available](https://github.com/kurtmckee/feedparser/issues/212).
+249        """
+250        return datetime.fromtimestamp(timegm(ts), tz=timezone.utc)
+251
+252    class Author(object):
+253        """
+254        A light inner class for representing a result's authors.
+255        """
+256
+257        name: str
+258        """The author's name."""
+259
+260        def __init__(self, name: str):
+261            """
+262            Constructs an `Author` with the specified name.
+263
+264            In most cases, prefer using `Author._from_feed_author` to parsing
+265            and constructing `Author`s yourself.
+266            """
+267            self.name = name
+268
+269        def _from_feed_author(
+270            feed_author: feedparser.FeedParserDict
+271        ) -> Result.Author:
+272            """
+273            Constructs an `Author` with the name specified in an author object
+274            from a feed entry.
+275
+276            See usage in `Result._from_feed_entry`.
+277            """
+278            return Result.Author(feed_author.name)
+279
+280        def __str__(self) -> str:
+281            return self.name
+282
+283        def __repr__(self) -> str:
+284            return '{}({})'.format(_classname(self), repr(self.name))
+285
+286        def __eq__(self, other) -> bool:
+287            if isinstance(other, Result.Author):
+288                return self.name == other.name
+289            return False
+290
+291    class Link(object):
+292        """
+293        A light inner class for representing a result's links.
+294        """
+295
+296        href: str
+297        """The link's `href` attribute."""
+298        title: str
+299        """The link's title."""
+300        rel: str
+301        """The link's relationship to the `Result`."""
+302        content_type: str
+303        """The link's HTTP content type."""
+304
+305        def __init__(
+306            self,
+307            href: str,
+308            title: str = None,
+309            rel: str = None,
+310            content_type: str = None
+311        ):
+312            """
+313            Constructs a `Link` with the specified link metadata.
+314
+315            In most cases, prefer using `Link._from_feed_link` to parsing and
+316            constructing `Link`s yourself.
+317            """
+318            self.href = href
+319            self.title = title
+320            self.rel = rel
+321            self.content_type = content_type
+322
+323        def _from_feed_link(
+324            feed_link: feedparser.FeedParserDict
+325        ) -> Result.Link:
+326            """
+327            Constructs a `Link` with link metadata specified in a link object
+328            from a feed entry.
+329
+330            See usage in `Result._from_feed_entry`.
+331            """
+332            return Result.Link(
+333                href=feed_link.href,
+334                title=feed_link.get('title'),
+335                rel=feed_link.get('rel'),
+336                content_type=feed_link.get('content_type')
+337            )
+338
+339        def __str__(self) -> str:
+340            return self.href
+341
+342        def __repr__(self) -> str:
+343            return '{}({}, title={}, rel={}, content_type={})'.format(
+344                _classname(self),
+345                repr(self.href),
+346                repr(self.title),
+347                repr(self.rel),
+348                repr(self.content_type)
+349            )
+350
+351        def __eq__(self, other) -> bool:
+352            if isinstance(other, Result.Link):
+353                return self.href == other.href
+354            return False
+355
+356    class MissingFieldError(Exception):
+357        """
+358        An error indicating an entry is unparseable because it lacks required
+359        fields.
+360        """
+361
+362        missing_field: str
+363        """The required field missing from the would-be entry."""
+364        message: str
+365        """Message describing what caused this error."""
+366
+367        def __init__(self, missing_field):
+368            self.missing_field = missing_field
+369            self.message = "Entry from arXiv missing required info"
+370
+371        def __repr__(self) -> str:
+372            return '{}({})'.format(
+373                _classname(self),
+374                repr(self.missing_field)
+375            )
+376
+377
+378class SortCriterion(Enum):
+379    """
+380    A SortCriterion identifies a property by which search results can be
+381    sorted.
+382
+383    See [the arXiv API User's Manual: sort order for return
+384    results](https://arxiv.org/help/api/user-manual#sort).
+385    """
+386    Relevance = "relevance"
+387    LastUpdatedDate = "lastUpdatedDate"
+388    SubmittedDate = "submittedDate"
+389
+390
+391class SortOrder(Enum):
+392    """
+393    A SortOrder indicates order in which search results are sorted according
+394    to the specified arxiv.SortCriterion.
+395
+396    See [the arXiv API User's Manual: sort order for return
+397    results](https://arxiv.org/help/api/user-manual#sort).
+398    """
+399    Ascending = "ascending"
+400    Descending = "descending"
+401
+402
+403class Search(object):
+404    """
+405    A specification for a search of arXiv's database.
+406
+407    To run a search, use `Search.run` to use a default client or `Client.run`
+408    with a specific client.
+409    """
+410
+411    query: str
+412    """
+413    A query string.
+414
+415    This should be unencoded. Use `au:del_maestro AND ti:checkerboard`, not
+416    `au:del_maestro+AND+ti:checkerboard`.
+417
+418    See [the arXiv API User's Manual: Details of Query
+419    Construction](https://arxiv.org/help/api/user-manual#query_details).
+420    """
+421    id_list: List[str]
+422    """
+423    A list of arXiv article IDs to which to limit the search.
+424
+425    See [the arXiv API User's
+426    Manual](https://arxiv.org/help/api/user-manual#search_query_and_id_list)
+427    for documentation of the interaction between `query` and `id_list`.
+428    """
+429    max_results: float
+430    """
+431    The maximum number of results to be returned in an execution of this
+432    search.
+433
+434    To fetch every result available, set `max_results=float('inf')`.
+435    """
+436    sort_by: SortCriterion
+437    """The sort criterion for results."""
+438    sort_order: SortOrder
+439    """The sort order for results."""
+440
+441    def __init__(
+442        self,
+443        query: str = "",
+444        id_list: List[str] = [],
+445        max_results: float = float('inf'),
+446        sort_by: SortCriterion = SortCriterion.Relevance,
+447        sort_order: SortOrder = SortOrder.Descending
+448    ):
+449        """
+450        Constructs an arXiv API search with the specified criteria.
+451        """
+452        self.query = query
+453        self.id_list = id_list
+454        self.max_results = max_results
+455        self.sort_by = sort_by
+456        self.sort_order = sort_order
+457
+458    def __str__(self) -> str:
+459        # TODO: develop a more informative string representation.
+460        return repr(self)
+461
+462    def __repr__(self) -> str:
+463        return (
+464            '{}(query={}, id_list={}, max_results={}, sort_by={}, '
+465            'sort_order={})'
+466        ).format(
+467            _classname(self),
+468            repr(self.query),
+469            repr(self.id_list),
+470            repr(self.max_results),
+471            repr(self.sort_by),
+472            repr(self.sort_order)
+473        )
+474
+475    def _url_args(self) -> Dict[str, str]:
+476        """
+477        Returns a dict of search parameters that should be included in an API
+478        request for this search.
+479        """
+480        return {
+481            "search_query": self.query,
+482            "id_list": ','.join(self.id_list),
+483            "sortBy": self.sort_by.value,
+484            "sortOrder": self.sort_order.value
+485        }
+486
+487    def get(self) -> Generator[Result, None, None]:
+488        """
+489        **Deprecated** after 1.2.0; use `Search.results`.
+490        """
+491        warnings.warn(
+492            "The 'get' method is deprecated, use 'results' instead",
+493            DeprecationWarning,
+494            stacklevel=2
+495        )
+496        return self.results()
+497
+498    def results(self, offset: int = 0) -> Generator[Result, None, None]:
+499        """
+500        Executes the specified search using a default arXiv API client.
+501
+502        For info on default behavior, see `Client.__init__` and `Client.results`.
+503        """
+504        return Client().results(self, offset=offset)
+505
+506
+507class Client(object):
+508    """
+509    Specifies a strategy for fetching results from arXiv's API.
+510
+511    This class obscures pagination and retry logic, and exposes
+512    `Client.results`.
+513    """
+514
+515    query_url_format = 'http://export.arxiv.org/api/query?{}'
+516    """The arXiv query API endpoint format."""
+517    page_size: int
+518    """Maximum number of results fetched in a single API request."""
+519    delay_seconds: int
+520    """Number of seconds to wait between API requests."""
+521    num_retries: int
+522    """Number of times to retry a failing API request."""
+523    _last_request_dt: datetime
+524
+525    def __init__(
+526        self,
+527        page_size: int = 100,
+528        delay_seconds: int = 3,
+529        num_retries: int = 3
+530    ):
+531        """
+532        Constructs an arXiv API client with the specified options.
+533
+534        Note: the default parameters should provide a robust request strategy
+535        for most use cases. Extreme page sizes, delays, or retries risk
+536        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
+537        brittle behavior, and inconsistent results.
+538        """
+539        self.page_size = page_size
+540        self.delay_seconds = delay_seconds
+541        self.num_retries = num_retries
+542        self._last_request_dt = None
+543
+544    def __str__(self) -> str:
+545        # TODO: develop a more informative string representation.
+546        return repr(self)
+547
+548    def __repr__(self) -> str:
+549        return '{}(page_size={}, delay_seconds={}, num_retries={})'.format(
+550            _classname(self),
+551            repr(self.page_size),
+552            repr(self.delay_seconds),
+553            repr(self.num_retries)
+554        )
+555
+556    def get(self, search: Search) -> Generator[Result, None, None]:
+557        """
+558        **Deprecated** after 1.2.0; use `Client.results`.
+559        """
+560        warnings.warn(
+561            "The 'get' method is deprecated, use 'results' instead",
+562            DeprecationWarning,
+563            stacklevel=2
+564        )
+565        return self.results(search)
+566
+567    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
+568        """
+569        Uses this client configuration to fetch one page of the search results
+570        at a time, yielding the parsed `Result`s, until `max_results` results
+571        have been yielded or there are no more search results.
 572
-573        Setting a nonzero `offset` discards leading records in the result set.
-574        When `offset` is greater than or equal to `search.max_results`, the full
-575        result set is discarded.
-576
-577        For more on using generators, see
-578        [Generators](https://wiki.python.org/moin/Generators).
-579        """
-580
-581        # total_results may be reduced according to the feed's
-582        # opensearch:totalResults value.
-583        total_results = search.max_results
-584        first_page = True
-585        while offset < total_results:
-586            page_size = min(self.page_size, search.max_results - offset)
-587            logger.info("Requesting {} results at offset {}".format(
-588                page_size,
-589                offset,
-590            ))
-591            page_url = self._format_url(search, offset, page_size)
-592            feed = self._parse_feed(page_url, first_page)
-593            if first_page:
-594                # NOTE: this is an ugly fix for a known bug. The totalresults
-595                # value is set to 1 for results with zero entries. If that API
-596                # bug is fixed, we can remove this conditional and always set
-597                # `total_results = min(...)`.
-598                if len(feed.entries) == 0:
-599                    logger.info("Got empty results; stopping generation")
-600                    total_results = 0
-601                else:
-602                    total_results = min(
-603                        total_results,
-604                        int(feed.feed.opensearch_totalresults)
-605                    )
-606                    logger.info("Got first page; {} of {} results available".format(
-607                        total_results,
-608                        search.max_results
-609                    ))
-610                # Subsequent pages are not the first page.
-611                first_page = False
-612            # Update offset for next request: account for received results.
-613            offset += len(feed.entries)
-614            # Yield query results until page is exhausted.
-615            for entry in feed.entries:
-616                try:
-617                    yield Result._from_feed_entry(entry)
-618                except Result.MissingFieldError:
-619                    logger.warning("Skipping partial result")
-620                    continue
-621
-622    def _format_url(self, search: Search, start: int, page_size: int) -> str:
-623        """
-624        Construct a request API for search that returns up to `page_size`
-625        results starting with the result at index `start`.
-626        """
-627        url_args = search._url_args()
-628        url_args.update({
-629            "start": start,
-630            "max_results": page_size,
-631        })
-632        return self.query_url_format.format(urlencode(url_args))
-633
-634    def _parse_feed(
-635        self,
-636        url: str,
-637        first_page: bool = True
-638    ) -> feedparser.FeedParserDict:
-639        """
-640        Fetches the specified URL and parses it with feedparser.
-641
-642        If a request fails or is unexpectedly empty, retries the request up to
-643        `self.num_retries` times.
-644        """
-645        # Invoke the recursive helper with initial available retries.
-646        return self.__try_parse_feed(
-647            url,
-648            first_page=first_page,
-649            retries_left=self.num_retries
-650        )
-651
-652    def __try_parse_feed(
-653        self,
-654        url: str,
-655        first_page: bool,
-656        retries_left: int,
-657        last_err: Exception = None,
-658    ) -> feedparser.FeedParserDict:
-659        """
-660        Recursive helper for _parse_feed. Enforces `self.delay_seconds`: if that
-661        number of seconds has not passed since `_parse_feed` was last called,
-662        sleeps until delay_seconds seconds have passed.
-663        """
-664        retry = self.num_retries - retries_left
-665        # If this call would violate the rate limit, sleep until it doesn't.
-666        if self._last_request_dt is not None:
-667            required = timedelta(seconds=self.delay_seconds)
-668            since_last_request = datetime.now() - self._last_request_dt
-669            if since_last_request < required:
-670                to_sleep = (required - since_last_request).total_seconds()
-671                logger.info("Sleeping for %f seconds", to_sleep)
-672                time.sleep(to_sleep)
-673        logger.info("Requesting page of results", extra={
-674            'url': url,
-675            'first_page': first_page,
-676            'retry': retry,
-677            'last_err': last_err.message if last_err is not None else None,
-678        })
-679        feed = feedparser.parse(url)
-680        self._last_request_dt = datetime.now()
-681        err = None
-682        if feed.status != 200:
-683            err = HTTPError(url, retry, feed)
-684        elif len(feed.entries) == 0 and not first_page:
-685            err = UnexpectedEmptyPageError(url, retry)
-686        if err is not None:
-687            if retries_left > 0:
-688                return self.__try_parse_feed(
-689                    url,
-690                    first_page=first_page,
-691                    retries_left=retries_left-1,
-692                    last_err=err,
-693                )
-694            # Feed was never returned in self.num_retries tries. Raise the last
-695            # exception encountered.
-696            raise err
-697        return feed
-698
-699
-700class ArxivError(Exception):
-701    """This package's base Exception class."""
-702
-703    url: str
-704    """The feed URL that could not be fetched."""
-705    retry: int
-706    """
-707    The request try number which encountered this error; 0 for the initial try,
-708    1 for the first retry, and so on.
-709    """
-710    message: str
-711    """Message describing what caused this error."""
-712
-713    def __init__(self, url: str, retry: int, message: str):
-714        """
-715        Constructs an `ArxivError` encountered while fetching the specified URL.
-716        """
-717        self.url = url
-718        self.retry = retry
-719        self.message = message
-720        super().__init__(self.message)
-721
-722    def __str__(self) -> str:
-723        return '{} ({})'.format(self.message, self.url)
-724
-725
-726class UnexpectedEmptyPageError(ArxivError):
-727    """
-728    An error raised when a page of results that should be non-empty is empty.
-729
-730    This should never happen in theory, but happens sporadically due to
-731    brittleness in the underlying arXiv API; usually resolved by retries.
-732
-733    See `Client.results` for usage.
-734    """
-735    def __init__(self, url: str, retry: int):
-736        """
-737        Constructs an `UnexpectedEmptyPageError` encountered for the specified
-738        API URL after `retry` tries.
-739        """
-740        self.url = url
-741        super().__init__(url, retry, "Page of results was unexpectedly empty")
-742
-743    def __repr__(self) -> str:
-744        return '{}({}, {})'.format(
-745            _classname(self),
-746            repr(self.url),
-747            repr(self.retry)
-748        )
-749
-750
-751class HTTPError(ArxivError):
-752    """
-753    A non-200 status encountered while fetching a page of results.
-754
-755    See `Client.results` for usage.
-756    """
-757
-758    status: int
-759    """The HTTP status reported by feedparser."""
-760    entry: feedparser.FeedParserDict
-761    """The feed entry describing the error, if present."""
-762
-763    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
-764        """
-765        Constructs an `HTTPError` for the specified status code, encountered for
-766        the specified API URL after `retry` tries.
-767        """
-768        self.url = url
-769        self.status = feed.status
-770        # If the feed is valid and includes a single entry, trust it's an
-771        # explanation.
-772        if not feed.bozo and len(feed.entries) == 1:
-773            self.entry = feed.entries[0]
-774        else:
-775            self.entry = None
-776        super().__init__(
-777            url,
-778            retry,
-779            "Page request resulted in HTTP {}: {}".format(
-780                self.status,
-781                self.entry.summary if self.entry else None,
-782            ),
-783        )
-784
-785    def __repr__(self) -> str:
-786        return '{}({}, {}, {})'.format(
-787            _classname(self),
-788            repr(self.url),
-789            repr(self.retry),
-790            repr(self.status)
-791        )
-792
-793
-794def _classname(o):
-795    """A helper function for use in __repr__ methods: arxiv.Result.Link."""
-796    return 'arxiv.{}'.format(o.__class__.__qualname__)
+573        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+574
+575        Setting a nonzero `offset` discards leading records in the result set.
+576        When `offset` is greater than or equal to `search.max_results`, the full
+577        result set is discarded.
+578
+579        For more on using generators, see
+580        [Generators](https://wiki.python.org/moin/Generators).
+581        """
+582
+583        # total_results may be reduced according to the feed's
+584        # opensearch:totalResults value.
+585        total_results = search.max_results
+586        first_page = True
+587        while offset < total_results:
+588            page_size = min(self.page_size, search.max_results - offset)
+589            logger.info("Requesting {} results at offset {}".format(
+590                page_size,
+591                offset,
+592            ))
+593            page_url = self._format_url(search, offset, page_size)
+594            feed = self._parse_feed(page_url, first_page)
+595            if first_page:
+596                # NOTE: this is an ugly fix for a known bug. The totalresults
+597                # value is set to 1 for results with zero entries. If that API
+598                # bug is fixed, we can remove this conditional and always set
+599                # `total_results = min(...)`.
+600                if len(feed.entries) == 0:
+601                    logger.info("Got empty results; stopping generation")
+602                    total_results = 0
+603                else:
+604                    total_results = min(
+605                        total_results,
+606                        int(feed.feed.opensearch_totalresults)
+607                    )
+608                    logger.info("Got first page; {} of {} results available".format(
+609                        total_results,
+610                        search.max_results
+611                    ))
+612                # Subsequent pages are not the first page.
+613                first_page = False
+614            # Update offset for next request: account for received results.
+615            offset += len(feed.entries)
+616            # Yield query results until page is exhausted.
+617            for entry in feed.entries:
+618                try:
+619                    yield Result._from_feed_entry(entry)
+620                except Result.MissingFieldError:
+621                    logger.warning("Skipping partial result")
+622                    continue
+623
+624    def _format_url(self, search: Search, start: int, page_size: int) -> str:
+625        """
+626        Construct a request API for search that returns up to `page_size`
+627        results starting with the result at index `start`.
+628        """
+629        url_args = search._url_args()
+630        url_args.update({
+631            "start": start,
+632            "max_results": page_size,
+633        })
+634        return self.query_url_format.format(urlencode(url_args))
+635
+636    def _parse_feed(
+637        self,
+638        url: str,
+639        first_page: bool = True
+640    ) -> feedparser.FeedParserDict:
+641        """
+642        Fetches the specified URL and parses it with feedparser.
+643
+644        If a request fails or is unexpectedly empty, retries the request up to
+645        `self.num_retries` times.
+646        """
+647        # Invoke the recursive helper with initial available retries.
+648        return self.__try_parse_feed(
+649            url,
+650            first_page=first_page,
+651            retries_left=self.num_retries
+652        )
+653
+654    def __try_parse_feed(
+655        self,
+656        url: str,
+657        first_page: bool,
+658        retries_left: int,
+659        last_err: Exception = None,
+660    ) -> feedparser.FeedParserDict:
+661        """
+662        Recursive helper for _parse_feed. Enforces `self.delay_seconds`: if that
+663        number of seconds has not passed since `_parse_feed` was last called,
+664        sleeps until delay_seconds seconds have passed.
+665        """
+666        retry = self.num_retries - retries_left
+667        # If this call would violate the rate limit, sleep until it doesn't.
+668        if self._last_request_dt is not None:
+669            required = timedelta(seconds=self.delay_seconds)
+670            since_last_request = datetime.now() - self._last_request_dt
+671            if since_last_request < required:
+672                to_sleep = (required - since_last_request).total_seconds()
+673                logger.info("Sleeping for %f seconds", to_sleep)
+674                time.sleep(to_sleep)
+675        logger.info("Requesting page of results", extra={
+676            'url': url,
+677            'first_page': first_page,
+678            'retry': retry,
+679            'last_err': last_err.message if last_err is not None else None,
+680        })
+681        feed = feedparser.parse(url)
+682        self._last_request_dt = datetime.now()
+683        err = None
+684        if feed.status != 200:
+685            err = HTTPError(url, retry, feed)
+686        elif len(feed.entries) == 0 and not first_page:
+687            err = UnexpectedEmptyPageError(url, retry)
+688        if err is not None:
+689            if retries_left > 0:
+690                return self.__try_parse_feed(
+691                    url,
+692                    first_page=first_page,
+693                    retries_left=retries_left-1,
+694                    last_err=err,
+695                )
+696            # Feed was never returned in self.num_retries tries. Raise the last
+697            # exception encountered.
+698            raise err
+699        return feed
+700
+701
+702class ArxivError(Exception):
+703    """This package's base Exception class."""
+704
+705    url: str
+706    """The feed URL that could not be fetched."""
+707    retry: int
+708    """
+709    The request try number which encountered this error; 0 for the initial try,
+710    1 for the first retry, and so on.
+711    """
+712    message: str
+713    """Message describing what caused this error."""
+714
+715    def __init__(self, url: str, retry: int, message: str):
+716        """
+717        Constructs an `ArxivError` encountered while fetching the specified URL.
+718        """
+719        self.url = url
+720        self.retry = retry
+721        self.message = message
+722        super().__init__(self.message)
+723
+724    def __str__(self) -> str:
+725        return '{} ({})'.format(self.message, self.url)
+726
+727
+728class UnexpectedEmptyPageError(ArxivError):
+729    """
+730    An error raised when a page of results that should be non-empty is empty.
+731
+732    This should never happen in theory, but happens sporadically due to
+733    brittleness in the underlying arXiv API; usually resolved by retries.
+734
+735    See `Client.results` for usage.
+736    """
+737    def __init__(self, url: str, retry: int):
+738        """
+739        Constructs an `UnexpectedEmptyPageError` encountered for the specified
+740        API URL after `retry` tries.
+741        """
+742        self.url = url
+743        super().__init__(url, retry, "Page of results was unexpectedly empty")
+744
+745    def __repr__(self) -> str:
+746        return '{}({}, {})'.format(
+747            _classname(self),
+748            repr(self.url),
+749            repr(self.retry)
+750        )
+751
+752
+753class HTTPError(ArxivError):
+754    """
+755    A non-200 status encountered while fetching a page of results.
+756
+757    See `Client.results` for usage.
+758    """
+759
+760    status: int
+761    """The HTTP status reported by feedparser."""
+762    entry: feedparser.FeedParserDict
+763    """The feed entry describing the error, if present."""
+764
+765    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
+766        """
+767        Constructs an `HTTPError` for the specified status code, encountered for
+768        the specified API URL after `retry` tries.
+769        """
+770        self.url = url
+771        self.status = feed.status
+772        # If the feed is valid and includes a single entry, trust it's an
+773        # explanation.
+774        if not feed.bozo and len(feed.entries) == 1:
+775            self.entry = feed.entries[0]
+776        else:
+777            self.entry = None
+778        super().__init__(
+779            url,
+780            retry,
+781            "Page request resulted in HTTP {}: {}".format(
+782                self.status,
+783                self.entry.summary if self.entry else None,
+784            ),
+785        )
+786
+787    def __repr__(self) -> str:
+788        return '{}({}, {}, {})'.format(
+789            _classname(self),
+790            repr(self.url),
+791            repr(self.retry),
+792            repr(self.status)
+793        )
+794
+795
+796def _classname(o):
+797    """A helper function for use in __repr__ methods: arxiv.Result.Link."""
+798    return 'arxiv.{}'.format(o.__class__.__qualname__)
 
@@ -1301,358 +1303,358 @@

Example: logging

-
 23class Result(object):
- 24    """
- 25    An entry in an arXiv query results feed.
- 26
- 27    See [the arXiv API User's Manual: Details of Atom Results
- 28    Returned](https://arxiv.org/help/api/user-manual#_details_of_atom_results_returned).
- 29    """
- 30
- 31    entry_id: str
- 32    """A url of the form `http://arxiv.org/abs/{id}`."""
- 33    updated: datetime
- 34    """When the result was last updated."""
- 35    published: datetime
- 36    """When the result was originally published."""
- 37    title: str
- 38    """The title of the result."""
- 39    authors: list
- 40    """The result's authors."""
- 41    summary: str
- 42    """The result abstract."""
- 43    comment: str
- 44    """The authors' comment if present."""
- 45    journal_ref: str
- 46    """A journal reference if present."""
- 47    doi: str
- 48    """A URL for the resolved DOI to an external resource if present."""
- 49    primary_category: str
- 50    """
- 51    The result's primary arXiv category. See [arXiv: Category
- 52    Taxonomy](https://arxiv.org/category_taxonomy).
- 53    """
- 54    categories: List[str]
- 55    """
- 56    All of the result's categories. See [arXiv: Category
- 57    Taxonomy](https://arxiv.org/category_taxonomy).
- 58    """
- 59    links: list
- 60    """Up to three URLs associated with this result."""
- 61    pdf_url: str
- 62    """The URL of a PDF version of this result if present among links."""
- 63    _raw: feedparser.FeedParserDict
- 64    """
- 65    The raw feedparser result object if this Result was constructed with
- 66    Result._from_feed_entry.
- 67    """
- 68
- 69    def __init__(
- 70        self,
- 71        entry_id: str,
- 72        updated: datetime = _DEFAULT_TIME,
- 73        published: datetime = _DEFAULT_TIME,
- 74        title: str = "",
- 75        authors: List['Result.Author'] = [],
- 76        summary: str = "",
- 77        comment: str = "",
- 78        journal_ref: str = "",
- 79        doi: str = "",
- 80        primary_category: str = "",
- 81        categories: List[str] = [],
- 82        links: List['Result.Link'] = [],
- 83        _raw: feedparser.FeedParserDict = None,
- 84    ):
- 85        """
- 86        Constructs an arXiv search result item.
- 87
- 88        In most cases, prefer using `Result._from_feed_entry` to parsing and
- 89        constructing `Result`s yourself.
- 90        """
- 91        self.entry_id = entry_id
- 92        self.updated = updated
- 93        self.published = published
- 94        self.title = title
- 95        self.authors = authors
- 96        self.summary = summary
- 97        self.comment = comment
- 98        self.journal_ref = journal_ref
- 99        self.doi = doi
-100        self.primary_category = primary_category
-101        self.categories = categories
-102        self.links = links
-103        # Calculated members
-104        self.pdf_url = Result._get_pdf_url(links)
-105        # Debugging
-106        self._raw = _raw
-107
-108    def _from_feed_entry(entry: feedparser.FeedParserDict) -> 'Result':
-109        """
-110        Converts a feedparser entry for an arXiv search result feed into a
-111        Result object.
-112        """
-113        if not hasattr(entry, "id"):
-114            raise Result.MissingFieldError("id")
-115        # Title attribute may be absent for certain titles. Defaulting to "0" as
-116        # it's the only title observed to cause this bug.
-117        # https://github.com/lukasschwab/arxiv.py/issues/71
-118        # title = entry.title if hasattr(entry, "title") else "0"
-119        title = "0"
-120        if hasattr(entry, "title"):
-121            title = entry.title
-122        else:
-123            logger.warning(
-124                "Result %s is missing title attribute; defaulting to '0'",
-125                entry.id
-126            )
-127        return Result(
-128            entry_id=entry.id,
-129            updated=Result._to_datetime(entry.updated_parsed),
-130            published=Result._to_datetime(entry.published_parsed),
-131            title=re.sub(r'\s+', ' ', title),
-132            authors=[Result.Author._from_feed_author(a) for a in entry.authors],
-133            summary=entry.summary,
-134            comment=entry.get('arxiv_comment'),
-135            journal_ref=entry.get('arxiv_journal_ref'),
-136            doi=entry.get('arxiv_doi'),
-137            primary_category=entry.arxiv_primary_category.get('term'),
-138            categories=[tag.get('term') for tag in entry.tags],
-139            links=[Result.Link._from_feed_link(link) for link in entry.links],
-140            _raw=entry
-141        )
-142
-143    def __str__(self) -> str:
-144        return self.entry_id
-145
-146    def __repr__(self) -> str:
-147        return (
-148            '{}(entry_id={}, updated={}, published={}, title={}, authors={}, '
-149            'summary={}, comment={}, journal_ref={}, doi={}, '
-150            'primary_category={}, categories={}, links={})'
-151        ).format(
-152            _classname(self),
-153            repr(self.entry_id),
-154            repr(self.updated),
-155            repr(self.published),
-156            repr(self.title),
-157            repr(self.authors),
-158            repr(self.summary),
-159            repr(self.comment),
-160            repr(self.journal_ref),
-161            repr(self.doi),
-162            repr(self.primary_category),
-163            repr(self.categories),
-164            repr(self.links)
-165        )
-166
-167    def __eq__(self, other) -> bool:
-168        if isinstance(other, Result):
-169            return self.entry_id == other.entry_id
-170        return False
-171
-172    def get_short_id(self) -> str:
-173        """
-174        Returns the short ID for this result.
-175
-176        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
-177        `result.get_short_id()` returns `2107.05580v1`.
-178
-179        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
-180        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
-181        2007 arXiv identifier format).
-182
-183        For an explanation of the difference between arXiv's legacy and current
-184        identifiers, see [Understanding the arXiv
-185        identifier](https://arxiv.org/help/arxiv_identifier).
-186        """
-187        return self.entry_id.split('arxiv.org/abs/')[-1]
-188
-189    def _get_default_filename(self, extension: str = "pdf") -> str:
-190        """
-191        A default `to_filename` function for the extension given.
-192        """
-193        nonempty_title = self.title if self.title else "UNTITLED"
-194        # Remove disallowed characters.
-195        clean_title = '_'.join(re.findall(r'\w+', nonempty_title))
-196        return "{}.{}.{}".format(self.get_short_id(), clean_title, extension)
-197
-198    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
-199        """
-200        Downloads the PDF for this result to the specified directory.
-201
-202        The filename is generated by calling `to_filename(self)`.
-203        """
-204        if not filename:
-205            filename = self._get_default_filename()
-206        path = os.path.join(dirpath, filename)
-207        written_path, _ = urlretrieve(self.pdf_url, path)
-208        return written_path
-209
-210    def download_source(self, dirpath: str = './', filename: str = '') -> str:
-211        """
-212        Downloads the source tarfile for this result to the specified
-213        directory.
-214
-215        The filename is generated by calling `to_filename(self)`.
-216        """
-217        if not filename:
-218            filename = self._get_default_filename('tar.gz')
-219        path = os.path.join(dirpath, filename)
-220        # Bodge: construct the source URL from the PDF URL.
-221        source_url = self.pdf_url.replace('/pdf/', '/src/')
-222        written_path, _ = urlretrieve(source_url, path)
-223        return written_path
-224
-225    def _get_pdf_url(links: list) -> str:
-226        """
-227        Finds the PDF link among a result's links and returns its URL.
-228
-229        Should only be called once for a given `Result`, in its constructor.
-230        After construction, the URL should be available in `Result.pdf_url`.
-231        """
-232        pdf_urls = [link.href for link in links if link.title == 'pdf']
-233        if len(pdf_urls) == 0:
-234            return None
-235        elif len(pdf_urls) > 1:
-236            logger.warning(
-237                "Result has multiple PDF links; using %s",
-238                pdf_urls[0]
-239            )
-240        return pdf_urls[0]
-241
-242    def _to_datetime(ts: time.struct_time) -> datetime:
-243        """
-244        Converts a UTC time.struct_time into a time-zone-aware datetime.
-245
-246        This will be replaced with feedparser functionality [when it becomes
-247        available](https://github.com/kurtmckee/feedparser/issues/212).
-248        """
-249        return datetime.fromtimestamp(timegm(ts), tz=timezone.utc)
-250
-251    class Author(object):
-252        """
-253        A light inner class for representing a result's authors.
-254        """
-255
-256        name: str
-257        """The author's name."""
-258
-259        def __init__(self, name: str):
-260            """
-261            Constructs an `Author` with the specified name.
-262
-263            In most cases, prefer using `Author._from_feed_author` to parsing
-264            and constructing `Author`s yourself.
-265            """
-266            self.name = name
-267
-268        def _from_feed_author(
-269            feed_author: feedparser.FeedParserDict
-270        ) -> 'Result.Author':
-271            """
-272            Constructs an `Author` with the name specified in an author object
-273            from a feed entry.
-274
-275            See usage in `Result._from_feed_entry`.
-276            """
-277            return Result.Author(feed_author.name)
-278
-279        def __str__(self) -> str:
-280            return self.name
-281
-282        def __repr__(self) -> str:
-283            return '{}({})'.format(_classname(self), repr(self.name))
-284
-285        def __eq__(self, other) -> bool:
-286            if isinstance(other, Result.Author):
-287                return self.name == other.name
-288            return False
-289
-290    class Link(object):
-291        """
-292        A light inner class for representing a result's links.
-293        """
-294
-295        href: str
-296        """The link's `href` attribute."""
-297        title: str
-298        """The link's title."""
-299        rel: str
-300        """The link's relationship to the `Result`."""
-301        content_type: str
-302        """The link's HTTP content type."""
-303
-304        def __init__(
-305            self,
-306            href: str,
-307            title: str = None,
-308            rel: str = None,
-309            content_type: str = None
-310        ):
-311            """
-312            Constructs a `Link` with the specified link metadata.
-313
-314            In most cases, prefer using `Link._from_feed_link` to parsing and
-315            constructing `Link`s yourself.
-316            """
-317            self.href = href
-318            self.title = title
-319            self.rel = rel
-320            self.content_type = content_type
-321
-322        def _from_feed_link(
-323            feed_link: feedparser.FeedParserDict
-324        ) -> 'Result.Link':
-325            """
-326            Constructs a `Link` with link metadata specified in a link object
-327            from a feed entry.
-328
-329            See usage in `Result._from_feed_entry`.
-330            """
-331            return Result.Link(
-332                href=feed_link.href,
-333                title=feed_link.get('title'),
-334                rel=feed_link.get('rel'),
-335                content_type=feed_link.get('content_type')
-336            )
-337
-338        def __str__(self) -> str:
-339            return self.href
-340
-341        def __repr__(self) -> str:
-342            return '{}({}, title={}, rel={}, content_type={})'.format(
-343                _classname(self),
-344                repr(self.href),
-345                repr(self.title),
-346                repr(self.rel),
-347                repr(self.content_type)
-348            )
-349
-350        def __eq__(self, other) -> bool:
-351            if isinstance(other, Result.Link):
-352                return self.href == other.href
-353            return False
-354
-355    class MissingFieldError(Exception):
-356        """
-357        An error indicating an entry is unparseable because it lacks required
-358        fields.
-359        """
-360
-361        missing_field: str
-362        """The required field missing from the would-be entry."""
-363        message: str
-364        """Message describing what caused this error."""
-365
-366        def __init__(self, missing_field):
-367            self.missing_field = missing_field
-368            self.message = "Entry from arXiv missing required info"
-369
-370        def __repr__(self) -> str:
-371            return '{}({})'.format(
-372                _classname(self),
-373                repr(self.missing_field)
-374            )
+            
 25class Result(object):
+ 26    """
+ 27    An entry in an arXiv query results feed.
+ 28
+ 29    See [the arXiv API User's Manual: Details of Atom Results
+ 30    Returned](https://arxiv.org/help/api/user-manual#_details_of_atom_results_returned).
+ 31    """
+ 32
+ 33    entry_id: str
+ 34    """A url of the form `http://arxiv.org/abs/{id}`."""
+ 35    updated: datetime
+ 36    """When the result was last updated."""
+ 37    published: datetime
+ 38    """When the result was originally published."""
+ 39    title: str
+ 40    """The title of the result."""
+ 41    authors: List[Author]
+ 42    """The result's authors."""
+ 43    summary: str
+ 44    """The result abstract."""
+ 45    comment: str
+ 46    """The authors' comment if present."""
+ 47    journal_ref: str
+ 48    """A journal reference if present."""
+ 49    doi: str
+ 50    """A URL for the resolved DOI to an external resource if present."""
+ 51    primary_category: str
+ 52    """
+ 53    The result's primary arXiv category. See [arXiv: Category
+ 54    Taxonomy](https://arxiv.org/category_taxonomy).
+ 55    """
+ 56    categories: List[str]
+ 57    """
+ 58    All of the result's categories. See [arXiv: Category
+ 59    Taxonomy](https://arxiv.org/category_taxonomy).
+ 60    """
+ 61    links: List[Link]
+ 62    """Up to three URLs associated with this result."""
+ 63    pdf_url: str
+ 64    """The URL of a PDF version of this result if present among links."""
+ 65    _raw: feedparser.FeedParserDict
+ 66    """
+ 67    The raw feedparser result object if this Result was constructed with
+ 68    Result._from_feed_entry.
+ 69    """
+ 70
+ 71    def __init__(
+ 72        self,
+ 73        entry_id: str,
+ 74        updated: datetime = _DEFAULT_TIME,
+ 75        published: datetime = _DEFAULT_TIME,
+ 76        title: str = "",
+ 77        authors: List[Author] = [],
+ 78        summary: str = "",
+ 79        comment: str = "",
+ 80        journal_ref: str = "",
+ 81        doi: str = "",
+ 82        primary_category: str = "",
+ 83        categories: List[str] = [],
+ 84        links: List[Link] = [],
+ 85        _raw: feedparser.FeedParserDict = None,
+ 86    ):
+ 87        """
+ 88        Constructs an arXiv search result item.
+ 89
+ 90        In most cases, prefer using `Result._from_feed_entry` to parsing and
+ 91        constructing `Result`s yourself.
+ 92        """
+ 93        self.entry_id = entry_id
+ 94        self.updated = updated
+ 95        self.published = published
+ 96        self.title = title
+ 97        self.authors = authors
+ 98        self.summary = summary
+ 99        self.comment = comment
+100        self.journal_ref = journal_ref
+101        self.doi = doi
+102        self.primary_category = primary_category
+103        self.categories = categories
+104        self.links = links
+105        # Calculated members
+106        self.pdf_url = Result._get_pdf_url(links)
+107        # Debugging
+108        self._raw = _raw
+109
+110    def _from_feed_entry(entry: feedparser.FeedParserDict) -> Result:
+111        """
+112        Converts a feedparser entry for an arXiv search result feed into a
+113        Result object.
+114        """
+115        if not hasattr(entry, "id"):
+116            raise Result.MissingFieldError("id")
+117        # Title attribute may be absent for certain titles. Defaulting to "0" as
+118        # it's the only title observed to cause this bug.
+119        # https://github.com/lukasschwab/arxiv.py/issues/71
+120        # title = entry.title if hasattr(entry, "title") else "0"
+121        title = "0"
+122        if hasattr(entry, "title"):
+123            title = entry.title
+124        else:
+125            logger.warning(
+126                "Result %s is missing title attribute; defaulting to '0'",
+127                entry.id
+128            )
+129        return Result(
+130            entry_id=entry.id,
+131            updated=Result._to_datetime(entry.updated_parsed),
+132            published=Result._to_datetime(entry.published_parsed),
+133            title=re.sub(r'\s+', ' ', title),
+134            authors=[Result.Author._from_feed_author(a) for a in entry.authors],
+135            summary=entry.summary,
+136            comment=entry.get('arxiv_comment'),
+137            journal_ref=entry.get('arxiv_journal_ref'),
+138            doi=entry.get('arxiv_doi'),
+139            primary_category=entry.arxiv_primary_category.get('term'),
+140            categories=[tag.get('term') for tag in entry.tags],
+141            links=[Result.Link._from_feed_link(link) for link in entry.links],
+142            _raw=entry
+143        )
+144
+145    def __str__(self) -> str:
+146        return self.entry_id
+147
+148    def __repr__(self) -> str:
+149        return (
+150            '{}(entry_id={}, updated={}, published={}, title={}, authors={}, '
+151            'summary={}, comment={}, journal_ref={}, doi={}, '
+152            'primary_category={}, categories={}, links={})'
+153        ).format(
+154            _classname(self),
+155            repr(self.entry_id),
+156            repr(self.updated),
+157            repr(self.published),
+158            repr(self.title),
+159            repr(self.authors),
+160            repr(self.summary),
+161            repr(self.comment),
+162            repr(self.journal_ref),
+163            repr(self.doi),
+164            repr(self.primary_category),
+165            repr(self.categories),
+166            repr(self.links)
+167        )
+168
+169    def __eq__(self, other) -> bool:
+170        if isinstance(other, Result):
+171            return self.entry_id == other.entry_id
+172        return False
+173
+174    def get_short_id(self) -> str:
+175        """
+176        Returns the short ID for this result.
+177
+178        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
+179        `result.get_short_id()` returns `2107.05580v1`.
+180
+181        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
+182        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
+183        2007 arXiv identifier format).
+184
+185        For an explanation of the difference between arXiv's legacy and current
+186        identifiers, see [Understanding the arXiv
+187        identifier](https://arxiv.org/help/arxiv_identifier).
+188        """
+189        return self.entry_id.split('arxiv.org/abs/')[-1]
+190
+191    def _get_default_filename(self, extension: str = "pdf") -> str:
+192        """
+193        A default `to_filename` function for the extension given.
+194        """
+195        nonempty_title = self.title if self.title else "UNTITLED"
+196        # Remove disallowed characters.
+197        clean_title = '_'.join(re.findall(r'\w+', nonempty_title))
+198        return "{}.{}.{}".format(self.get_short_id(), clean_title, extension)
+199
+200    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
+201        """
+202        Downloads the PDF for this result to the specified directory.
+203
+204        The filename is generated by calling `to_filename(self)`.
+205        """
+206        if not filename:
+207            filename = self._get_default_filename()
+208        path = os.path.join(dirpath, filename)
+209        written_path, _ = urlretrieve(self.pdf_url, path)
+210        return written_path
+211
+212    def download_source(self, dirpath: str = './', filename: str = '') -> str:
+213        """
+214        Downloads the source tarfile for this result to the specified
+215        directory.
+216
+217        The filename is generated by calling `to_filename(self)`.
+218        """
+219        if not filename:
+220            filename = self._get_default_filename('tar.gz')
+221        path = os.path.join(dirpath, filename)
+222        # Bodge: construct the source URL from the PDF URL.
+223        source_url = self.pdf_url.replace('/pdf/', '/src/')
+224        written_path, _ = urlretrieve(source_url, path)
+225        return written_path
+226
+227    def _get_pdf_url(links: List[Link]) -> str:
+228        """
+229        Finds the PDF link among a result's links and returns its URL.
+230
+231        Should only be called once for a given `Result`, in its constructor.
+232        After construction, the URL should be available in `Result.pdf_url`.
+233        """
+234        pdf_urls = [link.href for link in links if link.title == 'pdf']
+235        if len(pdf_urls) == 0:
+236            return None
+237        elif len(pdf_urls) > 1:
+238            logger.warning(
+239                "Result has multiple PDF links; using %s",
+240                pdf_urls[0]
+241            )
+242        return pdf_urls[0]
+243
+244    def _to_datetime(ts: time.struct_time) -> datetime:
+245        """
+246        Converts a UTC time.struct_time into a time-zone-aware datetime.
+247
+248        This will be replaced with feedparser functionality [when it becomes
+249        available](https://github.com/kurtmckee/feedparser/issues/212).
+250        """
+251        return datetime.fromtimestamp(timegm(ts), tz=timezone.utc)
+252
+253    class Author(object):
+254        """
+255        A light inner class for representing a result's authors.
+256        """
+257
+258        name: str
+259        """The author's name."""
+260
+261        def __init__(self, name: str):
+262            """
+263            Constructs an `Author` with the specified name.
+264
+265            In most cases, prefer using `Author._from_feed_author` to parsing
+266            and constructing `Author`s yourself.
+267            """
+268            self.name = name
+269
+270        def _from_feed_author(
+271            feed_author: feedparser.FeedParserDict
+272        ) -> Result.Author:
+273            """
+274            Constructs an `Author` with the name specified in an author object
+275            from a feed entry.
+276
+277            See usage in `Result._from_feed_entry`.
+278            """
+279            return Result.Author(feed_author.name)
+280
+281        def __str__(self) -> str:
+282            return self.name
+283
+284        def __repr__(self) -> str:
+285            return '{}({})'.format(_classname(self), repr(self.name))
+286
+287        def __eq__(self, other) -> bool:
+288            if isinstance(other, Result.Author):
+289                return self.name == other.name
+290            return False
+291
+292    class Link(object):
+293        """
+294        A light inner class for representing a result's links.
+295        """
+296
+297        href: str
+298        """The link's `href` attribute."""
+299        title: str
+300        """The link's title."""
+301        rel: str
+302        """The link's relationship to the `Result`."""
+303        content_type: str
+304        """The link's HTTP content type."""
+305
+306        def __init__(
+307            self,
+308            href: str,
+309            title: str = None,
+310            rel: str = None,
+311            content_type: str = None
+312        ):
+313            """
+314            Constructs a `Link` with the specified link metadata.
+315
+316            In most cases, prefer using `Link._from_feed_link` to parsing and
+317            constructing `Link`s yourself.
+318            """
+319            self.href = href
+320            self.title = title
+321            self.rel = rel
+322            self.content_type = content_type
+323
+324        def _from_feed_link(
+325            feed_link: feedparser.FeedParserDict
+326        ) -> Result.Link:
+327            """
+328            Constructs a `Link` with link metadata specified in a link object
+329            from a feed entry.
+330
+331            See usage in `Result._from_feed_entry`.
+332            """
+333            return Result.Link(
+334                href=feed_link.href,
+335                title=feed_link.get('title'),
+336                rel=feed_link.get('rel'),
+337                content_type=feed_link.get('content_type')
+338            )
+339
+340        def __str__(self) -> str:
+341            return self.href
+342
+343        def __repr__(self) -> str:
+344            return '{}({}, title={}, rel={}, content_type={})'.format(
+345                _classname(self),
+346                repr(self.href),
+347                repr(self.title),
+348                repr(self.rel),
+349                repr(self.content_type)
+350            )
+351
+352        def __eq__(self, other) -> bool:
+353            if isinstance(other, Result.Link):
+354                return self.href == other.href
+355            return False
+356
+357    class MissingFieldError(Exception):
+358        """
+359        An error indicating an entry is unparseable because it lacks required
+360        fields.
+361        """
+362
+363        missing_field: str
+364        """The required field missing from the would-be entry."""
+365        message: str
+366        """Message describing what caused this error."""
+367
+368        def __init__(self, missing_field):
+369            self.missing_field = missing_field
+370            self.message = "Entry from arXiv missing required info"
+371
+372        def __repr__(self) -> str:
+373            return '{}({})'.format(
+374                _classname(self),
+375                repr(self.missing_field)
+376            )
 
@@ -1673,44 +1675,44 @@

Example: logging

-
 69    def __init__(
- 70        self,
- 71        entry_id: str,
- 72        updated: datetime = _DEFAULT_TIME,
- 73        published: datetime = _DEFAULT_TIME,
- 74        title: str = "",
- 75        authors: List['Result.Author'] = [],
- 76        summary: str = "",
- 77        comment: str = "",
- 78        journal_ref: str = "",
- 79        doi: str = "",
- 80        primary_category: str = "",
- 81        categories: List[str] = [],
- 82        links: List['Result.Link'] = [],
- 83        _raw: feedparser.FeedParserDict = None,
- 84    ):
- 85        """
- 86        Constructs an arXiv search result item.
- 87
- 88        In most cases, prefer using `Result._from_feed_entry` to parsing and
- 89        constructing `Result`s yourself.
- 90        """
- 91        self.entry_id = entry_id
- 92        self.updated = updated
- 93        self.published = published
- 94        self.title = title
- 95        self.authors = authors
- 96        self.summary = summary
- 97        self.comment = comment
- 98        self.journal_ref = journal_ref
- 99        self.doi = doi
-100        self.primary_category = primary_category
-101        self.categories = categories
-102        self.links = links
-103        # Calculated members
-104        self.pdf_url = Result._get_pdf_url(links)
-105        # Debugging
-106        self._raw = _raw
+            
 71    def __init__(
+ 72        self,
+ 73        entry_id: str,
+ 74        updated: datetime = _DEFAULT_TIME,
+ 75        published: datetime = _DEFAULT_TIME,
+ 76        title: str = "",
+ 77        authors: List[Author] = [],
+ 78        summary: str = "",
+ 79        comment: str = "",
+ 80        journal_ref: str = "",
+ 81        doi: str = "",
+ 82        primary_category: str = "",
+ 83        categories: List[str] = [],
+ 84        links: List[Link] = [],
+ 85        _raw: feedparser.FeedParserDict = None,
+ 86    ):
+ 87        """
+ 88        Constructs an arXiv search result item.
+ 89
+ 90        In most cases, prefer using `Result._from_feed_entry` to parsing and
+ 91        constructing `Result`s yourself.
+ 92        """
+ 93        self.entry_id = entry_id
+ 94        self.updated = updated
+ 95        self.published = published
+ 96        self.title = title
+ 97        self.authors = authors
+ 98        self.summary = summary
+ 99        self.comment = comment
+100        self.journal_ref = journal_ref
+101        self.doi = doi
+102        self.primary_category = primary_category
+103        self.categories = categories
+104        self.links = links
+105        # Calculated members
+106        self.pdf_url = Result._get_pdf_url(links)
+107        # Debugging
+108        self._raw = _raw
 
@@ -1776,7 +1778,7 @@

Example: logging

- authors: list + authors: List[arxiv.arxiv.Result.Author]
@@ -1869,7 +1871,7 @@

Example: logging

-
172    def get_short_id(self) -> str:
-173        """
-174        Returns the short ID for this result.
-175
-176        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
-177        `result.get_short_id()` returns `2107.05580v1`.
-178
-179        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
-180        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
-181        2007 arXiv identifier format).
-182
-183        For an explanation of the difference between arXiv's legacy and current
-184        identifiers, see [Understanding the arXiv
-185        identifier](https://arxiv.org/help/arxiv_identifier).
-186        """
-187        return self.entry_id.split('arxiv.org/abs/')[-1]
+            
174    def get_short_id(self) -> str:
+175        """
+176        Returns the short ID for this result.
+177
+178        + If the result URL is `"http://arxiv.org/abs/2107.05580v1"`,
+179        `result.get_short_id()` returns `2107.05580v1`.
+180
+181        + If the result URL is `"http://arxiv.org/abs/quant-ph/0201082v1"`,
+182        `result.get_short_id()` returns `"quant-ph/0201082v1"` (the pre-March
+183        2007 arXiv identifier format).
+184
+185        For an explanation of the difference between arXiv's legacy and current
+186        identifiers, see [Understanding the arXiv
+187        identifier](https://arxiv.org/help/arxiv_identifier).
+188        """
+189        return self.entry_id.split('arxiv.org/abs/')[-1]
 
@@ -1951,17 +1953,17 @@

Example: logging

-
198    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
-199        """
-200        Downloads the PDF for this result to the specified directory.
-201
-202        The filename is generated by calling `to_filename(self)`.
-203        """
-204        if not filename:
-205            filename = self._get_default_filename()
-206        path = os.path.join(dirpath, filename)
-207        written_path, _ = urlretrieve(self.pdf_url, path)
-208        return written_path
+            
200    def download_pdf(self, dirpath: str = './', filename: str = '') -> str:
+201        """
+202        Downloads the PDF for this result to the specified directory.
+203
+204        The filename is generated by calling `to_filename(self)`.
+205        """
+206        if not filename:
+207            filename = self._get_default_filename()
+208        path = os.path.join(dirpath, filename)
+209        written_path, _ = urlretrieve(self.pdf_url, path)
+210        return written_path
 
@@ -1983,20 +1985,20 @@

Example: logging

-
210    def download_source(self, dirpath: str = './', filename: str = '') -> str:
-211        """
-212        Downloads the source tarfile for this result to the specified
-213        directory.
-214
-215        The filename is generated by calling `to_filename(self)`.
-216        """
-217        if not filename:
-218            filename = self._get_default_filename('tar.gz')
-219        path = os.path.join(dirpath, filename)
-220        # Bodge: construct the source URL from the PDF URL.
-221        source_url = self.pdf_url.replace('/pdf/', '/src/')
-222        written_path, _ = urlretrieve(source_url, path)
-223        return written_path
+            
212    def download_source(self, dirpath: str = './', filename: str = '') -> str:
+213        """
+214        Downloads the source tarfile for this result to the specified
+215        directory.
+216
+217        The filename is generated by calling `to_filename(self)`.
+218        """
+219        if not filename:
+220            filename = self._get_default_filename('tar.gz')
+221        path = os.path.join(dirpath, filename)
+222        # Bodge: construct the source URL from the PDF URL.
+223        source_url = self.pdf_url.replace('/pdf/', '/src/')
+224        written_path, _ = urlretrieve(source_url, path)
+225        return written_path
 
@@ -2020,44 +2022,44 @@

Example: logging

-
251    class Author(object):
-252        """
-253        A light inner class for representing a result's authors.
-254        """
-255
-256        name: str
-257        """The author's name."""
-258
-259        def __init__(self, name: str):
-260            """
-261            Constructs an `Author` with the specified name.
-262
-263            In most cases, prefer using `Author._from_feed_author` to parsing
-264            and constructing `Author`s yourself.
-265            """
-266            self.name = name
-267
-268        def _from_feed_author(
-269            feed_author: feedparser.FeedParserDict
-270        ) -> 'Result.Author':
-271            """
-272            Constructs an `Author` with the name specified in an author object
-273            from a feed entry.
-274
-275            See usage in `Result._from_feed_entry`.
-276            """
-277            return Result.Author(feed_author.name)
-278
-279        def __str__(self) -> str:
-280            return self.name
-281
-282        def __repr__(self) -> str:
-283            return '{}({})'.format(_classname(self), repr(self.name))
-284
-285        def __eq__(self, other) -> bool:
-286            if isinstance(other, Result.Author):
-287                return self.name == other.name
-288            return False
+            
253    class Author(object):
+254        """
+255        A light inner class for representing a result's authors.
+256        """
+257
+258        name: str
+259        """The author's name."""
+260
+261        def __init__(self, name: str):
+262            """
+263            Constructs an `Author` with the specified name.
+264
+265            In most cases, prefer using `Author._from_feed_author` to parsing
+266            and constructing `Author`s yourself.
+267            """
+268            self.name = name
+269
+270        def _from_feed_author(
+271            feed_author: feedparser.FeedParserDict
+272        ) -> Result.Author:
+273            """
+274            Constructs an `Author` with the name specified in an author object
+275            from a feed entry.
+276
+277            See usage in `Result._from_feed_entry`.
+278            """
+279            return Result.Author(feed_author.name)
+280
+281        def __str__(self) -> str:
+282            return self.name
+283
+284        def __repr__(self) -> str:
+285            return '{}({})'.format(_classname(self), repr(self.name))
+286
+287        def __eq__(self, other) -> bool:
+288            if isinstance(other, Result.Author):
+289                return self.name == other.name
+290            return False
 
@@ -2075,14 +2077,14 @@

Example: logging

-
259        def __init__(self, name: str):
-260            """
-261            Constructs an `Author` with the specified name.
-262
-263            In most cases, prefer using `Author._from_feed_author` to parsing
-264            and constructing `Author`s yourself.
-265            """
-266            self.name = name
+            
261        def __init__(self, name: str):
+262            """
+263            Constructs an `Author` with the specified name.
+264
+265            In most cases, prefer using `Author._from_feed_author` to parsing
+266            and constructing `Author`s yourself.
+267            """
+268            self.name = name
 
@@ -2119,70 +2121,70 @@

Example: logging

-
290    class Link(object):
-291        """
-292        A light inner class for representing a result's links.
-293        """
-294
-295        href: str
-296        """The link's `href` attribute."""
-297        title: str
-298        """The link's title."""
-299        rel: str
-300        """The link's relationship to the `Result`."""
-301        content_type: str
-302        """The link's HTTP content type."""
-303
-304        def __init__(
-305            self,
-306            href: str,
-307            title: str = None,
-308            rel: str = None,
-309            content_type: str = None
-310        ):
-311            """
-312            Constructs a `Link` with the specified link metadata.
-313
-314            In most cases, prefer using `Link._from_feed_link` to parsing and
-315            constructing `Link`s yourself.
-316            """
-317            self.href = href
-318            self.title = title
-319            self.rel = rel
-320            self.content_type = content_type
-321
-322        def _from_feed_link(
-323            feed_link: feedparser.FeedParserDict
-324        ) -> 'Result.Link':
-325            """
-326            Constructs a `Link` with link metadata specified in a link object
-327            from a feed entry.
-328
-329            See usage in `Result._from_feed_entry`.
-330            """
-331            return Result.Link(
-332                href=feed_link.href,
-333                title=feed_link.get('title'),
-334                rel=feed_link.get('rel'),
-335                content_type=feed_link.get('content_type')
-336            )
-337
-338        def __str__(self) -> str:
-339            return self.href
-340
-341        def __repr__(self) -> str:
-342            return '{}({}, title={}, rel={}, content_type={})'.format(
-343                _classname(self),
-344                repr(self.href),
-345                repr(self.title),
-346                repr(self.rel),
-347                repr(self.content_type)
-348            )
-349
-350        def __eq__(self, other) -> bool:
-351            if isinstance(other, Result.Link):
-352                return self.href == other.href
-353            return False
+            
292    class Link(object):
+293        """
+294        A light inner class for representing a result's links.
+295        """
+296
+297        href: str
+298        """The link's `href` attribute."""
+299        title: str
+300        """The link's title."""
+301        rel: str
+302        """The link's relationship to the `Result`."""
+303        content_type: str
+304        """The link's HTTP content type."""
+305
+306        def __init__(
+307            self,
+308            href: str,
+309            title: str = None,
+310            rel: str = None,
+311            content_type: str = None
+312        ):
+313            """
+314            Constructs a `Link` with the specified link metadata.
+315
+316            In most cases, prefer using `Link._from_feed_link` to parsing and
+317            constructing `Link`s yourself.
+318            """
+319            self.href = href
+320            self.title = title
+321            self.rel = rel
+322            self.content_type = content_type
+323
+324        def _from_feed_link(
+325            feed_link: feedparser.FeedParserDict
+326        ) -> Result.Link:
+327            """
+328            Constructs a `Link` with link metadata specified in a link object
+329            from a feed entry.
+330
+331            See usage in `Result._from_feed_entry`.
+332            """
+333            return Result.Link(
+334                href=feed_link.href,
+335                title=feed_link.get('title'),
+336                rel=feed_link.get('rel'),
+337                content_type=feed_link.get('content_type')
+338            )
+339
+340        def __str__(self) -> str:
+341            return self.href
+342
+343        def __repr__(self) -> str:
+344            return '{}({}, title={}, rel={}, content_type={})'.format(
+345                _classname(self),
+346                repr(self.href),
+347                repr(self.title),
+348                repr(self.rel),
+349                repr(self.content_type)
+350            )
+351
+352        def __eq__(self, other) -> bool:
+353            if isinstance(other, Result.Link):
+354                return self.href == other.href
+355            return False
 
@@ -2200,23 +2202,23 @@

Example: logging

-
304        def __init__(
-305            self,
-306            href: str,
-307            title: str = None,
-308            rel: str = None,
-309            content_type: str = None
-310        ):
-311            """
-312            Constructs a `Link` with the specified link metadata.
-313
-314            In most cases, prefer using `Link._from_feed_link` to parsing and
-315            constructing `Link`s yourself.
-316            """
-317            self.href = href
-318            self.title = title
-319            self.rel = rel
-320            self.content_type = content_type
+            
306        def __init__(
+307            self,
+308            href: str,
+309            title: str = None,
+310            rel: str = None,
+311            content_type: str = None
+312        ):
+313            """
+314            Constructs a `Link` with the specified link metadata.
+315
+316            In most cases, prefer using `Link._from_feed_link` to parsing and
+317            constructing `Link`s yourself.
+318            """
+319            self.href = href
+320            self.title = title
+321            self.rel = rel
+322            self.content_type = content_type
 
@@ -2292,26 +2294,26 @@

Example: logging

-
355    class MissingFieldError(Exception):
-356        """
-357        An error indicating an entry is unparseable because it lacks required
-358        fields.
-359        """
-360
-361        missing_field: str
-362        """The required field missing from the would-be entry."""
-363        message: str
-364        """Message describing what caused this error."""
-365
-366        def __init__(self, missing_field):
-367            self.missing_field = missing_field
-368            self.message = "Entry from arXiv missing required info"
-369
-370        def __repr__(self) -> str:
-371            return '{}({})'.format(
-372                _classname(self),
-373                repr(self.missing_field)
-374            )
+            
357    class MissingFieldError(Exception):
+358        """
+359        An error indicating an entry is unparseable because it lacks required
+360        fields.
+361        """
+362
+363        missing_field: str
+364        """The required field missing from the would-be entry."""
+365        message: str
+366        """Message describing what caused this error."""
+367
+368        def __init__(self, missing_field):
+369            self.missing_field = missing_field
+370            self.message = "Entry from arXiv missing required info"
+371
+372        def __repr__(self) -> str:
+373            return '{}({})'.format(
+374                _classname(self),
+375                repr(self.missing_field)
+376            )
 
@@ -2330,9 +2332,9 @@

Example: logging

-
366        def __init__(self, missing_field):
-367            self.missing_field = missing_field
-368            self.message = "Entry from arXiv missing required info"
+            
368        def __init__(self, missing_field):
+369            self.missing_field = missing_field
+370            self.message = "Entry from arXiv missing required info"
 
@@ -2386,17 +2388,17 @@
Inherited Members
-
377class SortCriterion(Enum):
-378    """
-379    A SortCriterion identifies a property by which search results can be
-380    sorted.
-381
-382    See [the arXiv API User's Manual: sort order for return
-383    results](https://arxiv.org/help/api/user-manual#sort).
-384    """
-385    Relevance = "relevance"
-386    LastUpdatedDate = "lastUpdatedDate"
-387    SubmittedDate = "submittedDate"
+            
379class SortCriterion(Enum):
+380    """
+381    A SortCriterion identifies a property by which search results can be
+382    sorted.
+383
+384    See [the arXiv API User's Manual: sort order for return
+385    results](https://arxiv.org/help/api/user-manual#sort).
+386    """
+387    Relevance = "relevance"
+388    LastUpdatedDate = "lastUpdatedDate"
+389    SubmittedDate = "submittedDate"
 
@@ -2466,16 +2468,16 @@
Inherited Members
-
390class SortOrder(Enum):
-391    """
-392    A SortOrder indicates order in which search results are sorted according
-393    to the specified arxiv.SortCriterion.
-394
-395    See [the arXiv API User's Manual: sort order for return
-396    results](https://arxiv.org/help/api/user-manual#sort).
-397    """
-398    Ascending = "ascending"
-399    Descending = "descending"
+            
392class SortOrder(Enum):
+393    """
+394    A SortOrder indicates order in which search results are sorted according
+395    to the specified arxiv.SortCriterion.
+396
+397    See [the arXiv API User's Manual: sort order for return
+398    results](https://arxiv.org/help/api/user-manual#sort).
+399    """
+400    Ascending = "ascending"
+401    Descending = "descending"
 
@@ -2533,108 +2535,108 @@
Inherited Members
-
402class Search(object):
-403    """
-404    A specification for a search of arXiv's database.
-405
-406    To run a search, use `Search.run` to use a default client or `Client.run`
-407    with a specific client.
-408    """
-409
-410    query: str
-411    """
-412    A query string.
-413
-414    This should be unencoded. Use `au:del_maestro AND ti:checkerboard`, not
-415    `au:del_maestro+AND+ti:checkerboard`.
-416
-417    See [the arXiv API User's Manual: Details of Query
-418    Construction](https://arxiv.org/help/api/user-manual#query_details).
-419    """
-420    id_list: list
-421    """
-422    A list of arXiv article IDs to which to limit the search.
-423
-424    See [the arXiv API User's
-425    Manual](https://arxiv.org/help/api/user-manual#search_query_and_id_list)
-426    for documentation of the interaction between `query` and `id_list`.
-427    """
-428    max_results: float
-429    """
-430    The maximum number of results to be returned in an execution of this
-431    search.
-432
-433    To fetch every result available, set `max_results=float('inf')`.
-434    """
-435    sort_by: SortCriterion
-436    """The sort criterion for results."""
-437    sort_order: SortOrder
-438    """The sort order for results."""
-439
-440    def __init__(
-441        self,
-442        query: str = "",
-443        id_list: List[str] = [],
-444        max_results: float = float('inf'),
-445        sort_by: SortCriterion = SortCriterion.Relevance,
-446        sort_order: SortOrder = SortOrder.Descending
-447    ):
-448        """
-449        Constructs an arXiv API search with the specified criteria.
-450        """
-451        self.query = query
-452        self.id_list = id_list
-453        self.max_results = max_results
-454        self.sort_by = sort_by
-455        self.sort_order = sort_order
-456
-457    def __str__(self) -> str:
-458        # TODO: develop a more informative string representation.
-459        return repr(self)
-460
-461    def __repr__(self) -> str:
-462        return (
-463            '{}(query={}, id_list={}, max_results={}, sort_by={}, '
-464            'sort_order={})'
-465        ).format(
-466            _classname(self),
-467            repr(self.query),
-468            repr(self.id_list),
-469            repr(self.max_results),
-470            repr(self.sort_by),
-471            repr(self.sort_order)
-472        )
-473
-474    def _url_args(self) -> Dict[str, str]:
-475        """
-476        Returns a dict of search parameters that should be included in an API
-477        request for this search.
-478        """
-479        return {
-480            "search_query": self.query,
-481            "id_list": ','.join(self.id_list),
-482            "sortBy": self.sort_by.value,
-483            "sortOrder": self.sort_order.value
-484        }
-485
-486    def get(self) -> Generator[Result, None, None]:
-487        """
-488        **Deprecated** after 1.2.0; use `Search.results`.
-489        """
-490        warnings.warn(
-491            "The 'get' method is deprecated, use 'results' instead",
-492            DeprecationWarning,
-493            stacklevel=2
-494        )
-495        return self.results()
-496
-497    def results(self, offset: int = 0) -> Generator[Result, None, None]:
-498        """
-499        Executes the specified search using a default arXiv API client.
-500
-501        For info on default behavior, see `Client.__init__` and `Client.results`.
-502        """
-503        return Client().results(self, offset=offset)
+            
404class Search(object):
+405    """
+406    A specification for a search of arXiv's database.
+407
+408    To run a search, use `Search.run` to use a default client or `Client.run`
+409    with a specific client.
+410    """
+411
+412    query: str
+413    """
+414    A query string.
+415
+416    This should be unencoded. Use `au:del_maestro AND ti:checkerboard`, not
+417    `au:del_maestro+AND+ti:checkerboard`.
+418
+419    See [the arXiv API User's Manual: Details of Query
+420    Construction](https://arxiv.org/help/api/user-manual#query_details).
+421    """
+422    id_list: List[str]
+423    """
+424    A list of arXiv article IDs to which to limit the search.
+425
+426    See [the arXiv API User's
+427    Manual](https://arxiv.org/help/api/user-manual#search_query_and_id_list)
+428    for documentation of the interaction between `query` and `id_list`.
+429    """
+430    max_results: float
+431    """
+432    The maximum number of results to be returned in an execution of this
+433    search.
+434
+435    To fetch every result available, set `max_results=float('inf')`.
+436    """
+437    sort_by: SortCriterion
+438    """The sort criterion for results."""
+439    sort_order: SortOrder
+440    """The sort order for results."""
+441
+442    def __init__(
+443        self,
+444        query: str = "",
+445        id_list: List[str] = [],
+446        max_results: float = float('inf'),
+447        sort_by: SortCriterion = SortCriterion.Relevance,
+448        sort_order: SortOrder = SortOrder.Descending
+449    ):
+450        """
+451        Constructs an arXiv API search with the specified criteria.
+452        """
+453        self.query = query
+454        self.id_list = id_list
+455        self.max_results = max_results
+456        self.sort_by = sort_by
+457        self.sort_order = sort_order
+458
+459    def __str__(self) -> str:
+460        # TODO: develop a more informative string representation.
+461        return repr(self)
+462
+463    def __repr__(self) -> str:
+464        return (
+465            '{}(query={}, id_list={}, max_results={}, sort_by={}, '
+466            'sort_order={})'
+467        ).format(
+468            _classname(self),
+469            repr(self.query),
+470            repr(self.id_list),
+471            repr(self.max_results),
+472            repr(self.sort_by),
+473            repr(self.sort_order)
+474        )
+475
+476    def _url_args(self) -> Dict[str, str]:
+477        """
+478        Returns a dict of search parameters that should be included in an API
+479        request for this search.
+480        """
+481        return {
+482            "search_query": self.query,
+483            "id_list": ','.join(self.id_list),
+484            "sortBy": self.sort_by.value,
+485            "sortOrder": self.sort_order.value
+486        }
+487
+488    def get(self) -> Generator[Result, None, None]:
+489        """
+490        **Deprecated** after 1.2.0; use `Search.results`.
+491        """
+492        warnings.warn(
+493            "The 'get' method is deprecated, use 'results' instead",
+494            DeprecationWarning,
+495            stacklevel=2
+496        )
+497        return self.results()
+498
+499    def results(self, offset: int = 0) -> Generator[Result, None, None]:
+500        """
+501        Executes the specified search using a default arXiv API client.
+502
+503        For info on default behavior, see `Client.__init__` and `Client.results`.
+504        """
+505        return Client().results(self, offset=offset)
 
@@ -2655,22 +2657,22 @@
Inherited Members
-
440    def __init__(
-441        self,
-442        query: str = "",
-443        id_list: List[str] = [],
-444        max_results: float = float('inf'),
-445        sort_by: SortCriterion = SortCriterion.Relevance,
-446        sort_order: SortOrder = SortOrder.Descending
-447    ):
-448        """
-449        Constructs an arXiv API search with the specified criteria.
-450        """
-451        self.query = query
-452        self.id_list = id_list
-453        self.max_results = max_results
-454        self.sort_by = sort_by
-455        self.sort_order = sort_order
+            
442    def __init__(
+443        self,
+444        query: str = "",
+445        id_list: List[str] = [],
+446        max_results: float = float('inf'),
+447        sort_by: SortCriterion = SortCriterion.Relevance,
+448        sort_order: SortOrder = SortOrder.Descending
+449    ):
+450        """
+451        Constructs an arXiv API search with the specified criteria.
+452        """
+453        self.query = query
+454        self.id_list = id_list
+455        self.max_results = max_results
+456        self.sort_by = sort_by
+457        self.sort_order = sort_order
 
@@ -2700,7 +2702,7 @@
Inherited Members
- id_list: list + id_list: List[str]
@@ -2768,16 +2770,16 @@
Inherited Members
-
486    def get(self) -> Generator[Result, None, None]:
-487        """
-488        **Deprecated** after 1.2.0; use `Search.results`.
-489        """
-490        warnings.warn(
-491            "The 'get' method is deprecated, use 'results' instead",
-492            DeprecationWarning,
-493            stacklevel=2
-494        )
-495        return self.results()
+            
488    def get(self) -> Generator[Result, None, None]:
+489        """
+490        **Deprecated** after 1.2.0; use `Search.results`.
+491        """
+492        warnings.warn(
+493            "The 'get' method is deprecated, use 'results' instead",
+494            DeprecationWarning,
+495            stacklevel=2
+496        )
+497        return self.results()
 
@@ -2797,13 +2799,13 @@
Inherited Members
-
497    def results(self, offset: int = 0) -> Generator[Result, None, None]:
-498        """
-499        Executes the specified search using a default arXiv API client.
-500
-501        For info on default behavior, see `Client.__init__` and `Client.results`.
-502        """
-503        return Client().results(self, offset=offset)
+            
499    def results(self, offset: int = 0) -> Generator[Result, None, None]:
+500        """
+501        Executes the specified search using a default arXiv API client.
+502
+503        For info on default behavior, see `Client.__init__` and `Client.results`.
+504        """
+505        return Client().results(self, offset=offset)
 
@@ -2826,199 +2828,199 @@
Inherited Members
-
506class Client(object):
-507    """
-508    Specifies a strategy for fetching results from arXiv's API.
-509
-510    This class obscures pagination and retry logic, and exposes
-511    `Client.results`.
-512    """
-513
-514    query_url_format = 'http://export.arxiv.org/api/query?{}'
-515    """The arXiv query API endpoint format."""
-516    page_size: int
-517    """Maximum number of results fetched in a single API request."""
-518    delay_seconds: int
-519    """Number of seconds to wait between API requests."""
-520    num_retries: int
-521    """Number of times to retry a failing API request."""
-522    _last_request_dt: datetime
-523
-524    def __init__(
-525        self,
-526        page_size: int = 100,
-527        delay_seconds: int = 3,
-528        num_retries: int = 3
-529    ):
-530        """
-531        Constructs an arXiv API client with the specified options.
-532
-533        Note: the default parameters should provide a robust request strategy
-534        for most use cases. Extreme page sizes, delays, or retries risk
-535        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
-536        brittle behavior, and inconsistent results.
-537        """
-538        self.page_size = page_size
-539        self.delay_seconds = delay_seconds
-540        self.num_retries = num_retries
-541        self._last_request_dt = None
-542
-543    def __str__(self) -> str:
-544        # TODO: develop a more informative string representation.
-545        return repr(self)
-546
-547    def __repr__(self) -> str:
-548        return '{}(page_size={}, delay_seconds={}, num_retries={})'.format(
-549            _classname(self),
-550            repr(self.page_size),
-551            repr(self.delay_seconds),
-552            repr(self.num_retries)
-553        )
-554
-555    def get(self, search: Search) -> Generator[Result, None, None]:
-556        """
-557        **Deprecated** after 1.2.0; use `Client.results`.
-558        """
-559        warnings.warn(
-560            "The 'get' method is deprecated, use 'results' instead",
-561            DeprecationWarning,
-562            stacklevel=2
-563        )
-564        return self.results(search)
-565
-566    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
-567        """
-568        Uses this client configuration to fetch one page of the search results
-569        at a time, yielding the parsed `Result`s, until `max_results` results
-570        have been yielded or there are no more search results.
-571
-572        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+            
508class Client(object):
+509    """
+510    Specifies a strategy for fetching results from arXiv's API.
+511
+512    This class obscures pagination and retry logic, and exposes
+513    `Client.results`.
+514    """
+515
+516    query_url_format = 'http://export.arxiv.org/api/query?{}'
+517    """The arXiv query API endpoint format."""
+518    page_size: int
+519    """Maximum number of results fetched in a single API request."""
+520    delay_seconds: int
+521    """Number of seconds to wait between API requests."""
+522    num_retries: int
+523    """Number of times to retry a failing API request."""
+524    _last_request_dt: datetime
+525
+526    def __init__(
+527        self,
+528        page_size: int = 100,
+529        delay_seconds: int = 3,
+530        num_retries: int = 3
+531    ):
+532        """
+533        Constructs an arXiv API client with the specified options.
+534
+535        Note: the default parameters should provide a robust request strategy
+536        for most use cases. Extreme page sizes, delays, or retries risk
+537        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
+538        brittle behavior, and inconsistent results.
+539        """
+540        self.page_size = page_size
+541        self.delay_seconds = delay_seconds
+542        self.num_retries = num_retries
+543        self._last_request_dt = None
+544
+545    def __str__(self) -> str:
+546        # TODO: develop a more informative string representation.
+547        return repr(self)
+548
+549    def __repr__(self) -> str:
+550        return '{}(page_size={}, delay_seconds={}, num_retries={})'.format(
+551            _classname(self),
+552            repr(self.page_size),
+553            repr(self.delay_seconds),
+554            repr(self.num_retries)
+555        )
+556
+557    def get(self, search: Search) -> Generator[Result, None, None]:
+558        """
+559        **Deprecated** after 1.2.0; use `Client.results`.
+560        """
+561        warnings.warn(
+562            "The 'get' method is deprecated, use 'results' instead",
+563            DeprecationWarning,
+564            stacklevel=2
+565        )
+566        return self.results(search)
+567
+568    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
+569        """
+570        Uses this client configuration to fetch one page of the search results
+571        at a time, yielding the parsed `Result`s, until `max_results` results
+572        have been yielded or there are no more search results.
 573
-574        Setting a nonzero `offset` discards leading records in the result set.
-575        When `offset` is greater than or equal to `search.max_results`, the full
-576        result set is discarded.
-577
-578        For more on using generators, see
-579        [Generators](https://wiki.python.org/moin/Generators).
-580        """
-581
-582        # total_results may be reduced according to the feed's
-583        # opensearch:totalResults value.
-584        total_results = search.max_results
-585        first_page = True
-586        while offset < total_results:
-587            page_size = min(self.page_size, search.max_results - offset)
-588            logger.info("Requesting {} results at offset {}".format(
-589                page_size,
-590                offset,
-591            ))
-592            page_url = self._format_url(search, offset, page_size)
-593            feed = self._parse_feed(page_url, first_page)
-594            if first_page:
-595                # NOTE: this is an ugly fix for a known bug. The totalresults
-596                # value is set to 1 for results with zero entries. If that API
-597                # bug is fixed, we can remove this conditional and always set
-598                # `total_results = min(...)`.
-599                if len(feed.entries) == 0:
-600                    logger.info("Got empty results; stopping generation")
-601                    total_results = 0
-602                else:
-603                    total_results = min(
-604                        total_results,
-605                        int(feed.feed.opensearch_totalresults)
-606                    )
-607                    logger.info("Got first page; {} of {} results available".format(
-608                        total_results,
-609                        search.max_results
-610                    ))
-611                # Subsequent pages are not the first page.
-612                first_page = False
-613            # Update offset for next request: account for received results.
-614            offset += len(feed.entries)
-615            # Yield query results until page is exhausted.
-616            for entry in feed.entries:
-617                try:
-618                    yield Result._from_feed_entry(entry)
-619                except Result.MissingFieldError:
-620                    logger.warning("Skipping partial result")
-621                    continue
-622
-623    def _format_url(self, search: Search, start: int, page_size: int) -> str:
-624        """
-625        Construct a request API for search that returns up to `page_size`
-626        results starting with the result at index `start`.
-627        """
-628        url_args = search._url_args()
-629        url_args.update({
-630            "start": start,
-631            "max_results": page_size,
-632        })
-633        return self.query_url_format.format(urlencode(url_args))
-634
-635    def _parse_feed(
-636        self,
-637        url: str,
-638        first_page: bool = True
-639    ) -> feedparser.FeedParserDict:
-640        """
-641        Fetches the specified URL and parses it with feedparser.
-642
-643        If a request fails or is unexpectedly empty, retries the request up to
-644        `self.num_retries` times.
-645        """
-646        # Invoke the recursive helper with initial available retries.
-647        return self.__try_parse_feed(
-648            url,
-649            first_page=first_page,
-650            retries_left=self.num_retries
-651        )
-652
-653    def __try_parse_feed(
-654        self,
-655        url: str,
-656        first_page: bool,
-657        retries_left: int,
-658        last_err: Exception = None,
-659    ) -> feedparser.FeedParserDict:
-660        """
-661        Recursive helper for _parse_feed. Enforces `self.delay_seconds`: if that
-662        number of seconds has not passed since `_parse_feed` was last called,
-663        sleeps until delay_seconds seconds have passed.
-664        """
-665        retry = self.num_retries - retries_left
-666        # If this call would violate the rate limit, sleep until it doesn't.
-667        if self._last_request_dt is not None:
-668            required = timedelta(seconds=self.delay_seconds)
-669            since_last_request = datetime.now() - self._last_request_dt
-670            if since_last_request < required:
-671                to_sleep = (required - since_last_request).total_seconds()
-672                logger.info("Sleeping for %f seconds", to_sleep)
-673                time.sleep(to_sleep)
-674        logger.info("Requesting page of results", extra={
-675            'url': url,
-676            'first_page': first_page,
-677            'retry': retry,
-678            'last_err': last_err.message if last_err is not None else None,
-679        })
-680        feed = feedparser.parse(url)
-681        self._last_request_dt = datetime.now()
-682        err = None
-683        if feed.status != 200:
-684            err = HTTPError(url, retry, feed)
-685        elif len(feed.entries) == 0 and not first_page:
-686            err = UnexpectedEmptyPageError(url, retry)
-687        if err is not None:
-688            if retries_left > 0:
-689                return self.__try_parse_feed(
-690                    url,
-691                    first_page=first_page,
-692                    retries_left=retries_left-1,
-693                    last_err=err,
-694                )
-695            # Feed was never returned in self.num_retries tries. Raise the last
-696            # exception encountered.
-697            raise err
-698        return feed
+574        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+575
+576        Setting a nonzero `offset` discards leading records in the result set.
+577        When `offset` is greater than or equal to `search.max_results`, the full
+578        result set is discarded.
+579
+580        For more on using generators, see
+581        [Generators](https://wiki.python.org/moin/Generators).
+582        """
+583
+584        # total_results may be reduced according to the feed's
+585        # opensearch:totalResults value.
+586        total_results = search.max_results
+587        first_page = True
+588        while offset < total_results:
+589            page_size = min(self.page_size, search.max_results - offset)
+590            logger.info("Requesting {} results at offset {}".format(
+591                page_size,
+592                offset,
+593            ))
+594            page_url = self._format_url(search, offset, page_size)
+595            feed = self._parse_feed(page_url, first_page)
+596            if first_page:
+597                # NOTE: this is an ugly fix for a known bug. The totalresults
+598                # value is set to 1 for results with zero entries. If that API
+599                # bug is fixed, we can remove this conditional and always set
+600                # `total_results = min(...)`.
+601                if len(feed.entries) == 0:
+602                    logger.info("Got empty results; stopping generation")
+603                    total_results = 0
+604                else:
+605                    total_results = min(
+606                        total_results,
+607                        int(feed.feed.opensearch_totalresults)
+608                    )
+609                    logger.info("Got first page; {} of {} results available".format(
+610                        total_results,
+611                        search.max_results
+612                    ))
+613                # Subsequent pages are not the first page.
+614                first_page = False
+615            # Update offset for next request: account for received results.
+616            offset += len(feed.entries)
+617            # Yield query results until page is exhausted.
+618            for entry in feed.entries:
+619                try:
+620                    yield Result._from_feed_entry(entry)
+621                except Result.MissingFieldError:
+622                    logger.warning("Skipping partial result")
+623                    continue
+624
+625    def _format_url(self, search: Search, start: int, page_size: int) -> str:
+626        """
+627        Construct a request API for search that returns up to `page_size`
+628        results starting with the result at index `start`.
+629        """
+630        url_args = search._url_args()
+631        url_args.update({
+632            "start": start,
+633            "max_results": page_size,
+634        })
+635        return self.query_url_format.format(urlencode(url_args))
+636
+637    def _parse_feed(
+638        self,
+639        url: str,
+640        first_page: bool = True
+641    ) -> feedparser.FeedParserDict:
+642        """
+643        Fetches the specified URL and parses it with feedparser.
+644
+645        If a request fails or is unexpectedly empty, retries the request up to
+646        `self.num_retries` times.
+647        """
+648        # Invoke the recursive helper with initial available retries.
+649        return self.__try_parse_feed(
+650            url,
+651            first_page=first_page,
+652            retries_left=self.num_retries
+653        )
+654
+655    def __try_parse_feed(
+656        self,
+657        url: str,
+658        first_page: bool,
+659        retries_left: int,
+660        last_err: Exception = None,
+661    ) -> feedparser.FeedParserDict:
+662        """
+663        Recursive helper for _parse_feed. Enforces `self.delay_seconds`: if that
+664        number of seconds has not passed since `_parse_feed` was last called,
+665        sleeps until delay_seconds seconds have passed.
+666        """
+667        retry = self.num_retries - retries_left
+668        # If this call would violate the rate limit, sleep until it doesn't.
+669        if self._last_request_dt is not None:
+670            required = timedelta(seconds=self.delay_seconds)
+671            since_last_request = datetime.now() - self._last_request_dt
+672            if since_last_request < required:
+673                to_sleep = (required - since_last_request).total_seconds()
+674                logger.info("Sleeping for %f seconds", to_sleep)
+675                time.sleep(to_sleep)
+676        logger.info("Requesting page of results", extra={
+677            'url': url,
+678            'first_page': first_page,
+679            'retry': retry,
+680            'last_err': last_err.message if last_err is not None else None,
+681        })
+682        feed = feedparser.parse(url)
+683        self._last_request_dt = datetime.now()
+684        err = None
+685        if feed.status != 200:
+686            err = HTTPError(url, retry, feed)
+687        elif len(feed.entries) == 0 and not first_page:
+688            err = UnexpectedEmptyPageError(url, retry)
+689        if err is not None:
+690            if retries_left > 0:
+691                return self.__try_parse_feed(
+692                    url,
+693                    first_page=first_page,
+694                    retries_left=retries_left-1,
+695                    last_err=err,
+696                )
+697            # Feed was never returned in self.num_retries tries. Raise the last
+698            # exception encountered.
+699            raise err
+700        return feed
 
@@ -3039,24 +3041,24 @@
Inherited Members
-
524    def __init__(
-525        self,
-526        page_size: int = 100,
-527        delay_seconds: int = 3,
-528        num_retries: int = 3
-529    ):
-530        """
-531        Constructs an arXiv API client with the specified options.
-532
-533        Note: the default parameters should provide a robust request strategy
-534        for most use cases. Extreme page sizes, delays, or retries risk
-535        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
-536        brittle behavior, and inconsistent results.
-537        """
-538        self.page_size = page_size
-539        self.delay_seconds = delay_seconds
-540        self.num_retries = num_retries
-541        self._last_request_dt = None
+            
526    def __init__(
+527        self,
+528        page_size: int = 100,
+529        delay_seconds: int = 3,
+530        num_retries: int = 3
+531    ):
+532        """
+533        Constructs an arXiv API client with the specified options.
+534
+535        Note: the default parameters should provide a robust request strategy
+536        for most use cases. Extreme page sizes, delays, or retries risk
+537        violating the arXiv [API Terms of Use](https://arxiv.org/help/api/tou),
+538        brittle behavior, and inconsistent results.
+539        """
+540        self.page_size = page_size
+541        self.delay_seconds = delay_seconds
+542        self.num_retries = num_retries
+543        self._last_request_dt = None
 
@@ -3134,16 +3136,16 @@
Inherited Members
-
555    def get(self, search: Search) -> Generator[Result, None, None]:
-556        """
-557        **Deprecated** after 1.2.0; use `Client.results`.
-558        """
-559        warnings.warn(
-560            "The 'get' method is deprecated, use 'results' instead",
-561            DeprecationWarning,
-562            stacklevel=2
-563        )
-564        return self.results(search)
+            
557    def get(self, search: Search) -> Generator[Result, None, None]:
+558        """
+559        **Deprecated** after 1.2.0; use `Client.results`.
+560        """
+561        warnings.warn(
+562            "The 'get' method is deprecated, use 'results' instead",
+563            DeprecationWarning,
+564            stacklevel=2
+565        )
+566        return self.results(search)
 
@@ -3163,62 +3165,62 @@
Inherited Members
-
566    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
-567        """
-568        Uses this client configuration to fetch one page of the search results
-569        at a time, yielding the parsed `Result`s, until `max_results` results
-570        have been yielded or there are no more search results.
-571
-572        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+            
568    def results(self, search: Search, offset: int = 0) -> Generator[Result, None, None]:
+569        """
+570        Uses this client configuration to fetch one page of the search results
+571        at a time, yielding the parsed `Result`s, until `max_results` results
+572        have been yielded or there are no more search results.
 573
-574        Setting a nonzero `offset` discards leading records in the result set.
-575        When `offset` is greater than or equal to `search.max_results`, the full
-576        result set is discarded.
-577
-578        For more on using generators, see
-579        [Generators](https://wiki.python.org/moin/Generators).
-580        """
-581
-582        # total_results may be reduced according to the feed's
-583        # opensearch:totalResults value.
-584        total_results = search.max_results
-585        first_page = True
-586        while offset < total_results:
-587            page_size = min(self.page_size, search.max_results - offset)
-588            logger.info("Requesting {} results at offset {}".format(
-589                page_size,
-590                offset,
-591            ))
-592            page_url = self._format_url(search, offset, page_size)
-593            feed = self._parse_feed(page_url, first_page)
-594            if first_page:
-595                # NOTE: this is an ugly fix for a known bug. The totalresults
-596                # value is set to 1 for results with zero entries. If that API
-597                # bug is fixed, we can remove this conditional and always set
-598                # `total_results = min(...)`.
-599                if len(feed.entries) == 0:
-600                    logger.info("Got empty results; stopping generation")
-601                    total_results = 0
-602                else:
-603                    total_results = min(
-604                        total_results,
-605                        int(feed.feed.opensearch_totalresults)
-606                    )
-607                    logger.info("Got first page; {} of {} results available".format(
-608                        total_results,
-609                        search.max_results
-610                    ))
-611                # Subsequent pages are not the first page.
-612                first_page = False
-613            # Update offset for next request: account for received results.
-614            offset += len(feed.entries)
-615            # Yield query results until page is exhausted.
-616            for entry in feed.entries:
-617                try:
-618                    yield Result._from_feed_entry(entry)
-619                except Result.MissingFieldError:
-620                    logger.warning("Skipping partial result")
-621                    continue
+574        If all tries fail, raises an `UnexpectedEmptyPageError` or `HTTPError`.
+575
+576        Setting a nonzero `offset` discards leading records in the result set.
+577        When `offset` is greater than or equal to `search.max_results`, the full
+578        result set is discarded.
+579
+580        For more on using generators, see
+581        [Generators](https://wiki.python.org/moin/Generators).
+582        """
+583
+584        # total_results may be reduced according to the feed's
+585        # opensearch:totalResults value.
+586        total_results = search.max_results
+587        first_page = True
+588        while offset < total_results:
+589            page_size = min(self.page_size, search.max_results - offset)
+590            logger.info("Requesting {} results at offset {}".format(
+591                page_size,
+592                offset,
+593            ))
+594            page_url = self._format_url(search, offset, page_size)
+595            feed = self._parse_feed(page_url, first_page)
+596            if first_page:
+597                # NOTE: this is an ugly fix for a known bug. The totalresults
+598                # value is set to 1 for results with zero entries. If that API
+599                # bug is fixed, we can remove this conditional and always set
+600                # `total_results = min(...)`.
+601                if len(feed.entries) == 0:
+602                    logger.info("Got empty results; stopping generation")
+603                    total_results = 0
+604                else:
+605                    total_results = min(
+606                        total_results,
+607                        int(feed.feed.opensearch_totalresults)
+608                    )
+609                    logger.info("Got first page; {} of {} results available".format(
+610                        total_results,
+611                        search.max_results
+612                    ))
+613                # Subsequent pages are not the first page.
+614                first_page = False
+615            # Update offset for next request: account for received results.
+616            offset += len(feed.entries)
+617            # Yield query results until page is exhausted.
+618            for entry in feed.entries:
+619                try:
+620                    yield Result._from_feed_entry(entry)
+621                except Result.MissingFieldError:
+622                    logger.warning("Skipping partial result")
+623                    continue
 
@@ -3250,30 +3252,30 @@
Inherited Members
-
701class ArxivError(Exception):
-702    """This package's base Exception class."""
-703
-704    url: str
-705    """The feed URL that could not be fetched."""
-706    retry: int
-707    """
-708    The request try number which encountered this error; 0 for the initial try,
-709    1 for the first retry, and so on.
-710    """
-711    message: str
-712    """Message describing what caused this error."""
-713
-714    def __init__(self, url: str, retry: int, message: str):
-715        """
-716        Constructs an `ArxivError` encountered while fetching the specified URL.
-717        """
-718        self.url = url
-719        self.retry = retry
-720        self.message = message
-721        super().__init__(self.message)
-722
-723    def __str__(self) -> str:
-724        return '{} ({})'.format(self.message, self.url)
+            
703class ArxivError(Exception):
+704    """This package's base Exception class."""
+705
+706    url: str
+707    """The feed URL that could not be fetched."""
+708    retry: int
+709    """
+710    The request try number which encountered this error; 0 for the initial try,
+711    1 for the first retry, and so on.
+712    """
+713    message: str
+714    """Message describing what caused this error."""
+715
+716    def __init__(self, url: str, retry: int, message: str):
+717        """
+718        Constructs an `ArxivError` encountered while fetching the specified URL.
+719        """
+720        self.url = url
+721        self.retry = retry
+722        self.message = message
+723        super().__init__(self.message)
+724
+725    def __str__(self) -> str:
+726        return '{} ({})'.format(self.message, self.url)
 
@@ -3291,14 +3293,14 @@
Inherited Members
-
714    def __init__(self, url: str, retry: int, message: str):
-715        """
-716        Constructs an `ArxivError` encountered while fetching the specified URL.
-717        """
-718        self.url = url
-719        self.retry = retry
-720        self.message = message
-721        super().__init__(self.message)
+            
716    def __init__(self, url: str, retry: int, message: str):
+717        """
+718        Constructs an `ArxivError` encountered while fetching the specified URL.
+719        """
+720        self.url = url
+721        self.retry = retry
+722        self.message = message
+723        super().__init__(self.message)
 
@@ -3368,29 +3370,29 @@
Inherited Members
-
727class UnexpectedEmptyPageError(ArxivError):
-728    """
-729    An error raised when a page of results that should be non-empty is empty.
-730
-731    This should never happen in theory, but happens sporadically due to
-732    brittleness in the underlying arXiv API; usually resolved by retries.
-733
-734    See `Client.results` for usage.
-735    """
-736    def __init__(self, url: str, retry: int):
-737        """
-738        Constructs an `UnexpectedEmptyPageError` encountered for the specified
-739        API URL after `retry` tries.
-740        """
-741        self.url = url
-742        super().__init__(url, retry, "Page of results was unexpectedly empty")
-743
-744    def __repr__(self) -> str:
-745        return '{}({}, {})'.format(
-746            _classname(self),
-747            repr(self.url),
-748            repr(self.retry)
-749        )
+            
729class UnexpectedEmptyPageError(ArxivError):
+730    """
+731    An error raised when a page of results that should be non-empty is empty.
+732
+733    This should never happen in theory, but happens sporadically due to
+734    brittleness in the underlying arXiv API; usually resolved by retries.
+735
+736    See `Client.results` for usage.
+737    """
+738    def __init__(self, url: str, retry: int):
+739        """
+740        Constructs an `UnexpectedEmptyPageError` encountered for the specified
+741        API URL after `retry` tries.
+742        """
+743        self.url = url
+744        super().__init__(url, retry, "Page of results was unexpectedly empty")
+745
+746    def __repr__(self) -> str:
+747        return '{}({}, {})'.format(
+748            _classname(self),
+749            repr(self.url),
+750            repr(self.retry)
+751        )
 
@@ -3413,13 +3415,13 @@
Inherited Members
-
736    def __init__(self, url: str, retry: int):
-737        """
-738        Constructs an `UnexpectedEmptyPageError` encountered for the specified
-739        API URL after `retry` tries.
-740        """
-741        self.url = url
-742        super().__init__(url, retry, "Page of results was unexpectedly empty")
+            
738    def __init__(self, url: str, retry: int):
+739        """
+740        Constructs an `UnexpectedEmptyPageError` encountered for the specified
+741        API URL after `retry` tries.
+742        """
+743        self.url = url
+744        super().__init__(url, retry, "Page of results was unexpectedly empty")
 
@@ -3468,47 +3470,47 @@
Inherited Members
-
752class HTTPError(ArxivError):
-753    """
-754    A non-200 status encountered while fetching a page of results.
-755
-756    See `Client.results` for usage.
-757    """
-758
-759    status: int
-760    """The HTTP status reported by feedparser."""
-761    entry: feedparser.FeedParserDict
-762    """The feed entry describing the error, if present."""
-763
-764    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
-765        """
-766        Constructs an `HTTPError` for the specified status code, encountered for
-767        the specified API URL after `retry` tries.
-768        """
-769        self.url = url
-770        self.status = feed.status
-771        # If the feed is valid and includes a single entry, trust it's an
-772        # explanation.
-773        if not feed.bozo and len(feed.entries) == 1:
-774            self.entry = feed.entries[0]
-775        else:
-776            self.entry = None
-777        super().__init__(
-778            url,
-779            retry,
-780            "Page request resulted in HTTP {}: {}".format(
-781                self.status,
-782                self.entry.summary if self.entry else None,
-783            ),
-784        )
-785
-786    def __repr__(self) -> str:
-787        return '{}({}, {}, {})'.format(
-788            _classname(self),
-789            repr(self.url),
-790            repr(self.retry),
-791            repr(self.status)
-792        )
+            
754class HTTPError(ArxivError):
+755    """
+756    A non-200 status encountered while fetching a page of results.
+757
+758    See `Client.results` for usage.
+759    """
+760
+761    status: int
+762    """The HTTP status reported by feedparser."""
+763    entry: feedparser.FeedParserDict
+764    """The feed entry describing the error, if present."""
+765
+766    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
+767        """
+768        Constructs an `HTTPError` for the specified status code, encountered for
+769        the specified API URL after `retry` tries.
+770        """
+771        self.url = url
+772        self.status = feed.status
+773        # If the feed is valid and includes a single entry, trust it's an
+774        # explanation.
+775        if not feed.bozo and len(feed.entries) == 1:
+776            self.entry = feed.entries[0]
+777        else:
+778            self.entry = None
+779        super().__init__(
+780            url,
+781            retry,
+782            "Page request resulted in HTTP {}: {}".format(
+783                self.status,
+784                self.entry.summary if self.entry else None,
+785            ),
+786        )
+787
+788    def __repr__(self) -> str:
+789        return '{}({}, {}, {})'.format(
+790            _classname(self),
+791            repr(self.url),
+792            repr(self.retry),
+793            repr(self.status)
+794        )
 
@@ -3528,27 +3530,27 @@
Inherited Members
-
764    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
-765        """
-766        Constructs an `HTTPError` for the specified status code, encountered for
-767        the specified API URL after `retry` tries.
-768        """
-769        self.url = url
-770        self.status = feed.status
-771        # If the feed is valid and includes a single entry, trust it's an
-772        # explanation.
-773        if not feed.bozo and len(feed.entries) == 1:
-774            self.entry = feed.entries[0]
-775        else:
-776            self.entry = None
-777        super().__init__(
-778            url,
-779            retry,
-780            "Page request resulted in HTTP {}: {}".format(
-781                self.status,
-782                self.entry.summary if self.entry else None,
-783            ),
-784        )
+            
766    def __init__(self, url: str, retry: int, feed: feedparser.FeedParserDict):
+767        """
+768        Constructs an `HTTPError` for the specified status code, encountered for
+769        the specified API URL after `retry` tries.
+770        """
+771        self.url = url
+772        self.status = feed.status
+773        # If the feed is valid and includes a single entry, trust it's an
+774        # explanation.
+775        if not feed.bozo and len(feed.entries) == 1:
+776            self.entry = feed.entries[0]
+777        else:
+778            self.entry = None
+779        super().__init__(
+780            url,
+781            retry,
+782            "Page request resulted in HTTP {}: {}".format(
+783                self.status,
+784                self.entry.summary if self.entry else None,
+785            ),
+786        )
 
diff --git a/setup.py b/setup.py index 947ba2a..1bd5565 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -version = '1.4.4' +version = '1.4.5' with open('README.md', 'r') as fh: long_description = fh.read()