Skip to content

Commit

Permalink
Update docstrings (#6297)
Browse files Browse the repository at this point in the history
  • Loading branch information
agnieszka-m authored Nov 15, 2023
1 parent f74f034 commit 414cbcf
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions haystack/preview/components/fetchers/link_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ def __init__(
"""
Initializes a LinkContentFetcher instance.
:param raise_on_failure: If True, raises an exception on failure when fetching a single URL.
For multiple URLs, errors are logged and successful fetches are returned. Default is True.
:param raise_on_failure: If True, raises an exception if it fails to fetch a single URL.
For multiple URLs, it logs errors and returns the content it successfully fetched. Default is True.
:param user_agents: A list of user agents for fetching content. If None, a default user agent is used.
:param retry_attempts: Number of retry attempts for fetching content. Default is 2.
:param retry_attempts: Specifies how many times you want it to retry to fetch the URL's content. Default is 2.
:param timeout: Timeout in seconds for the request. Default is 3.
"""
self.raise_on_failure = raise_on_failure
Expand Down Expand Up @@ -100,6 +100,7 @@ def run(self, urls: List[str]):
"""
Fetches content from a list of URLs and returns a list of extracted content streams.
Each content stream is a ByteStream object containing the extracted content as binary data.
Each ByteStream object in the returned list corresponds to the contents of a single URL.
The content type of each stream is stored in the metadata of the ByteStream object under
the key "content_type". The URL of the fetched content is stored under the key "url".
Expand Down Expand Up @@ -137,7 +138,7 @@ def fetch(self, url: str) -> Tuple[Dict[str, str], ByteStream]:
:param url: The URL to fetch content from.
:return: A tuple containing the ByteStream metadata dict and the corresponding ByteStream.
ByteStream metadata contains the URL and the content type of the fetched content.
The content type is a string indicating the type of content fetched (e.g., "text/html", "application/pdf").
The content type is a string indicating the type of content fetched (for example, "text/html", "application/pdf").
The ByteStream object contains the fetched content as binary data.
:raises: If an error occurs during content retrieval and `raise_on_failure` is set to True, this method will
Expand All @@ -155,7 +156,7 @@ def fetch(self, url: str) -> Tuple[Dict[str, str], ByteStream]:
if self.raise_on_failure:
raise e
# less verbose log as this is expected to happen often (requests failing, blocked, etc.)
logger.debug("Couldn't retrieve content from %s due to %s", url, str(e))
logger.debug("Couldn't retrieve content from %s because %s", url, str(e))

finally:
self.current_user_agent_idx = 0
Expand All @@ -166,8 +167,8 @@ def _fetch_with_exception_suppression(self, url: str) -> Tuple[Optional[Dict[str
"""
Fetches content from a URL and returns it as a ByteStream.
If `raise_on_failure` is set to True, this method will wrap the fetch method and catch any exceptions.
Otherwise, it will simply call the fetch method.
If `raise_on_failure` is set to True, this method will wrap the fetch() method and catch any exceptions.
Otherwise, it will simply call the fetch() method.
:param url: The URL to fetch content from.
:return: A tuple containing the ByteStream metadata dict and the corresponding ByteStream.
Expand Down

0 comments on commit 414cbcf

Please sign in to comment.