diff --git a/pyproject.toml b/pyproject.toml index 162d1c3..769f062 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dev = [ "pytest-httpx>=0.28.0", "pytest>=7.4.4", "ruff>=0.1.11", + "zstd>=1.5.6.1", ] [project.urls] diff --git a/twscrape/api.py b/twscrape/api.py index b1de1fb..f57ab98 100644 --- a/twscrape/api.py +++ b/twscrape/api.py @@ -1,7 +1,9 @@ from contextlib import aclosing from httpx import Response +from json import loads from typing_extensions import deprecated +from zstd import decompress from .accounts_pool import AccountsPool from .logger import set_log_level @@ -126,7 +128,8 @@ async def _gql_items( if rep is None: return - obj = rep.json() + encoding: str | None = rep.headers.get("content-encoding") + obj = loads(decompress(rep.content)) if encoding == "zstd" else rep.json() els = get_by_path(obj, "entries") or [] els = [ x @@ -138,7 +141,7 @@ async def _gql_items( ] cur = self._get_cursor(obj, cursor_type) - rep, cnt, active = self._is_end(rep, queue, els, cur, cnt, limit) + rep, cnt, active = self._is_end(obj, queue, els, cur, cnt, limit) if rep is None: return @@ -169,7 +172,7 @@ async def search_raw(self, q: str, limit=-1, kv=None): async def search(self, q: str, limit=-1, kv=None): async with aclosing(self.search_raw(q, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): yield x # user_by_id @@ -261,7 +264,7 @@ async def tweet_replies_raw(self, twid: int, limit=-1, kv=None): async def tweet_replies(self, twid: int, limit=-1, kv=None): async with aclosing(self.tweet_replies_raw(twid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): if x.inReplyToTweetId == twid: yield x @@ -278,7 +281,7 @@ async def followers_raw(self, uid: int, limit=-1, kv=None): async def followers(self, uid: int, limit=-1, kv=None): async with aclosing(self.followers_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # verified_followers @@ -296,7 +299,7 @@ async def verified_followers_raw(self, uid: int, limit=-1, kv=None): async def verified_followers(self, uid: int, limit=-1, kv=None): async with aclosing(self.verified_followers_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # following @@ -311,7 +314,7 @@ async def following_raw(self, uid: int, limit=-1, kv=None): async def following(self, uid: int, limit=-1, kv=None): async with aclosing(self.following_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # subscriptions @@ -326,7 +329,7 @@ async def subscriptions_raw(self, uid: int, limit=-1, kv=None): async def subscriptions(self, uid: int, limit=-1, kv=None): async with aclosing(self.subscriptions_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # retweeters @@ -341,7 +344,7 @@ async def retweeters_raw(self, twid: int, limit=-1, kv=None): async def retweeters(self, twid: int, limit=-1, kv=None): async with aclosing(self.retweeters_raw(twid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # favoriters @@ -358,7 +361,7 @@ async def favoriters_raw(self, twid: int, limit=-1, kv=None): async def favoriters(self, twid: int, limit=-1, kv=None): async with aclosing(self.favoriters_raw(twid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_users(rep.json(), limit): + for x in parse_users(rep, limit): yield x # user_tweets @@ -381,7 +384,7 @@ async def user_tweets_raw(self, uid: int, limit=-1, kv=None): async def user_tweets(self, uid: int, limit=-1, kv=None): async with aclosing(self.user_tweets_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): yield x # user_tweets_and_replies @@ -404,7 +407,7 @@ async def user_tweets_and_replies_raw(self, uid: int, limit=-1, kv=None): async def user_tweets_and_replies(self, uid: int, limit=-1, kv=None): async with aclosing(self.user_tweets_and_replies_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): yield x # user_media @@ -476,7 +479,7 @@ async def liked_tweets_raw(self, uid: int, limit=-1, kv=None): async def liked_tweets(self, uid: int, limit=-1, kv=None): async with aclosing(self.liked_tweets_raw(uid, limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): yield x # Get current user bookmarks @@ -502,5 +505,5 @@ async def bookmarks_raw(self, limit=-1, kv=None): async def bookmarks(self, limit=-1, kv=None): async with aclosing(self.bookmarks_raw(limit=limit, kv=kv)) as gen: async for rep in gen: - for x in parse_tweets(rep.json(), limit): + for x in parse_tweets(rep, limit): yield x diff --git a/twscrape/queue_client.py b/twscrape/queue_client.py index b7a4dec..baa89d3 100644 --- a/twscrape/queue_client.py +++ b/twscrape/queue_client.py @@ -1,6 +1,7 @@ -import json +from json import JSONDecodeError, dumps, loads import os from typing import Any +from zstd import decompress import httpx from httpx import AsyncClient, Response @@ -56,8 +57,10 @@ def dump_rep(rep: Response): msg.append("\n") try: - msg.append(json.dumps(rep.json(), indent=2)) - except json.JSONDecodeError: + encoding: str | None = rep.headers.get("content-encoding") + obj = loads(decompress(rep.content)) if encoding == "zstd" else rep.json() + msg.append(dumps(obj, indent=2)) + except JSONDecodeError: msg.append(rep.text) txt = "\n".join(msg) @@ -120,8 +123,9 @@ async def _check_rep(self, rep: Response) -> None: dump_rep(rep) try: - res = rep.json() - except json.JSONDecodeError: + encoding: str | None = rep.headers.get("content-encoding") + res = loads(decompress(rep.content)) if encoding == "zstd" else rep.json() + except JSONDecodeError: res: Any = {"_raw": rep.text} limit_remaining = int(rep.headers.get("x-rate-limit-remaining", -1))