From 83cc5406f4dfd4e1b09bfda96beb537f9c81477b Mon Sep 17 00:00:00 2001 From: TDKorn <96394652+TDKorn@users.noreply.github.com> Date: Mon, 8 May 2023 23:04:21 -0400 Subject: [PATCH] Merge ```InstaPage``` branch Added new ```InstaPage``` and ```Hashtag``` classes * Hashtag class wraps the data from a scraped Instagram hashtag * InstaPage is an abstract class representing any scrapable Instagram page; InstaUser and Hashtag are subclasses Added new methods to ```InstaClient``` * ```scrape()``` can be used to scrape an Instagram page * ```get_user()``` and ```get_hashtag()``` scrape a profile or hashtag * ```get_username``` retrieves username from a userid Updated docs, docstrings, variables and method names to reflect the changes * Notable change is that ```user_map``` is now ```page_map``` --- InstaTweet/__init__.py | 2 +- InstaTweet/instaclient.py | 77 ++++++-- InstaTweet/instapage.py | 190 +++++++++++++++++++ InstaTweet/instapost.py | 33 ++-- InstaTweet/instatweet.py | 75 ++++---- InstaTweet/instauser.py | 81 -------- InstaTweet/profile.py | 182 ++++++++---------- InstaTweet/tweetclient.py | 2 +- README.rst | 53 +++--- README_PyPi.rst | 55 +++--- docs/source/_readme/about-instatweet.rst | 16 +- docs/source/_readme/getting-started.rst | 171 +++++------------ docs/source/_snippets/about-the-page-map.rst | 19 ++ docs/source/_snippets/about-the-user-map.rst | 20 -- docs/source/_snippets/run-profile.rst | 6 +- docs/source/_snippets/save-profile.rst | 37 +++- docs/source/_snippets/use-instaclient.rst | 18 +- docs/source/conf.py | 4 +- docs/source/index.rst | 2 +- docs/source/{instauser.rst => instapage.rst} | 4 +- docs/source/modules.rst | 2 +- 21 files changed, 592 insertions(+), 457 deletions(-) create mode 100644 InstaTweet/instapage.py delete mode 100644 InstaTweet/instauser.py create mode 100644 docs/source/_snippets/about-the-page-map.rst delete mode 100644 docs/source/_snippets/about-the-user-map.rst rename docs/source/{instauser.rst => instapage.rst} (56%) diff --git a/InstaTweet/__init__.py b/InstaTweet/__init__.py index f880572..1dd0928 100644 --- a/InstaTweet/__init__.py +++ b/InstaTweet/__init__.py @@ -3,7 +3,7 @@ from .db import DBConnection # API Interaction/Wrapper Classes from .instapost import InstaPost -from .instauser import InstaUser +from .instapage import InstaPage, InstaUser, Hashtag from .instaclient import InstaClient, USER_AGENT from .tweetclient import TweetClient # User Interface Classes diff --git a/InstaTweet/instaclient.py b/InstaTweet/instaclient.py index c1b1890..7a43b64 100644 --- a/InstaTweet/instaclient.py +++ b/InstaTweet/instaclient.py @@ -1,7 +1,9 @@ import os import requests +from requests import Response +from typing import Type, Union, Optional, Dict from json.decoder import JSONDecodeError -from . import InstaUser, InstaPost +from . import InstaPage, InstaUser, InstaPost, Hashtag USER_AGENT = "Mozilla/5.0 (Linux; Android 9; GM1903 Build/PKQ1.190110.001; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/75.0.3770.143 Mobile Safari/537.36 Instagram 103.1.0.15.119 Android (28/9; 420dpi; 1080x2260; OnePlus; GM1903; OnePlus7; qcom; sv_SE; 164094539)" @@ -17,7 +19,7 @@ class InstaClient: DOWNLOAD_DIR = os.path.abspath('downloads') #: [*Optional*] -- Directory to temporarily download media to - def __init__(self, session_id: str, user_agent: str = USER_AGENT, proxies: dict = None): + def __init__(self, session_id: str, user_agent: str = USER_AGENT, proxies: Optional[Dict] = None): """Initialize an :class:`~InstaClient` with an Instagram sessionid cookie (at minimum) .. note:: As of v2.0.0b13, the endpoint used by :meth:`~get_user` seems to require a specific :attr:`~USER_AGENT` @@ -31,7 +33,7 @@ def __init__(self, session_id: str, user_agent: str = USER_AGENT, proxies: dict raise TypeError('session_id must be a string') self.session_id = session_id - self.user_agent = user_agent # Hardcoded one works for now... + self.user_agent = user_agent self.proxies = proxies if not os.path.exists(InstaClient.DOWNLOAD_DIR): @@ -48,32 +50,81 @@ def request(self, url: str) -> requests.Response: proxies=self.proxies ) + def scrape(self, page: str) -> InstaPage: + """Scrapes an Instagram page and wraps the response data + + :param page: an Instagram hashtag (prefixed with ``#``) or username + :returns: an :class:`~.InstaUser` or :class:`~.Hashtag` + """ + if isinstance(page, str): + if page.startswith("#"): + return self.get_hashtag(page) + return self.get_user(page) + raise TypeError(f"`page` must be of type {str}") + + def get_hashtag(self, tag: str, max_id: str = '') -> Hashtag: + """Scrapes an Instagram hashtag and wraps the response with :class:`~.Hashtag` + + :param tag: the hashtag to scrape (with or without a ``#``) + :param max_id: the end cursor + """ + tag = tag.lstrip("#") + endpoint = f'https://www.instagram.com/explore/tags/{tag}/?__a=1&max_id={max_id}&__d=dis' + response = self.request(endpoint) + return self._wrap(tag, response, Hashtag) + def get_user(self, username: str) -> InstaUser: - """Scrapes an Instagram user's profile and wraps the response + """Scrapes an Instagram user's profile and wraps the response with :class:`~.InstaUser` - :param username: the username of the IG user to scrape (without the @) - :return: an :class:`~.InstaUser` object, which wraps the response data + :param username: the username of the IG user to scrape """ + username = username.lstrip('@') endpoint = f"https://i.instagram.com/api/v1/users/web_profile_info/?username={username}" response = self.request(endpoint) + return self._wrap(username, response, InstaUser) + def _wrap(self, page: str, response: Response, Wrapper: Type[InstaPage]) -> InstaPage: + """Validates and wraps the API response from an Instagram page + """ + page = f'user @{page}' if Wrapper is InstaUser else f'hashtag #{page}' if response.ok: try: - return InstaUser(response.json(), self) + return Wrapper(response.json(), self) except JSONDecodeError as e: - raise RuntimeError(f'Unable to scrape Instagram user @{username}') from e + raise RuntimeError(f'Unable to scrape Instagram {page}') from e else: try: error = response.json() except JSONDecodeError: error = response.reason raise RuntimeError( - 'Failed to scrape Instagram user @{u}\nResponse: [{code}] -- {e}'.format( - u=username, code=response.status_code, e=error + 'Failed to scrape Instagram {page}\nResponse: [{code}] -- {e}'.format( + page=page, code=response.status_code, e=error ) ) - def download_post(self, post: InstaPost, filepath: str = None) -> bool: + def get_username(self, user_id: Union[int, str]) -> str: + """Retrieves the Instagram username for the user with the provided ``user_id`` + + .. tip:: Use this with :meth:`get_user` to scrape by ``user_id``:: + + >> user_id = 51276430399 + >> username = insta.get_username(user_id) + >> user = insta.get_user(username) + >> print(user.posts[0]) + + Post 2981866202934977614 by @dailykittenig on 2022-11-29 01:44:37 + + :param user_id: the id of the Instagram user to retrieve the username of + """ + endpoint = f"https://i.instagram.com/api/v1/users/{user_id}/info" + response = self.request(endpoint) + if response.ok: + return response.json().get('user', {}).get('username', '') + else: + raise RuntimeError(f"Failed to retrieve info for Instagram user with id {user_id}") + + def download_post(self, post: InstaPost, filepath: Optional[str] = None) -> bool: """Downloads the media from an Instagram post :param post: the :class:`~.InstaPost` of the post to download @@ -98,11 +149,11 @@ def download_post(self, post: InstaPost, filepath: str = None) -> bool: return True @property - def headers(self) -> dict: + def headers(self) -> Dict: """Headers to use in :meth:`~.request`""" return {'User-Agent': self.user_agent, } @property - def cookies(self) -> dict: + def cookies(self) -> Dict: """Cookies to use in :meth:`~.request`""" return {'sessionid': self.session_id, } diff --git a/InstaTweet/instapage.py b/InstaTweet/instapage.py new file mode 100644 index 0000000..ac60211 --- /dev/null +++ b/InstaTweet/instapage.py @@ -0,0 +1,190 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from functools import cached_property +from typing import Dict, Optional, TYPE_CHECKING, List +from . import InstaPost + +if TYPE_CHECKING: + from . import InstaClient + + +class InstaPage(ABC): + + """Abstract wrapper class for wrapping API responses from Instagram pages""" + + def __init__(self, data: Dict, client: Optional[InstaClient] = None): + """Initialize an :class:`InstaPage` + + Used to wrap responses from endpoints that contain Instagram post data, + like Instagram user profiles and Instagram hashtag searches + + :param data: the API response JSON to use as source data + :param client: the :class:`~.InstaClient` to use; required for :meth:`~.get_more_posts` + """ + self.data = data + self.client = client + self._posts = [] + + @abstractmethod + def __str__(self) -> str: + pass + + @property + @abstractmethod + def name(self) -> str: + """Name of the Instagram page""" + pass + + @property + @abstractmethod + def page_data(self) -> Dict: + """Data about the Instagram page itself""" + pass + + @property + @abstractmethod + def media_data(self) -> Dict: + """Data about posts on the Instagram page""" + pass + + @property + def id(self) -> int: + """ID of the Instagram page""" + return int(self.page_data.get('id', -1)) + + @property + def posts(self) -> List[InstaPost]: + """Posts that have been scraped from the Instagram page + + To retrieve the next page of posts, call :meth:`get_more_posts` + + :returns: the page's posts as :class:`~.InstaPost` objects + """ + if not self._posts: + if edges := self.media_data.get('edges'): + self._posts = [InstaPost(edge['node'], self.client) for edge in edges] + return self._posts + + def get_more_posts(self) -> bool: + """Requests the next page of posts from the :class:`InstaPage` + + If the page :attr:`~.has_more_posts`, they'll be added to the :attr:`~.posts` list + + :returns: ``True`` if the request was successful, otherwise ``False`` + """ + if not self.client: + raise AttributeError("Must provide an InstaClient to scrape with") + + if not self.has_more_posts: + print("All posts have already been scraped") + return False + + if not (next_page := self._get_next_page()): + print("Unable to retrieve the next page of posts") + return False + + self.media_page_info.update(next_page.media_page_info) + self._posts.extend(next_page.posts) + return True + + @abstractmethod + def _get_next_page(self) -> Optional[InstaPage]: + """Makes the request for the next page of posts; wraps the response if successful""" + pass + + @property + def has_more_posts(self) -> bool: + """Returns ``True`` if more posts can be scraped using :meth:`~.get_more_posts`""" + return self.media_page_info.get('has_next_page') + + @property + def end_cursor(self) -> str: + """Cursor used in request by :meth:`~.get_more_posts`""" + return self.media_page_info.get('end_cursor', '').strip('=') + + @property + def media_page_info(self) -> Dict: + return self.media_data.get('page_info', {}) + + +class InstaUser(InstaPage): + + """API response wrapper for an Instagram user's profile""" + + def __init__(self, data: Dict, client: Optional[InstaClient] = None): + """Initialize an :class:`InstaUser` + + :param data: the API response from :meth:`~.get_user` + :param client: the :class:`~.InstaClient` to use + """ + super().__init__(data, client) + + def __str__(self) -> str: + return f"Instagram User: @{self.name}" + + @property + def name(self) -> str: + return self.page_data.get('username') + + @property + def page_data(self) -> Dict: + return self.data.get('data', {}).get('user', {}) + + @property + def media_data(self) -> Dict: + return self.page_data.get('edge_owner_to_timeline_media', {'edges': []}) + + def _get_next_page(self) -> Optional[InstaPage]: + endpoint = 'https://www.instagram.com/graphql/query/?query_hash=8c2a529969ee035a5063f2fc8602a0fd' + \ + f'&variables=%7B%22id%22%3A%22{self.id}%22%2C%22first%22%3A12%2C%22' + \ + f'after%22%3A%22{self.end_cursor}%3D%3D%22%7D' + response = self.client.request(endpoint) + if not response.ok: + return None + try: + return InstaUser(response.json()) + except Exception as e: + raise RuntimeError('Failed to get more posts') from e + + +class Hashtag(InstaPage): + + """API response wrapper for an Instagram hashtag""" + + def __init__(self, data: Dict, client: Optional[InstaClient] = None): + """Initialize a :class:`Hashtag` + + :param data: the API response from :meth:`~.get_hashtag` + :param client: the :class:`~.InstaClient` to use + """ + if (data := data.get('graphql', {}).get('hashtag')) is None: + raise ValueError(f"Hashtag response data is missing") + + super().__init__(data, client) + self._top_posts = [] + + def __str__(self) -> str: + return f"Instagram Hashtag: {self.name}" + + @property + def name(self) -> str: + return "#" + self.page_data.get('name') + + @property + def page_data(self) -> Dict: + return self.data + + @property + def media_data(self) -> Dict: + return self.page_data.get('edge_hashtag_to_media', {'count': 0, 'edges': []}) + + @cached_property + def top_posts(self) -> List[InstaPost]: + return [InstaPost(edge['node'], self.client) for edge in self.top_media_data['edges']] + + @property + def top_media_data(self) -> Dict: + return self.page_data.get("edge_hashtag_to_top_posts", {"edges": []}) + + def _get_next_page(self) -> Optional[InstaPage]: + return self.client.get_hashtag(self.name, max_id=self.end_cursor) diff --git a/InstaTweet/instapost.py b/InstaTweet/instapost.py index b88ed76..2265b28 100644 --- a/InstaTweet/instapost.py +++ b/InstaTweet/instapost.py @@ -1,26 +1,28 @@ +from __future__ import annotations import os -from functools import cached_property -from typing import Union, List from datetime import datetime from tweepy.models import Status +from functools import cached_property +from typing import Union, List, Optional, Dict class InstaPost: """Minimalistic API response wrapper for an Instagram post""" - def __init__(self, post_data: dict): + def __init__(self, data: dict, client: Optional["InstaClient"] = None): """Initialize an :class:`~InstaPost` - :param post_data: the JSON response data of a single Instagram post, found within the :attr:`~.InstaUser.user_data` + :param data: the JSON response data of a single Instagram post, found within the :attr:`~.InstaUser.user_data` """ #: Source data from API response - self.json = post_data + self.json = data + self.client = client #: The post id - self.id = post_data['id'] - self.dimensions: dict = post_data.get('dimensions', {}) - self.is_video: bool = post_data.get('is_video', False) - self.video_url = post_data.get('video_url', '') + self.id = data['id'] + self.dimensions: dict = data.get('dimensions', {}) + self.is_video: bool = data.get('is_video', False) + self.video_url = data.get('video_url', '') #: Path of downloaded media, set by :meth:`~.InstaClient.download_post` self.filepath: str = '' #: Limited data from a successful tweet based off this post, set by :meth:`~.TweetClient.send_tweet` @@ -30,7 +32,7 @@ def __str__(self): return f'Post {self.id} by @{self.owner["username"]} on {self.timestamp}' @cached_property - def children(self) -> List["InstaPost"]: + def children(self) -> List[InstaPost]: """If the post is a carousel, returns a list of child :class:`InstaPost`'s""" if self.is_carousel: edges = self.json['edge_sidecar_to_children']['edges'] @@ -51,6 +53,10 @@ def caption(self) -> str: return caption_edge[0].get('node', {}).get('text', '') return '' + @property + def likes(self) -> Optional[int]: + return self.json.get('edge_liked_by', {}).get('count') + @property def media_url(self) -> str: """The direct URL to the actual post content @@ -96,8 +102,11 @@ def filetype(self) -> str: return '.mp4' if self.is_video else '.jpg' @property - def owner(self) -> dict: - if owner := self.json.get('owner', self.json.get('user', {})): + def owner(self) -> Dict: + if owner := self.json.get('owner', {}): + if not owner.get('username'): + if self.client and (uid := owner.get('id')): + owner['username'] = self.client.get_username(uid) return owner return dict.fromkeys(['id', 'username']) diff --git a/InstaTweet/instatweet.py b/InstaTweet/instatweet.py index f3aaf28..df048f5 100644 --- a/InstaTweet/instatweet.py +++ b/InstaTweet/instatweet.py @@ -1,4 +1,4 @@ -from typing import Optional, List +from typing import Optional, List, Dict from . import utils, TweetClient, InstaClient, InstaPost, Profile @@ -11,9 +11,9 @@ class InstaTweet: .. admonition:: **InstaTweet** (`verb`): :class: instatweet - To load a :class:`~.Profile` 🠖 scrape :attr:`~.posts` from its Instagram users + To load a :class:`~.Profile` 🠖 scrape :attr:`~.posts` from its Instagram pages 🠖 :meth:`~.download_post` & :meth:`~.send_tweet` for any new content - 🠖 update the :attr:`~.user_map` + 🠖 update the :attr:`~.page_map` 🠖 :meth:`~.save` the profile if it :attr:`~.exists` .. admonition:: **Example Sentence** @@ -44,11 +44,10 @@ def load(cls, profile_name: str, local: bool = True) -> "InstaTweet": :param profile_name: name of the Profile to load :param local: whether the profile is saved locally (default) or remotely on a database - """ return cls(profile=Profile.load(name=profile_name, local=local)) - def get_proxies(self) -> Optional[dict]: + def get_proxies(self) -> Optional[Dict]: """Retrieve proxies using the loaded Profile's :attr:`~Profile.proxy_key`""" return utils.get_proxies( env_key=self.profile.proxy_key @@ -69,35 +68,40 @@ def get_tweet_client(self) -> TweetClient: proxies=self.proxies ) - def start(self) -> None: - """InstaTweets all users that have been added to the loaded :class:`~.Profile` + def start(self, max_posts: int = 12) -> None: + """InstaTweets all pages that have been added to the loaded :class:`~.Profile` + + The most recent posts from each page will be scraped, then compared to the ``scraped`` + list in the :attr:`~.PAGE_MAPPING` to determine which are new. - Each user's IG page will be scraped and compared to the ``scraped`` list in their :attr:`~.USER_MAPPING`. - Posts that weren't previously scraped will be downloaded and tweeted + Up to ``max_posts`` new posts from each page will then be downloaded and tweeted .. note:: If ``InstaTweet`` fails to :meth:`~.download_post` or :meth:`~.send_tweet`, - the :attr:`~.USER_MAPPING` won't be updated + the :attr:`~.PAGE_MAPPING` won't be updated * This ensures that failed repost attempts are retried in the next call to :meth:`~start` If a save file for the Profile already :attr:`~.exists`, successful reposts will trigger a call to :meth:`~.save` + + :param max_posts: the maximum number of new posts to download and tweet per page """ profile = self.profile profile.validate() print(f'Starting InstaTweet for Profile: {profile.name}') - for user in profile.user_map: - new_posts = self.get_new_posts(user) - if not new_posts: - print(f'No posts to tweet for @{user}') + for page in profile.page_map: + page_name = page if page.startswith("#") else "@" + page + + if not (new_posts := self.get_new_posts(page)): + print(f'No posts to tweet for {page_name}') continue - print(f'There are {len(new_posts)} posts to tweet for @{user}') - hashtags = profile.get_hashtags_for(user) + print(f'There are {len(new_posts)} posts to tweet for {page_name}') + hashtags = profile.get_hashtags_for(page) - for post in new_posts: + for post in new_posts[:max_posts]: self.insta.download_post(post) if not post.is_downloaded: continue @@ -106,38 +110,39 @@ def start(self) -> None: if not tweeted: continue - profile.get_scraped_from(user).append(post.id) - profile.get_tweets_for(user).append(post.tweet_data) + profile.get_scraped_from(page).append(post.id) + profile.get_tweets_for(page).append(post.tweet_data) if profile.exists: profile.save(alert=False) - print(f'Finished insta-tweeting for @{user}') + print(f'Finished insta-tweeting for {page_name}') + + print(f'All pages have been insta-tweeted') - print(f'All users have been insta-tweeted') + def get_new_posts(self, insta_page: str) -> Optional[List[InstaPost]]: + """Scrapes recent posts from an Instagram page and returns all posts that haven't been tweeted yet - def get_new_posts(self, username) -> Optional[List[InstaPost]]: - """Scrapes recent posts from an Instagram user and returns all posts that haven't been tweeted yet + **NOTE:** If a page's ``scraped`` list is empty, no posts will be returned. - **NOTE:** If a user's ``scraped`` list is empty, no posts will be returned. + Instead, the page is "initialized" as follows: - Instead, the user is "initialized" as follows: - * Their ``scraped`` list will be populated with the ID's from the most recent posts - * These IDs are then used in future calls to the method to determine which posts to tweet + * The ``scraped`` list will be populated with the ID's from the most recent posts + * These IDs are then used in future method calls to determine which posts to tweet - :param username: the IG username to scrape posts from - :return: a list of posts that haven't been tweeted yet, or nothing at all (if user is only initialized) + :param insta_page: the Instagram page to scrape posts from + :return: a list of posts that haven't been tweeted yet, or nothing at all (if page is only initialized) """ - print(f'Checking posts from @{username}') - scraped_posts = self.profile.get_scraped_from(username) - user = self.insta.get_user(username) + print(f'Checking posts from {insta_page}') + scraped_posts = self.profile.get_scraped_from(insta_page) + page = self.insta.scrape(insta_page) if scraped_posts: - new_posts = [post for post in user.posts if post.id not in scraped_posts] + new_posts = [post for post in page.posts if post.id not in scraped_posts] return sorted(new_posts, key=lambda post: post.timestamp) else: - scraped_posts.extend(post.id for post in user.posts) - print(f'Initialized User: @{username}') + scraped_posts.extend(post.id for post in page.posts) + print(f'Initialized {page}') return None diff --git a/InstaTweet/instauser.py b/InstaTweet/instauser.py deleted file mode 100644 index bcaf9a6..0000000 --- a/InstaTweet/instauser.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import annotations -from . import InstaPost - - -class InstaUser: - """Minimalistic API response wrapper for an Instagram profile""" - - def __init__(self, data: dict, client: "InstaClient" = None): - """Initialize an :class:`InstaUser` - - :param data: the API response JSON to use as source data - :param client: API client to use; only required for :meth:`~.get_more_posts` - """ - self.json = data - self.client = client - self._posts = [] - - @property - def id(self) -> int: - """Instagram User ID""" - return int(self.user_data.get('id', -1)) - - @property - def posts(self) -> [InstaPost]: - """Returns the list of posts scraped from the Instagram user""" - if not self._posts: - if edges := self.media_data.get('edges'): - self._posts = [InstaPost(edge['node']) for edge in edges] - return self._posts - - @property - def media_data(self) -> dict: - return self.user_data.get('edge_owner_to_timeline_media', {'edges': []}) - - @property - def user_data(self) -> dict: - return self.json.get('data', {}).get('user', {}) - - def get_more_posts(self) -> bool: - """Requests the next page of posts - - If the user :attr:`~.has_more_posts`, they'll be added to the :attr:`~.posts` list - - :returns: ``True`` if the request was successful, otherwise ``False`` - """ - if not self.has_more_posts: - print("All posts have already been scraped") - return False - - if not self.client: - raise AttributeError("InstaClient is required to request more posts") - - endpoint = 'https://www.instagram.com/graphql/query/?query_hash=8c2a529969ee035a5063f2fc8602a0fd' + \ - f'&variables=%7B%22id%22%3A%22{self.id}%22%2C%22first%22%3A12%2C%22' + \ - f'after%22%3A%22{self.end_cursor}%3D%3D%22%7D' - response = self.client.request(endpoint) - if not response.ok: - return False - - try: - u = InstaUser(response.json()) - except Exception as e: - raise RuntimeError('Failed to get more posts') from e - - self.page_info.update(u.page_info) - self._posts.extend(u.posts) - return True - - @property - def has_more_posts(self) -> bool: - """Returns ``True`` if more posts can be scraped using :meth:`~.get_more_posts`""" - return self.page_info.get('has_next_page') - - @property - def end_cursor(self) -> str: - """Cursor used in request by :meth:`~.get_more_posts`""" - return self.page_info.get('end_cursor', '').strip('=') - - @property - def page_info(self) -> dict: - return self.media_data.get('page_info', {}) diff --git a/InstaTweet/profile.py b/InstaTweet/profile.py index c25a616..7cfaad0 100644 --- a/InstaTweet/profile.py +++ b/InstaTweet/profile.py @@ -1,11 +1,10 @@ from __future__ import annotations - import os import copy import json import pickle from pathlib import Path -from typing import Iterable +from typing import Iterable, Dict from . import TweetClient, DBConnection, USER_AGENT @@ -13,70 +12,44 @@ class Profile: """The :class:`Profile` is a configuration class used extensively throughout the package - It consists of a :attr:`~user_map` and an associated collection of API/web scraping :ref:`settings ` + It consists of a :attr:`~page_map` and an associated collection of API/web scraping :ref:`settings ` ... - .. admonition:: About the User Map + .. admonition:: About the Page Map :class: instatweet - The :attr:`~user_map` is a dict containing info about the users added to a :class:`Profile` - - * It's used to help detect new posts and compose tweets on a per-user basis - * Entries are created when you :meth:`add_users`, which map the user to a :attr:`~USER_MAPPING` - * The :attr:`~USER_MAPPING` maintains lists of hashtags, scraped posts, and sent tweets - * The mapping is updated when you :meth:`add_hashtags` and successfully :meth:`~.send_tweet` - - You can access entries in the :attr:`~user_map` as follows: + **The** :attr:`~.page_map` **is a dict containing info about the pages added to a** :class:`~.Profile` - * :meth:`~get_user` allows you to retrieve a full entry by username - * :meth:`~get_hashtags_for`, :meth:`get_scraped_from`, :meth:`get_tweets_for` provide access - to lists + * It's used to help detect new posts and compose tweets on a per-page basis + * Entries are created when you :meth:`~.add_pages`, which map the page to a :attr:`~.PAGE_MAPPING` + * The :attr:`~.PAGE_MAPPING` maintains lists of hashtags, scraped posts, and sent tweets + * The mapping is updated when you :meth:`~.add_hashtags` and successfully :meth:`~.send_tweet` ... **[Optional]** - A unique, identifying :attr:`~name` can be assigned to the Profile, which - may then be used to :meth:`~save` and, in turn, :meth:`~load` its settings + A unique, identifying :attr:`~name` can optionally be assigned to the Profile, + which may then be used to :meth:`~save` and :meth:`~load` its settings - * This makes it extremely easy to switch between Profiles and create templates + The save location is determined by the value of :attr:`Profile.local` as follows: - Saving isn't a requirement to :meth:`~.start` InstaTweet, but... + * If ``True``, saves are made locally to the :attr:`~LOCAL_DIR` as .pickle files + * If ``False``, saves are made remotely to a database as pickle bytes - * To :meth:`~.get_new_posts`, InstaTweet makes comparisons - with the ``scraped`` list in the :attr:`~.user_map` - * Saving this list ensures you don't :meth:`~.send_tweet` - for a post more than once + See :ref:`save-profile` for more information ... - - .. admonition:: Important - :class: important-af - - If you do :meth:`~save` your profile, the save location is determined by the value of :attr:`Profile.local` - - * Local saves are made to the :attr:`~LOCAL_DIR`, as pickle files - * Remote saves are made to a database (via the :mod:`~.db` module) as pickle bytes - - **You MUST configure the** :attr:`~InstaTweet.db.DATABASE_URL` **environment variable to save/load remotely** - - * InstaTweet uses ``SQLAlchemy`` to create a :class:`~.DBConnection` -- any db it supports is compatible - * See the :mod:`~.db` module for more information - """ - - USER_MAPPING = {'hashtags': [], 'scraped': [], 'tweets': []} #: Template for an entry in the ``user_map`` - LOCAL_DIR = Path(__file__).parent.parent.joinpath("profiles") #: Directory where local profiles are saved + #: Template for an entry in the :attr:`~page_map` + PAGE_MAPPING: Dict = {'hashtags': [], 'scraped': [], 'tweets': []} + #: Directory where local profiles are saved + LOCAL_DIR: str = Path(__file__).parent.parent.joinpath("profiles") def __init__(self, name: str = 'default', local: bool = True, **kwargs): """Create a new :class:`Profile` - .. note:: :class:`Profile` creation is mandatory to use the ``InstaTweet`` package - - * Required as a parameter to initialize an :class:`~.InstaTweet` object - * Naming and saving it is ideal, but not necessary to :meth:`~.start` InstaTweet - :param name: unique profile name :param local: indicates if profile is being saved locally or on a remote database :param kwargs: see below @@ -88,30 +61,23 @@ def __init__(self, name: str = 'default', local: bool = True, **kwargs): Twitter API Keys with v1.1 endpoint access (see :attr:`~.TweetClient.DEFAULT_KEYS` for a template) * *user_agent* (``str``) -- Optional - The user agent to use for requests; uses a currently working hardcoded agent if not provided + The user agent to use for requests * *proxy_key* (``str``) -- Optional Environment variable to retrieve proxies from - * .. autoattribute:: user_map - :annotation: - :noindex: - - .. admonition:: **Profile Creation Tips** - :class: instatweet - - * All attributes can be passed as arguments at initialization or set directly afterwards - * Property setters validate data types for the :ref:`mandatory-settings` - * The :class:`~Profile` as a whole is validated by :meth:`~validate` - - """ self.local = local self.name = name # Will raise Exception if name is already used - self.session_id = kwargs.get('session_id', '') - self.twitter_keys = kwargs.get('twitter_keys', TweetClient.DEFAULT_KEYS) - self.user_agent = kwargs.get('user_agent', USER_AGENT) - self.proxy_key = kwargs.get('proxy_key', None) - self.user_map = kwargs.get('user_map', {}) #: ``dict``: Mapping of added Instagram users and their :attr:`~USER_MAPPING` + #: Instagram ``sessionid`` cookie, obtained by logging in through browser + self.session_id: str = kwargs.get('session_id', '') + #: Twitter API Keys with v1.1 endpoint access (see :attr:`~.DEFAULT_KEYS` for a template) + self.twitter_keys: Dict = kwargs.get('twitter_keys', TweetClient.DEFAULT_KEYS) + #: The user agent to use for requests + self.user_agent: str = kwargs.get('user_agent', USER_AGENT) + #: Environment variable to retrieve proxies from + self.proxy_key: str = kwargs.get('proxy_key', None) + #: Mapping of added Instagram pages and their :attr:`~PAGE_MAPPING` + self.page_map: Dict[str, Dict] = kwargs.get('page_map', {}) @classmethod def load(cls, name: str, local: bool = True) -> Profile: @@ -119,7 +85,6 @@ def load(cls, name: str, local: bool = True) -> Profile: :param name: the name of the :class:`Profile` to load :param local: whether the profile is saved locally (default, ``True``) or remotely on a database - """ if not cls.profile_exists(name, local): raise LookupError( @@ -138,7 +103,7 @@ def from_json(cls, json_str: str) -> Profile: return cls.from_dict(json.loads(json_str)) @classmethod - def from_dict(cls, d: dict) -> Profile: + def from_dict(cls, d: Dict) -> Profile: """Creates a profile from a dictionary of config settings""" return cls(**d) @@ -164,55 +129,58 @@ def get_local_path(name: str) -> str: """Returns filepath of where a local profile would be saved""" return os.path.join(Profile.LOCAL_DIR, name) + '.pickle' - def add_users(self, users: Iterable, send_tweet: bool = False): - """Add Instagram user(s) to the :attr:`~.user_map` for subsequent monitoring + def add_pages(self, pages: Iterable, send_tweet: bool = False) -> None: + """Add Instagram page(s) to the :attr:`~.page_map` for subsequent monitoring + + * An Instagram profile can be added as ``"@username"`` or ``"username"`` + * A hashtag must be added as ``"#hashtag"`` + - .. note:: By default, newly added users won't have their posts tweeted the first time they're scraped + .. note:: By default, newly added pages won't have their posts tweeted the first time they're scraped - * The IDs of the ~12 most recent posts are stored in the ``scraped`` list + * The IDs of the most recent posts are stored in the ``scraped`` list * Any new posts from that point forward will be tweeted - You can override this by setting ``send_tweet=True`` + You can scrape AND tweet posts on the first run by setting ``send_tweet=True`` - * This causes their ~12 most recent posts to be scraped AND tweeted - :param users: Instagram username(s) to automatically scrape and tweet content from + :param pages: Instagram pages to automatically scrape and tweet content from :param send_tweet: choose if tweets should be sent on the first scrape, or only for new posts going forward """ - if not isinstance(users, Iterable): - raise TypeError(f'Invalid type provided. `users` must be an Iterable') - if isinstance(users, str): - users = [users] + if not isinstance(pages, Iterable): + raise TypeError(f'Invalid type provided. `pages` must be an Iterable') + if isinstance(pages, str): + pages = [pages] - for user in users: - mapping = copy.deepcopy(Profile.USER_MAPPING) - self.user_map.setdefault(user, mapping) + for page in pages: + mapping = copy.deepcopy(Profile.PAGE_MAPPING) + self.page_map.setdefault(page.lstrip("@"), mapping) if send_tweet: # Non-empty scraped list will trigger Tweets to send - self.get_scraped_from(user).append(-1) + self.get_scraped_from(page).append(-1) - print(f'Added Instagram user @{user} to the user map') + print(f'Added Instagram page {page} to the page map') if self.exists: self._save_profile(alert=False) - def add_hashtags(self, user: str, hashtags: Iterable): - """Add hashtag(s) to a user in the :attr:`~.user_map`, which will be randomly chosen from when composing Tweets + def add_hashtags(self, page: str, hashtags: Iterable): + """Add hashtag(s) to a page in the :attr:`~.page_map`, which will be randomly chosen from when composing Tweets - :param user: the user in the user map to add hashtags to - :param hashtags: hashtags to choose from and include in any Tweets where content comes from this user + :param page: the page in the page map to add hashtags to + :param hashtags: hashtags to choose from and include in any Tweets where content comes from this page """ if not isinstance(hashtags, Iterable): raise TypeError("Hashtags must be provided as a string or iterable of strings") if isinstance(hashtags, str): hashtags = [hashtags] - tags = self.get_hashtags_for(user) # Retrieve the current hashtag list + tags = self.get_hashtags_for(page) # Retrieve the current hashtag list tags.extend(set(hashtags) - set(tags)) # Add new ones (case-sensitive) if self.exists: self._save_profile(alert=False) - print(f'Added hashtags for @{user}') + print(f'Added hashtags for {page}') def save(self, name: str = None, alert: bool = True) -> bool: """Pickles and saves the :class:`Profile` using the specified or currently set name. @@ -242,7 +210,7 @@ def _save_profile(self, alert: bool = True) -> bool: def validate(self) -> None: """Checks to see if the Profile is fully configured for InstaTweeting - :raises ValueError: if the :attr:`~.session_id`, :attr:`~.twitter_keys`, or :attr:`~.user_map` are invalid + :raises ValueError: if the :attr:`~.session_id`, :attr:`~.twitter_keys`, or :attr:`~.page_map` are invalid """ if not self.session_id: raise ValueError('Instagram sessionid cookie is required to scrape posts') @@ -250,8 +218,8 @@ def validate(self) -> None: if bad_keys := [key for key, value in self.twitter_keys.items() if value == 'string']: raise ValueError(f'Values not set for the following Twitter keys: {bad_keys}') - if not self.user_map: - raise ValueError('You must add at least one Instagram user to auto-tweet from') + if not self.page_map: + raise ValueError('You must add at least one Instagram page to auto-tweet from') def to_pickle(self) -> bytes: """Serializes profile to a pickled byte string""" @@ -280,7 +248,7 @@ def config(self) -> dict: 'twitter_keys': self.twitter_keys, 'user_agent': self.user_agent, 'proxy_key': self.proxy_key, - 'user_map': self.user_map, + 'page_map': self.page_map, } @property @@ -300,28 +268,26 @@ def profile_path(self) -> str: return Profile.get_local_path(self.name) return '' - def get_user(self, user: str) -> dict: - """Returns the specified user's dict entry in the :attr:`user_map`""" - return self.user_map[user] + def get_page(self, page: str) -> dict: + """Returns the specified page's dict entry in the :attr:`page_map`""" + return self.page_map[page.lstrip('@')] - def get_scraped_from(self, user: str) -> list: - """Returns a list of posts that have been scraped from the specified user""" - return self.user_map[user]['scraped'] + def get_scraped_from(self, page: str) -> list: + """Returns a list of posts that have been scraped from the specified page""" + return self.get_page(page)['scraped'] - def get_tweets_for(self, user: str) -> list: - """Returns a list of tweets that use the specified user's scraped content""" - return self.user_map[user]['tweets'] + def get_tweets_for(self, page: str) -> list: + """Returns a list of tweets that use the specified page's scraped content""" + return self.get_page(page)['tweets'] - def get_hashtags_for(self, user: str) -> list: - """Returns the hashtag list for the specified user""" - return self.user_map[user]['hashtags'] + def get_hashtags_for(self, page: str) -> list: + """Returns the hashtag list for the specified page""" + return self.get_page(page)['hashtags'] @property def local(self) -> bool: """Indicates if saves should be made locally (``True``) or on a remote database (``False``) - - :rtype: bool - """ + """ return self._local @local.setter @@ -359,7 +325,7 @@ def name(self) -> str: return self._name @name.setter - def name(self, profile_name): + def name(self, profile_name: str): """Sets the profile name, if a profile with that name doesn't already exist locally/remotely""" if profile_name != 'default' and self.profile_exists(profile_name, local=self.local): if self.local: @@ -392,7 +358,7 @@ def session_id(self, session_id: str): self._save_profile(alert=False) @property - def twitter_keys(self) -> dict: + def twitter_keys(self) -> Dict: """Twitter developer API keys with v1.1 endpoint access. See :attr:`~.DEFAULT_KEYS`""" return self._twitter_keys diff --git a/InstaTweet/tweetclient.py b/InstaTweet/tweetclient.py index b7d86e6..ff8b6b5 100644 --- a/InstaTweet/tweetclient.py +++ b/InstaTweet/tweetclient.py @@ -131,7 +131,7 @@ def build_tweet(self, post: InstaPost, hashtags: Optional[list[str]] = None) -> :class: instatweet - The :attr:`~.InstaPost.caption` is used as a starting point - - If you :meth:`~.add_hashtags` for the user, it will randomly :meth:`~pick_hashtags` to include + - If you :meth:`~.add_hashtags` for the page, it will randomly :meth:`~pick_hashtags` to include - Lastly, the post's :attr:`~.InstaPost.permalink` is added to the end **Example**:: diff --git a/README.rst b/README.rst index 68dcc9d..b7c9d4c 100644 --- a/README.rst +++ b/README.rst @@ -3,15 +3,15 @@ :description: A Python package to automatically repost content from Instagram to Twitter .. |.InstaTweet| replace:: ``InstaTweet`` -.. _.InstaTweet: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L5-L142 -.. |.add_users| replace:: ``add_users()`` -.. _.add_users: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L167-L197 +.. _.InstaTweet: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L5-L147 +.. |.add_pages| replace:: ``add_pages()`` +.. _.add_pages: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L132-L165 .. |.Profile| replace:: ``Profile`` -.. _.Profile: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L12-L416 +.. _.Profile: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L11-L382 .. |.start| replace:: ``start()`` -.. _.start: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L72-L117 +.. _.start: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L71-L121 .. |.InstaClient| replace:: ``InstaClient`` -.. _.InstaClient: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instaclient.py#L14-L108 +.. _.InstaClient: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instaclient.py#L16-L159 .. |mandatory-settings| replace:: mandatory settings .. _mandatory-settings: https://instatweet.readthedocs.io/en/latest/_readme/getting-started.html#mandatory-settings @@ -86,7 +86,7 @@ What's InstaTweet? -Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_users|_ to repost from +Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_pages|_ to repost from .. code-block:: python @@ -100,10 +100,10 @@ Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_user >>> profile.twitter_keys = twitter_api_keys >>> profile.session_id = '6011991A' - # Add at least one Instagram account to repost from - >>> profile.add_users('the.dailykitten') + # Add at least one Instagram page (user/hashtag) to repost from + >>> profile.add_pages(['the.dailykitten', '#thedailykitten']) - # Save the Profile [optional] + # Save the Profile [optional] >>> profile.save() Saved Local Profile myProfile @@ -127,7 +127,8 @@ Once configured, the |.Profile|_ can be used to initialize and |.start|_ InstaTw | -.. image:: https://user-images.githubusercontent.com/96394652/232274766-71e87fb2-f402-466d-9624-f775d8e985ac.png +.. image:: https://user-images.githubusercontent.com/96394652/236979506-83d12d6f-114d-43ce-b4db-b062f8d0ed3a.png + :width: 700px | @@ -137,9 +138,11 @@ As ``InstaTweet`` runs, its progress will be logged to console: Starting InstaTweet for Profile: myProfile Checking posts from @the.dailykitten - - Finished insta-tweeting for @the.dailykitten - All users have been insta-tweeted + ... + Checking posts from #thedailykitten + ... + Finished insta-tweeting for #thedailykitten + All pages have been insta-tweeted ... @@ -158,9 +161,9 @@ Okay... But Why? 😟 **InstaTweet has two main use cases:** * To automatically share your own Instagram posts to Twitter -* To automatically tweet new content from other Instagram users +* To automatically tweet new content from other Instagram users/hashtags -Regardless of your intention, InstaTweet will detect new posts from the users you specify, +Regardless of your intention, InstaTweet will detect new posts from the pages you specify, download them, and repost them to Twitter. .. raw:: html @@ -182,16 +185,22 @@ The package's custom |.InstaClient|_ can also be used as a standalone Instagram from InstaTweet import InstaClient >>> ig = InstaClient(session_id="kjfdn309wredsfl") + + # Scrape Instagram user or hashtag >>> user = ig.get_user('dailykittenig') - >>> print(user) + >>> hashtag = ig.get_hashtag('#dailykitten') + >>> print(user, hashtag, sep='\n') - + Instagram User: @dailykittenig + Instagram Hashtag: #dailykitten - >>> print(user.posts) - >>> ig.download_post(user.posts[0]) + # Download most recent post + >>> post = user.posts[0] + >>> print(post) + >>> ig.download_post(post) - [, ...] - Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\\path\\to\\insta-tweet\\downloads\\2981866202934977614.mp4 + Post 2981866202934977614 by @dailykittenig on 2022-11-29 01:44:37 + Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\path\to\insta-tweet\downloads\2981866202934977614.mp4 ... diff --git a/README_PyPi.rst b/README_PyPi.rst index 7448dc3..b8aacb9 100644 --- a/README_PyPi.rst +++ b/README_PyPi.rst @@ -2,16 +2,16 @@ :title: InstaTweet - Automatically Repost Content From Instagram to Twitter :description: A Python package to automatically repost content from Instagram to Twitter -.. |.InstaTweet| replace:: ``InstaTweet`` -.. _.InstaTweet: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L5-L142 -.. |.add_users| replace:: ``add_users()`` -.. _.add_users: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L167-L197 -.. |.Profile| replace:: ``Profile`` -.. _.Profile: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L12-L416 -.. |.start| replace:: ``start()`` -.. _.start: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L72-L117 -.. |.InstaClient| replace:: ``InstaClient`` -.. _.InstaClient: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instaclient.py#L14-L108 +.. |.InstaTweet| replace:: InstaTweet +.. _.InstaTweet: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L5-L147 +.. |.add_pages| replace:: add_pages() +.. _.add_pages: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L132-L165 +.. |.Profile| replace:: Profile +.. _.Profile: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/profile.py#L11-L382 +.. |.start| replace:: start() +.. _.start: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instatweet.py#L71-L121 +.. |.InstaClient| replace:: InstaClient +.. _.InstaClient: https://github.com/tdkorn/insta-tweet/blob/master/InstaTweet/instaclient.py#L16-L159 .. |mandatory-settings| replace:: mandatory settings .. _mandatory-settings: https://instatweet.readthedocs.io/en/latest/_readme/getting-started.html#mandatory-settings @@ -76,7 +76,7 @@ What's InstaTweet? -Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_users|_ to repost from +Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_pages|_ to repost from .. code-block:: python @@ -90,8 +90,8 @@ Simply create a |.Profile|_, configure the |mandatory-settings|_, and |.add_user >>> profile.twitter_keys = twitter_api_keys >>> profile.session_id = '6011991A' - # Add at least one Instagram account to repost from - >>> profile.add_users('the.dailykitten') + # Add at least one Instagram page (user/hashtag) to repost from + >>> profile.add_pages(['the.dailykitten', '#thedailykitten']) # Save the Profile [optional] >>> profile.save() @@ -117,7 +117,8 @@ Once configured, the |.Profile|_ can be used to initialize and |.start|_ InstaTw | -.. image:: https://user-images.githubusercontent.com/96394652/232274766-71e87fb2-f402-466d-9624-f775d8e985ac.png +.. image:: https://user-images.githubusercontent.com/96394652/236979506-83d12d6f-114d-43ce-b4db-b062f8d0ed3a.png + :width: 700px | @@ -127,9 +128,11 @@ As ``InstaTweet`` runs, its progress will be logged to console: Starting InstaTweet for Profile: myProfile Checking posts from @the.dailykitten - - Finished insta-tweeting for @the.dailykitten - All users have been insta-tweeted + ... + Checking posts from #thedailykitten + ... + Finished insta-tweeting for #thedailykitten + All pages have been insta-tweeted ... @@ -166,16 +169,22 @@ The package's custom |.InstaClient|_ can also be used as a standalone Instagram from InstaTweet import InstaClient >>> ig = InstaClient(session_id="kjfdn309wredsfl") + + # Scrape Instagram user or hashtag >>> user = ig.get_user('dailykittenig') - >>> print(user) + >>> hashtag = ig.get_hashtag('#dailykitten') + >>> print(user, hashtag, sep='\n') - + Instagram User: @dailykittenig + Instagram Hashtag: #dailykitten - >>> print(user.posts) - >>> ig.download_post(user.posts[0]) + # Download most recent post + >>> post = user.posts[0] + >>> print(post) + >>> ig.download_post(post) - [, ...] - Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\\path\\to\\insta-tweet\\downloads\\2981866202934977614.mp4 + Post 2981866202934977614 by @dailykittenig on 2022-11-29 01:44:37 + Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\path\to\insta-tweet\downloads\2981866202934977614.mp4 ... diff --git a/docs/source/_readme/about-instatweet.rst b/docs/source/_readme/about-instatweet.rst index 9432f61..82ade59 100644 --- a/docs/source/_readme/about-instatweet.rst +++ b/docs/source/_readme/about-instatweet.rst @@ -57,7 +57,7 @@ What's InstaTweet? Simply create a :class:`~.Profile`, configure the :ref:`mandatory-settings`, -and :meth:`~.add_users` to repost from +and :meth:`~.add_pages` to repost from .. code-block:: python @@ -70,8 +70,8 @@ and :meth:`~.add_users` to repost from >>> profile.twitter_keys = twitter_api_keys >>> profile.session_id = '6011991A' - # Add at least one Instagram account to repost from - >>> profile.add_users('the.dailykitten') + # Add at least one Instagram page (user/hashtag) to repost from + >>> profile.add_pages(['the.dailykitten', '#thedailykitten']) # Save the Profile [optional] >>> profile.save() @@ -110,8 +110,10 @@ As ``InstaTweet`` runs, its progress will be logged to console: Starting InstaTweet for Profile: myProfile Checking posts from @the.dailykitten ... - Finished insta-tweeting for @the.dailykitten - All users have been insta-tweeted + Checking posts from #thedailykitten + ... + Finished insta-tweeting for #thedailykitten + All pages have been insta-tweeted Okay... But Why? 😟 @@ -123,9 +125,9 @@ Okay... But Why? 😟 **InstaTweet has two main use cases:** * To automatically share your own Instagram posts to Twitter - * To automatically tweet new content from other Instagram users + * To automatically tweet new content from other Instagram users/hashtags - Regardless of your intention, InstaTweet will detect new posts from the users you specify, + Regardless of your intention, InstaTweet will detect new posts from the pages you specify, download them, and repost them to Twitter. ... diff --git a/docs/source/_readme/getting-started.rst b/docs/source/_readme/getting-started.rst index 8327ac7..1e65411 100644 --- a/docs/source/_readme/getting-started.rst +++ b/docs/source/_readme/getting-started.rst @@ -5,7 +5,7 @@ Getting Started InstaTweet Profiles ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -**InstaTweet** uses the :class:`~.Profile` class to help manage Twitter accounts, Instagram sessions, and user maps. +**InstaTweet** uses the :class:`~.Profile` class to help manage Twitter accounts, Instagram sessions, and page maps. .. autoclass:: InstaTweet.profile.Profile :no-members: @@ -17,11 +17,6 @@ InstaTweet Profiles Profile Settings ~~~~~~~~~~~~~~~~~~~ -All settings can be configured in two ways: - -1. By passing them as keyword arguments when initializing a :class:`~.Profile` -2. By setting them directly as object attributes after the :class:`~.Profile` object is created - .. _mandatory-settings: @@ -43,21 +38,24 @@ Entirely Optional Settings ========================================= * ``proxy_key`` — Environment variable to retrieve proxies from when making requests to Instagram/Twitter -* :attr:`~.user_map` — Fully formatted dictionary of IG usernames mapped to their ``USER_MAPPING`` +* :attr:`~.page_map` — Fully formatted dictionary of IG pages mapped to their ``PAGE_MAPPING`` Creating a Profile ~~~~~~~~~~~~~~~~~~~~~~~ +Profile settings can be configured + +1. By passing them as keyword arguments when initializing a :class:`~.Profile` +2. By setting them directly as object attributes after the :class:`~.Profile` object is created + + .. code-block:: python from InstaTweet import Profile # Initialize a profile with arguments - p = Profile( - name='myProfile', - session_id='6011991A' - ) + p = Profile('myProfile', session_id='6011991A') # Initialize a profile with no arguments q = Profile() @@ -65,17 +63,14 @@ Creating a Profile q.session_id = '6011991A' -All settings can be accessed via the :attr:`~.Profile.config` dict. -If you just want to look, call :meth:`~.view_config` +All settings can be accessed via the :attr:`~.Profile.config` dict, +which can be pretty printed using :meth:`~.view_config` .. code-block:: python # View and compare configuration settings >>> q.view_config() - >>> print(f'Same Config: {p.config==q.config}') - -Output: .. code-block:: shell @@ -85,9 +80,7 @@ Output: twitter_keys : {'Consumer Key': 'string', 'Consumer Secret': 'string', 'Access Token': 'string', 'Token Secret': 'string'} user_agent : Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 proxy_key : None - user_map : {} - Same Config: True - + page_map : {} ... @@ -99,61 +92,61 @@ Output: which will first :meth:`~.Profile.validate` the profile settings -Populating the User Map +Populating the Page Map ~~~~~~~~~~~~~~~~~~~~~~~~~~ -The User Map +The Page Map ============== -The :attr:`~.user_map` allows a :class:`~.Profile` to maintain -a history of package-related activity for its added IG users +The :attr:`~.page_map` allows a :class:`~.Profile` to maintain +a history of package-related activity for its added IG pages (users or hashtags). -Users are mapped to their :attr:`~.USER_MAPPING`, which contains their associated lists of: +Pages are mapped to their :attr:`~.PAGE_MAPPING`, which contains their associated lists of: .. code-block:: python - USER_MAPPING = {'hashtags': [], 'scraped': [], 'tweets': []} + PAGE_MAPPING = {'hashtags': [], 'scraped': [], 'tweets': []} -* ``hashtags`` — the user's associated hashtag list (for use when composing tweets) -* ``scraped`` — the list of posts that have been scraped from the user (only the post id) -* ``tweets`` — the list of sent tweets containing media scraped from that user (limited data) +* ``hashtags`` — the page's associated hashtag list (for use when composing tweets) +* ``scraped`` — the list of posts that have been scraped from the page (only the post id) +* ``tweets`` — the list of sent tweets containing media scraped from that page (limited data) -The mapping gets updated each time :class:`~.InstaTweet` successfully scrapes and tweets a post from the user +The mapping gets updated each time :class:`~.InstaTweet` successfully scrapes and tweets a post from the page -Adding Users +Adding Pages ================= -Use the :meth:`~.add_users` method to add one or more Instagram users -to a :class:`~.Profile`'s :attr:`~.user_map` +Use the :meth:`~.add_pages` method to add one or more Instagram pages +to a :class:`~.Profile`'s :attr:`~.page_map` .. code-block:: python from InstaTweet import Profile - # Add one user at a time + # Add one page at a time >>> p = Profile('myProfile') - >>> p.add_users('the.dailykitten', send_tweet=True) + >>> p.add_pages('the.dailykitten', send_tweet=True) - Added Instagram user @the.dailykitten to the user map + Added Instagram page @the.dailykitten to the page map - # Add multiple users at once - >>> usernames = ['dailykittenig','the.daily.kitten.ig'] - >>> p.add_users(usernames) + # Add multiple pages at once + >>> pages = ['dailykittenig','#thedailykitten'] + >>> p.add_pages(pages) - Added Instagram user @dailykittenig to the user map - Added Instagram user @the.daily.kitten.ig to the user map + Added Instagram page @dailykittenig to the page map + Added Instagram page #thedailykitten to the page map -The :meth:`~.Profile.get_user` method can be used to retrieve the full :attr:`~.USER_MAPPING` of an added user +The :meth:`~.Profile.get_page` method can be used to retrieve the full :attr:`~.PAGE_MAPPING` of an added page .. code-block:: python - >> p.get_user('the.dailykitten') + >> p.get_page('the.dailykitten') {'hashtags': [], 'scraped': [-1], 'tweets': []} @@ -161,33 +154,31 @@ The :meth:`~.Profile.get_user` method can be used to retrieve the full :attr:`~. Adding Hashtags ================= -You can :meth:`~.add_hashtags` for each user in the :attr:`~.user_map` +You can :meth:`~.add_hashtags` for each page in the :attr:`~.page_map` * They'll be chosen from at random when composing tweets based on one of their :attr:`~.posts` * For more info, see :meth:`~.pick_hashtags`, :meth:`~.build_tweet` and :meth:`~.send_tweet` .. code-block:: python - # Add a single hashtag for a specific user - >>> p.add_hashtags(user='dailykittenig', hashtags='cats') + # Add a single hashtag for a specific page + >>> p.add_hashtags('dailykittenig', 'cats') - Added hashtags for @dailykittenig + Added hashtags for dailykittenig # Add multiple hashtags at once - >>> users = ['the.dailykitten','the.daily.kitten.ig'] + >>> pages = ['the.dailykitten', '#thedailykitten'] >>> hashtags = ['kittygram', 'kittycat'] - >>> for user in users: - ... p.add_hashtags(user, hashtags) + >>> for page in pages: + ... p.add_hashtags(page, hashtags) - Added hashtags for @the.dailykitten - Added hashtags for @the.daily.kitten.ig + Added hashtags for the.dailykitten + Added hashtags for #thedailykitten >>> p.view_config() -Output: - .. code-block:: shell name : myProfile @@ -196,84 +187,26 @@ Output: twitter_keys : {'Consumer Key': 'string', 'Consumer Secret': 'string', 'Access Token': 'string', 'Token Secret': 'string'} user_agent : Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 proxy_key : None - user_map : {'the.dailykitten': {'hashtags': ['kittygram', 'kittycat'], 'scraped': [-1], 'tweets': []}, 'dailykittenig': {'hashtags': ['cats'], 'scraped': [], 'tweets': []}, 'the.daily.kitten.ig': {'hashtags': ['kittygram', 'kittycat'], 'scraped': [], 'tweets': []}} + page_map : {'the.dailykitten': {'hashtags': ['kittygram', 'kittycat'], 'scraped': [-1], 'tweets': []}, 'dailykittenig': {'hashtags': ['cats'], 'scraped': [], 'tweets': []}, '#thedailykitten': {'hashtags': ['kittygram', 'kittycat'], 'scraped': [], 'tweets': []}} -User Map Access Methods +Page Map Access Methods =========================== -.. admonition:: User Map Access Methods +.. admonition:: Page Map Access Methods :class: instatweet - The :class:`~.Profile` has several methods that allow for easy access to the :attr:`~.user_map` - - * :meth:`~.Profile.get_user` provides access to a particular user's :attr:`~.USER_MAPPING` - * :meth:`~.get_scraped_from` returns the list of posts scraped from a specified user - * :meth:`~.get_hashtags_for` returns the list of hashtags to use in tweets for the specified user - * :meth:`~.get_tweets_for` returns a list of tweets that use the specified user's scraped content - - -All lists returned by these methods can be modified in place. For example: - -.. code-block:: python - - # View the list of hashtags by username - >> print(p.get_hashtags_for('the.daily.kitten.ig')) - - ['kittygram', 'kittycat'] - - # Retrieve and modify the list - >> p.get_hashtags_for('the.daily.kitten.ig').append('kittypoop') - >> print(p.get_hashtags_for('the.daily.kitten.ig')) + The :class:`~.Profile` has several methods that allow for easy access to the :attr:`~.page_map` - ['kittygram', 'kittycat', 'kittypoop'] + * :meth:`~.Profile.get_page` provides access to a particular page's :attr:`~.PAGE_MAPPING` + * :meth:`~.get_scraped_from` returns the list of posts scraped from a specified page + * :meth:`~.get_hashtags_for` returns the list of hashtags to use in tweets for the specified page + * :meth:`~.get_tweets_for` returns a list of tweets that use the specified page's scraped content -Saving a Profile -~~~~~~~~~~~~~~~~~~~~~ - .. include:: ../_snippets/save-profile.rst - :start-line: 3 - - -Although you don't *need* to :meth:`~.save` the Profile to :meth:`~.start` InstaTweet, -it's highly suggested since: - - * It's an easy way to group API settings together - * It keeps track of previously scraped & tweeted posts, which is used to detect new posts - - -Example: Save a Profile -======================== - -.. note:: You can specify a new :attr:`~.Profile.name` - for the profile in the call to :meth:`~.save` - - -.. code-block:: python - - from InstaTweet import Profile - - >>> p = Profile('myProfile') - >>> p.save() - - Saved Local Profile myProfile - - >>> q = Profile() - >>> q.save('aProfile') - - Saved Local Profile aProfile - - # Try to save under a name that's already used... - >>> q.save('myProfile') - - FileExistsError: Local save file already exists for profile named "myProfile" - Please choose another name, load the profile, or delete the file. - - >>> Profile.profile_exists("aProfile") - True .. include:: /_snippets/run-profile.rst diff --git a/docs/source/_snippets/about-the-page-map.rst b/docs/source/_snippets/about-the-page-map.rst new file mode 100644 index 0000000..d7e6116 --- /dev/null +++ b/docs/source/_snippets/about-the-page-map.rst @@ -0,0 +1,19 @@ +.. _about-page-map: +About the Page Map +~~~~~~~~~~~~~~~~~~~ + +.. admonition:: About the Page Map + :class: instatweet + + **The** :attr:`~.page_map` **is a dict containing info about the pages added to a** :class:`~.Profile` + + * It's used to help detect new posts and compose tweets on a per-page basis + * Entries are created when you :meth:`~.add_pages`, which map the page to a :attr:`~.PAGE_MAPPING` + * The :attr:`~.PAGE_MAPPING` maintains lists of hashtags, scraped posts, and sent tweets + * The mapping is updated when you :meth:`~.add_hashtags` and successfully :meth:`~.send_tweet` + + **You can access entries in the** :attr:`~.page_map` **as follows:** + + * :meth:`~.Profile.get_page` allows you to retrieve a full entry by page name + * :meth:`~.get_hashtags_for`, :meth:`.get_scraped_from`, :meth:`.get_tweets_for` provide access + to lists diff --git a/docs/source/_snippets/about-the-user-map.rst b/docs/source/_snippets/about-the-user-map.rst deleted file mode 100644 index 34be67a..0000000 --- a/docs/source/_snippets/about-the-user-map.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. _About the User Map: - -About the User Map -~~~~~~~~~~~~~~~~~~~ - -.. admonition:: About the User Map - :class: instatweet - - **The** :attr:`~.user_map` **is a dict containing info about the users added to a** :class:`~.Profile` - - * It's used to help detect new posts and compose tweets on a per-user basis - * Entries are created when you :meth:`~.add_users`, which map the user to a :attr:`~.USER_MAPPING` - * The :attr:`~.USER_MAPPING` maintains lists of hashtags, scraped posts, and sent tweets - * The mapping is updated when you :meth:`~.add_hashtags` and successfully :meth:`~.send_tweet` - - **You can access entries in the** :attr:`~.user_map` **as follows:** - - * :meth:`~.Profile.get_user` allows you to retrieve a full entry by username - * :meth:`~.get_hashtags_for`, :meth:`.get_scraped_from`, :meth:`.get_tweets_for` provide access - to lists diff --git a/docs/source/_snippets/run-profile.rst b/docs/source/_snippets/run-profile.rst index a9e0c81..e2e821b 100644 --- a/docs/source/_snippets/run-profile.rst +++ b/docs/source/_snippets/run-profile.rst @@ -34,6 +34,8 @@ As ``InstaTweet`` runs, its progress will be logged to console: Starting InstaTweet for Profile: myProfile Checking posts from @the.dailykitten ... - Finished insta-tweeting for @the.dailykitten - All users have been insta-tweeted + Checking posts from #thedailykitten + ... + Finished insta-tweeting for #thedailykitten + All pages have been insta-tweeted diff --git a/docs/source/_snippets/save-profile.rst b/docs/source/_snippets/save-profile.rst index ced32a8..4bf7e20 100644 --- a/docs/source/_snippets/save-profile.rst +++ b/docs/source/_snippets/save-profile.rst @@ -1,3 +1,5 @@ +.. _save-profile: + Saving a Profile ~~~~~~~~~~~~~~~~~~ @@ -30,4 +32,37 @@ Saving a Profile InstaTweet uses ``SQLAlchemy`` to create a :class:`~.DBConnection` * Any ``SQLAlchemy``-supported database is therefore also supported by ``InstaTweet`` - * See the :mod:`~.db` module for more information \ No newline at end of file + * See the :mod:`~.db` module for more information + + +Example: Save a Profile +======================== + +.. note:: You can specify a new :attr:`~.Profile.name` + for the profile in the call to :meth:`~.save` + +.. code-block:: python + + from InstaTweet import Profile + + >>> p = Profile('myProfile') + >>> p.save() + + Saved Local Profile myProfile + + >>> p.save('aProfile') + >>> print(p.name) + + Saved Local Profile aProfile + aProfile + +Profile names must be unique - you cannot save or create a profile if a +:meth:`~.profile_exists` with that name already + +.. code-block:: python + + >>> q = Profile('myProfile') + + FileExistsError: Local save file already exists for profile named "myProfile" + Please choose another name, load the profile, or delete the file. + diff --git a/docs/source/_snippets/use-instaclient.rst b/docs/source/_snippets/use-instaclient.rst index 89ec970..d5980b8 100644 --- a/docs/source/_snippets/use-instaclient.rst +++ b/docs/source/_snippets/use-instaclient.rst @@ -10,15 +10,21 @@ The package's custom :class:`~.InstaClient` can be used separately to scrape Ins from InstaTweet import InstaClient >>> ig = InstaClient(session_id="kjfdn309wredsfl") + + # Scrape Instagram user or hashtag >>> user = ig.get_user('dailykittenig') - >>> print(user) + >>> hashtag = ig.get_hashtag('#dailykitten') + >>> print(user, hashtag, sep='\n') - + Instagram User: @dailykittenig + Instagram Hashtag: #dailykitten - >>> print(user.posts) - >>> ig.download_post(user.posts[0]) + # Download most recent post + >>> post = user.posts[0] + >>> print(post) + >>> ig.download_post(post) - [, ...] - Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\\path\\to\\insta-tweet\\downloads\\2981866202934977614.mp4 + Post 2981866202934977614 by @dailykittenig on 2022-11-29 01:44:37 + Downloaded post https://www.instagram.com/p/Clht4NRrqRO by dailykittenig to C:\path\to\insta-tweet\downloads\2981866202934977614.mp4 diff --git a/docs/source/conf.py b/docs/source/conf.py index b405fe0..1986a7f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -218,7 +218,7 @@ def replace_autodoc_refs_with_linkcode(info: dict, link: str, rst_src: str): ================================= By https://github.com/TDKorn ===================================== - For example, :meth:`~.InstaClient.get_user` would be rendered in HTML as an outlined "get_user()" link + For example, :meth:`~.InstaClient.get_page` would be rendered in HTML as an outlined "get_page()" link that contains an internal reference to the corresponding documentation entry (assuming it exists) We love it, it's great. Fr. But it's ugly and useless on GitHub and PyPi. Literally so gross. @@ -229,7 +229,7 @@ def replace_autodoc_refs_with_linkcode(info: dict, link: str, rst_src: str): .. note:: links are of the format https://github.com/user/repo/blob/branch/package/file.py#L30-L35 For example, - `get_user() `_ + `get_page() `_ :param info: the info dict from linkcode_resolve diff --git a/docs/source/index.rst b/docs/source/index.rst index d2df4cf..9d19b46 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,7 +5,7 @@ .. include:: _readme/about-instatweet.rst :start-line: 2 - :end-line: 144 + :end-line: 151 .. only:: html diff --git a/docs/source/instauser.rst b/docs/source/instapage.rst similarity index 56% rename from docs/source/instauser.rst rename to docs/source/instapage.rst index 66440ac..48c145d 100644 --- a/docs/source/instauser.rst +++ b/docs/source/instapage.rst @@ -1,7 +1,7 @@ -The ``InstaUser`` class +The ``InstaPage`` module ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: InstaTweet.instauser +.. automodule:: InstaTweet.instapage :members: :undoc-members: :show-inheritance: diff --git a/docs/source/modules.rst b/docs/source/modules.rst index 017f93e..a71e4a4 100644 --- a/docs/source/modules.rst +++ b/docs/source/modules.rst @@ -12,7 +12,7 @@ Below, you'll find the documentation for each class/module in the ``InstaTweet` db tweetclient instaclient - instauser + instapage instapost utils