Skip to content

Commit

Permalink
reverted removing already downloaded hentai ID from downloadme.txt, i…
Browse files Browse the repository at this point in the history
…nstead save galleries now in galleries.json
  • Loading branch information
9FS committed Oct 11, 2023
1 parent 3f377e5 commit 97c71da
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 64 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ license = "MIT"
name = "x" # can't leave empty because of bug with `poetry install` from poetry.lock file
readme = "readme.md"
repository = "https://github.com/9-FS/2021-11-15-nHentai-to-PDF"
version = "1.2.0"
version = "1.3.0"

[tool.poetry.dependencies]
kfsconfig = "^1.0.0"
Expand Down
101 changes: 71 additions & 30 deletions src/Hentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ class Hentai:
represents an individual hentai
"""

def __init__(self, hentai_ID: int, cookies: dict[str, str], headers: dict[str, str]):
def __init__(self, nhentai_ID: int, cookies: dict[str, str], headers: dict[str, str]):
"""
Constructs hentai object. Downloads data from the nhentai API.
Arguments:
- hentai_ID: the hentai from nhentai.net found here: https://nhentai.net/g/{hentai_ID}
- nhentai_ID: the hentai from nhentai.net found here: https://nhentai.net/g/{hentai_ID}
- cookies: cookies to send with the request to bypass bot protection
- headers: user agent to send with the request to bypass bot protection
Expand All @@ -32,61 +32,102 @@ def __init__(self, hentai_ID: int, cookies: dict[str, str], headers: dict[str, s
- ValueError: Hentai with ID \"{self.ID}\" does not exist.
"""

gallery_page: requests.Response
NHENTAI_GALLERY_API_URL: str="https://nhentai.net/api/gallery" # URL to nhentai API
self._fails: list[int] # list of how many times individual page has failed to be downloaded or converted to PDF
self._gallery: dict # gallery from nhentai API, saved to extract data for download later
self._give_up: bool=False # give this hentai up? after failing to download or convert numerous times
self.ID: int # nhentai ID
self.page_amount: int # number of pages
self.title: str # title (unchanged)
self._fails: list[int] # list of how many times individual page has failed to be downloaded or converted to PDF
self._gallery: dict # gallery from nhentai API, saved to extract data for download later
self._give_up: bool=False # give this hentai up? after failing to download or convert numerous times
self.ID: int # nhentai ID
self.page_amount: int # number of pages
self.title: str # title (unchanged)


logging.debug(f"Creating hentai object...")
self.ID=nhentai_ID
self._gallery=self._get_gallery(self.ID, cookies, headers)
self.page_amount=int(self._gallery["num_pages"])
self.title=self._gallery["title"]["pretty"]
self._fails=[0 for _ in range(self.page_amount)] # initialise with amount of pages number of zeros
logging.debug(f"Created hentai object.")
logging.debug(self.__repr__())

return


def __str__(self) -> str:
return f"{self.ID}: \"{self.title}\""


@staticmethod
def _get_gallery(nhentai_ID: int, cookies: dict[str, str], headers: dict[str, str]) -> dict:
"""
Tries to load nhentai API gallery from file first, if not found downloads it from nhentai API.
Arguments:
- nhentai_ID: the hentai from nhentai.net found here: https://nhentai.net/g/{hentai_ID}
- cookies: cookies to send with the request to bypass bot protection
- headers: user agent to send with the request to bypass bot protection
Returns:
- gallery: gallery from nhentai API
self.ID=hentai_ID
Raises:
- requests.HTTPError: Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{self.ID}\" failed multiple times.
- ValueError: Hentai with ID \"{self.ID}\" does not exist.
"""

galleries: list[dict]=[] # list of already downloaded galleries
gallery: dict # gallery to return
gallery_page: requests.Response
NHENTAI_GALLERY_API_URL: str="https://nhentai.net/api/gallery" # URL to nhentai API


if os.path.isfile("./galleries.json")==True: # if galleries file exists:
logging.info("Loading gallery from \"galleries.json\"...")
with open("./galleries.json", "rt") as galleries_file:
galleries=json.loads(galleries_file.read()) # load already downloaded gallers

gallery=next((gallery for gallery in galleries if gallery["id"]==nhentai_ID), {}) # try to find gallery with same ID
if gallery=={}: # if gallery not found: download it
logging.info(f"\rLoaded \"./gallery.json\", but gallery with nhentai ID {nhentai_ID} was not found.")
else: # if gallery found: use that
logging.info("\rLoaded gallery from \"./galleries.json\".")
return gallery


logging.info(f"Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{self.ID}\"...")
logging.info(f"Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{nhentai_ID}\"...") # if gallery not loaded from file: download it
attempt_no: int=1
while True:
try:
gallery_page=requests.get(f"{NHENTAI_GALLERY_API_URL}/{self.ID}", cookies=cookies, headers=headers, timeout=10)
gallery_page=requests.get(f"{NHENTAI_GALLERY_API_URL}/{nhentai_ID}", cookies=cookies, headers=headers, timeout=10)
except (requests.exceptions.ConnectionError, requests.Timeout): # if connection error: try again
time.sleep(1)
if attempt_no<3: # try 3 times
continue
else: # if failed 3 times: give up
raise
if gallery_page.status_code==403: # if status code 403 (forbidden): probably cookies and headers not set correctly
logging.critical(f"Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{self.ID}\" resulted in status code {gallery_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
raise requests.HTTPError(f"Error in {self.__init__.__name__}{inspect.signature(self.__init__)}: Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{self.ID}\" resulted in status code {gallery_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
logging.critical(f"Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{nhentai_ID}\" resulted in status code {gallery_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
raise requests.HTTPError(f"Error in {Hentai._get_gallery.__name__}{inspect.signature(Hentai._get_gallery)}: Downloading gallery from \"{NHENTAI_GALLERY_API_URL}/{nhentai_ID}\" resulted in status code {gallery_page.status_code}. Have you set \"cookies.json\" and \"headers.json\" correctly?")
if gallery_page.status_code==404: # if status code 404 (not found): hentai does not exist (anymore?)
logging.error(f"Hentai with ID \"{self.ID}\" does not exist.")
raise ValueError(f"Error in {self.__init__.__name__}{inspect.signature(self.__init__)}: Hentai with ID \"{self.ID}\" does not exist.")
logging.error(f"Hentai with ID \"{nhentai_ID}\" does not exist.")
raise ValueError(f"Error in {Hentai._get_gallery.__name__}{inspect.signature(Hentai._get_gallery)}: Hentai with ID \"{nhentai_ID}\" does not exist.")
if gallery_page.ok==False: # if status code not ok: try again
time.sleep(1)
if attempt_no<3: # try 3 times
continue
else: # if failed 3 times: give up
raise

self._gallery=json.loads(gallery_page.text)
gallery=json.loads(gallery_page.text)
galleries.append(gallery)
break
logging.info(f"\rDownloaded gallery from \"{NHENTAI_GALLERY_API_URL}/{self.ID}\".")
logging.info(f"\rDownloaded gallery from \"{NHENTAI_GALLERY_API_URL}/{nhentai_ID}\".")

self.page_amount=int(self._gallery["num_pages"])

self.title=self._gallery["title"]["pretty"]

self._fails=[0 for _ in range(self.page_amount)] # initialise with amount of pages number of zeros

logging.debug(f"\rCreated hentai object.")
logging.debug(self.__repr__())
return

galleries=sorted(galleries, key=lambda gallery: int(gallery["id"])) # sort galleries by ID
with open("./galleries.json", "wt") as galleries_file:
galleries_file.write(json.dumps(galleries, indent=4)) # write galleries to file

def __str__(self) -> str:
return f"{self.ID}: \"{self.title}\""
return gallery


def _increment_fails(self, image_list: list[str]) -> None:
Expand Down
5 changes: 1 addition & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
from get_hentai_ID_list import get_hentai_ID_list
from Hentai import Hentai
from remove_hentai_ID_from_downloadme import remove_hentai_ID_from_downloadme


@KFSlog.timeit
Expand Down Expand Up @@ -54,13 +53,11 @@ def main():
try:
hentai.download(settings["dest_path"]) # download hentai
except FileExistsError: # if hentai already exists:
remove_hentai_ID_from_downloadme(hentai.ID) # remove hentai ID from downloadme.txt
continue # skip to next hentai
except KFSmedia.DownloadError:
with open("FAILURES.txt", "at") as fails_file: # append in failure file
fails_file.write(f"{hentai.ID}\n")
else: # if successful:
remove_hentai_ID_from_downloadme(hentai.ID) # remove hentai ID from downloadme.txt
continue # skip to next hentai
logging.info("--------------------------------------------------")


Expand Down
29 changes: 0 additions & 29 deletions src/remove_hentai_ID_from_downloadme.py

This file was deleted.

0 comments on commit 97c71da

Please sign in to comment.