Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes & improve duplicate checks #342

Merged
merged 6 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions nhentai/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,21 @@ def main():
else:
continue

file_type = ''
if options.is_cbz: file_type = '.cbz'
elif options.is_pdf: file_type = '.pdf'

if not options.dryrun:
doujinshi.downloader = downloader
doujinshi.download(regenerate_cbz=options.regenerate_cbz)
result = doujinshi.download(regenerate_cbz=options.regenerate_cbz, file_type=file_type)
# Already downloaded; continue on with the other doujins.
if not result: continue

if options.generate_metadata:
table = doujinshi.table
generate_metadata_file(options.output_dir, table, doujinshi)
result = generate_metadata_file(options.output_dir, table, doujinshi, file_type)
# Already downloaded; continue on with the other doujins.
if not result: continue

if options.is_save_download_history:
with DB() as db:
Expand Down
5 changes: 3 additions & 2 deletions nhentai/doujinshi.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def __repr__(self):
def show(self):
logger.info(f'Print doujinshi information of {self.id}\n{tabulate(self.table)}')

def download(self, regenerate_cbz=False):
def download(self, regenerate_cbz=False, file_type=''):
logger.info(f'Starting to download doujinshi: {self.name}')
if self.downloader:
download_queue = []
Expand All @@ -82,9 +82,10 @@ def download(self, regenerate_cbz=False):
for i in range(1, min(self.pages, len(self.ext)) + 1):
download_queue.append(f'{IMAGE_URL}/{self.img_id}/{i}.{self.ext[i-1]}')

self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz)
return self.downloader.start_download(download_queue, self.filename, regenerate_cbz=regenerate_cbz, file_type=file_type)
else:
logger.critical('Downloader has not been loaded')
return False


if __name__ == '__main__':
Expand Down
20 changes: 10 additions & 10 deletions nhentai/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def download(self, url, folder='', filename='', retried=0, proxy=None):
save_file_path = os.path.join(folder, base_filename.zfill(3) + extension)
try:
if os.path.exists(save_file_path):
logger.warning(f'Ignored exists file: {save_file_path}')
logger.warning(f'Skipped download: {save_file_path} already exists')
return 1, url

response = None
Expand Down Expand Up @@ -115,17 +115,17 @@ def download(self, url, folder='', filename='', retried=0, proxy=None):

return 1, url

def start_download(self, queue, folder='', regenerate_cbz=False):

def start_download(self, queue, folder='', regenerate_cbz=False, file_type='') -> bool:
if not isinstance(folder, (str, )):
folder = str(folder)

if self.path:
folder = os.path.join(self.path, folder)

if os.path.exists(folder + '.cbz'):
if not regenerate_cbz:
logger.warning(f'CBZ file "{folder}.cbz" exists, ignored download request')
return
if file_type != '' and os.path.exists(folder + file_type) and not regenerate_cbz:
logger.warning(f'Skipped download: "{folder}{file_type}" already exists')
return False

logger.info(f'Doujinshi will be saved at "{folder}"')
if not os.path.exists(folder):
Expand All @@ -134,11 +134,9 @@ def start_download(self, queue, folder='', regenerate_cbz=False):
except EnvironmentError as e:
logger.critical(str(e))

else:
logger.warning(f'Path "{folder}" already exist.')

if os.getenv('DEBUG', None) == 'NODOWNLOAD':
return
# Assuming we want to continue with rest of process.
return True
queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]

pool = multiprocessing.Pool(self.size, init_worker)
Expand All @@ -147,6 +145,8 @@ def start_download(self, queue, folder='', regenerate_cbz=False):
pool.close()
pool.join()

return True


def download_wrapper(obj, url, folder='', proxy=None):
if sys.platform == 'darwin' or semaphore.get_value():
Expand Down
117 changes: 70 additions & 47 deletions nhentai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import os
import zipfile
import shutil

import requests
import sqlite3
import urllib.parse
from typing import Optional, Tuple

from nhentai import constant
from nhentai.logger import logger
Expand Down Expand Up @@ -65,13 +67,41 @@ def readfile(path):
return file.read()


def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
image_html = ''
def parse_doujinshi_obj(
output_dir: str,
doujinshi_obj = None,
file_type: str = '',
write_comic_info = False
) -> Tuple[str, str, bool]:
doujinshi_dir = '.'
filename = './doujinshi' + file_type
already_downloaded = False

if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
if os.path.exists(doujinshi_dir + file_type):
already_downloaded = True
elif file_type != '':
_filename = f'{doujinshi_obj.filename}{file_type}'

if file_type == '.cbz' and write_comic_info:
serialize_comic_xml(doujinshi_obj, doujinshi_dir)

if file_type == '.pdf':
_filename = _filename.replace('/', '-')

filename = os.path.join(output_dir, _filename)

return doujinshi_dir, filename, already_downloaded


def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj)
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists')
return

image_html = ''

if not os.path.exists(doujinshi_dir):
logger.warning(f'Path "{doujinshi_dir}" does not exist, creating.')
Expand Down Expand Up @@ -166,23 +196,16 @@ def generate_main_html(output_dir='./'):


def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_comic_info=True, move_to_folder=False):
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
if os.path.exists(doujinshi_dir+".cbz"):
logger.warning(f'Comic Book CBZ file exists, skip "{doujinshi_dir}"')
return
if write_comic_info:
serialize_comic_xml(doujinshi_obj, doujinshi_dir)
cbz_filename = os.path.join(os.path.join(doujinshi_dir, '..'), f'{doujinshi_obj.filename}.cbz')
else:
cbz_filename = './doujinshi.cbz'
doujinshi_dir = '.'
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.cbz', write_comic_info)
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists')
return

file_list = os.listdir(doujinshi_dir)
file_list.sort()

logger.info(f'Writing CBZ file to path: {cbz_filename}')
with zipfile.ZipFile(cbz_filename, 'w') as cbz_pf:
logger.info(f'Writing CBZ file to path: {filename}')
with zipfile.ZipFile(filename, 'w') as cbz_pf:
for image in file_list:
image_path = os.path.join(doujinshi_dir, image)
cbz_pf.write(image_path, image)
Expand All @@ -191,40 +214,34 @@ def generate_cbz(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, write_
shutil.rmtree(doujinshi_dir, ignore_errors=True)

if move_to_folder:
for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")
for filename in os.listdir(doujinshi_dir):
file_path = os.path.join(doujinshi_dir, filename)
if os.path.isfile(file_path):
try:
os.remove(file_path)
except Exception as e:
print(f"Error deleting file: {e}")

shutil.move(cbz_filename, doujinshi_dir)
shutil.move(filename, doujinshi_dir)

logger.log(16, f'Comic Book CBZ file has been written to "{doujinshi_dir}"')
logger.log(16, f'Comic Book CBZ file has been written to "{filename}"')


def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_to_folder=False):
try:
import img2pdf

"""Write images to a PDF file using img2pdf."""
if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
filename = doujinshi_obj.filename.replace('/', '-')
pdf_filename = os.path.join(
os.path.join(doujinshi_dir, '..'),
f'{filename}.pdf'
)
else:
pdf_filename = './doujinshi.pdf'
doujinshi_dir = '.'
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, '.pdf')
if already_downloaded:
logger.info(f'Skipped download: {doujinshi_dir} already exists')
return

file_list = os.listdir(doujinshi_dir)
file_list = [f for f in os.listdir(doujinshi_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]
file_list.sort()

logger.info(f'Writing PDF file to path: {pdf_filename}')
with open(pdf_filename, 'wb') as pdf_f:
logger.info(f'Writing PDF file to path: {filename}')
with open(filename, 'wb') as pdf_f:
full_path_list = (
[os.path.join(doujinshi_dir, image) for image in file_list]
)
Expand All @@ -242,9 +259,9 @@ def generate_pdf(output_dir='.', doujinshi_obj=None, rm_origin_dir=False, move_t
except Exception as e:
print(f"Error deleting file: {e}")

shutil.move(pdf_filename, doujinshi_dir)
shutil.move(filename, doujinshi_dir)

logger.log(16, f'PDF file has been written to "{doujinshi_dir}"')
logger.log(16, f'PDF file has been written to "{filename}"')

except ImportError:
logger.error("Please install img2pdf package by using pip.")
Expand Down Expand Up @@ -303,17 +320,21 @@ def paging(page_string):
return page_list


def generate_metadata_file(output_dir, table, doujinshi_obj=None):
def generate_metadata_file(output_dir, table, doujinshi_obj=None, check_file_type=''):
logger.info('Writing Metadata Info')

if doujinshi_obj is not None:
doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
else:
doujinshi_dir = '.'
doujinshi_dir, filename, already_downloaded = parse_doujinshi_obj(output_dir, doujinshi_obj, file_type=check_file_type)
info_txt_path = os.path.join(doujinshi_dir, 'info.txt')

if already_downloaded:
# Ensure that info.txt was generated for the folder (if it exists) before exiting.
if os.path.exists(doujinshi_dir) and os.path.exists(info_txt_path):
logger.info(f'Skipped download: {info_txt_path} already exists')
return False

logger.info(doujinshi_dir)

f = open(os.path.join(doujinshi_dir, 'info.txt'), 'w', encoding='utf-8')
f = open(info_txt_path, 'w', encoding='utf-8')

fields = ['TITLE', 'ORIGINAL TITLE', 'AUTHOR', 'ARTIST', 'GROUPS', 'CIRCLE', 'SCANLATOR',
'TRANSLATOR', 'PUBLISHER', 'DESCRIPTION', 'STATUS', 'CHAPTERS', 'PAGES',
Expand All @@ -330,6 +351,8 @@ def generate_metadata_file(output_dir, table, doujinshi_obj=None):

f.close()

return True


class DB(object):
conn = None
Expand Down