Skip to content

Commit

Permalink
Removes unnecesary logs
Browse files Browse the repository at this point in the history
  • Loading branch information
dipu-bd committed Jan 8, 2025
1 parent 6956d28 commit 3795b66
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 42 deletions.
6 changes: 0 additions & 6 deletions sources/en/d/dobelyuwai.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ def read_novel_info(self):
self.novel_cover = None
logger.info("Novel cover: %s", self.novel_cover)

# try:
# self.novel_author = soup.select_one('div.entry-content > p:nth-child(2)').text.strip()
# except Exception as e:
# logger.warning('Failed to get novel auth. Error: %s', e)
# logger.info('%s', self.novel_author)

# Removes none TOC links from bottom of page.
toc_parts = soup.select_one("div.entry-content")

Expand Down
22 changes: 9 additions & 13 deletions sources/en/n/novelhall.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ def search_novel(self, query: str):
soup = self.get_soup(self.absolute_url(search_url + quote_plus(query.lower())))

results = []
for novel in soup.select('.section3 table tbody tr'):
novel = novel.findAll('a')
for novel in soup.select(".section3 table tbody tr"):
novel = novel.findAll("a")
novel_link = novel[1]
latest_chapter = novel[2].text.strip().split('.')
latest_chapter = novel[2].text.strip().split(".")
chapter_number = latest_chapter[0]

if chapter_number.isdigit():
Expand All @@ -37,8 +37,8 @@ def search_novel(self, query: str):
results.append(
{
"title": novel_link.text.strip(),
"url": self.absolute_url(novel_link['href']),
"info": latest_chapter
"url": self.absolute_url(novel_link["href"]),
"info": latest_chapter,
}
)

Expand All @@ -60,20 +60,16 @@ def read_novel_info(self):
possible_image = soup.select_one("div.book-img img")
if possible_image:
self.novel_cover = self.absolute_url(possible_image["src"])

if possible_image['src'] == "":
logger.warning("Novel cover: unavailable")
else:
logger.info("Novel cover: %s", self.novel_cover)
else:
logger.info("Novel cover: unavailable")
logger.info("Novel cover: %s", self.novel_cover)

author = soup.select("div.book-info div.total.booktag span.blue")[0]
author.select_one("p").extract()
self.novel_author = author.text.replace("Author:", "").strip()
logger.info("Novel author: %s", self.novel_author)

self.novel_tags = [soup.select_one("div.book-info div.total.booktag a.red").text.strip()]
self.novel_tags = [
soup.select_one("div.book-info div.total.booktag a.red").text.strip()
]
logger.info("Novel tags: %s", self.novel_tags)

synopsis = soup.select_one(".js-close-wrap")
Expand Down
28 changes: 17 additions & 11 deletions sources/en/n/novelight.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ def initialize(self) -> None:
self.cleaner.bad_css.update(["div.advertisment"])

def search_novel(self, query) -> List[SearchResult]:
soup = self.get_soup(f"{self.home_url}catalog/?search={quote_plus(query.lower())}")
soup = self.get_soup(
f"{self.home_url}catalog/?search={quote_plus(query.lower())}"
)

return [
SearchResult(title=a.text.strip(), url=self.absolute_url(a["href"]))
Expand All @@ -47,7 +49,9 @@ def read_novel_info(self):
if isinstance(novel_synopsis, Tag):
self.novel_synopsis = self.cleaner.extract_contents(novel_synopsis)

novel_tags = soup.select("div#information section.tags a[href^='/catalog/?tags=']")
novel_tags = soup.select(
"div#information section.tags a[href^='/catalog/?tags=']"
)
for tag in novel_tags:
self.novel_tags.append(tag.get_text().strip())

Expand All @@ -57,27 +61,27 @@ def read_novel_info(self):
logger.info("Novel author: %s", self.novel_author)

page_scripts = soup.select("body > script:not([src])")
scripts_joined = '\n'.join(str(s) for s in page_scripts)
scripts_joined = "\n".join(str(s) for s in page_scripts)
book_id = re.search(r'.*const BOOK_ID = "(\d+)".*', scripts_joined).group(1)
if not book_id:
raise LNException("Could not extract book_id from novel page")
logger.debug("book_id: %s", book_id)
# this is different token than the 'csrftoken' in cookies
csrfmiddlewaretoken = re.search(r'.*window.CSRF_TOKEN = "(\w+)".*', scripts_joined).group(1)
csrfmiddlewaretoken = re.search(
r'.*window.CSRF_TOKEN = "(\w+)".*', scripts_joined
).group(1)
if not csrfmiddlewaretoken:
raise LNException("Could not extract csrfmiddlewaretoken from novel page")
logger.debug("csrfmiddlewaretoken: %s", csrfmiddlewaretoken)

headers = {
"Accept": "*/*",
"Referer": self.novel_url,
"x-requested-with": "XMLHttpRequest"
"x-requested-with": "XMLHttpRequest",
}
chapters_lists = soup.select("select#select-pagination-chapter > option")
bar = self.progress_bar(
total=len(chapters_lists),
desc="Chapters list",
unit="page"
total=len(chapters_lists), desc="Chapters list", unit="page"
)
encountered_paid_chapter = False
for page in reversed(chapters_lists):
Expand All @@ -86,11 +90,11 @@ def read_novel_info(self):
params = {
"csrfmiddlewaretoken": csrfmiddlewaretoken,
"book_id": book_id,
"page": page["value"]
"page": page["value"],
}
chapters_response = self.get_json(
f"{self.home_url}book/ajax/chapter-pagination?{urlencode(params)}",
headers=headers
headers=headers,
)
chapters_soup = self.make_soup(chapters_response["html"])
for a in reversed(chapters_soup.select("a[href^='/book/chapter/']")):
Expand All @@ -108,7 +112,9 @@ def read_novel_info(self):
bar.update()
bar.close()
if encountered_paid_chapter:
logger.warning("WARNING: Paid chapters are not supported and will be skipped.")
logger.warning(
"WARNING: Paid chapters are not supported and will be skipped."
)

def download_chapter_body(self, chapter: Chapter):
soup = self.get_soup(chapter.url)
Expand Down
24 changes: 14 additions & 10 deletions sources/zh/ddxsss.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ def initialize(self):
# the default lxml parser cannot handle the huge gbk encoded sites (fails after 4.3k chapters)
self.init_parser("html.parser")
self.cleaner.bad_tags.update(["script", "a"])
self.cleaner.bad_css.update([
".noshow",
"div.Readpage.pagedown",
])
self.cleaner.bad_css.update(
[
".noshow",
"div.Readpage.pagedown",
]
)

# p tags should only show up after being parsed and formatted the first time
self.cleaner.bad_tag_text_pairs["p"] = [
Expand All @@ -44,10 +46,10 @@ def search_novel(self, query):
data = self.get_json(
f"{self.home_url}user/search.html?q={query}",
# if this cookie "expires" it might return INT results again -> maybe remove search functionality
cookies={"hm": "7c2cee175bfbf597f805ebc48957806e"}
cookies={"hm": "7c2cee175bfbf597f805ebc48957806e"},
)
if isinstance(data, int):
logger.warning("Failed to get any results, likely auth failure")
logger.info("Failed to get any results, likely auth failure")
return []

results = []
Expand All @@ -56,7 +58,7 @@ def search_novel(self, query):
SearchResult(
title=book["articlename"],
url=self.absolute_url(book["url_list"]),
info=f"Author: {book['author']} | Synopsis: {book['intro']}"
info=f"Author: {book['author']} | Synopsis: {book['intro']}",
)
)
return results
Expand All @@ -78,7 +80,7 @@ def read_novel_info(self):
self.novel_cover = self.absolute_url(possible_image["src"])
logger.info("Novel cover: %s", self.novel_cover)

possible_author = meta.find('.small span', text=r"作者:")
possible_author = meta.find(".small span", text=r"作者:")
if isinstance(possible_author, Tag):
self.novel_author = possible_author.text.strip().replace("作者:", "")
logger.info("Novel Author: %s", self.novel_author)
Expand All @@ -96,13 +98,15 @@ def read_novel_info(self):
logger.info("Skipping non-chapter link: %s", a["href"])
continue

chap_id = int(re.match(re.compile(f".*/book/{book_id}/(\\d+).*"), a["href"])[1])
chap_id = int(
re.match(re.compile(f".*/book/{book_id}/(\\d+).*"), a["href"])[1]
)
vol_id = len(self.chapters) // 100 + 1
if len(self.chapters) % 100 == 0:
self.volumes.append(Volume(vol_id))
if not a:
# this should not occur with html.parser, if it does, likely due to parser/encoding issue
logger.warning("Failed to get Chapter %d! Missing Link", chap_id)
logger.info("Failed to get Chapter %d! Missing Link", chap_id)
continue
self.chapters.append(
Chapter(
Expand Down
8 changes: 6 additions & 2 deletions sources/zh/uukanshu.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ def read_novel_info(self) -> None:
self.novel_synopsis = synopsis.select_one("p").text

chapters = soup.select_one("ul#chapterList")
for chapter in list(chapters.children)[::-1]: # reverse order as it's newest to oldest
for chapter in list(chapters.children)[
::-1
]: # reverse order as it's newest to oldest
# convince typehint that we're looking at Tags & also make sure we skip random text within the ul if any
if not isinstance(chapter, Tag):
continue
Expand All @@ -61,7 +63,9 @@ def read_novel_info(self) -> None:
continue
anchor = chapter.select_one("a")
if not anchor:
logger.warning("Found <li> in chapter list, not volume, without link: %s", chapter)
logger.info(
"Found <li> in chapter list, not volume, without link: %s", chapter
)
continue
self.chapters.append(
Chapter(
Expand Down

0 comments on commit 3795b66

Please sign in to comment.