Skip to content

Commit

Permalink
Merge commit '098c77b543260b17b5fdb181cfbb2fec72ecd6ca' of https://gi…
Browse files Browse the repository at this point in the history
  • Loading branch information
KaiHuaDou committed Jun 5, 2024
2 parents 973b0ac + 098c77b commit 5f6e521
Show file tree
Hide file tree
Showing 47 changed files with 770 additions and 826 deletions.
30 changes: 30 additions & 0 deletions Changelog.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,36 @@
# - title by author
# }}}

{{{ 7.9.0 2024-04-19

:: new features

- [2060886] Kobo driver: Add support for the new color Kobo devices

- Edit book: Add a setting to control cursor width under Preferences->Editor settings

- Edit book: Regex-function mode: Show a confirmation popup when closing the function editor when there are unsaved changes

:: bug fixes

- [2060314] Fix undocked Quickview dialog not being restored at startup

- [2044118] Windows: Fix an issue where closing a maximized calibre window to the system tray and then reconnecting with remote desktop would cause a blank calibre window to be displayed

:: improved recipes
- El Correo
- Eenadu
- ORFonline
- NatGeo
- Harpers Magazine
- New Yorker
- Business Today
- The Week
- Asahi Shimbun
- Outlook Magazine

}}}

{{{ 7.8.0 2024-04-05

:: new features
Expand Down
2 changes: 1 addition & 1 deletion bypy/windows/XUnzip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3072,7 +3072,7 @@ unzFile unzOpenInternal(LUFILE *fin)
uLong number_disk; // number of the current dist, used for spanning ZIP, unsupported, always 0
if (unzlocal_getShort(fin,&number_disk)!=UNZ_OK) err=UNZ_ERRNO;
// number of the disk with the start of the central directory
uLong number_disk_with_CD; // number the the disk with central dir, used for spanning ZIP, unsupported, always 0
uLong number_disk_with_CD; // number the disk with central dir, used for spanning ZIP, unsupported, always 0
if (unzlocal_getShort(fin,&number_disk_with_CD)!=UNZ_OK) err=UNZ_ERRNO;
// total number of entries in the central dir on this disk
if (unzlocal_getShort(fin,&us.gi.number_entry)!=UNZ_OK) err=UNZ_ERRNO;
Expand Down
9 changes: 7 additions & 2 deletions bypy/windows/wix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
j, d, a, b = os.path.join, os.path.dirname, os.path.abspath, os.path.basename


def add_wix_extension(name):
if not os.path.exists(os.path.expanduser(f'~/.wix/extensions/{name}')):
run(WIX, 'extension', 'add', '-g', name)


def create_installer(env, compression_level='9'):
cl = int(compression_level)
if cl > 4:
Expand Down Expand Up @@ -62,8 +67,8 @@ def create_installer(env, compression_level='9'):
arch = 'x64' if is64bit else 'x86'
installer = j(env.dist, '%s%s-%s.msi' % (
calibre_constants['appname'], ('-64bit' if is64bit else ''), calibre_constants['version']))
run(WIX, 'extension', 'add', '-g', 'WixToolset.Util.wixext')
run(WIX, 'extension', 'add', '-g', 'WixToolset.UI.wixext')
add_wix_extension('WixToolset.Util.wixext')
add_wix_extension( 'WixToolset.UI.wixext')
cmd = [WIX, 'build', '-arch', arch, '-culture', 'en-us', '-loc', enusf, '-dcl', dcl,
'-ext', 'WixToolset.Util.wixext', '-ext', 'WixToolset.UI.wixext', '-o', installer, wxsf]
run(*cmd)
Expand Down
2 changes: 1 addition & 1 deletion format_docs/pdb/pdb_format.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Format

A PDB file can be broken into multiple parts. The header, record 0 and data.
values stored within the various parts are big-endian byte order. The data
part is is broken down into multiple sections. The section count and offsets
part is broken down into multiple sections. The section count and offsets
are referenced in the PDB header. Sections can be no more than 65505 bytes in
length.

Expand Down
2 changes: 1 addition & 1 deletion recipes/asahi_shimbun_en.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ class AsahiShimbunEnglishNews(BasicNewsRecipe):
("Asia & World - World", self.get_section("asia_world/world")),
("Sci & Tech", self.get_section("sci_tech")),
("Culture - Style", self.get_section("culture/style")),
("Culture - Cooking", self.get_section("culture/cooking")),
# ("Culture - Cooking", self.get_section("culture/cooking")),
("Culture - Movies", self.get_section("culture/movies")),
("Culture - Manga & Anime", self.get_section("culture/manga_anime")),
("Travel", self.get_section("travel")),
Expand Down
6 changes: 5 additions & 1 deletion recipes/barrons.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@ class barrons(BasicNewsRecipe):
return br

def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
archive = self.index_to_soup('https://www.barrons.com/magazine?archives=' + date.today().strftime('%Y'))
issue = archive.find(**prefixed_classes('BarronsTheme--archive-box--'))
self.timefmt = ' [' + self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--date--'))) + ']'
self.log(self.timefmt)
self.description = self.tag_to_string(issue.find(**prefixed_classes('BarronsTheme--headline--')))
self.cover_url = issue.img['src'].split('?')[0]

ans = defaultdict(list)
Expand Down
5 changes: 3 additions & 2 deletions recipes/business_today.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,16 @@ class BT(BasicNewsRecipe):

# Insert feeds in specified order, if available

feedSort = ['Editor\'s Note']
feedSort = ['Editor\'s Note', 'Editors note']
for i in feedSort:
if i in sections:
feeds.append((i, sections[i]))

# Done with the sorted feeds

for i in feedSort:
del sections[i]
if i in sections:
del sections[i]

# Append what is left over...

Expand Down
4 changes: 2 additions & 2 deletions recipes/caravan_magazine.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ class CaravanMagazine(BasicNewsRecipe):
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
api = 'https://api.caravanmagazine.in/api/trpc/magazines.getLatestIssue'
# https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&input=
# %7B%220%22%3A%7B%22json%22%3A%7B%22month%22%3A 2 %2C%22year%22%3A 2024 %7D%7D%7D
# api = 'https://api.caravanmagazine.in/api/trpc/magazines.getForMonthAndYear?batch=1&' + \
# 'input=%7B%220%22%3A%7B%22json%22%3A%7B%22month%22%3A' + '2' + '%2C%22year%22%3A' + '2024' + '%7D%7D%7D'
# input={"0":{"json":{"month":2,"year":2024}}}
raw = self.index_to_soup(api, raw=True)
data = json.loads(raw)['result']['data']['json']
Expand Down
208 changes: 102 additions & 106 deletions recipes/eenadu.recipe
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import re
from datetime import date, datetime, timedelta
from urllib.parse import quote

from calibre.utils.date import parse_date
from calibre.web.feeds.news import BasicNewsRecipe, classes


Expand All @@ -10,118 +8,116 @@ class eenadu_ts(BasicNewsRecipe):
__author__ = 'unkn0wn'
description = 'THE LARGEST CIRCULATED TELUGU DAILY'
language = 'te'
use_embedded_content = False
remove_javascript = True
encoding = 'utf-8'
no_stylesheets = True
remove_attributes = ['height', 'width', 'style']
ignore_duplicate_articles = {'url', 'title'}
remove_javascript = True
masthead_url = 'https://dxxd96tbpm203.cloudfront.net//img/logo.png'
cover_url = 'https://d66zsp32hue2v.cloudfront.net/Eenadu/2022/08/08/GTH/5_01/d5041804_01_mr.jpg'
encoding = 'utf-8'
remove_attributes = ['style', 'height', 'width']
ignore_duplicate_articles = {'url', 'title'}
reverse_article_order = True
remove_empty_feeds = True
extra_css = '.pub-t{font-size:small; font-style:italic;}'
simultaneous_downloads = 1
art_url = ''

keep_only_tags = [
dict(name='h1'),
dict(**classes('pub-t')),
classes('fullstory text-justify contlist-cont'),
dict(name='span', attrs={'id': 'PDSAIApbreak'}),
]
extra_css = '''
img {display:block; margin:0 auto;}
blockquote, em {color:#202020;}
.pub-t{font-size:small; font-style:italic;}
'''

remove_tags = [
dict(name='span', attrs={'style': 'float:left; margin-right:10px;'}),
dict(
name='p',
attrs={
'style':
'font-size: 18px !important; margin: 0px; margin-top: -15px; text-align: center;flex: 1;'
}
),
dict(name='aside', attrs={'class': lambda x: x and x.startswith('thumb')}),
dict(name='br'),
classes('sshare-c tags andbeyond_ad fnt20 arti more2 offset-tb1 msb-list')
]
keep_only_tags = [classes('bookWrapper fullstory')]
remove_tags = [classes('ext-link offset-tb1 sshare-c')]

articles_are_obfuscated = True

def get_obfuscated_article(self, url):
br = self.get_browser()
soup = self.index_to_soup(url)
link = soup.a['href']
skip_sections =[ # add sections you want to skip
'/video/', '/videos/', '/multimedia/', 'marathi', 'hindi', 'bangla'
]
if any(x in link for x in skip_sections):
self.abort_article('skipping video links')
self.log('Found ', link)
self.art_url = link
html = br.open(link).read()
return ({ 'data': html, 'url': link })

def parse_index(self):
section_list = [
('తెలంగాణ తాజా వార్తలు', 'telangana'),
('సంపాదకీయం', 'telangana/editorial'),
('తెలంగాణ ప్రధానాంశాలు', 'telangana/top-news'),
('తెలంగాణ జిల్లా వార్తలు', 'telangana/districts'),
# ('క్రైమ్', 'crime'),
resolve_internal_links = True
remove_empty_feeds = True

def get_cover_url(self):
import json
from datetime import date
today = quote(date.today().strftime('%d/%m/%Y'), safe='')
raw = self.index_to_soup(
'https://epaper.eenadu.net/Home/GetAllpages?editionid=1&editiondate=' + today, raw=True
)
for cov in json.loads(raw):
if cov['NewsProPageTitle'].lower().startswith('front'):
return cov['HighResolution']

feeds = []

when = '27' # hours
index = 'https://www.eenadu.net'
a = 'https://news.google.com/rss/search?q=when:{}h+allinurl:{}&hl=te-IN&gl=IN&ceid=IN:te'

news = index + '/telugu-news/'
news_list = [
('తెలంగాణ ప్రధానాంశాలు', 'ts-top-news'),
('సంపాదకీయం', 'editorial'),
('వ్యాఖ్యానం', 'vyakyanam'),
('హైదరాబాద్ జిల్లా వార్తలు', 'districts/Hyderabad'),
('క్రైమ్', 'crime'),
('పాలిటిక్స్', 'politics'),
('జాతీయం', 'india'),
('బిజినెస్', 'business'),
('అంతర్జాతీయం', 'world'),
('క్రీడలు', 'sports'),
# ('సినిమా', 'movies'),
# ('చదువు', 'education'),
# ('సుఖీభవ', 'health'),
# ('ఈ-నాడు', 'technology'),
# ('మకరందం', 'devotional'),
# ('ఈ తరం', 'youth'),
# ('ఆహా', 'recipes'),
# ('హాయ్ బుజ్జీ', 'kids-stories'),
# ('స్థిరాస్తి', 'real-estate'),
('సినిమా', 'movies'),
('వసుంధర', 'women'),
('ఈ-నాడు', 'technology'),
('వెబ్ ప్రత్యేకం', 'explained')
]
is_sunday = date.today().weekday() == 6
if is_sunday:
section_list.append(('సండే మ్యాగజైన్', 'sunday-magazine'))
feeds = []

# For each section title, fetch the article urls
for section in section_list:
section_title = section[0]
section_url = 'https://www.eenadu.net/' + section[1]
self.log(section_title, section_url)
soup = self.index_to_soup(section_url)
articles = self.articles_from_soup(soup)
if articles:
feeds.append((section_title, articles))
return feeds

def articles_from_soup(self, soup):
ans = []
for link in soup.findAll(
attrs={
'class': ['telugu_uni_body', 'thumb-content-more', 'owl-carousel']
}
):
for a in link.findAll('a', attrs={'href': True}):
url = a['href']
if url.startswith('https') is False:
url = 'https://www.eenadu.net/' + url

try:
desc = self.tag_to_string(a.find('div')).strip()
except Exception:
desc = ''

for h3 in a.findAll('h3'):
title = self.tag_to_string(h3).strip()
sub = re.escape(title)
desc = re.sub(sub, '', desc).strip()

if not title or not url:
continue

self.log('\t', title, '\n\t', desc, '\n\t\t', url)
ans.append({'title': title, 'url': url, 'description': desc})
return ans

def preprocess_html(self, soup):
div = soup.find('div', **classes('pub-t'))
if div:
date = parse_date(
self.tag_to_string(div).strip().replace('Published : ', '').replace(
'Updated : ', ''
).replace(' IST', ':00.000001')
).replace(tzinfo=None)
today = datetime.now()
if (today - date) > timedelta(1.15):
self.abort_article('Skipping old article')
else:
self.abort_article('not an article')
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src']
return soup
for n in news_list:
news_index = news + n[1] + '/'
feeds.append((n[0], a.format(when, quote(news_index, safe=''))))
feeds.append(('Other News', a.format(when, quote(news, safe=''))))


art = index + '/telugu-article/'
art_list = [
('చదువు', 'education'),
('సుఖీభవ', 'health'),
('ఆహా', 'recipes'),
('హాయ్ బుజ్జీ', 'kids-stories'),
('మకరందం', 'devotional'),
('దేవతార్చన', 'temples'),
('స్థిరాస్తి', 'real-estate'),
('కథామృతం', 'kathalu'),
('సండే మ్యాగజైన్', 'sunday-magazine')
]
for x in art_list:
art_index = art + x[1] + '/'
feeds.append((x[0], a.format(when, quote(art_index, safe=''))))
feeds.append(('Other Articles', a.format(when, quote(art, safe=''))))

feeds.append(('ఇతరులు', a.format(when, quote(index, safe=''))))
feeds.append(('ప్రతిభ', a.format(when, 'https://pratibha.eenadu.net/')))

def populate_article_metadata(self, article, soup, first):
article.url = self.art_url
article.title = article.title.replace(' - Eenadu', '')
desc = soup.find(attrs={'class':'srtdes'})
if desc:
article.summary = self.tag_to_string(desc)
article.text_summary = article.summary

def preprocess_raw_html(self, raw, *a):
import re
if '<!--Top Full Story Start -->' in raw:
body = re.search(r'<!--Top Full Story Start -->([^~]+?)<!--Tags Start -->', raw)
return '<html><body><div>' + body.group(1) + '</div></body></html>'
return raw
Loading

0 comments on commit 5f6e521

Please sign in to comment.