diff --git a/Changelog.txt b/Changelog.txt
index 44c54a0c01ef..af54ecb21a5e 100644
--- a/Changelog.txt
+++ b/Changelog.txt
@@ -23,6 +23,36 @@
# - title by author
# }}}
+{{{ 7.12.0 2024-05-30
+
+:: new features
+
+- [2067167] E-book polishing: Add option to download external resources (images/stylesheets/etc.)
+
+- Conversion: Add an option under Structure detection to automatically fill in blank alt attributes for images that have alt text in the image file metadata
+
+- [2067437] Content server: Use the book title as the filename for downloaded highlights
+
+- Review downloaded metadata dialog: Add a "Previous" button to go back to the last reviewed book
+
+:: bug fixes
+
+- Content server viewer: Fix the bottom of the text being cut off on mobile browsers when not in full screen mode. Unfortunately, this means the viewer now requires a browser no more than two years old.
+
+- [2067168] Kindle driver: Increase the size of the cover thumbnails sent to the Scribe
+
+- [1943495] Kindle output: Fix cover images that contain EXIF data without a rotation not displaying on the Kindle lockscreen
+
+- E-book viewer: Fix ruby tags not excluded when searching for text
+
+- [2067128] Content server viewer: Fix a regression in the previous release that broke searching
+
+:: improved recipes
+- Economist Espresso
+- Slate
+
+}}}
+
{{{ 7.11.0 2024-05-24
:: new features
diff --git a/manual/server.rst b/manual/server.rst
index b69adb0e90e8..30c48b80e3aa 100644
--- a/manual/server.rst
+++ b/manual/server.rst
@@ -174,11 +174,11 @@ A much loved feature of the main program, :guilabel:`Virtual libraries` is
present in the server interface as well. Click the three vertical dots in the
top right corner to choose a Virtual library.
-The book reader
+The book viewer
^^^^^^^^^^^^^^^^
You can read any book in your calibre library by simply tapping on
-it and then tapping the :guilabel:`Read` button. The book reader
+it and then tapping the :guilabel:`Read` button. The book viewer
is very simple to operate. You can both tap and swipe to turn pages. Swiping
up/down skips between chapters. Tapping the top quarter of the screen gets you
the detailed controls and viewer preferences.
diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe
index 522fe4a715f1..63b60ed9ca87 100644
--- a/recipes/economist_espresso.recipe
+++ b/recipes/economist_espresso.recipe
@@ -2,8 +2,92 @@
https://www.economist.com/the-world-in-brief
'''
-from calibre.ebooks.BeautifulSoup import Tag
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+import json
+from urllib.parse import quote, urlencode
+
+from calibre import replace_entities
+from calibre.ebooks.BeautifulSoup import NavigableString, Tag
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe
+from html5_parser import parse
+from lxml import etree
+
+
+def E(parent, name, text='', **attrs):
+ ans = parent.makeelement(name, **attrs)
+ ans.text = text
+ parent.append(ans)
+ return ans
+
+
+def process_node(node, html_parent):
+ ntype = node.get('type')
+ if ntype == 'tag':
+ c = html_parent.makeelement(node['name'])
+ c.attrib.update({k: v or '' for k, v in node.get('attribs', {}).items()})
+ html_parent.append(c)
+ for nc in node.get('children', ()):
+ process_node(nc, c)
+ elif ntype == 'text':
+ text = node.get('data')
+ if text:
+ text = replace_entities(text)
+ if len(html_parent):
+ t = html_parent[-1]
+ t.tail = (t.tail or '') + text
+ else:
+ html_parent.text = (html_parent.text or '') + text
+
+
+def safe_dict(data, *names):
+ ans = data
+ for x in names:
+ ans = ans.get(x) or {}
+ return ans
+
+
+class JSONHasNoContent(ValueError):
+ pass
+
+
+def load_article_from_json(raw, root):
+ # open('/t/raw.json', 'w').write(raw)
+ data = json.loads(raw)
+ body = root.xpath('//body')[0]
+ article = E(body, 'article')
+ E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;')
+ E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '')
+ E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;')
+ E(article, 'div', data['byline'], style='font-style: italic; color:#202020;')
+ main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical')
+ if main_image_url:
+ div = E(article, 'div')
+ try:
+ E(div, 'img', src=main_image_url)
+ except Exception:
+ pass
+ for node in data.get('text') or ():
+ process_node(node, article)
+
+
+def cleanup_html_article(root):
+ main = root.xpath('//main')[0]
+ body = root.xpath('//body')[0]
+ for child in tuple(body):
+ body.remove(child)
+ body.append(main)
+ main.set('id', '')
+ main.tag = 'article'
+ for x in root.xpath('//*[@style]'):
+ x.set('style', '')
+ for x in root.xpath('//button'):
+ x.getparent().remove(x)
+
+
+def classes(classes):
+ q = frozenset(classes.split(' '))
+ return dict(attrs={
+ 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def new_tag(soup, name, attrs=()):
@@ -13,60 +97,59 @@ def new_tag(soup, name, attrs=()):
return Tag(soup, name, attrs=attrs or None)
+class NoArticles(Exception):
+ pass
+
+
+def process_url(url):
+ if url.startswith('/'):
+ url = 'https://www.economist.com' + url
+ return url
+
+
class Espresso(BasicNewsRecipe):
title = 'The Economist Espresso'
language = 'en'
__author__ = 'unkn0wn'
+ encoding = 'utf-8'
+ masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
+ cover_url = 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png'
description = (
'Espresso is a rich, full-flavoured shot of daily global analysis'
' from the editors of The Economist to get you up to speed, fast.'
'Maximise your understanding of the most significant business, '
'economic, political and cultural developments globally.'
)
- cover_url = 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png'
- no_stylesheets = True
- remove_attributes = ['height', 'width', 'style']
- use_embedded_content = False
- masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png'
extra_css = '''
- h1 { text-align:center; }
- ._main-image, ._description, .sub { text-align:center; font-size:small; }
- ._quote-container { font-size:x-large; font-style:italic; color:#202020; }
+ em { color:#202020; }
+ img {display:block; margin:0 auto;}
'''
- keep_only_tags = [
- dict(name='main', attrs={'id':'content'})
- ]
-
remove_tags = [
- classes('_podcast-promo _newsletter-promo-container _time-last-updated')
- ]
-
- def parse_index(self):
- return [
- ('Espresso',
- [
- {
- 'title': 'The World in Brief',
- 'url': 'https://www.economist.com/the-world-in-brief',
- 'description': 'Catch up quickly on the global stories that matter'
- },
+ dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']),
+ dict(attrs={'aria-label': "Article Teaser"}),
+ dict(attrs={
+ 'class': [
+ 'dblClkTrk', 'ec-article-info', 'share_inline_header',
+ 'related-items', 'main-content-container', 'ec-topic-widget',
+ 'teaser', 'blog-post__bottom-panel-bottom', 'blog-post__comments-label',
+ 'blog-post__foot-note', 'blog-post__sharebar', 'blog-post__bottom-panel',
+ 'newsletter-form','share-links-header','teaser--wrapped', 'latest-updates-panel__container',
+ 'latest-updates-panel__article-link','blog-post__section'
]
- ),
- ]
-
- def print_version(self, url):
- return 'https://webcache.googleusercontent.com/search?q=cache:' + url
-
- def preprocess_html(self, soup):
- if h1 := soup.find('h1'):
- if p := h1.find_next_sibling('p'):
- p['class'] = 'sub'
- for hr in soup.findAll(attrs={'class':['_gobbet', '_article']}):
- nt = new_tag(soup, 'hr')
- hr.append(nt)
- return soup
+ }
+ ),
+ dict(attrs={
+ 'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}),
+ classes(
+ 'share-links-header teaser--wrapped latest-updates-panel__container'
+ ' latest-updates-panel__article-link blog-post__section newsletter-form blog-post__bottom-panel'
+ )
+ ]
+ keep_only_tags = [dict(name='article', id=lambda x: not x)]
+ no_stylesheets = True
+ remove_attributes = ['data-reactid', 'width', 'height']
def get_browser(self, *args, **kwargs):
# Needed to bypass cloudflare
@@ -74,3 +157,108 @@ class Espresso(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')]
return br
+
+ def economist_return_index(self, ans):
+ if not ans:
+ raise NoArticles(
+ 'Could not find any articles, either the '
+ 'economist.com server is having trouble and you should '
+ 'try later or the website format has changed and the '
+ 'recipe needs to be updated.'
+ )
+ return ans
+
+ def parse_index(self):
+ query = {
+ 'query': 'query EspressoQuery($ref:String!){espresso:canonical(ref:$ref){...EspressoFragment __typename}}fragment EspressoFragment on Content{id type hasPart(size:1 sort:"datePublished:desc"){parts{id type rubric:description hasPart(sort:"publication.context.position:asc,datePublished:desc"){parts{...ArticleFragment __typename}__typename}__typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa
+ 'operationName': 'EspressoQuery',
+ 'variables': '{"ref":"/content/jakj5ed3rml75i8j0d5i74p8adf6eem4"}',
+ }
+ url = 'https://cp2-graphql-gateway.p.aws.economist.com/graphql?' + urlencode(query, safe='()!', quote_via=quote)
+ try:
+ raw = self.index_to_soup(url, raw=True)
+ except Exception:
+ raise ValueError('Server is not reachable, try again after some time.')
+ ans = self.economist_parse_index(raw)
+ return self.economist_return_index(ans)
+
+ def economist_parse_index(self, raw):
+ data = json.loads(raw)['data']['espresso']['hasPart']['parts'][0]
+ self.description = data['rubric']
+
+ ans = []
+ for part in safe_dict(data, "hasPart", "parts"):
+ title = safe_dict(part, "title")
+ pt = PersistentTemporaryFile('.html')
+ pt.write(json.dumps(part).encode('utf-8'))
+ pt.close()
+ url = 'file:///' + pt.name
+ ans.append({"title": title, "url": url})
+ return [('Espresso', ans)]
+
+ def populate_article_metadata(self, article, soup, first):
+ article.url = soup.find('h1')['title']
+
+ def preprocess_raw_html(self, raw, url):
+ # open('/t/raw.html', 'wb').write(raw.encode('utf-8'))
+ body = '
'
+ root = parse(body)
+ load_article_from_json(raw, root)
+
+ for div in root.xpath('//div[@class="lazy-image"]'):
+ noscript = list(div.iter('noscript'))
+ if noscript and noscript[0].text:
+ img = list(parse(noscript[0].text).iter('img'))
+ if img:
+ p = noscript[0].getparent()
+ idx = p.index(noscript[0])
+ p.insert(idx, p.makeelement('img', src=img[0].get('src')))
+ p.remove(noscript[0])
+ for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'):
+ x.getparent().remove(x)
+ # the economist uses for small caps with a custom font
+ for init in root.xpath('//span[@data-caps="initial"]'):
+ init.set('style', 'font-weight:bold;')
+ for x in root.xpath('//small'):
+ if x.text and len(x) == 0:
+ x.text = x.text.upper()
+ x.tag = 'span'
+ x.set('style', 'font-variant: small-caps')
+ for h2 in root.xpath('//h2'):
+ h2.tag = 'h4'
+ for x in root.xpath('//figcaption'):
+ x.set('style', 'text-align:center; font-size:small;')
+ for x in root.xpath('//cite'):
+ x.tag = 'blockquote'
+ x.set('style', 'color:#404040;')
+ raw = etree.tostring(root, encoding='unicode')
+ return raw
+
+
+ def eco_find_image_tables(self, soup):
+ for x in soup.findAll('table', align=['right', 'center']):
+ if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1:
+ yield x
+
+ def postprocess_html(self, soup, first):
+ for img in soup.findAll('img', srcset=True):
+ del img['srcset']
+ for table in list(self.eco_find_image_tables(soup)):
+ caption = table.find('font')
+ img = table.find('img')
+ div = new_tag(soup, 'div')
+ div['style'] = 'text-align:left;font-size:70%'
+ ns = NavigableString(self.tag_to_string(caption))
+ div.insert(0, ns)
+ div.insert(1, new_tag(soup, 'br'))
+ del img['width']
+ del img['height']
+ img.extract()
+ div.insert(2, img)
+ table.replaceWith(div)
+ return soup
+
+ def canonicalize_internal_url(self, url, is_link=True):
+ if url.endswith('/print'):
+ url = url.rpartition('/')[0]
+ return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link)
diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe
index 53e5cb4a46a4..8eb4586b06a1 100644
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@@ -95,6 +95,7 @@ class LiveMint(BasicNewsRecipe):
keep_only_tags = [
dict(name='article', attrs={'id':lambda x: x and x.startswith(('article_', 'box_'))}),
+ dict(attrs={'class':lambda x: x and x.startswith('storyPage_storyBox__')}),
classes('contentSec')
]
remove_tags = [
diff --git a/recipes/slate.recipe b/recipes/slate.recipe
index 24ed1db877af..d262d291a3ec 100644
--- a/recipes/slate.recipe
+++ b/recipes/slate.recipe
@@ -19,13 +19,12 @@ def classes(classes):
class Slate(BasicNewsRecipe):
title = 'Slate'
description = 'A general-interest publication offering analysis and commentary about politics, news and culture.'
- __author__ = 'Kovid Goyal'
+ __author__ = 'unkn0wn'
no_stylesheets = True
language = 'en'
encoding = 'utf-8'
remove_attributes = ['style', 'height', 'width']
- oldest_article = 2 # days
- INDEX = 'https://slate.com'
+ INDEX = 'https://slate.com/'
resolve_internal_links = True
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
@@ -52,16 +51,44 @@ class Slate(BasicNewsRecipe):
img['src'] = img['data-src'] + '&width=600'
return soup
- feeds = [
- ('News & Politics', 'https://slate.com/feeds/news-and-politics.rss'),
- ('Culture', 'https://slate.com/feeds/culture.rss'),
- ('Technology', 'https://slate.com/feeds/technology.rss'),
- ('Business', 'https://slate.com/feeds/business.rss'),
- ('Human Interest', 'https://slate.com/feeds/human-interest.rss'),
- ('Others', 'https://slate.com/feeds/all.rss')
- ]
+ def parse_index(self):
+ ans = []
+ for sectitle, url in (
+ ('News & Politics', 'news-and-politics'),
+ ('Culture', 'culture'),
+ ('Technology', 'technology'),
+ ('Business', 'business'),
+ ('Life', 'life'),
+ ('Advice', 'advice'),
+ ):
+ url = self.INDEX + url
+ self.log('\nFound section:', sectitle, url)
+ articles = self.slate_section_articles(url)
+ if articles:
+ ans.append((sectitle, articles))
+ return ans
+
+ def slate_section_articles(self, url):
+ from datetime import date
+ soup = self.index_to_soup(url)
+ ans = []
+ dt = date.today().strftime('/%Y/%m')
+ for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(url + dt)}):
+ url = a['href']
+ head = a.find(attrs={'class':[
+ 'section-feed-two-column__card-headline',
+ 'section-feed-three-column__teaser-headline',
+ 'section-feed-two-column__teaser-headline',
+ 'topic-story__hed'
+ ]})
+ if head:
+ title = self.tag_to_string(head).strip()
+ self.log('\t' + title)
+ self.log('\t\t' + url)
+ ans.append({'title': title, 'url': url})
+ return ans
- def get_article_url(self, article):
- url = BasicNewsRecipe.get_article_url(self, article)
- if '/podcasts/' not in url:
- return url.split('?')[0]
+ def populate_article_metadata(self, article, soup, first):
+ summ = soup.find(attrs={'class':'article__dek'})
+ if summ:
+ article.summary = article.text_summary = self.tag_to_string(summ)
diff --git a/setup/arch-ci.sh b/setup/arch-ci.sh
index 138b5f035b26..1498dda942a0 100755
--- a/setup/arch-ci.sh
+++ b/setup/arch-ci.sh
@@ -5,8 +5,7 @@
set -xe
-pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo
+pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-lxml-html-clean python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo
useradd -m ci
chown -R ci:users $GITHUB_WORKSPACE
-pip install --break-system-packages lxml-html-clean
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 418e08d0b870..4184c1d72577 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -11,7 +11,7 @@
from polyglot.builtins import environ_item, hasenv
__appname__ = 'calibre'
-numeric_version = (7, 11, 0)
+numeric_version = (7, 12, 0)
__version__ = '.'.join(map(str, numeric_version))
git_version = None
__author__ = "Kovid Goyal "
diff --git a/src/calibre/devices/mtp/driver.py b/src/calibre/devices/mtp/driver.py
index d64b0f2e4bae..620e2701a225 100644
--- a/src/calibre/devices/mtp/driver.py
+++ b/src/calibre/devices/mtp/driver.py
@@ -27,6 +27,7 @@
BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%(
'windows' if iswindows else 'unix')).MTP_DEVICE
+DEFAULT_THUMBNAIL_WIDTH, DEFAULT_THUMBNAIL_HEIGHT = 120, 160
class MTPInvalidSendPathError(PathError):
@@ -43,8 +44,8 @@ class MTP_DEVICE(BASE):
CAN_SET_METADATA = []
NEWS_IN_FOLDER = True
MAX_PATH_LEN = 230
- THUMBNAIL_HEIGHT = 160
- THUMBNAIL_WIDTH = 120
+ THUMBNAIL_HEIGHT = DEFAULT_THUMBNAIL_HEIGHT
+ THUMBNAIL_WIDTH = DEFAULT_THUMBNAIL_WIDTH
CAN_SET_METADATA = []
BACKLOADING_ERROR_MESSAGE = None
MANAGES_DEVICE_PRESENCE = True
@@ -155,7 +156,9 @@ def open(self, device, library_uuid):
self.current_device_defaults, self.current_vid, self.current_pid = self.device_defaults(device, self)
self.calibre_file_paths = self.current_device_defaults.get(
'calibre_file_paths', {'metadata':self.METADATA_CACHE, 'driveinfo':self.DRIVEINFO})
+ self.THUMBNAIL_WIDTH, self.THUMBNAIL_HEIGHT = DEFAULT_THUMBNAIL_WIDTH, DEFAULT_THUMBNAIL_HEIGHT
if self.is_kindle:
+ self.THUMBNAIL_WIDTH = self.THUMBNAIL_HEIGHT = 500 # see kindle/driver.py
try:
self.sync_kindle_thumbnails()
except Exception:
diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py
index 3a9d18ed60e5..49ddffde150e 100644
--- a/src/calibre/ebooks/conversion/cli.py
+++ b/src/calibre/ebooks/conversion/cli.py
@@ -239,7 +239,7 @@ def add_pipeline_options(parser, plumber):
'chapter', 'chapter_mark',
'prefer_metadata_cover', 'remove_first_image',
'insert_metadata', 'page_breaks_before',
- 'remove_fake_margins', 'start_reading_at',
+ 'remove_fake_margins', 'start_reading_at', 'add_alt_text_to_img',
]
)),
diff --git a/src/calibre/ebooks/conversion/config.py b/src/calibre/ebooks/conversion/config.py
index 0fff29dea587..91f46b984f76 100644
--- a/src/calibre/ebooks/conversion/config.py
+++ b/src/calibre/ebooks/conversion/config.py
@@ -260,7 +260,7 @@ def get_sorted_output_formats(preferred_fmt=None):
'structure_detection': (
'chapter', 'chapter_mark', 'start_reading_at',
'remove_first_image', 'remove_fake_margins', 'insert_metadata',
- 'page_breaks_before'),
+ 'page_breaks_before', 'add_alt_text_to_img',),
'toc': (
'level1_toc', 'level2_toc', 'level3_toc',
diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py
index adbfbb540a3d..47c4f50e64d7 100644
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@@ -420,6 +420,11 @@ def __init__(self, input, output, log, report_progress=DummyReporter(),
'case you can disable the removal.')
),
+OptionRecommendation(name='add_alt_text_to_img',
+ recommended_value=False, level=OptionRecommendation.LOW,
+ help=_('When an tag has no alt attribute, check the associated image file for metadata that specifies alternate text, and'
+ ' use it to fill in the alt attribute. The alt attribute is used by screen readers for assisting the visually challenged.')
+),
OptionRecommendation(name='margin_top',
recommended_value=5.0, level=OptionRecommendation.LOW,
@@ -1203,6 +1208,12 @@ def run(self):
from calibre.ebooks.oeb.transforms.jacket import Jacket
Jacket()(self.oeb, self.opts, self.user_metadata)
+ pr(0.37)
+ self.flush()
+
+ if self.opts.add_alt_text_to_img:
+ from calibre.ebooks.oeb.transforms.alt_text import AddAltText
+ AddAltText()(self.oeb, self.opts)
pr(0.4)
self.flush()
diff --git a/src/calibre/ebooks/metadata/search_internet.py b/src/calibre/ebooks/metadata/search_internet.py
index 55b991537619..9b894d99e56d 100644
--- a/src/calibre/ebooks/metadata/search_internet.py
+++ b/src/calibre/ebooks/metadata/search_internet.py
@@ -48,7 +48,7 @@
def qquote(val, use_plus=True):
if not isinstance(val, bytes):
- val = val.encode('utf-8')
+ val = val.encode('utf-8', 'replace')
ans = quote_plus(val) if use_plus else quote(val)
if isinstance(ans, bytes):
ans = ans.decode('utf-8')
diff --git a/src/calibre/ebooks/mobi/writer2/resources.py b/src/calibre/ebooks/mobi/writer2/resources.py
index 592c1e607768..52c44e1f095e 100644
--- a/src/calibre/ebooks/mobi/writer2/resources.py
+++ b/src/calibre/ebooks/mobi/writer2/resources.py
@@ -27,12 +27,14 @@ def process_jpegs_for_amazon(data: bytes) -> bytes:
# Amazon's MOBI renderer can't render JPEG images without JFIF metadata
# and images with EXIF data dont get displayed on the cover screen
changed = not img.info
+ has_exif = False
if hasattr(img, 'getexif'):
exif = img.getexif()
+ has_exif = bool(exif)
if exif.get(0x0112) in (2,3,4,5,6,7,8):
changed = True
img = ImageOps.exif_transpose(img)
- if changed:
+ if changed or has_exif:
out = BytesIO()
img.save(out, 'JPEG')
data = out.getvalue()
diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py
index fd32b77c4d66..005af7113cc7 100644
--- a/src/calibre/ebooks/oeb/polish/main.py
+++ b/src/calibre/ebooks/oeb/polish/main.py
@@ -14,6 +14,7 @@
from calibre.ebooks.oeb.polish.container import get_container
from calibre.ebooks.oeb.polish.cover import set_cover
from calibre.ebooks.oeb.polish.css import remove_unused_css
+from calibre.ebooks.oeb.polish.download import download_external_resources, get_external_resources, replace_resources
from calibre.ebooks.oeb.polish.embed import embed_all_fonts
from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens, remove_soft_hyphens
from calibre.ebooks.oeb.polish.images import compress_images
@@ -22,6 +23,7 @@
from calibre.ebooks.oeb.polish.stats import StatsCollector
from calibre.ebooks.oeb.polish.subset import iter_subsettable_fonts, subset_all_fonts
from calibre.ebooks.oeb.polish.upgrade import upgrade_book
+from calibre.utils.localization import ngettext
from calibre.utils.logging import Log
from polyglot.builtins import iteritems
@@ -38,6 +40,7 @@
'upgrade_book': False,
'add_soft_hyphens': False,
'remove_soft_hyphens': False,
+ 'download_external_resources': False,
}
CUSTOMIZATION = {
@@ -133,6 +136,12 @@
'remove_soft_hyphens': _('''\
Remove soft hyphens from all text in the book.
'''),
+
+'download_external_resources': _('''\
+Download external resources such as images, stylesheets, etc. that point to URLs instead of files in the book.
+All such resources will be downloaded and added to the book so that the book no longer references any external resources.
+
+'''),
}
@@ -161,6 +170,30 @@ def update_metadata(ebook, new_opf):
stream.write(opfbytes)
+def download_resources(ebook, report) -> bool:
+ changed = False
+ url_to_referrer_map = get_external_resources(ebook)
+ if url_to_referrer_map:
+ n = len(url_to_referrer_map)
+ report(ngettext('Downloading one external resource', 'Downloading {} external resources', n).format(n))
+ replacements, failures = download_external_resources(ebook, url_to_referrer_map)
+ if not failures:
+ report(_('Successfully downloaded all resources'))
+ else:
+ tb = [f'{url}\n\t{err}\n' for url, err in iteritems(failures)]
+ if replacements:
+ report(_('Failed to download some resources, see details below:'))
+ else:
+ report(_('Failed to download all resources, see details below:'))
+ report(tb)
+ if replacements:
+ if replace_resources(ebook, url_to_referrer_map, replacements):
+ changed = True
+ else:
+ report(_('No external resources found in book'))
+ return changed
+
+
def polish_one(ebook, opts, report, customization=None):
def rt(x):
return report('\n### ' + x)
@@ -267,6 +300,16 @@ def rt(x):
add_soft_hyphens(ebook, report)
changed = True
+ if opts.download_external_resources:
+ rt(_('Downloading external resources'))
+ try:
+ download_resources(ebook, report)
+ except Exception:
+ import traceback
+ report(_('Failed to download resources with error:'))
+ report(traceback.format_exc())
+ report('')
+
return changed
@@ -337,6 +380,7 @@ def option_parser():
o('--add-soft-hyphens', '-H', help=CLI_HELP['add_soft_hyphens'])
o('--remove-soft-hyphens', help=CLI_HELP['remove_soft_hyphens'])
o('--upgrade-book', '-U', help=CLI_HELP['upgrade_book'])
+ o('--download-external-resources', '-d', help=CLI_HELP['download_external_resources'])
o('--verbose', help=_('Produce more verbose output, useful for debugging.'))
diff --git a/src/calibre/ebooks/oeb/transforms/alt_text.py b/src/calibre/ebooks/oeb/transforms/alt_text.py
new file mode 100644
index 000000000000..383628790c55
--- /dev/null
+++ b/src/calibre/ebooks/oeb/transforms/alt_text.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+# License: GPLv3 Copyright: 2024, Kovid Goyal
+
+
+from io import BytesIO
+
+from PIL import Image
+
+from calibre.ebooks.oeb.base import SVG_MIME, urlnormalize, xpath
+from calibre.utils.img import read_alt_text
+
+
+def process_spine_item(item, hrefs, log):
+ html = item.data
+ for elem in xpath(html, '//h:img[@src]'):
+ src = urlnormalize(elem.attrib['src'])
+ image = hrefs.get(item.abshref(src), None)
+ if image and image.media_type != SVG_MIME and not elem.attrib.get('alt'):
+ data = image.bytes_representation
+ try:
+ with Image.open(BytesIO(data)) as im:
+ alt = read_alt_text(im)
+ except Exception as err:
+ log.warn(f'Failed to read alt text from image {src} with error: {err}')
+ else:
+ if alt:
+ elem.set('alt', alt)
+
+
+class AddAltText:
+
+ def __call__(self, oeb, opts):
+ oeb.logger.info('Add alt text to images...')
+ hrefs = oeb.manifest.hrefs
+ for item in oeb.spine:
+ process_spine_item(item, hrefs, oeb.log)
diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index c2473bba35a5..1fa5d557770e 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -534,7 +534,7 @@ def do_edit_bulk_metadata(self, rows, book_ids):
changed = False
refresh_books = set(book_ids)
try:
- current_tab = 0
+ current_tab = -1
while True:
dialog = MetadataBulkDialog(self.gui, rows,
self.gui.library_view.model(), current_tab, refresh_books)
diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py
index 741d90a5288f..6764d7c8a489 100644
--- a/src/calibre/gui2/actions/polish.py
+++ b/src/calibre/gui2/actions/polish.py
@@ -87,6 +87,7 @@ def __init__(self, db, book_id_map, parent=None):
'remove_jacket':_('Remove book jacket
%s')%HELP['remove_jacket'],
'remove_unused_css':_('Remove unused CSS rules
%s')%HELP['remove_unused_css'],
'compress_images': _('Losslessly compress images
%s') % HELP['compress_images'],
+ 'download_external_resources': _('Download external resources
%s') % HELP['download_external_resources'],
'add_soft_hyphens': _('Add soft-hyphens
%s') % HELP['add_soft_hyphens'],
'remove_soft_hyphens': _('Remove soft-hyphens
%s') % HELP['remove_soft_hyphens'],
'upgrade_book': _('Upgrade book internals
%s') % HELP['upgrade_book'],
@@ -109,6 +110,7 @@ def __init__(self, db, book_id_map, parent=None):
('remove_jacket', _('&Remove a previously inserted book jacket')),
('remove_unused_css', _('Remove &unused CSS rules from the book')),
('compress_images', _('Losslessly &compress images')),
+ ('download_external_resources', _('&Download external resources')),
('add_soft_hyphens', _('Add s&oft hyphens')),
('remove_soft_hyphens', _('Remove so&ft hyphens')),
('upgrade_book', _('&Upgrade book internals')),
diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui
index 9b99fd93a9e5..9d68182f9056 100644
--- a/src/calibre/gui2/convert/structure_detection.ui
+++ b/src/calibre/gui2/convert/structure_detection.ui
@@ -14,39 +14,9 @@
Form
- -
-
-
- Remove &fake margins
-
-
-
- -
-
-
- The header and footer removal options have been replaced by the Search & replace options. Click the Search & replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.
-
-
- true
-
-
-
- -
+
-
- -
-
-
- Insert &metadata as page at start of book
-
-
-
- -
-
-
- -
-
-
-
@@ -57,12 +27,8 @@
- -
-
-
- 20
-
-
+
-
+
-
@@ -77,14 +43,24 @@
- -
-
+
-
+
+
+ 20
+
+
+
+ -
+
- Remove first &image
+ The header and footer removal options have been replaced by the Search & replace options. Click the Search & replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field.
+
+
+ true
- -
+
-
Qt::Vertical
@@ -97,6 +73,37 @@
+ -
+
+
+ Insert &metadata as page at start of book
+
+
+
+ -
+
+
+ -
+
+
+ Remove &fake margins
+
+
+
+ -
+
+
+ Remove first &image
+
+
+
+ -
+
+
+ Add &alt text to images
+
+
+
diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py
index 6bdcb215fda9..82b85fd825b0 100644
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@@ -558,7 +558,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog):
_('Append to field'),
]
- def __init__(self, window, rows, model, tab, refresh_books):
+ def __init__(self, window, rows, model, starting_tab, refresh_books):
QDialog.__init__(self, window)
self.setupUi(self)
setup_status_actions(self.test_result)
@@ -634,8 +634,7 @@ def __init__(self, window, rows, model, tab, refresh_books):
'This operation cannot be canceled or undone'))
self.do_again = False
self.restore_geometry(gprefs, 'bulk_metadata_window_geometry')
- ct = gprefs.get('bulk_metadata_window_tab', 0)
- self.central_widget.setCurrentIndex(ct)
+
self.languages.init_langs(self.db)
self.languages.setEditText('')
self.authors.setFocus(Qt.FocusReason.OtherFocusReason)
@@ -650,7 +649,9 @@ def __init__(self, window, rows, model, tab, refresh_books):
(self.button_clear_tags_rules, self.button_clear_authors_rules, self.button_clear_publishers_rules)
))
self.update_transform_labels()
- self.central_widget.setCurrentIndex(tab)
+ if starting_tab < 0:
+ starting_tab = gprefs.get('bulk_metadata_window_tab', 0)
+ self.central_widget.setCurrentIndex(starting_tab)
self.exec()
def update_transform_labels(self):
diff --git a/src/calibre/gui2/metadata/diff.py b/src/calibre/gui2/metadata/diff.py
index fcf279b6ccb8..4e1b0df183b9 100644
--- a/src/calibre/gui2/metadata/diff.py
+++ b/src/calibre/gui2/metadata/diff.py
@@ -626,6 +626,7 @@ def set_pixmap(self, pixmap):
class CompareMany(QDialog):
+
def __init__(self, ids, get_metadata, field_metadata, parent=None,
window_title=None,
reject_button_tooltip=None,
@@ -641,6 +642,13 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None,
self.l = l = QVBoxLayout(w)
s.addWidget(w)
self.next_called = False
+
+ # initialize the previous items list, we will use it to store the watched items that were rejected or accepted
+ # when the user clicks on the next or reject button we will add the current item to the previous items list
+ # when the user presses the back button we will pop the last item from the previous items list and set it as current item
+ # also the popped item will be removed from the rejected or accepted items list (and will be unmarked if it was marked)
+ self.previous_items = []
+
self.setWindowIcon(QIcon.ic('auto_author_sort.png'))
self.get_metadata = get_metadata
self.ids = list(ids)
@@ -667,7 +675,9 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None,
self.bb = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Cancel)
bb.button(QDialogButtonBox.StandardButton.Cancel).setAutoDefault(False)
bb.rejected.connect(self.reject)
+
if self.total > 1:
+
self.aarb = b = bb.addButton(_('&Accept all remaining'), QDialogButtonBox.ButtonRole.YesRole)
b.setIcon(QIcon.ic('ok.png')), b.setAutoDefault(False)
if accept_all_tooltip:
@@ -696,6 +706,21 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None,
b.setIcon(QIcon.ic(action_button[1]))
self.action_button_action = action_button[2]
b.clicked.connect(self.action_button_clicked)
+
+ # Add a Back button, wich allows the user to go back to the previous book cancel any reject/edit/accept that was done to it, and review it again
+ # create a Back action that will be triggered when the user presses the back button or the back shortcut
+ self.back_action = QAction(self)
+ self.back_action.setShortcut(QKeySequence(Qt.KeyboardModifier.AltModifier | Qt.Key.Key_Left))
+ self.back_action.triggered.connect(self.previous_item)
+ self.addAction(self.back_action)
+ # create the back button, set it's name, tooltip, icon and action to call the previous_item method
+ self.back_button = bb.addButton(_('P&revious'), QDialogButtonBox.ButtonRole.ActionRole)
+ self.back_button.setToolTip(_('Move to previous {}').format(self.back_action.shortcut().toString(QKeySequence.SequenceFormat.NativeText)))
+ self.back_button.setIcon(QIcon.ic('back.png'))
+ self.back_button.clicked.connect(self.previous_item)
+ self.back_button.setDefault(True)
+ self.back_button.setAutoDefault(False)
+
self.nb = b = bb.addButton(_('&Next') if self.total > 1 else _('&OK'), QDialogButtonBox.ButtonRole.ActionRole)
if self.total > 1:
b.setToolTip(_('Move to next [%s]') % self.next_action.shortcut().toString(QKeySequence.SequenceFormat.NativeText))
@@ -722,22 +747,27 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None,
b.setFocus(Qt.FocusReason.OtherFocusReason)
self.next_called = False
+
def show_zoomed_cover(self, pixmap):
self.cover_zoom.set_pixmap(pixmap)
self.stack.setCurrentIndex(1)
+
@property
def mark_rejected(self):
return self.markq.isChecked()
+
def action_button_clicked(self):
self.action_button_action(self.ids[0])
+
def accept(self):
self.save_geometry(gprefs, 'diff_dialog_geom')
self.compare_widget.save_comments_controls_state()
super().accept()
+
def reject(self):
if self.stack.currentIndex() == 1:
self.stack.setCurrentIndex(0)
@@ -750,27 +780,64 @@ def reject(self):
self.compare_widget.save_comments_controls_state()
super().reject()
+
@property
def current_mi(self):
return self.compare_widget.current_mi
+
+ def show_current_item(self):
+ self.setWindowTitle(self.window_title + _(' [%(num)d of %(tot)d]') % dict(
+ num=(self.total - len(self.ids) + 1), tot=self.total))
+ oldmi, newmi = self.get_metadata(self.ids[0])
+ self.compare_widget(oldmi, newmi)
+ self.update_back_button_state()
+
+ def update_back_button_state(self):
+ enabled = bool(self.previous_items)
+ self.back_action.setEnabled(enabled)
+ self.back_button.setEnabled(enabled)
+
def next_item(self, accept):
self.next_called = True
if not self.ids:
return self.accept()
+
if self.current_mi is not None:
changed = self.compare_widget.apply_changes()
if self.current_mi is not None:
old_id = self.ids.pop(0)
+
+ # Save the current book that was just reviewed and accepted or rejected to the previous_items list
+ # this book can be displayed again if the user presses the back button
+ self.previous_items.append(old_id)
+
if not accept:
self.rejected_ids.add(old_id)
self.accepted[old_id] = (changed, self.current_mi) if accept else (False, None)
if not self.ids:
return self.accept()
- self.setWindowTitle(self.window_title + _(' [%(num)d of %(tot)d]') % dict(
- num=(self.total - len(self.ids) + 1), tot=self.total))
- oldmi, newmi = self.get_metadata(self.ids[0])
- self.compare_widget(oldmi, newmi)
+ self.show_current_item()
+
+
+ def previous_item(self):
+ if self.previous_items:
+ # get the last book id from the previous items list and remove it from the previous items list
+ # this book id is the last book id that was reviewed and accepted or rejected
+ last_previous_item = self.previous_items.pop()
+
+ # if this book id was rejected, remove it from the rejected ids set
+ if last_previous_item in self.rejected_ids:
+ self.rejected_ids.remove(last_previous_item)
+ self.markq.setChecked(False)
+ # if this book id was accepted, remove it from the accepted dictionary
+ elif last_previous_item in self.accepted:
+ self.accepted.pop(last_previous_item)
+
+ # move the last previous item to the begining of the pending list
+ self.ids.insert(0, last_previous_item)
+ self.show_current_item()
+
def accept_all_remaining(self):
self.next_item(True)
@@ -780,6 +847,7 @@ def accept_all_remaining(self):
self.ids = []
self.accept()
+
def reject_all_remaining(self):
from calibre.gui2.dialogs.confirm_delete import confirm
if not confirm(ngettext(
@@ -795,6 +863,7 @@ def reject_all_remaining(self):
self.ids = []
self.accept()
+
def keyPressEvent(self, ev):
if ev.key() in (Qt.Key.Key_Enter, Qt.Key.Key_Return):
ev.accept()
diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui
index 95c628d42a83..55ca97207e98 100644
--- a/src/calibre/gui2/preferences/look_feel.ui
+++ b/src/calibre/gui2/preferences/look_feel.ui
@@ -143,7 +143,7 @@
-
- The unit for the cover corner rounding. Either pixel values or as a percetage of the cover size.
+ The unit for the cover corner rounding. Either pixel values or as a percentage of the cover size.
diff --git a/src/calibre/gui2/update.py b/src/calibre/gui2/update.py
index 61194cf5fdbb..3a0c6d5c4828 100644
--- a/src/calibre/gui2/update.py
+++ b/src/calibre/gui2/update.py
@@ -231,12 +231,14 @@ def update_found(self, calibre_version, number_of_plugin_updates, force=False, n
self._update_notification__.show()
elif has_plugin_updates:
if force:
- from calibre.gui2.dialogs.plugin_updater import FILTER_UPDATE_AVAILABLE, PluginUpdaterDialog
- d = PluginUpdaterDialog(self,
- initial_filter=FILTER_UPDATE_AVAILABLE)
- d.exec()
- if d.do_restart:
- self.quit(restart=True)
+ self.show_plugin_update_dialog()
+
+ def show_plugin_update_dialog(self):
+ from calibre.gui2.dialogs.plugin_updater import FILTER_UPDATE_AVAILABLE, PluginUpdaterDialog
+ d = PluginUpdaterDialog(self, initial_filter=FILTER_UPDATE_AVAILABLE)
+ d.exec()
+ if d.do_restart:
+ self.quit(restart=True)
def plugin_update_found(self, number_of_updates):
# Change the plugin icon to indicate there are updates available
diff --git a/src/calibre/gui2/viewer/search.py b/src/calibre/gui2/viewer/search.py
index a6bc791c69eb..91bac79a3a90 100644
--- a/src/calibre/gui2/viewer/search.py
+++ b/src/calibre/gui2/viewer/search.py
@@ -224,21 +224,24 @@ def __str__(self):
@lru_cache(maxsize=None)
def searchable_text_for_name(name):
ans = []
+ add_text = ans.append
serialized_data = json.loads(get_data(name)[0])
stack = []
+ a = stack.append
removed_tails = []
+ no_visit = frozenset({'script', 'style', 'title', 'head'})
+ ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'})
for child in serialized_data['tree']['c']:
if child.get('n') == 'body':
- stack.append(child)
+ a((child, False))
# the JS code does not add the tail of body tags to flat text
removed_tails.append((child.pop('l', None), child))
- ignore_text = {'script', 'style', 'title'}
text_pos = 0
anchor_offset_map = OrderedDict()
while stack:
- node = stack.pop()
+ node, text_ignored_in_parent = stack.pop()
if isinstance(node, str):
- ans.append(node)
+ add_text(node)
text_pos += len(node)
continue
g = node.get
@@ -253,13 +256,18 @@ def searchable_text_for_name(name):
aid = x[1]
if aid not in anchor_offset_map:
anchor_offset_map[aid] = text_pos
- if name and text and name not in ignore_text:
- ans.append(text)
+ if name in no_visit:
+ continue
+ ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text
+
+ if text and not ignore_text_in_node_and_children:
+ add_text(text)
text_pos += len(text)
- if tail:
- stack.append(tail)
+ if tail and not text_ignored_in_parent:
+ a((tail, ignore_text_in_node_and_children))
if children:
- stack.extend(reversed(children))
+ for child in reversed(children):
+ a((child, ignore_text_in_node_and_children))
for (tail, body) in removed_tails:
if tail is not None:
body['l'] = tail
diff --git a/src/calibre/utils/img.py b/src/calibre/utils/img.py
index d2be1251ad83..f45c3ad5f3da 100644
--- a/src/calibre/utils/img.py
+++ b/src/calibre/utils/img.py
@@ -719,6 +719,79 @@ def convert_PIL_image_to_pixmap(im, device_pixel_ratio=1.0):
if colortable:
qimg.setColorTable(colortable)
return QPixmap.fromImage(qimg)
+
+
+def read_xmp_from_pil_image(im) -> str:
+ fmt = im.format.lower()
+ xml = ''
+ if fmt == 'jpeg':
+ for segment, content in im.applist:
+ if segment == "APP1":
+ marker, xmp_tags = content.split(b"\x00")[:2]
+ if marker == b"http://ns.adobe.com/xap/1.0/":
+ xml = xmp_tags
+ break
+ elif fmt == 'png':
+ xml = im.info.get('XML:com.adobe.xmp', '')
+ elif fmt == 'webp':
+ xml = im.info.get("xmp", '')
+ elif fmt == 'tiff':
+ xml = im.tag_v2.get(700, '')
+ return xml
+
+
+def read_text_from_container(container, target_lang=''):
+ lang_map = {}
+ for li in container.xpath('descendant::*[local-name()="li"]'):
+ if li.text:
+ lang = li.attrib.get('{http://www.w3.org/XML/1998/namespace}lang', 'x-default')
+ lang_map[lang] = li.text
+ if not target_lang and 'x-default' in lang_map:
+ return lang_map['x-default']
+ if target_lang in lang_map:
+ return lang_map[target_lang]
+ from calibre.utils.localization import canonicalize_lang
+ target_lang = canonicalize_lang(target_lang)
+ if target_lang:
+ for lang, ans in lang_map.items():
+ if canonicalize_lang(lang) == target_lang:
+ return ans
+ return lang_map.get('x-default', '')
+
+
+def read_alt_text_from_xmp(xmp, target_lang='') -> str:
+ from lxml import etree
+ try:
+ root = etree.fromstring(xmp)
+ except Exception:
+ return ''
+ # print(etree.tostring(root, encoding='utf-8', pretty_print=True).decode())
+ for a in root.xpath('//*[local-name()="AltTextAccessibility"]'):
+ if ans := read_text_from_container(a, target_lang):
+ return ans
+
+ for d in etree.XPath('//dc:description', namespaces={'dc': 'http://purl.org/dc/elements/1.1/'})(root):
+ if ans := read_text_from_container(d, target_lang):
+ return ans
+ return ''
+
+
+def read_alt_text(pil_im_or_path, target_lang='') -> str:
+ if isinstance(pil_im_or_path, str):
+ from PIL import Image
+ im = Image.open(pil_im_or_path)
+ else:
+ im = pil_im_or_path
+ xmp = read_xmp_from_pil_image(im)
+ if xmp:
+ if alt := read_alt_text_from_xmp(xmp, target_lang).strip():
+ return alt
+ exif = im.getexif()
+ if exif:
+ if desc := exif.get(270):
+ return desc.strip()
+ return ''
+
# }}}
def test(): # {{{
diff --git a/src/pyj/book_list/conversion_widgets.pyj b/src/pyj/book_list/conversion_widgets.pyj
index b404a13cc0d9..63f915a6143c 100644
--- a/src/pyj/book_list/conversion_widgets.pyj
+++ b/src/pyj/book_list/conversion_widgets.pyj
@@ -360,6 +360,7 @@ def structure_detection(container):
g.appendChild(choices('chapter_mark', _('Chap&ter mark:'), ['pagebreak', 'rule', 'both', 'none']))
g.appendChild(checkbox('remove_first_image', _('Remove first &image')))
g.appendChild(checkbox('remove_fake_margins', _('Remove &fake margins')))
+ g.appendChild(checkbox('add_alt_text_to_img', _('Add &alt text to images')))
g.appendChild(checkbox('insert_metadata', _('Insert metadata at start of book')))
g.appendChild(lineedit('page_breaks_before', _('Insert page breaks before'), 50))
g.appendChild(lineedit('start_reading_at', _('Start reading at'), 50))
diff --git a/src/pyj/read_book/highlights.pyj b/src/pyj/read_book/highlights.pyj
index 9f637acdc151..081215dda8cc 100644
--- a/src/pyj/read_book/highlights.pyj
+++ b/src/pyj/read_book/highlights.pyj
@@ -614,7 +614,7 @@ class ChapterGroup:
sg.render_as_text(lines, link_prefix, current_query, as_markdown)
-def show_export_dialog(annotations_manager):
+def show_export_dialog(annotations_manager, book_metadata):
sd = get_session_data()
fmt = sd.get('highlights_export_format')
if v"['text', 'markdown', 'calibre_annotations_collection']".indexOf(fmt) < 0:
@@ -682,7 +682,8 @@ def show_export_dialog(annotations_manager):
text = document.getElementById(ta_id).textContent
ext = 'md' if fmt is 'markdown' else ('txt' if fmt is 'text' else 'json')
mt = 'text/markdown' if fmt is 'markdown' else ('text/plain' if fmt is 'text' else 'application/json')
- filename = f'highlights.{ext}'
+ title = book_metadata?.title or _('Unknown')
+ filename = _('{title} - highlights').format(title=title) + f'.{ext}'
file = new Blob([text], {'type': mt})
url = window.URL.createObjectURL(file)
a = E.a(href=url, download=filename)
@@ -956,7 +957,7 @@ def create_highlights_panel(annotations_manager, hide_panel, book, container, on
_('All'), 'plus', select_all, _('Select all highlights'),
class_='ac-button sel-button')
export_button = create_button(
- _('Export'), 'cloud-download', show_export_dialog.bind(None, annotations_manager), _('Export all or selected highlights'),
+ _('Export'), 'cloud-download', show_export_dialog.bind(None, annotations_manager, book.metadata), _('Export all or selected highlights'),
class_='ac-button')
c = E.div(
style='padding: 1rem',
diff --git a/src/pyj/read_book/resources.pyj b/src/pyj/read_book/resources.pyj
index c854af7642f1..cbde88a6f382 100644
--- a/src/pyj/read_book/resources.pyj
+++ b/src/pyj/read_book/resources.pyj
@@ -412,12 +412,12 @@ def text_from_serialized_html(data, get_anchor_offset_map):
ignore_text = {'img': True, 'math': True, 'rt': True, 'rp': True, 'rtc': True}
ignore_text
if tag_map:
- stack = v'[[serialized_data.tree[2], False]]'
+ stack = v'[[serialized_data.tree[2], false]]'
else:
stack = v'[]'
for child in serialized_data.tree.c:
if child.n is 'body':
- stack.push(v'[child, False]')
+ stack.push(v'[child, false]')
anchor_offset_map = {}
text_pos = 0
while stack.length:
@@ -436,7 +436,7 @@ def text_from_serialized_html(data, get_anchor_offset_map):
anchor_offset_map[aid] = text_pos
if no_visit[src.n]:
continue
- ignore_text_in_node_and_children = v'!!ignore_text[src.n]'
+ ignore_text_in_node_and_children = text_ignored_in_parent or v'!!ignore_text[src.n]'
if not ignore_text_in_node_and_children and src.x:
ans.push(src.x)
text_pos += src.x.length
diff --git a/src/pyj/read_book/view.pyj b/src/pyj/read_book/view.pyj
index 3aba8bb68dfb..8ee550467450 100644
--- a/src/pyj/read_book/view.pyj
+++ b/src/pyj/read_book/view.pyj
@@ -47,13 +47,13 @@ from read_book.toc import get_current_toc_nodes, update_visible_toc_nodes
from read_book.touch import set_left_margin_handler, set_right_margin_handler
from session import get_device_uuid, get_interface_data
from utils import (
- default_context_menu_should_be_allowed, html_escape, is_ios, parse_url_params,
+ default_context_menu_should_be_allowed, html_escape, parse_url_params,
safe_set_inner_html, username_key
)
add_extra_css(def():
sel = '.book-side-margin'
- ans = build_rule(sel, cursor='pointer', text_align='center', height='100vh', user_select='none', display='flex', align_items='center', justify_content='space-between', flex_direction='column')
+ ans = build_rule(sel, cursor='pointer', text_align='center', height='100dvh', user_select='none', display='flex', align_items='center', justify_content='space-between', flex_direction='column')
ans += build_rule(sel + ' > .arrow', display='none')
ans += build_rule(sel + ' > *', max_width='100%', overflow='hidden')
ans += build_rule(sel + ':hover > .not-arrow', display='none')
@@ -138,11 +138,11 @@ def show_controls_help():
container.removeChild(container.firstChild)
container.appendChild(E.div(
- style=f'overflow: hidden; width: 100vw; height: 100vh; text-align: center; font-size: 1.3rem; font-weight: bold; background: {get_color("window-background")};' +
+ style=f'overflow: hidden; width: 100vw; height: 100dvh; text-align: center; font-size: 1.3rem; font-weight: bold; background: {get_color("window-background")};' +
'display:flex; flex-direction: column; align-items: stretch',
E.div(
msg(_('Tap (or right click) for controls')),
- style='height: 25vh; display:flex; align-items: center; border-bottom: solid 2px currentColor',
+ style='height: 25dvh; display:flex; align-items: center; border-bottom: solid 2px currentColor',
),
E.div(
style="display: flex; align-items: stretch; flex-grow: 10",
@@ -191,12 +191,6 @@ def margin_elem(sd, which, id, onclick, oncontextmenu):
ans.addEventListener('click', onclick)
if oncontextmenu:
ans.addEventListener('contextmenu', oncontextmenu)
- if is_ios and which is 'margin_bottom' and not window.navigator.standalone and not /CriOS\//.test(window.navigator.userAgent):
- # On iOS Safari 100vh includes the size of the navbar and there is no way to
- # go fullscreen, so to make the bottom bar visible we add a margin to
- # the bottom bar. CriOS is for Chrome on iOS. And in standalone
- # (web-app mode) there is no nav bar.
- ans.style.marginBottom = '25px'
return ans
@@ -294,14 +288,14 @@ class View:
}
iframe, self.iframe_wrapper = create_wrapped_iframe(handlers, _('Bootstrapping book reader...'), entry_point, iframe_kw)
container.appendChild(
- E.div(style='max-height: 100vh; width: 100vw; height: 100vh; overflow: hidden; display: flex; align-items: stretch', # container for horizontally aligned panels
+ E.div(style='max-height: 100dvh; width: 100dvw; height: 100dvh; overflow: hidden; display: flex; align-items: stretch', # container for horizontally aligned panels
oncontextmenu=def (ev):
if not default_context_menu_should_be_allowed(ev):
ev.preventDefault()
,
- E.div(style='max-height: 100vh; display: flex; flex-direction: column; align-items: stretch; flex-grow:2', # container for iframe and any other panels in the same column
- E.div(style='max-height: 100vh; flex-grow: 2; display:flex; align-items: stretch', # container for iframe and its overlay
+ E.div(style='max-height: 100dvh; display: flex; flex-direction: column; align-items: stretch; flex-grow:2', # container for iframe and any other panels in the same column
+ E.div(style='max-height: 100dvh; flex-grow: 2; display:flex; align-items: stretch', # container for iframe and its overlay
left_margin,
E.div(style='flex-grow:2; display:flex; align-items:stretch; flex-direction: column', # container for top and bottom margins
margin_elem(sd, 'margin_top', 'book-top-margin', self.top_margin_clicked, self.margin_context_menu.bind(None, 'top')),