diff --git a/Changelog.txt b/Changelog.txt index 44c54a0c01ef..af54ecb21a5e 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -23,6 +23,36 @@ # - title by author # }}} +{{{ 7.12.0 2024-05-30 + +:: new features + +- [2067167] E-book polishing: Add option to download external resources (images/stylesheets/etc.) + +- Conversion: Add an option under Structure detection to automatically fill in blank alt attributes for images that have alt text in the image file metadata + +- [2067437] Content server: Use the book title as the filename for downloaded highlights + +- Review downloaded metadata dialog: Add a "Previous" button to go back to the last reviewed book + +:: bug fixes + +- Content server viewer: Fix the bottom of the text being cut off on mobile browsers when not in full screen mode. Unfortunately, this means the viewer now requires a browser no more than two years old. + +- [2067168] Kindle driver: Increase the size of the cover thumbnails sent to the Scribe + +- [1943495] Kindle output: Fix cover images that contain EXIF data without a rotation not displaying on the Kindle lockscreen + +- E-book viewer: Fix ruby tags not excluded when searching for text + +- [2067128] Content server viewer: Fix a regression in the previous release that broke searching + +:: improved recipes +- Economist Espresso +- Slate + +}}} + {{{ 7.11.0 2024-05-24 :: new features diff --git a/manual/server.rst b/manual/server.rst index b69adb0e90e8..30c48b80e3aa 100644 --- a/manual/server.rst +++ b/manual/server.rst @@ -174,11 +174,11 @@ A much loved feature of the main program, :guilabel:`Virtual libraries` is present in the server interface as well. Click the three vertical dots in the top right corner to choose a Virtual library. -The book reader +The book viewer ^^^^^^^^^^^^^^^^ You can read any book in your calibre library by simply tapping on -it and then tapping the :guilabel:`Read` button. The book reader +it and then tapping the :guilabel:`Read` button. The book viewer is very simple to operate. You can both tap and swipe to turn pages. Swiping up/down skips between chapters. Tapping the top quarter of the screen gets you the detailed controls and viewer preferences. diff --git a/recipes/economist_espresso.recipe b/recipes/economist_espresso.recipe index 522fe4a715f1..63b60ed9ca87 100644 --- a/recipes/economist_espresso.recipe +++ b/recipes/economist_espresso.recipe @@ -2,8 +2,92 @@ https://www.economist.com/the-world-in-brief ''' -from calibre.ebooks.BeautifulSoup import Tag -from calibre.web.feeds.news import BasicNewsRecipe, classes +import json +from urllib.parse import quote, urlencode + +from calibre import replace_entities +from calibre.ebooks.BeautifulSoup import NavigableString, Tag +from calibre.ptempfile import PersistentTemporaryFile +from calibre.web.feeds.news import BasicNewsRecipe +from html5_parser import parse +from lxml import etree + + +def E(parent, name, text='', **attrs): + ans = parent.makeelement(name, **attrs) + ans.text = text + parent.append(ans) + return ans + + +def process_node(node, html_parent): + ntype = node.get('type') + if ntype == 'tag': + c = html_parent.makeelement(node['name']) + c.attrib.update({k: v or '' for k, v in node.get('attribs', {}).items()}) + html_parent.append(c) + for nc in node.get('children', ()): + process_node(nc, c) + elif ntype == 'text': + text = node.get('data') + if text: + text = replace_entities(text) + if len(html_parent): + t = html_parent[-1] + t.tail = (t.tail or '') + text + else: + html_parent.text = (html_parent.text or '') + text + + +def safe_dict(data, *names): + ans = data + for x in names: + ans = ans.get(x) or {} + return ans + + +class JSONHasNoContent(ValueError): + pass + + +def load_article_from_json(raw, root): + # open('/t/raw.json', 'w').write(raw) + data = json.loads(raw) + body = root.xpath('//body')[0] + article = E(body, 'article') + E(article, 'div', data['flyTitle'] , style='color: red; font-size:small; font-weight:bold;') + E(article, 'h1', data['title'], title=safe_dict(data, "url", "canonical") or '') + E(article, 'div', data['rubric'], style='font-style: italic; color:#202020;') + E(article, 'div', data['byline'], style='font-style: italic; color:#202020;') + main_image_url = safe_dict(data, 'image', 'main', 'url').get('canonical') + if main_image_url: + div = E(article, 'div') + try: + E(div, 'img', src=main_image_url) + except Exception: + pass + for node in data.get('text') or (): + process_node(node, article) + + +def cleanup_html_article(root): + main = root.xpath('//main')[0] + body = root.xpath('//body')[0] + for child in tuple(body): + body.remove(child) + body.append(main) + main.set('id', '') + main.tag = 'article' + for x in root.xpath('//*[@style]'): + x.set('style', '') + for x in root.xpath('//button'): + x.getparent().remove(x) + + +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) def new_tag(soup, name, attrs=()): @@ -13,60 +97,59 @@ def new_tag(soup, name, attrs=()): return Tag(soup, name, attrs=attrs or None) +class NoArticles(Exception): + pass + + +def process_url(url): + if url.startswith('/'): + url = 'https://www.economist.com' + url + return url + + class Espresso(BasicNewsRecipe): title = 'The Economist Espresso' language = 'en' __author__ = 'unkn0wn' + encoding = 'utf-8' + masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' + cover_url = 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png' description = ( 'Espresso is a rich, full-flavoured shot of daily global analysis' ' from the editors of The Economist to get you up to speed, fast.' 'Maximise your understanding of the most significant business, ' 'economic, political and cultural developments globally.' ) - cover_url = 'https://downloadr2.apkmirror.com/wp-content/uploads/2021/10/75/615777cc6611b.png' - no_stylesheets = True - remove_attributes = ['height', 'width', 'style'] - use_embedded_content = False - masthead_url = 'https://www.livemint.com/lm-img/dev/economist-logo-oneline.png' extra_css = ''' - h1 { text-align:center; } - ._main-image, ._description, .sub { text-align:center; font-size:small; } - ._quote-container { font-size:x-large; font-style:italic; color:#202020; } + em { color:#202020; } + img {display:block; margin:0 auto;} ''' - keep_only_tags = [ - dict(name='main', attrs={'id':'content'}) - ] - remove_tags = [ - classes('_podcast-promo _newsletter-promo-container _time-last-updated') - ] - - def parse_index(self): - return [ - ('Espresso', - [ - { - 'title': 'The World in Brief', - 'url': 'https://www.economist.com/the-world-in-brief', - 'description': 'Catch up quickly on the global stories that matter' - }, + dict(name=['script', 'noscript', 'title', 'iframe', 'cf_floatingcontent', 'aside', 'footer']), + dict(attrs={'aria-label': "Article Teaser"}), + dict(attrs={ + 'class': [ + 'dblClkTrk', 'ec-article-info', 'share_inline_header', + 'related-items', 'main-content-container', 'ec-topic-widget', + 'teaser', 'blog-post__bottom-panel-bottom', 'blog-post__comments-label', + 'blog-post__foot-note', 'blog-post__sharebar', 'blog-post__bottom-panel', + 'newsletter-form','share-links-header','teaser--wrapped', 'latest-updates-panel__container', + 'latest-updates-panel__article-link','blog-post__section' ] - ), - ] - - def print_version(self, url): - return 'https://webcache.googleusercontent.com/search?q=cache:' + url - - def preprocess_html(self, soup): - if h1 := soup.find('h1'): - if p := h1.find_next_sibling('p'): - p['class'] = 'sub' - for hr in soup.findAll(attrs={'class':['_gobbet', '_article']}): - nt = new_tag(soup, 'hr') - hr.append(nt) - return soup + } + ), + dict(attrs={ + 'class': lambda x: x and 'blog-post__siblings-list-aside' in x.split()}), + classes( + 'share-links-header teaser--wrapped latest-updates-panel__container' + ' latest-updates-panel__article-link blog-post__section newsletter-form blog-post__bottom-panel' + ) + ] + keep_only_tags = [dict(name='article', id=lambda x: not x)] + no_stylesheets = True + remove_attributes = ['data-reactid', 'width', 'height'] def get_browser(self, *args, **kwargs): # Needed to bypass cloudflare @@ -74,3 +157,108 @@ class Espresso(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self, *args, **kwargs) br.addheaders += [('Accept-Language', 'en-GB,en-US;q=0.9,en;q=0.8')] return br + + def economist_return_index(self, ans): + if not ans: + raise NoArticles( + 'Could not find any articles, either the ' + 'economist.com server is having trouble and you should ' + 'try later or the website format has changed and the ' + 'recipe needs to be updated.' + ) + return ans + + def parse_index(self): + query = { + 'query': 'query EspressoQuery($ref:String!){espresso:canonical(ref:$ref){...EspressoFragment __typename}}fragment EspressoFragment on Content{id type hasPart(size:1 sort:"datePublished:desc"){parts{id type rubric:description hasPart(sort:"publication.context.position:asc,datePublished:desc"){parts{...ArticleFragment __typename}__typename}__typename}__typename}__typename}fragment ArticleFragment on Content{ad{grapeshot{channels{name __typename}__typename}__typename}articleSection{internal{id title:headline __typename}__typename}audio{main{id duration(format:"seconds")source:channel{id __typename}url{canonical __typename}__typename}__typename}byline dateline dateModified datePublished dateRevised flyTitle:subheadline id image{...ImageInlineFragment ...ImageMainFragment ...ImagePromoFragment __typename}print{title:headline flyTitle:subheadline rubric:description section{id title:headline __typename}__typename}publication{id tegID title:headline flyTitle:subheadline datePublished regionsAllowed url{canonical __typename}__typename}rubric:description source:channel{id __typename}tegID text(format:"json")title:headline type url{canonical __typename}topic contentIdentity{forceAppWebview mediaType articleType __typename}__typename}fragment ImageInlineFragment on Media{inline{url{canonical __typename}width height __typename}__typename}fragment ImageMainFragment on Media{main{url{canonical __typename}width height __typename}__typename}fragment ImagePromoFragment on Media{promo{url{canonical __typename}id width height __typename}__typename}', # noqa + 'operationName': 'EspressoQuery', + 'variables': '{"ref":"/content/jakj5ed3rml75i8j0d5i74p8adf6eem4"}', + } + url = 'https://cp2-graphql-gateway.p.aws.economist.com/graphql?' + urlencode(query, safe='()!', quote_via=quote) + try: + raw = self.index_to_soup(url, raw=True) + except Exception: + raise ValueError('Server is not reachable, try again after some time.') + ans = self.economist_parse_index(raw) + return self.economist_return_index(ans) + + def economist_parse_index(self, raw): + data = json.loads(raw)['data']['espresso']['hasPart']['parts'][0] + self.description = data['rubric'] + + ans = [] + for part in safe_dict(data, "hasPart", "parts"): + title = safe_dict(part, "title") + pt = PersistentTemporaryFile('.html') + pt.write(json.dumps(part).encode('utf-8')) + pt.close() + url = 'file:///' + pt.name + ans.append({"title": title, "url": url}) + return [('Espresso', ans)] + + def populate_article_metadata(self, article, soup, first): + article.url = soup.find('h1')['title'] + + def preprocess_raw_html(self, raw, url): + # open('/t/raw.html', 'wb').write(raw.encode('utf-8')) + body = '
' + root = parse(body) + load_article_from_json(raw, root) + + for div in root.xpath('//div[@class="lazy-image"]'): + noscript = list(div.iter('noscript')) + if noscript and noscript[0].text: + img = list(parse(noscript[0].text).iter('img')) + if img: + p = noscript[0].getparent() + idx = p.index(noscript[0]) + p.insert(idx, p.makeelement('img', src=img[0].get('src'))) + p.remove(noscript[0]) + for x in root.xpath('//*[name()="script" or name()="style" or name()="source" or name()="meta"]'): + x.getparent().remove(x) + # the economist uses for small caps with a custom font + for init in root.xpath('//span[@data-caps="initial"]'): + init.set('style', 'font-weight:bold;') + for x in root.xpath('//small'): + if x.text and len(x) == 0: + x.text = x.text.upper() + x.tag = 'span' + x.set('style', 'font-variant: small-caps') + for h2 in root.xpath('//h2'): + h2.tag = 'h4' + for x in root.xpath('//figcaption'): + x.set('style', 'text-align:center; font-size:small;') + for x in root.xpath('//cite'): + x.tag = 'blockquote' + x.set('style', 'color:#404040;') + raw = etree.tostring(root, encoding='unicode') + return raw + + + def eco_find_image_tables(self, soup): + for x in soup.findAll('table', align=['right', 'center']): + if len(x.findAll('font')) in (1, 2) and len(x.findAll('img')) == 1: + yield x + + def postprocess_html(self, soup, first): + for img in soup.findAll('img', srcset=True): + del img['srcset'] + for table in list(self.eco_find_image_tables(soup)): + caption = table.find('font') + img = table.find('img') + div = new_tag(soup, 'div') + div['style'] = 'text-align:left;font-size:70%' + ns = NavigableString(self.tag_to_string(caption)) + div.insert(0, ns) + div.insert(1, new_tag(soup, 'br')) + del img['width'] + del img['height'] + img.extract() + div.insert(2, img) + table.replaceWith(div) + return soup + + def canonicalize_internal_url(self, url, is_link=True): + if url.endswith('/print'): + url = url.rpartition('/')[0] + return BasicNewsRecipe.canonicalize_internal_url(self, url, is_link=is_link) diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index 53e5cb4a46a4..8eb4586b06a1 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -95,6 +95,7 @@ class LiveMint(BasicNewsRecipe): keep_only_tags = [ dict(name='article', attrs={'id':lambda x: x and x.startswith(('article_', 'box_'))}), + dict(attrs={'class':lambda x: x and x.startswith('storyPage_storyBox__')}), classes('contentSec') ] remove_tags = [ diff --git a/recipes/slate.recipe b/recipes/slate.recipe index 24ed1db877af..d262d291a3ec 100644 --- a/recipes/slate.recipe +++ b/recipes/slate.recipe @@ -19,13 +19,12 @@ def classes(classes): class Slate(BasicNewsRecipe): title = 'Slate' description = 'A general-interest publication offering analysis and commentary about politics, news and culture.' - __author__ = 'Kovid Goyal' + __author__ = 'unkn0wn' no_stylesheets = True language = 'en' encoding = 'utf-8' remove_attributes = ['style', 'height', 'width'] - oldest_article = 2 # days - INDEX = 'https://slate.com' + INDEX = 'https://slate.com/' resolve_internal_links = True remove_empty_feeds = True ignore_duplicate_articles = {'url'} @@ -52,16 +51,44 @@ class Slate(BasicNewsRecipe): img['src'] = img['data-src'] + '&width=600' return soup - feeds = [ - ('News & Politics', 'https://slate.com/feeds/news-and-politics.rss'), - ('Culture', 'https://slate.com/feeds/culture.rss'), - ('Technology', 'https://slate.com/feeds/technology.rss'), - ('Business', 'https://slate.com/feeds/business.rss'), - ('Human Interest', 'https://slate.com/feeds/human-interest.rss'), - ('Others', 'https://slate.com/feeds/all.rss') - ] + def parse_index(self): + ans = [] + for sectitle, url in ( + ('News & Politics', 'news-and-politics'), + ('Culture', 'culture'), + ('Technology', 'technology'), + ('Business', 'business'), + ('Life', 'life'), + ('Advice', 'advice'), + ): + url = self.INDEX + url + self.log('\nFound section:', sectitle, url) + articles = self.slate_section_articles(url) + if articles: + ans.append((sectitle, articles)) + return ans + + def slate_section_articles(self, url): + from datetime import date + soup = self.index_to_soup(url) + ans = [] + dt = date.today().strftime('/%Y/%m') + for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(url + dt)}): + url = a['href'] + head = a.find(attrs={'class':[ + 'section-feed-two-column__card-headline', + 'section-feed-three-column__teaser-headline', + 'section-feed-two-column__teaser-headline', + 'topic-story__hed' + ]}) + if head: + title = self.tag_to_string(head).strip() + self.log('\t' + title) + self.log('\t\t' + url) + ans.append({'title': title, 'url': url}) + return ans - def get_article_url(self, article): - url = BasicNewsRecipe.get_article_url(self, article) - if '/podcasts/' not in url: - return url.split('?')[0] + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'article__dek'}) + if summ: + article.summary = article.text_summary = self.tag_to_string(summ) diff --git a/setup/arch-ci.sh b/setup/arch-ci.sh index 138b5f035b26..1498dda942a0 100755 --- a/setup/arch-ci.sh +++ b/setup/arch-ci.sh @@ -5,8 +5,7 @@ set -xe -pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo +pacman -S --noconfirm --needed base-devel sudo git sip pyqt-builder cmake chmlib icu jxrlib hunspell libmtp libusb libwmf optipng python-apsw python-beautifulsoup4 python-cssselect python-css-parser python-dateutil python-jeepney python-dnspython python-feedparser python-html2text python-html5-parser python-lxml python-lxml-html-clean python-markdown python-mechanize python-msgpack python-netifaces python-unrardll python-pillow python-psutil python-pygments python-pyqt6 python-regex python-zeroconf python-pyqt6-webengine qt6-svg qt6-imageformats udisks2 hyphen python-pychm python-pycryptodome speech-dispatcher python-sphinx python-urllib3 python-py7zr python-pip python-fonttools python-xxhash uchardet libstemmer poppler tk podofo useradd -m ci chown -R ci:users $GITHUB_WORKSPACE -pip install --break-system-packages lxml-html-clean diff --git a/src/calibre/constants.py b/src/calibre/constants.py index 418e08d0b870..4184c1d72577 100644 --- a/src/calibre/constants.py +++ b/src/calibre/constants.py @@ -11,7 +11,7 @@ from polyglot.builtins import environ_item, hasenv __appname__ = 'calibre' -numeric_version = (7, 11, 0) +numeric_version = (7, 12, 0) __version__ = '.'.join(map(str, numeric_version)) git_version = None __author__ = "Kovid Goyal " diff --git a/src/calibre/devices/mtp/driver.py b/src/calibre/devices/mtp/driver.py index d64b0f2e4bae..620e2701a225 100644 --- a/src/calibre/devices/mtp/driver.py +++ b/src/calibre/devices/mtp/driver.py @@ -27,6 +27,7 @@ BASE = importlib.import_module('calibre.devices.mtp.%s.driver'%( 'windows' if iswindows else 'unix')).MTP_DEVICE +DEFAULT_THUMBNAIL_WIDTH, DEFAULT_THUMBNAIL_HEIGHT = 120, 160 class MTPInvalidSendPathError(PathError): @@ -43,8 +44,8 @@ class MTP_DEVICE(BASE): CAN_SET_METADATA = [] NEWS_IN_FOLDER = True MAX_PATH_LEN = 230 - THUMBNAIL_HEIGHT = 160 - THUMBNAIL_WIDTH = 120 + THUMBNAIL_HEIGHT = DEFAULT_THUMBNAIL_HEIGHT + THUMBNAIL_WIDTH = DEFAULT_THUMBNAIL_WIDTH CAN_SET_METADATA = [] BACKLOADING_ERROR_MESSAGE = None MANAGES_DEVICE_PRESENCE = True @@ -155,7 +156,9 @@ def open(self, device, library_uuid): self.current_device_defaults, self.current_vid, self.current_pid = self.device_defaults(device, self) self.calibre_file_paths = self.current_device_defaults.get( 'calibre_file_paths', {'metadata':self.METADATA_CACHE, 'driveinfo':self.DRIVEINFO}) + self.THUMBNAIL_WIDTH, self.THUMBNAIL_HEIGHT = DEFAULT_THUMBNAIL_WIDTH, DEFAULT_THUMBNAIL_HEIGHT if self.is_kindle: + self.THUMBNAIL_WIDTH = self.THUMBNAIL_HEIGHT = 500 # see kindle/driver.py try: self.sync_kindle_thumbnails() except Exception: diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 3a9d18ed60e5..49ddffde150e 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -239,7 +239,7 @@ def add_pipeline_options(parser, plumber): 'chapter', 'chapter_mark', 'prefer_metadata_cover', 'remove_first_image', 'insert_metadata', 'page_breaks_before', - 'remove_fake_margins', 'start_reading_at', + 'remove_fake_margins', 'start_reading_at', 'add_alt_text_to_img', ] )), diff --git a/src/calibre/ebooks/conversion/config.py b/src/calibre/ebooks/conversion/config.py index 0fff29dea587..91f46b984f76 100644 --- a/src/calibre/ebooks/conversion/config.py +++ b/src/calibre/ebooks/conversion/config.py @@ -260,7 +260,7 @@ def get_sorted_output_formats(preferred_fmt=None): 'structure_detection': ( 'chapter', 'chapter_mark', 'start_reading_at', 'remove_first_image', 'remove_fake_margins', 'insert_metadata', - 'page_breaks_before'), + 'page_breaks_before', 'add_alt_text_to_img',), 'toc': ( 'level1_toc', 'level2_toc', 'level3_toc', diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index adbfbb540a3d..47c4f50e64d7 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -420,6 +420,11 @@ def __init__(self, input, output, log, report_progress=DummyReporter(), 'case you can disable the removal.') ), +OptionRecommendation(name='add_alt_text_to_img', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('When an tag has no alt attribute, check the associated image file for metadata that specifies alternate text, and' + ' use it to fill in the alt attribute. The alt attribute is used by screen readers for assisting the visually challenged.') +), OptionRecommendation(name='margin_top', recommended_value=5.0, level=OptionRecommendation.LOW, @@ -1203,6 +1208,12 @@ def run(self): from calibre.ebooks.oeb.transforms.jacket import Jacket Jacket()(self.oeb, self.opts, self.user_metadata) + pr(0.37) + self.flush() + + if self.opts.add_alt_text_to_img: + from calibre.ebooks.oeb.transforms.alt_text import AddAltText + AddAltText()(self.oeb, self.opts) pr(0.4) self.flush() diff --git a/src/calibre/ebooks/metadata/search_internet.py b/src/calibre/ebooks/metadata/search_internet.py index 55b991537619..9b894d99e56d 100644 --- a/src/calibre/ebooks/metadata/search_internet.py +++ b/src/calibre/ebooks/metadata/search_internet.py @@ -48,7 +48,7 @@ def qquote(val, use_plus=True): if not isinstance(val, bytes): - val = val.encode('utf-8') + val = val.encode('utf-8', 'replace') ans = quote_plus(val) if use_plus else quote(val) if isinstance(ans, bytes): ans = ans.decode('utf-8') diff --git a/src/calibre/ebooks/mobi/writer2/resources.py b/src/calibre/ebooks/mobi/writer2/resources.py index 592c1e607768..52c44e1f095e 100644 --- a/src/calibre/ebooks/mobi/writer2/resources.py +++ b/src/calibre/ebooks/mobi/writer2/resources.py @@ -27,12 +27,14 @@ def process_jpegs_for_amazon(data: bytes) -> bytes: # Amazon's MOBI renderer can't render JPEG images without JFIF metadata # and images with EXIF data dont get displayed on the cover screen changed = not img.info + has_exif = False if hasattr(img, 'getexif'): exif = img.getexif() + has_exif = bool(exif) if exif.get(0x0112) in (2,3,4,5,6,7,8): changed = True img = ImageOps.exif_transpose(img) - if changed: + if changed or has_exif: out = BytesIO() img.save(out, 'JPEG') data = out.getvalue() diff --git a/src/calibre/ebooks/oeb/polish/main.py b/src/calibre/ebooks/oeb/polish/main.py index fd32b77c4d66..005af7113cc7 100644 --- a/src/calibre/ebooks/oeb/polish/main.py +++ b/src/calibre/ebooks/oeb/polish/main.py @@ -14,6 +14,7 @@ from calibre.ebooks.oeb.polish.container import get_container from calibre.ebooks.oeb.polish.cover import set_cover from calibre.ebooks.oeb.polish.css import remove_unused_css +from calibre.ebooks.oeb.polish.download import download_external_resources, get_external_resources, replace_resources from calibre.ebooks.oeb.polish.embed import embed_all_fonts from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens, remove_soft_hyphens from calibre.ebooks.oeb.polish.images import compress_images @@ -22,6 +23,7 @@ from calibre.ebooks.oeb.polish.stats import StatsCollector from calibre.ebooks.oeb.polish.subset import iter_subsettable_fonts, subset_all_fonts from calibre.ebooks.oeb.polish.upgrade import upgrade_book +from calibre.utils.localization import ngettext from calibre.utils.logging import Log from polyglot.builtins import iteritems @@ -38,6 +40,7 @@ 'upgrade_book': False, 'add_soft_hyphens': False, 'remove_soft_hyphens': False, + 'download_external_resources': False, } CUSTOMIZATION = { @@ -133,6 +136,12 @@ 'remove_soft_hyphens': _('''\

Remove soft hyphens from all text in the book.

'''), + +'download_external_resources': _('''\ +

Download external resources such as images, stylesheets, etc. that point to URLs instead of files in the book. +All such resources will be downloaded and added to the book so that the book no longer references any external resources. +

+'''), } @@ -161,6 +170,30 @@ def update_metadata(ebook, new_opf): stream.write(opfbytes) +def download_resources(ebook, report) -> bool: + changed = False + url_to_referrer_map = get_external_resources(ebook) + if url_to_referrer_map: + n = len(url_to_referrer_map) + report(ngettext('Downloading one external resource', 'Downloading {} external resources', n).format(n)) + replacements, failures = download_external_resources(ebook, url_to_referrer_map) + if not failures: + report(_('Successfully downloaded all resources')) + else: + tb = [f'{url}\n\t{err}\n' for url, err in iteritems(failures)] + if replacements: + report(_('Failed to download some resources, see details below:')) + else: + report(_('Failed to download all resources, see details below:')) + report(tb) + if replacements: + if replace_resources(ebook, url_to_referrer_map, replacements): + changed = True + else: + report(_('No external resources found in book')) + return changed + + def polish_one(ebook, opts, report, customization=None): def rt(x): return report('\n### ' + x) @@ -267,6 +300,16 @@ def rt(x): add_soft_hyphens(ebook, report) changed = True + if opts.download_external_resources: + rt(_('Downloading external resources')) + try: + download_resources(ebook, report) + except Exception: + import traceback + report(_('Failed to download resources with error:')) + report(traceback.format_exc()) + report('') + return changed @@ -337,6 +380,7 @@ def option_parser(): o('--add-soft-hyphens', '-H', help=CLI_HELP['add_soft_hyphens']) o('--remove-soft-hyphens', help=CLI_HELP['remove_soft_hyphens']) o('--upgrade-book', '-U', help=CLI_HELP['upgrade_book']) + o('--download-external-resources', '-d', help=CLI_HELP['download_external_resources']) o('--verbose', help=_('Produce more verbose output, useful for debugging.')) diff --git a/src/calibre/ebooks/oeb/transforms/alt_text.py b/src/calibre/ebooks/oeb/transforms/alt_text.py new file mode 100644 index 000000000000..383628790c55 --- /dev/null +++ b/src/calibre/ebooks/oeb/transforms/alt_text.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + + +from io import BytesIO + +from PIL import Image + +from calibre.ebooks.oeb.base import SVG_MIME, urlnormalize, xpath +from calibre.utils.img import read_alt_text + + +def process_spine_item(item, hrefs, log): + html = item.data + for elem in xpath(html, '//h:img[@src]'): + src = urlnormalize(elem.attrib['src']) + image = hrefs.get(item.abshref(src), None) + if image and image.media_type != SVG_MIME and not elem.attrib.get('alt'): + data = image.bytes_representation + try: + with Image.open(BytesIO(data)) as im: + alt = read_alt_text(im) + except Exception as err: + log.warn(f'Failed to read alt text from image {src} with error: {err}') + else: + if alt: + elem.set('alt', alt) + + +class AddAltText: + + def __call__(self, oeb, opts): + oeb.logger.info('Add alt text to images...') + hrefs = oeb.manifest.hrefs + for item in oeb.spine: + process_spine_item(item, hrefs, oeb.log) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index c2473bba35a5..1fa5d557770e 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -534,7 +534,7 @@ def do_edit_bulk_metadata(self, rows, book_ids): changed = False refresh_books = set(book_ids) try: - current_tab = 0 + current_tab = -1 while True: dialog = MetadataBulkDialog(self.gui, rows, self.gui.library_view.model(), current_tab, refresh_books) diff --git a/src/calibre/gui2/actions/polish.py b/src/calibre/gui2/actions/polish.py index 741d90a5288f..6764d7c8a489 100644 --- a/src/calibre/gui2/actions/polish.py +++ b/src/calibre/gui2/actions/polish.py @@ -87,6 +87,7 @@ def __init__(self, db, book_id_map, parent=None): 'remove_jacket':_('

Remove book jacket

%s')%HELP['remove_jacket'], 'remove_unused_css':_('

Remove unused CSS rules

%s')%HELP['remove_unused_css'], 'compress_images': _('

Losslessly compress images

%s') % HELP['compress_images'], + 'download_external_resources': _('

Download external resources

%s') % HELP['download_external_resources'], 'add_soft_hyphens': _('

Add soft-hyphens

%s') % HELP['add_soft_hyphens'], 'remove_soft_hyphens': _('

Remove soft-hyphens

%s') % HELP['remove_soft_hyphens'], 'upgrade_book': _('

Upgrade book internals

%s') % HELP['upgrade_book'], @@ -109,6 +110,7 @@ def __init__(self, db, book_id_map, parent=None): ('remove_jacket', _('&Remove a previously inserted book jacket')), ('remove_unused_css', _('Remove &unused CSS rules from the book')), ('compress_images', _('Losslessly &compress images')), + ('download_external_resources', _('&Download external resources')), ('add_soft_hyphens', _('Add s&oft hyphens')), ('remove_soft_hyphens', _('Remove so&ft hyphens')), ('upgrade_book', _('&Upgrade book internals')), diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui index 9b99fd93a9e5..9d68182f9056 100644 --- a/src/calibre/gui2/convert/structure_detection.ui +++ b/src/calibre/gui2/convert/structure_detection.ui @@ -14,39 +14,9 @@ Form - - - - Remove &fake margins - - - - - - - The header and footer removal options have been replaced by the Search & replace options. Click the Search & replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field. - - - true - - - - + - - - - Insert &metadata as page at start of book - - - - - - - - - @@ -57,12 +27,8 @@ - - - - 20 - - + + @@ -77,14 +43,24 @@ - - + + + + 20 + + + + + - Remove first &image + The header and footer removal options have been replaced by the Search & replace options. Click the Search & replace category in the bar to the left to use these options. Leave the replace field blank and enter your header/footer removal regexps into the search field. + + + true - + Qt::Vertical @@ -97,6 +73,37 @@ + + + + Insert &metadata as page at start of book + + + + + + + + + + Remove &fake margins + + + + + + + Remove first &image + + + + + + + Add &alt text to images + + + diff --git a/src/calibre/gui2/dialogs/metadata_bulk.py b/src/calibre/gui2/dialogs/metadata_bulk.py index 6bdcb215fda9..82b85fd825b0 100644 --- a/src/calibre/gui2/dialogs/metadata_bulk.py +++ b/src/calibre/gui2/dialogs/metadata_bulk.py @@ -558,7 +558,7 @@ class MetadataBulkDialog(QDialog, Ui_MetadataBulkDialog): _('Append to field'), ] - def __init__(self, window, rows, model, tab, refresh_books): + def __init__(self, window, rows, model, starting_tab, refresh_books): QDialog.__init__(self, window) self.setupUi(self) setup_status_actions(self.test_result) @@ -634,8 +634,7 @@ def __init__(self, window, rows, model, tab, refresh_books): 'This operation cannot be canceled or undone')) self.do_again = False self.restore_geometry(gprefs, 'bulk_metadata_window_geometry') - ct = gprefs.get('bulk_metadata_window_tab', 0) - self.central_widget.setCurrentIndex(ct) + self.languages.init_langs(self.db) self.languages.setEditText('') self.authors.setFocus(Qt.FocusReason.OtherFocusReason) @@ -650,7 +649,9 @@ def __init__(self, window, rows, model, tab, refresh_books): (self.button_clear_tags_rules, self.button_clear_authors_rules, self.button_clear_publishers_rules) )) self.update_transform_labels() - self.central_widget.setCurrentIndex(tab) + if starting_tab < 0: + starting_tab = gprefs.get('bulk_metadata_window_tab', 0) + self.central_widget.setCurrentIndex(starting_tab) self.exec() def update_transform_labels(self): diff --git a/src/calibre/gui2/metadata/diff.py b/src/calibre/gui2/metadata/diff.py index fcf279b6ccb8..4e1b0df183b9 100644 --- a/src/calibre/gui2/metadata/diff.py +++ b/src/calibre/gui2/metadata/diff.py @@ -626,6 +626,7 @@ def set_pixmap(self, pixmap): class CompareMany(QDialog): + def __init__(self, ids, get_metadata, field_metadata, parent=None, window_title=None, reject_button_tooltip=None, @@ -641,6 +642,13 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None, self.l = l = QVBoxLayout(w) s.addWidget(w) self.next_called = False + + # initialize the previous items list, we will use it to store the watched items that were rejected or accepted + # when the user clicks on the next or reject button we will add the current item to the previous items list + # when the user presses the back button we will pop the last item from the previous items list and set it as current item + # also the popped item will be removed from the rejected or accepted items list (and will be unmarked if it was marked) + self.previous_items = [] + self.setWindowIcon(QIcon.ic('auto_author_sort.png')) self.get_metadata = get_metadata self.ids = list(ids) @@ -667,7 +675,9 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None, self.bb = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Cancel) bb.button(QDialogButtonBox.StandardButton.Cancel).setAutoDefault(False) bb.rejected.connect(self.reject) + if self.total > 1: + self.aarb = b = bb.addButton(_('&Accept all remaining'), QDialogButtonBox.ButtonRole.YesRole) b.setIcon(QIcon.ic('ok.png')), b.setAutoDefault(False) if accept_all_tooltip: @@ -696,6 +706,21 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None, b.setIcon(QIcon.ic(action_button[1])) self.action_button_action = action_button[2] b.clicked.connect(self.action_button_clicked) + + # Add a Back button, wich allows the user to go back to the previous book cancel any reject/edit/accept that was done to it, and review it again + # create a Back action that will be triggered when the user presses the back button or the back shortcut + self.back_action = QAction(self) + self.back_action.setShortcut(QKeySequence(Qt.KeyboardModifier.AltModifier | Qt.Key.Key_Left)) + self.back_action.triggered.connect(self.previous_item) + self.addAction(self.back_action) + # create the back button, set it's name, tooltip, icon and action to call the previous_item method + self.back_button = bb.addButton(_('P&revious'), QDialogButtonBox.ButtonRole.ActionRole) + self.back_button.setToolTip(_('Move to previous {}').format(self.back_action.shortcut().toString(QKeySequence.SequenceFormat.NativeText))) + self.back_button.setIcon(QIcon.ic('back.png')) + self.back_button.clicked.connect(self.previous_item) + self.back_button.setDefault(True) + self.back_button.setAutoDefault(False) + self.nb = b = bb.addButton(_('&Next') if self.total > 1 else _('&OK'), QDialogButtonBox.ButtonRole.ActionRole) if self.total > 1: b.setToolTip(_('Move to next [%s]') % self.next_action.shortcut().toString(QKeySequence.SequenceFormat.NativeText)) @@ -722,22 +747,27 @@ def __init__(self, ids, get_metadata, field_metadata, parent=None, b.setFocus(Qt.FocusReason.OtherFocusReason) self.next_called = False + def show_zoomed_cover(self, pixmap): self.cover_zoom.set_pixmap(pixmap) self.stack.setCurrentIndex(1) + @property def mark_rejected(self): return self.markq.isChecked() + def action_button_clicked(self): self.action_button_action(self.ids[0]) + def accept(self): self.save_geometry(gprefs, 'diff_dialog_geom') self.compare_widget.save_comments_controls_state() super().accept() + def reject(self): if self.stack.currentIndex() == 1: self.stack.setCurrentIndex(0) @@ -750,27 +780,64 @@ def reject(self): self.compare_widget.save_comments_controls_state() super().reject() + @property def current_mi(self): return self.compare_widget.current_mi + + def show_current_item(self): + self.setWindowTitle(self.window_title + _(' [%(num)d of %(tot)d]') % dict( + num=(self.total - len(self.ids) + 1), tot=self.total)) + oldmi, newmi = self.get_metadata(self.ids[0]) + self.compare_widget(oldmi, newmi) + self.update_back_button_state() + + def update_back_button_state(self): + enabled = bool(self.previous_items) + self.back_action.setEnabled(enabled) + self.back_button.setEnabled(enabled) + def next_item(self, accept): self.next_called = True if not self.ids: return self.accept() + if self.current_mi is not None: changed = self.compare_widget.apply_changes() if self.current_mi is not None: old_id = self.ids.pop(0) + + # Save the current book that was just reviewed and accepted or rejected to the previous_items list + # this book can be displayed again if the user presses the back button + self.previous_items.append(old_id) + if not accept: self.rejected_ids.add(old_id) self.accepted[old_id] = (changed, self.current_mi) if accept else (False, None) if not self.ids: return self.accept() - self.setWindowTitle(self.window_title + _(' [%(num)d of %(tot)d]') % dict( - num=(self.total - len(self.ids) + 1), tot=self.total)) - oldmi, newmi = self.get_metadata(self.ids[0]) - self.compare_widget(oldmi, newmi) + self.show_current_item() + + + def previous_item(self): + if self.previous_items: + # get the last book id from the previous items list and remove it from the previous items list + # this book id is the last book id that was reviewed and accepted or rejected + last_previous_item = self.previous_items.pop() + + # if this book id was rejected, remove it from the rejected ids set + if last_previous_item in self.rejected_ids: + self.rejected_ids.remove(last_previous_item) + self.markq.setChecked(False) + # if this book id was accepted, remove it from the accepted dictionary + elif last_previous_item in self.accepted: + self.accepted.pop(last_previous_item) + + # move the last previous item to the begining of the pending list + self.ids.insert(0, last_previous_item) + self.show_current_item() + def accept_all_remaining(self): self.next_item(True) @@ -780,6 +847,7 @@ def accept_all_remaining(self): self.ids = [] self.accept() + def reject_all_remaining(self): from calibre.gui2.dialogs.confirm_delete import confirm if not confirm(ngettext( @@ -795,6 +863,7 @@ def reject_all_remaining(self): self.ids = [] self.accept() + def keyPressEvent(self, ev): if ev.key() in (Qt.Key.Key_Enter, Qt.Key.Key_Return): ev.accept() diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index 95c628d42a83..55ca97207e98 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -143,7 +143,7 @@ - The unit for the cover corner rounding. Either pixel values or as a percetage of the cover size. + The unit for the cover corner rounding. Either pixel values or as a percentage of the cover size. diff --git a/src/calibre/gui2/update.py b/src/calibre/gui2/update.py index 61194cf5fdbb..3a0c6d5c4828 100644 --- a/src/calibre/gui2/update.py +++ b/src/calibre/gui2/update.py @@ -231,12 +231,14 @@ def update_found(self, calibre_version, number_of_plugin_updates, force=False, n self._update_notification__.show() elif has_plugin_updates: if force: - from calibre.gui2.dialogs.plugin_updater import FILTER_UPDATE_AVAILABLE, PluginUpdaterDialog - d = PluginUpdaterDialog(self, - initial_filter=FILTER_UPDATE_AVAILABLE) - d.exec() - if d.do_restart: - self.quit(restart=True) + self.show_plugin_update_dialog() + + def show_plugin_update_dialog(self): + from calibre.gui2.dialogs.plugin_updater import FILTER_UPDATE_AVAILABLE, PluginUpdaterDialog + d = PluginUpdaterDialog(self, initial_filter=FILTER_UPDATE_AVAILABLE) + d.exec() + if d.do_restart: + self.quit(restart=True) def plugin_update_found(self, number_of_updates): # Change the plugin icon to indicate there are updates available diff --git a/src/calibre/gui2/viewer/search.py b/src/calibre/gui2/viewer/search.py index a6bc791c69eb..91bac79a3a90 100644 --- a/src/calibre/gui2/viewer/search.py +++ b/src/calibre/gui2/viewer/search.py @@ -224,21 +224,24 @@ def __str__(self): @lru_cache(maxsize=None) def searchable_text_for_name(name): ans = [] + add_text = ans.append serialized_data = json.loads(get_data(name)[0]) stack = [] + a = stack.append removed_tails = [] + no_visit = frozenset({'script', 'style', 'title', 'head'}) + ignore_text = frozenset({'img', 'math', 'rt', 'rp', 'rtc'}) for child in serialized_data['tree']['c']: if child.get('n') == 'body': - stack.append(child) + a((child, False)) # the JS code does not add the tail of body tags to flat text removed_tails.append((child.pop('l', None), child)) - ignore_text = {'script', 'style', 'title'} text_pos = 0 anchor_offset_map = OrderedDict() while stack: - node = stack.pop() + node, text_ignored_in_parent = stack.pop() if isinstance(node, str): - ans.append(node) + add_text(node) text_pos += len(node) continue g = node.get @@ -253,13 +256,18 @@ def searchable_text_for_name(name): aid = x[1] if aid not in anchor_offset_map: anchor_offset_map[aid] = text_pos - if name and text and name not in ignore_text: - ans.append(text) + if name in no_visit: + continue + ignore_text_in_node_and_children = text_ignored_in_parent or name in ignore_text + + if text and not ignore_text_in_node_and_children: + add_text(text) text_pos += len(text) - if tail: - stack.append(tail) + if tail and not text_ignored_in_parent: + a((tail, ignore_text_in_node_and_children)) if children: - stack.extend(reversed(children)) + for child in reversed(children): + a((child, ignore_text_in_node_and_children)) for (tail, body) in removed_tails: if tail is not None: body['l'] = tail diff --git a/src/calibre/utils/img.py b/src/calibre/utils/img.py index d2be1251ad83..f45c3ad5f3da 100644 --- a/src/calibre/utils/img.py +++ b/src/calibre/utils/img.py @@ -719,6 +719,79 @@ def convert_PIL_image_to_pixmap(im, device_pixel_ratio=1.0): if colortable: qimg.setColorTable(colortable) return QPixmap.fromImage(qimg) + + +def read_xmp_from_pil_image(im) -> str: + fmt = im.format.lower() + xml = '' + if fmt == 'jpeg': + for segment, content in im.applist: + if segment == "APP1": + marker, xmp_tags = content.split(b"\x00")[:2] + if marker == b"http://ns.adobe.com/xap/1.0/": + xml = xmp_tags + break + elif fmt == 'png': + xml = im.info.get('XML:com.adobe.xmp', '') + elif fmt == 'webp': + xml = im.info.get("xmp", '') + elif fmt == 'tiff': + xml = im.tag_v2.get(700, '') + return xml + + +def read_text_from_container(container, target_lang=''): + lang_map = {} + for li in container.xpath('descendant::*[local-name()="li"]'): + if li.text: + lang = li.attrib.get('{http://www.w3.org/XML/1998/namespace}lang', 'x-default') + lang_map[lang] = li.text + if not target_lang and 'x-default' in lang_map: + return lang_map['x-default'] + if target_lang in lang_map: + return lang_map[target_lang] + from calibre.utils.localization import canonicalize_lang + target_lang = canonicalize_lang(target_lang) + if target_lang: + for lang, ans in lang_map.items(): + if canonicalize_lang(lang) == target_lang: + return ans + return lang_map.get('x-default', '') + + +def read_alt_text_from_xmp(xmp, target_lang='') -> str: + from lxml import etree + try: + root = etree.fromstring(xmp) + except Exception: + return '' + # print(etree.tostring(root, encoding='utf-8', pretty_print=True).decode()) + for a in root.xpath('//*[local-name()="AltTextAccessibility"]'): + if ans := read_text_from_container(a, target_lang): + return ans + + for d in etree.XPath('//dc:description', namespaces={'dc': 'http://purl.org/dc/elements/1.1/'})(root): + if ans := read_text_from_container(d, target_lang): + return ans + return '' + + +def read_alt_text(pil_im_or_path, target_lang='') -> str: + if isinstance(pil_im_or_path, str): + from PIL import Image + im = Image.open(pil_im_or_path) + else: + im = pil_im_or_path + xmp = read_xmp_from_pil_image(im) + if xmp: + if alt := read_alt_text_from_xmp(xmp, target_lang).strip(): + return alt + exif = im.getexif() + if exif: + if desc := exif.get(270): + return desc.strip() + return '' + # }}} def test(): # {{{ diff --git a/src/pyj/book_list/conversion_widgets.pyj b/src/pyj/book_list/conversion_widgets.pyj index b404a13cc0d9..63f915a6143c 100644 --- a/src/pyj/book_list/conversion_widgets.pyj +++ b/src/pyj/book_list/conversion_widgets.pyj @@ -360,6 +360,7 @@ def structure_detection(container): g.appendChild(choices('chapter_mark', _('Chap&ter mark:'), ['pagebreak', 'rule', 'both', 'none'])) g.appendChild(checkbox('remove_first_image', _('Remove first &image'))) g.appendChild(checkbox('remove_fake_margins', _('Remove &fake margins'))) + g.appendChild(checkbox('add_alt_text_to_img', _('Add &alt text to images'))) g.appendChild(checkbox('insert_metadata', _('Insert metadata at start of book'))) g.appendChild(lineedit('page_breaks_before', _('Insert page breaks before'), 50)) g.appendChild(lineedit('start_reading_at', _('Start reading at'), 50)) diff --git a/src/pyj/read_book/highlights.pyj b/src/pyj/read_book/highlights.pyj index 9f637acdc151..081215dda8cc 100644 --- a/src/pyj/read_book/highlights.pyj +++ b/src/pyj/read_book/highlights.pyj @@ -614,7 +614,7 @@ class ChapterGroup: sg.render_as_text(lines, link_prefix, current_query, as_markdown) -def show_export_dialog(annotations_manager): +def show_export_dialog(annotations_manager, book_metadata): sd = get_session_data() fmt = sd.get('highlights_export_format') if v"['text', 'markdown', 'calibre_annotations_collection']".indexOf(fmt) < 0: @@ -682,7 +682,8 @@ def show_export_dialog(annotations_manager): text = document.getElementById(ta_id).textContent ext = 'md' if fmt is 'markdown' else ('txt' if fmt is 'text' else 'json') mt = 'text/markdown' if fmt is 'markdown' else ('text/plain' if fmt is 'text' else 'application/json') - filename = f'highlights.{ext}' + title = book_metadata?.title or _('Unknown') + filename = _('{title} - highlights').format(title=title) + f'.{ext}' file = new Blob([text], {'type': mt}) url = window.URL.createObjectURL(file) a = E.a(href=url, download=filename) @@ -956,7 +957,7 @@ def create_highlights_panel(annotations_manager, hide_panel, book, container, on _('All'), 'plus', select_all, _('Select all highlights'), class_='ac-button sel-button') export_button = create_button( - _('Export'), 'cloud-download', show_export_dialog.bind(None, annotations_manager), _('Export all or selected highlights'), + _('Export'), 'cloud-download', show_export_dialog.bind(None, annotations_manager, book.metadata), _('Export all or selected highlights'), class_='ac-button') c = E.div( style='padding: 1rem', diff --git a/src/pyj/read_book/resources.pyj b/src/pyj/read_book/resources.pyj index c854af7642f1..cbde88a6f382 100644 --- a/src/pyj/read_book/resources.pyj +++ b/src/pyj/read_book/resources.pyj @@ -412,12 +412,12 @@ def text_from_serialized_html(data, get_anchor_offset_map): ignore_text = {'img': True, 'math': True, 'rt': True, 'rp': True, 'rtc': True} ignore_text if tag_map: - stack = v'[[serialized_data.tree[2], False]]' + stack = v'[[serialized_data.tree[2], false]]' else: stack = v'[]' for child in serialized_data.tree.c: if child.n is 'body': - stack.push(v'[child, False]') + stack.push(v'[child, false]') anchor_offset_map = {} text_pos = 0 while stack.length: @@ -436,7 +436,7 @@ def text_from_serialized_html(data, get_anchor_offset_map): anchor_offset_map[aid] = text_pos if no_visit[src.n]: continue - ignore_text_in_node_and_children = v'!!ignore_text[src.n]' + ignore_text_in_node_and_children = text_ignored_in_parent or v'!!ignore_text[src.n]' if not ignore_text_in_node_and_children and src.x: ans.push(src.x) text_pos += src.x.length diff --git a/src/pyj/read_book/view.pyj b/src/pyj/read_book/view.pyj index 3aba8bb68dfb..8ee550467450 100644 --- a/src/pyj/read_book/view.pyj +++ b/src/pyj/read_book/view.pyj @@ -47,13 +47,13 @@ from read_book.toc import get_current_toc_nodes, update_visible_toc_nodes from read_book.touch import set_left_margin_handler, set_right_margin_handler from session import get_device_uuid, get_interface_data from utils import ( - default_context_menu_should_be_allowed, html_escape, is_ios, parse_url_params, + default_context_menu_should_be_allowed, html_escape, parse_url_params, safe_set_inner_html, username_key ) add_extra_css(def(): sel = '.book-side-margin' - ans = build_rule(sel, cursor='pointer', text_align='center', height='100vh', user_select='none', display='flex', align_items='center', justify_content='space-between', flex_direction='column') + ans = build_rule(sel, cursor='pointer', text_align='center', height='100dvh', user_select='none', display='flex', align_items='center', justify_content='space-between', flex_direction='column') ans += build_rule(sel + ' > .arrow', display='none') ans += build_rule(sel + ' > *', max_width='100%', overflow='hidden') ans += build_rule(sel + ':hover > .not-arrow', display='none') @@ -138,11 +138,11 @@ def show_controls_help(): container.removeChild(container.firstChild) container.appendChild(E.div( - style=f'overflow: hidden; width: 100vw; height: 100vh; text-align: center; font-size: 1.3rem; font-weight: bold; background: {get_color("window-background")};' + + style=f'overflow: hidden; width: 100vw; height: 100dvh; text-align: center; font-size: 1.3rem; font-weight: bold; background: {get_color("window-background")};' + 'display:flex; flex-direction: column; align-items: stretch', E.div( msg(_('Tap (or right click) for controls')), - style='height: 25vh; display:flex; align-items: center; border-bottom: solid 2px currentColor', + style='height: 25dvh; display:flex; align-items: center; border-bottom: solid 2px currentColor', ), E.div( style="display: flex; align-items: stretch; flex-grow: 10", @@ -191,12 +191,6 @@ def margin_elem(sd, which, id, onclick, oncontextmenu): ans.addEventListener('click', onclick) if oncontextmenu: ans.addEventListener('contextmenu', oncontextmenu) - if is_ios and which is 'margin_bottom' and not window.navigator.standalone and not /CriOS\//.test(window.navigator.userAgent): - # On iOS Safari 100vh includes the size of the navbar and there is no way to - # go fullscreen, so to make the bottom bar visible we add a margin to - # the bottom bar. CriOS is for Chrome on iOS. And in standalone - # (web-app mode) there is no nav bar. - ans.style.marginBottom = '25px' return ans @@ -294,14 +288,14 @@ class View: } iframe, self.iframe_wrapper = create_wrapped_iframe(handlers, _('Bootstrapping book reader...'), entry_point, iframe_kw) container.appendChild( - E.div(style='max-height: 100vh; width: 100vw; height: 100vh; overflow: hidden; display: flex; align-items: stretch', # container for horizontally aligned panels + E.div(style='max-height: 100dvh; width: 100dvw; height: 100dvh; overflow: hidden; display: flex; align-items: stretch', # container for horizontally aligned panels oncontextmenu=def (ev): if not default_context_menu_should_be_allowed(ev): ev.preventDefault() , - E.div(style='max-height: 100vh; display: flex; flex-direction: column; align-items: stretch; flex-grow:2', # container for iframe and any other panels in the same column - E.div(style='max-height: 100vh; flex-grow: 2; display:flex; align-items: stretch', # container for iframe and its overlay + E.div(style='max-height: 100dvh; display: flex; flex-direction: column; align-items: stretch; flex-grow:2', # container for iframe and any other panels in the same column + E.div(style='max-height: 100dvh; flex-grow: 2; display:flex; align-items: stretch', # container for iframe and its overlay left_margin, E.div(style='flex-grow:2; display:flex; align-items:stretch; flex-direction: column', # container for top and bottom margins margin_elem(sd, 'margin_top', 'book-top-margin', self.top_margin_clicked, self.margin_context_menu.bind(None, 'top')),