-
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #44 from loqusion/fix/emoji
Include emoji fonts
- Loading branch information
Showing
14 changed files
with
307 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
""" | ||
Checks if emoji characters in a PDF file are rendered with an appropriate font face. | ||
This script is dumb, and only checks if the font face name for text with an | ||
emoji matches one of a set of patterns given on the command line. This | ||
assumption is naive, and may not hold if the font names change, or if a font | ||
name not supporting emoji happens to match any of the patterns. | ||
""" | ||
|
||
import re | ||
import sys | ||
from collections.abc import Iterator | ||
from dataclasses import dataclass | ||
from typing import Final | ||
|
||
import emoji | ||
import pdfplumber | ||
|
||
|
||
@dataclass | ||
class TextElement: | ||
text: str | ||
font: str | ||
|
||
|
||
class InvalidEmojiFontException(Exception): | ||
def __init__(self, text_element: TextElement, font_patterns: list[re.Pattern]): | ||
self.text_element = text_element | ||
self.font_patterns = font_patterns | ||
self.message = ( | ||
f'Detected invalid font for text containing emoji character: "{text_element.text}"\n' | ||
f'Font: "{text_element.font}"\n' | ||
f"Font did not match any of the following patterns: {', '.join((f"'{pat.pattern}'" for pat in font_patterns))}" | ||
) | ||
super().__init__(self.message) | ||
|
||
|
||
class PDFChecker: | ||
pdf_path: str | ||
|
||
def __init__(self, pdf_path: str): | ||
self.pdf_path = pdf_path | ||
|
||
def check_emojis(self, font_patterns: list[re.Pattern]): | ||
text_elements = self._extract_text_elements() | ||
|
||
for text_element in text_elements: | ||
if emoji.emoji_count(text_element.text) == 0: | ||
continue | ||
|
||
if not any(pat.search(text_element.font) for pat in font_patterns): | ||
raise InvalidEmojiFontException(text_element, font_patterns) | ||
|
||
def _extract_text_elements(self) -> Iterator[TextElement]: | ||
with pdfplumber.open(self.pdf_path) as pdf: | ||
for page in pdf.pages: | ||
words = page.extract_words( | ||
keep_blank_chars=True, | ||
use_text_flow=True, | ||
extra_attrs=["fontname"], | ||
) | ||
|
||
for word in words: | ||
yield TextElement( | ||
text=word["text"], | ||
font=word["fontname"], | ||
) | ||
|
||
|
||
class InvalidArgumentException(Exception): | ||
USAGE: Final = f"usage: {sys.argv[0]} <pdf_path> <pattern> [<pattern>...]" | ||
|
||
def __init__(self, message: str): | ||
self.message = message + "\n" + InvalidArgumentException.USAGE | ||
super().__init__(self.message) | ||
|
||
|
||
def main(): | ||
pdf_path = sys.argv[1] | ||
checker = PDFChecker(pdf_path) | ||
|
||
font_patterns = list(map(lambda pat: re.compile(pat, re.IGNORECASE), sys.argv[2:])) | ||
if len(font_patterns) == 0: | ||
raise InvalidArgumentException( | ||
"expected one or more patterns given as arguments" | ||
) | ||
|
||
checker.check_emojis(font_patterns) | ||
|
||
|
||
if __name__ == "__main__": | ||
try: | ||
main() | ||
except InvalidEmojiFontException as err: | ||
print(f"test failed: {err}", file=sys.stderr) | ||
sys.exit(1) | ||
except InvalidArgumentException as err: | ||
print(f"error: {err}", file=sys.stderr) | ||
sys.exit(2) | ||
except Exception: | ||
raise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
#(emoji.tangerine)hell#(emoji.o) w#(emoji.o)rld#(emoji.face) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
Specify which emoji font to use. If `emojiFont` is `null`, no emoji font will | ||
be included. | ||
|
||
May be any of the following: | ||
|
||
- [`"twemoji"`](https://search.nixos.org/packages?channel=unstable&show=twemoji-color-font) (default) | ||
- [`"twemoji-cbdt"`](https://search.nixos.org/packages?channel=unstable&show=twitter-color-emoji) | ||
- [`"noto"`](https://search.nixos.org/packages?channel=unstable&show=noto-fonts-color-emoji) | ||
- [`"noto-monochrome"`](https://search.nixos.org/packages?channel=unstable&show=noto-fonts-monochrome-emoji) | ||
- [`"emojione"`](https://search.nixos.org/packages?channel=unstable&show=emojione) | ||
- `null` — Don't include any emoji font (e.g. so you can include your own) | ||
|
||
<details> | ||
<summary>Note about difference between <code>"twemoji"</code> and <code>"twemoji-cbdt"</code></summary> | ||
|
||
The default Twemoji font uses the SVG [font format], which may not be supported | ||
by some systems. If emojis aren't displaying properly, using `"twemoji-cbdt"` | ||
may fix it. | ||
|
||
[font format]: https://www.colorfonts.wtf/ | ||
|
||
</details> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
{ | ||
lib, | ||
pkgs, | ||
}: emojiFont: let | ||
inherit (builtins) isNull isString typeOf; | ||
in | ||
if isString emojiFont | ||
then | ||
( | ||
if emojiFont == "default" | ||
then "${pkgs.twemoji-color-font}/share/fonts/truetype" | ||
else if emojiFont == "twemoji" | ||
then "${pkgs.twemoji-color-font}/share/fonts/truetype" | ||
else if emojiFont == "twemoji-cbdt" | ||
then "${pkgs.twitter-color-emoji}/share/fonts/truetype" | ||
else if emojiFont == "noto" | ||
then "${pkgs.noto-fonts-color-emoji}/share/fonts/noto" | ||
else if emojiFont == "noto-monochrome" | ||
then "${pkgs.noto-fonts-monochrome-emoji}/share/fonts/noto" | ||
else if emojiFont == "emojione" | ||
then "${pkgs.emojione}/share/fonts/truetype" | ||
else throw ''invalid value for `emojiFont`: "${emojiFont}". Must be one of: "twemoji", "twemoji-cbdt", "noto", "noto-monochrome", "emojione", null.'' | ||
) | ||
else if isNull emojiFont | ||
then null | ||
else throw ''invalid type for `emojiFont`: ${typeOf emojiFont}. Must be string or null.'' |
Oops, something went wrong.