Skip to content

Commit

Permalink
Merge pull request #44 from loqusion/fix/emoji
Browse files Browse the repository at this point in the history
Include emoji fonts
  • Loading branch information
loqusion authored Jan 28, 2025
2 parents aadffe8 + 4f3e747 commit 9a82720
Show file tree
Hide file tree
Showing 14 changed files with 307 additions and 12 deletions.
101 changes: 101 additions & 0 deletions checks/check-emojis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Checks if emoji characters in a PDF file are rendered with an appropriate font face.
This script is dumb, and only checks if the font face name for text with an
emoji matches one of a set of patterns given on the command line. This
assumption is naive, and may not hold if the font names change, or if a font
name not supporting emoji happens to match any of the patterns.
"""

import re
import sys
from collections.abc import Iterator
from dataclasses import dataclass
from typing import Final

import emoji
import pdfplumber


@dataclass
class TextElement:
text: str
font: str


class InvalidEmojiFontException(Exception):
def __init__(self, text_element: TextElement, font_patterns: list[re.Pattern]):
self.text_element = text_element
self.font_patterns = font_patterns
self.message = (
f'Detected invalid font for text containing emoji character: "{text_element.text}"\n'
f'Font: "{text_element.font}"\n'
f"Font did not match any of the following patterns: {', '.join((f"'{pat.pattern}'" for pat in font_patterns))}"
)
super().__init__(self.message)


class PDFChecker:
pdf_path: str

def __init__(self, pdf_path: str):
self.pdf_path = pdf_path

def check_emojis(self, font_patterns: list[re.Pattern]):
text_elements = self._extract_text_elements()

for text_element in text_elements:
if emoji.emoji_count(text_element.text) == 0:
continue

if not any(pat.search(text_element.font) for pat in font_patterns):
raise InvalidEmojiFontException(text_element, font_patterns)

def _extract_text_elements(self) -> Iterator[TextElement]:
with pdfplumber.open(self.pdf_path) as pdf:
for page in pdf.pages:
words = page.extract_words(
keep_blank_chars=True,
use_text_flow=True,
extra_attrs=["fontname"],
)

for word in words:
yield TextElement(
text=word["text"],
font=word["fontname"],
)


class InvalidArgumentException(Exception):
USAGE: Final = f"usage: {sys.argv[0]} <pdf_path> <pattern> [<pattern>...]"

def __init__(self, message: str):
self.message = message + "\n" + InvalidArgumentException.USAGE
super().__init__(self.message)


def main():
pdf_path = sys.argv[1]
checker = PDFChecker(pdf_path)

font_patterns = list(map(lambda pat: re.compile(pat, re.IGNORECASE), sys.argv[2:]))
if len(font_patterns) == 0:
raise InvalidArgumentException(
"expected one or more patterns given as arguments"
)

checker.check_emojis(font_patterns)


if __name__ == "__main__":
try:
main()
except InvalidEmojiFontException as err:
print(f"test failed: {err}", file=sys.stderr)
sys.exit(1)
except InvalidArgumentException as err:
print(f"error: {err}", file=sys.stderr)
sys.exit(2)
except Exception:
raise
107 changes: 107 additions & 0 deletions checks/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
myLib,
}: let
inherit (pkgs) lib;
inherit (lib.strings) escapeShellArg concatMapStringsSep;
onlyDrvs = lib.filterAttrs (_: lib.isDerivation);
in
onlyDrvs (lib.makeScope myLib.newScope (self: let
Expand Down Expand Up @@ -60,6 +61,112 @@ in
};
};

checkEmojiScript = {
inputs =
(with pkgs; [
python312
python312Packages.emoji
])
++ (
with pkgs.python312Packages; [
# Referencing `emoji` doesn't work here because an attribute of that
# name already exists in a recursive attribute set (`rec {...}`)
pdfplumber
]
);
script = patterns: ''
python3.12 ${./check-emojis.py} "$out" ${concatMapStringsSep " " escapeShellArg patterns}
'';
};

emoji = {
emojiFont,
fontPaths ? [],
patterns,
}: (myLib.buildTypstProject ({
inherit typstSource fontPaths;
src = myLib.cleanTypstSource ./emoji;
doCheck = true;
nativeCheckInputs = checkEmojiScript.inputs;
checkPhase = checkEmojiScript.script patterns;
}
// lib.optionalAttrs (emojiFont != "__OMIT__") {
inherit emojiFont;
}));
emojiOmit = emoji {
emojiFont = "__OMIT__";
patterns = ["emoji"];
};
emojiTwemoji = emoji {
emojiFont = "twemoji";
patterns = ["emoji"];
};
emojiTwemojiCbdt = emoji {
emojiFont = "twemoji-cbdt";
patterns = ["emoji"];
};
emojiNoto = emoji {
emojiFont = "noto";
patterns = ["emoji"];
};
emojiNotoMonochrome = emoji {
emojiFont = "noto-monochrome";
patterns = ["emoji"];
};
emojiEmojiOne = emoji {
emojiFont = "emojione";
patterns = ["emoji"];
};
emojiFontOverride = emoji {
emojiFont = null;
fontPaths = ["${pkgs.noto-fonts-color-emoji}/share/fonts/noto"];
patterns = ["emoji"];
};

emojiWatch = {
emojiFont,
fontPaths ? [],
patterns,
}: (watch {
nativeBuildInputs = checkEmojiScript.inputs;
postBuild = checkEmojiScript.script patterns;
} ({
inherit typstSource fontPaths;
src = myLib.cleanTypstSource ./emoji;
}
// lib.optionalAttrs (emojiFont != "__OMIT__") {
inherit emojiFont;
}));
emojiWatchOmit = emojiWatch {
emojiFont = "__OMIT__";
patterns = ["emoji"];
};
emojiWatchTwemoji = emojiWatch {
emojiFont = "twemoji";
patterns = ["emoji"];
};
emojiWatchTwemojiCbdt = emojiWatch {
emojiFont = "twemoji-cbdt";
patterns = ["emoji"];
};
emojiWatchNoto = emojiWatch {
emojiFont = "noto";
patterns = ["emoji"];
};
emojiWatchNotoMonochrome = emojiWatch {
emojiFont = "noto-monochrome";
patterns = ["emoji"];
};
emojiWatchEmojiOne = emojiWatch {
emojiFont = "emojione";
patterns = ["emoji"];
};
emojiWatchFontOverride = emojiWatch {
emojiFont = null;
fontPaths = ["${pkgs.noto-fonts-color-emoji}/share/fonts/noto"];
patterns = ["emoji"];
};

overlappingVirtualPaths = isInvariant: util: file:
util (let
op =
Expand Down
1 change: 1 addition & 0 deletions checks/emoji/main.typ
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#(emoji.tangerine)hell#(emoji.o) w#(emoji.o)rld#(emoji.face)
4 changes: 4 additions & 0 deletions docs/api/derivations/build-typst-project-local.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ All parameters accepted by

{{#include common/src.md}}

### `emojiFont` (optional) { #emojifont }

{{#include common/emoji-font.md}}

### `fontPaths` (optional) { #fontpaths }

{{#include common/font-paths.md}}
Expand Down
4 changes: 4 additions & 0 deletions docs/api/derivations/build-typst-project.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ All parameters accepted by

{{#include common/src.md}}

### `emojiFont` (optional) { #emojifont }

{{#include common/emoji-font.md}}

### `fontPaths` (optional) { #fontpaths }

{{#include common/font-paths.md}}
Expand Down
22 changes: 22 additions & 0 deletions docs/api/derivations/common/emoji-font.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Specify which emoji font to use. If `emojiFont` is `null`, no emoji font will
be included.

May be any of the following:

- [`"twemoji"`](https://search.nixos.org/packages?channel=unstable&show=twemoji-color-font) (default)
- [`"twemoji-cbdt"`](https://search.nixos.org/packages?channel=unstable&show=twitter-color-emoji)
- [`"noto"`](https://search.nixos.org/packages?channel=unstable&show=noto-fonts-color-emoji)
- [`"noto-monochrome"`](https://search.nixos.org/packages?channel=unstable&show=noto-fonts-monochrome-emoji)
- [`"emojione"`](https://search.nixos.org/packages?channel=unstable&show=emojione)
- `null` — Don't include any emoji font (e.g. so you can include your own)

<details>
<summary>Note about difference between <code>"twemoji"</code> and <code>"twemoji-cbdt"</code></summary>

The default Twemoji font uses the SVG [font format], which may not be supported
by some systems. If emojis aren't displaying properly, using `"twemoji-cbdt"`
may fix it.

[font format]: https://www.colorfonts.wtf/

</details>
4 changes: 4 additions & 0 deletions docs/api/derivations/dev-shell.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ or [`direnv`][direnv].
**Note:** All parameters for [`mkShell`][nixpkgs-mkshell] are also
supported.

### `emojiFont` (optional) { #emojifont }

{{#include common/emoji-font.md}}

### extraShellHook (optional) { #extrashellhook }

Bash statements added to the [`shellHook`][nixpkgs-mkshell-attributes]
Expand Down
4 changes: 4 additions & 0 deletions docs/api/derivations/mk-typst-derivation.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ See also: [Typst CLI Usage][typst-cli-usage]

{{#include common/src.md}}

### `emojiFont` (optional) { #emojifont }

{{#include common/emoji-font.md}}

### `fontPaths` (optional) { #fontpaths }

{{#include common/font-paths.md}}
Expand Down
4 changes: 4 additions & 0 deletions docs/api/derivations/watch-typst-project.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ changes.
[`writeShellApplication`][nixpkgs-writeshellapplication] are also supported
(besides `text`).

### `emojiFont` (optional) { #emojifont }

{{#include common/emoji-font.md}}

### `fontPaths` (optional) { #fontpaths }

{{#include common/font-paths.md}}
Expand Down
1 change: 1 addition & 0 deletions lib/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ in {
coerceVirtualPathAttr = callPackage ./coerceVirtualPathAttr.nix {};
copyVirtualPathsHook = callPackage ./setupHooks/copyVirtualPaths.nix {};
devShell = callPackage ./devShell.nix {};
emojiFontPathFromString = callPackage ./emojiFontPathFromString.nix {};
inferTypstProjectOutput = callPackage ./inferTypstProjectOutput.nix {};
linkVirtualPaths = callPackage ./linkVirtualPaths.nix {};
mkTypstDerivation = callPackage ./mkTypstDerivation.nix {};
Expand Down
14 changes: 10 additions & 4 deletions lib/devShell.nix
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
{
emojiFontPathFromString,
lib,
linkVirtualPaths,
mkShellNoCC,
typst,
}: args @ {
checks ? {},
emojiFont ? "default",
extraShellHook ? "",
fontPaths ? [],
forceVirtualPaths ? false,
Expand All @@ -13,14 +15,18 @@
virtualPaths ? [],
...
}: let
inherit (builtins) removeAttrs;
inherit (lib) optionalAttrs optionalString;
inherit (builtins) isNull removeAttrs;
inherit (lib) lists optionalAttrs optionalString;
inherit (lib.strings) concatStringsSep;

emojiFontPath = emojiFontPathFromString emojiFont;
allFontPaths = fontPaths ++ lists.optional (!isNull emojiFontPath) emojiFontPath;

unsetSourceDateEpochScript = builtins.readFile ./setupHooks/unsetSourceDateEpochScript.sh;

cleanedArgs = removeAttrs args [
"checks"
"emojiFont"
"extraShellHook"
"fontPaths"
"forceVirtualPaths"
Expand All @@ -29,8 +35,8 @@
];
in
mkShellNoCC (cleanedArgs
// optionalAttrs (fontPaths != []) {
TYPST_FONT_PATHS = concatStringsSep ":" fontPaths;
// optionalAttrs (allFontPaths != []) {
TYPST_FONT_PATHS = concatStringsSep ":" allFontPaths;
}
// {
inputsFrom = builtins.attrValues checks ++ inputsFrom;
Expand Down
26 changes: 26 additions & 0 deletions lib/emojiFontPathFromString.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
lib,
pkgs,
}: emojiFont: let
inherit (builtins) isNull isString typeOf;
in
if isString emojiFont
then
(
if emojiFont == "default"
then "${pkgs.twemoji-color-font}/share/fonts/truetype"
else if emojiFont == "twemoji"
then "${pkgs.twemoji-color-font}/share/fonts/truetype"
else if emojiFont == "twemoji-cbdt"
then "${pkgs.twitter-color-emoji}/share/fonts/truetype"
else if emojiFont == "noto"
then "${pkgs.noto-fonts-color-emoji}/share/fonts/noto"
else if emojiFont == "noto-monochrome"
then "${pkgs.noto-fonts-monochrome-emoji}/share/fonts/noto"
else if emojiFont == "emojione"
then "${pkgs.emojione}/share/fonts/truetype"
else throw ''invalid value for `emojiFont`: "${emojiFont}". Must be one of: "twemoji", "twemoji-cbdt", "noto", "noto-monochrome", "emojione", null.''
)
else if isNull emojiFont
then null
else throw ''invalid type for `emojiFont`: ${typeOf emojiFont}. Must be string or null.''
Loading

0 comments on commit 9a82720

Please sign in to comment.