From 136c74c85c4810675dc5e0b8cddf40b7ac9e1654 Mon Sep 17 00:00:00 2001 From: Michal Hoftich Date: Mon, 6 Jan 2025 14:47:42 +0100 Subject: [PATCH] use Unicode characters for MathML font styles --- CHANGELOG.md | 5 ++++ domfilters/make4ht-mathmlfixes.lua | 42 ++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea5ee99..7981114 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +- 2025/01/06 + + - use Unicode characters for MathML font styles. + https://tex.stackexchange.com/a/734331/2891 + - 2024/12/23 - fixed bug in the `make4ht-indexing` library. It didn't handle index entries diff --git a/domfilters/make4ht-mathmlfixes.lua b/domfilters/make4ht-mathmlfixes.lua index fe83846..d2adec6 100644 --- a/domfilters/make4ht-mathmlfixes.lua +++ b/domfilters/make4ht-mathmlfixes.lua @@ -1,4 +1,7 @@ local log = logging.new("mathmlfixes") + +local mathml_chardata = require "make4ht-mathml-char-def" + -- should be inside , so we don't process it -- even though it is a token element local token = {"mi", "mn", "mo", "mtext", "mspace", "ms"} @@ -487,6 +490,41 @@ local function fix_rel_mo(el) end +local uchar = utf8.char +local ucodes = utf8.codes + +-- current version of MathML doesn't support the mathvariant attribute, so we need to replace unicode characters with the corresponding base code for the current font style +local function replace_characters(math, current_style) + -- recursively loop over all the children of the math element and replace the unicode characters with the corresponding base code for the current font style + for _, child in ipairs(math:get_children()) do + if child:is_text() then + local text = child:get_text() + local new_text = {} + for _ ,char in ucodes(text) do + -- replace the unicode characters with the corresponding base code for the current font style + local code = mathml_chardata[char] + if code then + local new_char = code[current_style] or char + table.insert(new_text, uchar(new_char)) + else + table.insert(new_text, uchar(char)) + end + end + child._text = table.concat(new_text) + elseif child:is_element() then + local current_style = child:get_attribute("mathvariant") or current_style + replace_characters(child, current_style) + end + end +end + +local function fix_mathml_chars(el) + local el_name, _ = get_element_name(el) + if el_name == "math" then + replace_characters(el, "normal") + end +end + return function(dom) dom:traverse_elements(function(el) @@ -504,6 +542,10 @@ return function(dom) fix_numbers(el) fix_operators(el) fix_mathvariant(el) + if settings.output_format ~= "odt" then + -- ODT needs older MathML version + fix_mathml_chars(el) + end fix_dcases(el) top_mrow(el) delete_last_empty_mtr(el)