From 2879df7c14c4ff8a9a54d03835b4f678c68d67ad Mon Sep 17 00:00:00 2001 From: Michal Hoftich Date: Wed, 25 Sep 2024 16:24:09 +0200 Subject: [PATCH] Added make4ht-char-def --- .gitignore | 1 + CHANGELOG.md | 7 ++++++ Makefile | 13 ++++++---- domfilters/make4ht-sectionid.lua | 7 +++--- tools/make_chardata.lua | 43 ++++++++++++++++++++++++++++++++ 5 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 tools/make_chardata.lua diff --git a/.gitignore b/.gitignore index 1de04a7..fbd6c5d 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ tags *.css readme.tex changelog.tex +make4ht-char-def.lua diff --git a/CHANGELOG.md b/CHANGELOG.md index 0ebf5e8..e481dfe 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +- 2024/09/25 + + - added `make4ht-char-def` library to remove dependency on `char-def` from + ConTeXt. It is used by the `sectionid` DOM filter. The library is + automatically created from UnicodeData.txt by `tools/make_chardata.lua`. + https://tex.stackexchange.com/q/727202/2891 + - 2024/09/18 - print error messages from the `log` file. diff --git a/Makefile b/Makefile index 87cde97..0a8de2b 100644 --- a/Makefile +++ b/Makefile @@ -54,9 +54,9 @@ ifeq ($(strip $(shell git rev-parse --is-inside-work-tree 2>/dev/null)),true) git fetch --tags endif -doc: $(doc_file) readme.tex +doc: chardef $(doc_file) readme.tex -htmldoc: ${htmldoc} +htmldoc: chardef ${htmldoc} make4ht-doc.pdf: $(doc_sources) latexmk -pdf -pdflatex='lualatex "\def\version{${VERSION}}\def\gitdate{${DATE}}\input{%S}"' make4ht-doc.tex @@ -70,7 +70,7 @@ readme.tex: README.md changelog.tex: CHANGELOG.md pandoc -f markdown+definition_lists -t LaTeX CHANGELOG.md > changelog.tex -build: doc $(lua_content) $(filters) $(domfilters) +build: chardef doc $(lua_content) $(filters) $(domfilters) @rm -rf build @mkdir -p $(BUILD_MAKE4HT) @mkdir -p $(BUILD_MAKE4HT)/filters @@ -86,10 +86,10 @@ build: doc $(lua_content) $(filters) $(domfilters) @cp README.md $(BUILD_MAKE4HT)/README @cd $(BUILD_DIR) && zip -r make4ht.zip make4ht -install: doc $(lua_content) $(filters) $(domfilters) justinstall +install: chardef doc $(lua_content) $(filters) $(domfilters) justinstall cp $(doc_file) $(MANUAL_DIR) -justinstall: +justinstall: chardef mkdir -p $(INSTALL_DIR) mkdir -p $(MANUAL_DIR) mkdir -p $(FILTERS_DIR) @@ -106,6 +106,9 @@ justinstall: echo $(wildcard $(EXECUTABLE)) $(INSTALL_COMMAND) +chardef: + texlua tools/make_chardata.lua > make4ht-char-def.lua + version: echo $(VERSION), $(DATE) diff --git a/domfilters/make4ht-sectionid.lua b/domfilters/make4ht-sectionid.lua index c8e9892..25f54e6 100644 --- a/domfilters/make4ht-sectionid.lua +++ b/domfilters/make4ht-sectionid.lua @@ -2,13 +2,14 @@ local mkutils = require "mkutils" local log = logging.new("tocid") -- Unicode data distributed with ConTeXt -- defines "characters" table -if not mkutils.isModuleAvailable("char-def") then +if not mkutils.isModuleAvailable("make4ht-char-def") then log:warning("char-def module not found") log:warning("cannot fix section id's") return function(dom) return dom end end -require "char-def" -local chardata = characters.data or {} + +local chardata = require "make4ht-char-def" + local toc = nil diff --git a/tools/make_chardata.lua b/tools/make_chardata.lua new file mode 100644 index 0000000..b4d4fc3 --- /dev/null +++ b/tools/make_chardata.lua @@ -0,0 +1,43 @@ +kpse.set_program_name "luatex" +-- create Lua module from UnicodeData +-- we need mapping to lower case letters and decomposed base letters for accented characters +local unicode_data = kpse.find_file("UnicodeData.txt") +local chardata = {} +for line in io.lines(unicode_data) do + local record = line:explode(";") + local char = tonumber(record[1], 16) + local category = string.lower(record[3]) + if category:match("^l") or category == "zs" then + -- the decomposed field contains charcode for the base letter and accent + -- we care only about the base letter + local decomposed = record[6]:match("([%x]+)") + decomposed = decomposed and tonumber(decomposed, 16) + -- the lowercase letter is the last field + local lower = record[#record - 1] + lower = lower and tonumber(lower, 16) or nil + chardata[#chardata+1] = { + char = char, + shcode = decomposed, + lccode = lower, + category = category + } + end +end + +print "return {" +local function add(fields, caption, value) + if value then + fields[#fields+1] = string.format("%s=%s", caption, value) + end +end + +for _, data in ipairs(chardata) do + local fields = {} + -- we need to add qotes to force string + add(fields, "category", string.format('"%s"', data.category)) + add(fields, "lccode", data.lccode) + add(fields, "shcode", data.shcode) + print(string.format("[%s] = {%s},", data.char, table.concat(fields, ", "))) +end + +print "}"