Skip to content

Commit

Permalink
Added make4ht-char-def
Browse files Browse the repository at this point in the history
  • Loading branch information
michal-h21 committed Sep 25, 2024
1 parent e1f7223 commit 2879df7
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ tags
*.css
readme.tex
changelog.tex
make4ht-char-def.lua
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

- 2024/09/25

- added `make4ht-char-def` library to remove dependency on `char-def` from
ConTeXt. It is used by the `sectionid` DOM filter. The library is
automatically created from UnicodeData.txt by `tools/make_chardata.lua`.
https://tex.stackexchange.com/q/727202/2891

- 2024/09/18

- print error messages from the `log` file.
Expand Down
13 changes: 8 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ ifeq ($(strip $(shell git rev-parse --is-inside-work-tree 2>/dev/null)),true)
git fetch --tags
endif

doc: $(doc_file) readme.tex
doc: chardef $(doc_file) readme.tex

htmldoc: ${htmldoc}
htmldoc: chardef ${htmldoc}

make4ht-doc.pdf: $(doc_sources)
latexmk -pdf -pdflatex='lualatex "\def\version{${VERSION}}\def\gitdate{${DATE}}\input{%S}"' make4ht-doc.tex
Expand All @@ -70,7 +70,7 @@ readme.tex: README.md
changelog.tex: CHANGELOG.md
pandoc -f markdown+definition_lists -t LaTeX CHANGELOG.md > changelog.tex

build: doc $(lua_content) $(filters) $(domfilters)
build: chardef doc $(lua_content) $(filters) $(domfilters)
@rm -rf build
@mkdir -p $(BUILD_MAKE4HT)
@mkdir -p $(BUILD_MAKE4HT)/filters
Expand All @@ -86,10 +86,10 @@ build: doc $(lua_content) $(filters) $(domfilters)
@cp README.md $(BUILD_MAKE4HT)/README
@cd $(BUILD_DIR) && zip -r make4ht.zip make4ht

install: doc $(lua_content) $(filters) $(domfilters) justinstall
install: chardef doc $(lua_content) $(filters) $(domfilters) justinstall
cp $(doc_file) $(MANUAL_DIR)

justinstall:
justinstall: chardef
mkdir -p $(INSTALL_DIR)
mkdir -p $(MANUAL_DIR)
mkdir -p $(FILTERS_DIR)
Expand All @@ -106,6 +106,9 @@ justinstall:
echo $(wildcard $(EXECUTABLE))
$(INSTALL_COMMAND)

chardef:
texlua tools/make_chardata.lua > make4ht-char-def.lua

version:
echo $(VERSION), $(DATE)

Expand Down
7 changes: 4 additions & 3 deletions domfilters/make4ht-sectionid.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ local mkutils = require "mkutils"
local log = logging.new("tocid")
-- Unicode data distributed with ConTeXt
-- defines "characters" table
if not mkutils.isModuleAvailable("char-def") then
if not mkutils.isModuleAvailable("make4ht-char-def") then
log:warning("char-def module not found")
log:warning("cannot fix section id's")
return function(dom) return dom end
end
require "char-def"
local chardata = characters.data or {}

local chardata = require "make4ht-char-def"



local toc = nil
Expand Down
43 changes: 43 additions & 0 deletions tools/make_chardata.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
kpse.set_program_name "luatex"
-- create Lua module from UnicodeData
-- we need mapping to lower case letters and decomposed base letters for accented characters
local unicode_data = kpse.find_file("UnicodeData.txt")
local chardata = {}
for line in io.lines(unicode_data) do
local record = line:explode(";")
local char = tonumber(record[1], 16)
local category = string.lower(record[3])
if category:match("^l") or category == "zs" then
-- the decomposed field contains charcode for the base letter and accent
-- we care only about the base letter
local decomposed = record[6]:match("([%x]+)")
decomposed = decomposed and tonumber(decomposed, 16)
-- the lowercase letter is the last field
local lower = record[#record - 1]
lower = lower and tonumber(lower, 16) or nil
chardata[#chardata+1] = {
char = char,
shcode = decomposed,
lccode = lower,
category = category
}
end
end

print "return {"
local function add(fields, caption, value)
if value then
fields[#fields+1] = string.format("%s=%s", caption, value)
end
end

for _, data in ipairs(chardata) do
local fields = {}
-- we need to add qotes to force string
add(fields, "category", string.format('"%s"', data.category))
add(fields, "lccode", data.lccode)
add(fields, "shcode", data.shcode)
print(string.format("[%s] = {%s},", data.char, table.concat(fields, ", ")))
end

print "}"

0 comments on commit 2879df7

Please sign in to comment.