From f927597c8c906efb4bc0959fb2fef07d4ef15c1f Mon Sep 17 00:00:00 2001 From: TANAKA Takuji Date: Wed, 1 Jan 2025 22:58:19 +0900 Subject: [PATCH] {,e}uptex: accept upTeX internal encoding also for pTeX-compatible EUC/SJIS mode (#46) --- source/texk/web2c/euptexdir/upkcat.test | 18 ++++- source/texk/web2c/uptexdir/kanji.c | 7 +- source/texk/web2c/uptexdir/tests/kcat.tex | 74 +++++++++++++++++-- source/texk/web2c/uptexdir/tests/upkcat-e.txt | 45 +++++++++++ source/texk/web2c/uptexdir/tests/upkcat-s.txt | 53 +++++++++++++ source/texk/web2c/uptexdir/tests/upkcat.txt | 18 +++-- source/texk/web2c/uptexdir/upkcat.test | 10 +++ source/texk/web2c/uptexdir/uptex-m.ch | 6 +- 8 files changed, 208 insertions(+), 23 deletions(-) create mode 100644 source/texk/web2c/uptexdir/tests/upkcat-e.txt create mode 100644 source/texk/web2c/uptexdir/tests/upkcat-s.txt diff --git a/source/texk/web2c/euptexdir/upkcat.test b/source/texk/web2c/euptexdir/upkcat.test index 602c1e5b4..3d2291366 100755 --- a/source/texk/web2c/euptexdir/upkcat.test +++ b/source/texk/web2c/euptexdir/upkcat.test @@ -1,6 +1,6 @@ #! /bin/sh -vx # $Id$ -# Copyright 2022 TANAKA Takuji +# Copyright 2022-2025 TANAKA Takuji # You may freely use, modify and/or distribute this file. BinDir=${BinDir:-.} @@ -12,8 +12,18 @@ LC_ALL=C; export LC_ALL; LANGUAGE=C; export LANGUAGE TEXMFCNF=$srcdir/../kpathsea; export TEXMFCNF TEXINPUTS="$srcdir/uptexdir/tests;."; export TEXINPUTS -$_euptex -ini -etex -interaction batchmode -jobname upkcat-log kcat.tex || exit 1 -grep '::' upkcat-log.log > upkcat-log_ed.log \ - && diff upkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2 +rm -f eupkcat*log*.log + +$_euptex -ini -etex -interaction batchmode -jobname eupkcat-log kcat.tex || exit 1 +grep '::' eupkcat-log.log > eupkcat-log_ed.log \ + && diff eupkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2 + +$_euptex -ini -etex -interaction batchmode -jobname eupkcat-e-log -kanji-internal=euc kcat.tex || exit 3 +grep '::' eupkcat-e-log.log > eupkcat-e-log_ed.log \ + && diff eupkcat-e-log_ed.log $srcdir/uptexdir/tests/upkcat-e.txt || exit 4 + +$_euptex -ini -etex -interaction batchmode -jobname eupkcat-s-log -kanji-internal=sjis kcat.tex || exit 5 +grep '::' eupkcat-s-log.log > eupkcat-s-log_ed.log \ + && diff eupkcat-s-log_ed.log $srcdir/uptexdir/tests/upkcat-s.txt || exit 6 exit 0 diff --git a/source/texk/web2c/uptexdir/kanji.c b/source/texk/web2c/uptexdir/kanji.c index b96fd48f0..4610146f7 100644 --- a/source/texk/web2c/uptexdir/kanji.c +++ b/source/texk/web2c/uptexdir/kanji.c @@ -8,7 +8,7 @@ #define CS_TOKEN_FLAG 0x1FFFFFFF #define IVS_CHAR_LIMIT 0x4400000 #define CJK_CHAR_LIMIT 0x1000000 -#define UCS_CHAR_LIMIT 0x120000 +#define UCS_CHAR_LIMIT 0x110000 #define CJK_TOKEN_FLAG 0xFFFFFF #define CAT_LEFT_BRACE 1 #define CAT_DELIM_NUM 15 @@ -48,7 +48,8 @@ boolean is_char_kanji(integer c) if (is_internalUPTEX()) return ((c >= 0)&&(c upkcat-log_ed.log \ && diff upkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2 +$_uptex -ini -interaction batchmode -jobname upkcat-e-log -kanji-internal=euc kcat.tex || exit 3 +grep '::' upkcat-e-log.log > upkcat-e-log_ed.log \ + && diff upkcat-e-log_ed.log $srcdir/uptexdir/tests/upkcat-e.txt || exit 4 + +$_uptex -ini -interaction batchmode -jobname upkcat-s-log -kanji-internal=sjis kcat.tex || exit 5 +grep '::' upkcat-s-log.log > upkcat-s-log_ed.log \ + && diff upkcat-s-log_ed.log $srcdir/uptexdir/tests/upkcat-s.txt || exit 6 + exit 0 diff --git a/source/texk/web2c/uptexdir/uptex-m.ch b/source/texk/web2c/uptexdir/uptex-m.ch index 5c91e61ca..a71159443 100644 --- a/source/texk/web2c/uptexdir/uptex-m.ch +++ b/source/texk/web2c/uptexdir/uptex-m.ch @@ -285,8 +285,6 @@ if (isinternalUPTEX) then begin @+@t\1@>for k:=@"115 to @"118 do kcat_code(k):=kana; { Kana Extended-B .. Small Kana Extension } @+@t\1@>for k:=@"145 to @"14F do kcat_code(k):=kanji; { CJK Unified Ideographs Extension B .. J } @t\hskip10pt@>kcat_code(@"15B):=modifier; { Variation Selectors Supplement } - @+@t\1@>for k:=@"170 to @"171 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark } - @t\hskip10pt@>kcat_code(@"175):=kanji; { Standardized Variation Sequence } @+@t\1@>for k:=@"177 to @"178 do kcat_code(k):=kanji; { Ideographic Variation Sequence } @+@t\1@>for k:=@"1F9 to @"1FC do kcat_code(k):=modifier; { Combining Katakana-Hiragana (Semi-)Voiced Sound Mark .. Emoji Modifier Fitzpatrick } @@ -302,6 +300,10 @@ end else begin { $\.{@@"20}+|k| = |kcatcodekey|(|fromKUTEN|(|HILO|(k,1))$ } @+@t\1@>for k:=16 to 94 do kcat_code(@"A0+k):=kanji; {2 men 16 ku ... 94 ku} end; +@+@t\1@>for k:=@"15F to @"162 do kcat_code(k):=kanji; { for japanese-otf, japanese-otf-uptex } +@+@t\1@>for k:=@"170 to @"171 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark } +@t\hskip10pt@>kcat_code(@"175):=kanji; { Standardized Variation Sequence } +@+@t\1@>for k:=@"177 to @"178 do kcat_code(k):=kanji; { Ideographic Variation Sequence } @z @x l.5897 - upTeX