Skip to content

Commit

Permalink
{,e}uptex: accept upTeX internal encoding also for pTeX-compatible EU…
Browse files Browse the repository at this point in the history
…C/SJIS mode (#46)
  • Loading branch information
t-tk committed Jan 1, 2025
1 parent 8323619 commit f927597
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 23 deletions.
18 changes: 14 additions & 4 deletions source/texk/web2c/euptexdir/upkcat.test
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh -vx
# $Id$
# Copyright 2022 TANAKA Takuji <[email protected]>
# Copyright 2022-2025 TANAKA Takuji <[email protected]>
# You may freely use, modify and/or distribute this file.

BinDir=${BinDir:-.}
Expand All @@ -12,8 +12,18 @@ LC_ALL=C; export LC_ALL; LANGUAGE=C; export LANGUAGE
TEXMFCNF=$srcdir/../kpathsea; export TEXMFCNF
TEXINPUTS="$srcdir/uptexdir/tests;."; export TEXINPUTS

$_euptex -ini -etex -interaction batchmode -jobname upkcat-log kcat.tex || exit 1
grep '::' upkcat-log.log > upkcat-log_ed.log \
&& diff upkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2
rm -f eupkcat*log*.log

$_euptex -ini -etex -interaction batchmode -jobname eupkcat-log kcat.tex || exit 1
grep '::' eupkcat-log.log > eupkcat-log_ed.log \
&& diff eupkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2

$_euptex -ini -etex -interaction batchmode -jobname eupkcat-e-log -kanji-internal=euc kcat.tex || exit 3
grep '::' eupkcat-e-log.log > eupkcat-e-log_ed.log \
&& diff eupkcat-e-log_ed.log $srcdir/uptexdir/tests/upkcat-e.txt || exit 4

$_euptex -ini -etex -interaction batchmode -jobname eupkcat-s-log -kanji-internal=sjis kcat.tex || exit 5
grep '::' eupkcat-s-log.log > eupkcat-s-log_ed.log \
&& diff eupkcat-s-log_ed.log $srcdir/uptexdir/tests/upkcat-s.txt || exit 6

exit 0
7 changes: 4 additions & 3 deletions source/texk/web2c/uptexdir/kanji.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#define CS_TOKEN_FLAG 0x1FFFFFFF
#define IVS_CHAR_LIMIT 0x4400000
#define CJK_CHAR_LIMIT 0x1000000
#define UCS_CHAR_LIMIT 0x120000
#define UCS_CHAR_LIMIT 0x110000
#define CJK_TOKEN_FLAG 0xFFFFFF
#define CAT_LEFT_BRACE 1
#define CAT_DELIM_NUM 15
Expand Down Expand Up @@ -48,7 +48,8 @@ boolean is_char_kanji(integer c)
if (is_internalUPTEX())
return ((c >= 0)&&(c<IVS_CHAR_LIMIT));
else
return iskanji1(Hi(c)) && iskanji2(Lo(c));
return ((iskanji1(Hi(c)) && iskanji2(Lo(c))) ||
(UCS_CHAR_LIMIT<=c && c<IVS_CHAR_LIMIT));
}

boolean ismultiprn(integer c)
Expand Down Expand Up @@ -523,7 +524,7 @@ binary_search (long x, long *a, int left, int right)
integer kcatcodekey(integer c)
{
integer block;
if (is_internalUPTEX()) {
if (is_internalUPTEX() || (UCS_CHAR_LIMIT<=c && c<IVS_CHAR_LIMIT)) {
block = binary_search((long)c, ucs_range, 0, NUCS_RANGE-1);
switch (block) {
case 0x01: /* Block : Latin-1 Supplement */
Expand Down
74 changes: 67 additions & 7 deletions source/texk/web2c/uptexdir/tests/kcat.tex
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
% other_kchar=18 {cjk symbol codes}
% hangul=19 {hangul codes}

\ifnum\jis"2121="3000
% upTeX, UCS code

% separate in blocks
\write16{0x00C0::\the\kcatcode"00C0}% U+00C0 "Latin-1 Letters" in Latin-1 Supplement
\write16{§::\the\kcatcode"00A7}% U+00A7 Latin-1 Supplement
Expand Down Expand Up @@ -48,8 +51,6 @@
\write16{丽::\the\kcatcode"2F800}% U+2F800 CJK Compatibility Ideographs Supplement
\write16{𰀀::\the\kcatcode"30000}% U+30000 CJK Unified Ideographs Extension G
\write16{𱍐::\the\kcatcode"31350}% U+31350 CJK Unified Ideographs Extension H
\write16{逸︀::\the\kcatcode"409038}% U+9038 U+FE00 Standardized Variation Sequence
\write16{逸󠄀::\the\kcatcode"809038}% U+9038 U+E0100 Ideographic Variation Sequence

% kana=17 {hiragana, katakana}
\write16{あ::\the\kcatcode"3042}% U+3042 Hiragana
Expand All @@ -59,8 +60,6 @@
\write16{𛀀::\the\kcatcode"1B000}% U+1B000 Kana Supplement
\write16{𛄀::\the\kcatcode"1B100}% U+1B100 Kana Extended-A
\write16{𛅐::\the\kcatcode"1B150}% U+1B150 Small Kana Extension
\write16{あ゙::\the\kcatcode"223042}% U+3042 U+3099 Kana with Voiced Sound Mark
\write16{か゚::\the\kcatcode"24304B}% U+304B U+309A Kana with Semi-Voiced Sound Mark

% other_kchar=18 {cjk symbol codes}
\write16{※::\the\kcatcode"203B}% U+203B General Punctuation
Expand All @@ -75,9 +74,6 @@
\write16{〠::\the\kcatcode"3020}% U+3020 CJK Symbols and Punctuation
\write16{㋐::\the\kcatcode"32D0}% U+32D0 Enclosed CJK Letters and Months
\write16{㍿::\the\kcatcode"337F}% U+337F CJK Compatibility
\write16{🇯🇵::\the\kcatcode"25EFF5}% U+1F1EF U+1F1F5 Emoji Flag Sequence
\write16{👶🏽::\the\kcatcode"2BF476}% U+1F476 U+1F3FD Emoji with Modifier Fitzpatrick
\write16{0x800030::\the\kcatcode"800030}% U+0030 U+FE0F U+20E3 Emoji Keycap Sequence

% hangul=19 {hangul codes}
\write16{ᄀ::\the\kcatcode"1100}% U+1100 Hangul Jamo
Expand All @@ -94,4 +90,68 @@
\write16{︀::\the\kcatcode"FE00}% U+FE00 Variation Selector
\write16{󠄀::\the\kcatcode"E0100}% U+E0100 Variation Selector

\else
% pTeX-compatible, EUC / Shift_JIS code, 1..94ku

\write16{1区::\the\expandafter\kcatcode\jis"2121}
\write16{2区::\the\expandafter\kcatcode\jis"2221}
\write16{3区::\the\expandafter\kcatcode\jis"2321}
\write16{4区::\the\expandafter\kcatcode\jis"2421}
\write16{5区::\the\expandafter\kcatcode\jis"2521}
\write16{6区::\the\expandafter\kcatcode\jis"2621}
\write16{7区::\the\expandafter\kcatcode\jis"2721}
\write16{8区::\the\expandafter\kcatcode\jis"2821}
\write16{9区::\the\expandafter\kcatcode\jis"2921}
\write16{10区::\the\expandafter\kcatcode\jis"2A21}
\write16{11区::\the\expandafter\kcatcode\jis"2B21}
\write16{12区::\the\expandafter\kcatcode\jis"2C21}
\write16{13区::\the\expandafter\kcatcode\jis"2D21}
\write16{14区::\the\expandafter\kcatcode\jis"2E21}
\write16{15区::\the\expandafter\kcatcode\jis"2F21}
\write16{16区::\the\expandafter\kcatcode\jis"3021}
\write16{17区::\the\expandafter\kcatcode\jis"3121}
\write16{18区::\the\expandafter\kcatcode\jis"3221}
\write16{19区::\the\expandafter\kcatcode\jis"3321}
\write16{20区::\the\expandafter\kcatcode\jis"3421}
\write16{21区::\the\expandafter\kcatcode\jis"3521}
\write16{22区::\the\expandafter\kcatcode\jis"3621}
\write16{23区::\the\expandafter\kcatcode\jis"3721}
\write16{24区::\the\expandafter\kcatcode\jis"3821}
\write16{80区::\the\expandafter\kcatcode\jis"7021}
\write16{81区::\the\expandafter\kcatcode\jis"7121}
\write16{82区::\the\expandafter\kcatcode\jis"7221}
\write16{83区::\the\expandafter\kcatcode\jis"7321}
\write16{84区::\the\expandafter\kcatcode\jis"7421}
\write16{90区::\the\expandafter\kcatcode\jis"7A21}
\write16{91区::\the\expandafter\kcatcode\jis"7B21}
\write16{92区::\the\expandafter\kcatcode\jis"7C21}
\write16{93区::\the\expandafter\kcatcode\jis"7D21}
\write16{94区::\the\expandafter\kcatcode\jis"7E21}
\ifnum\jis"2121="8140
% extended Shift_JIS code, 95..120ku
\write16{95区(ext)::\the\expandafter\kcatcode\sjis"F040}
\write16{96区(ext)::\the\expandafter\kcatcode\sjis"F09F}
\write16{97区(ext)::\the\expandafter\kcatcode\sjis"F140}
\write16{98区(ext)::\the\expandafter\kcatcode\sjis"F19F}
\write16{117区(ext)::\the\expandafter\kcatcode\sjis"FB40}
\write16{118区(ext)::\the\expandafter\kcatcode\sjis"FB9F}
\write16{119区(ext)::\the\expandafter\kcatcode\sjis"FC40}
\write16{120区(ext)::\the\expandafter\kcatcode\sjis"FC9F}
\fi
\fi

% upTeX internal encoding

\write16{0x110000::\the\kcatcode"110000}% japanese-otf-uptex
\write16{0x120000::\the\kcatcode"120000}% japanese-otf-uptex
\write16{0x130000::\the\kcatcode"130000}% japanese-otf-uptex
\write16{0x140000::\the\kcatcode"140000}% japanese-otf-uptex
\write16{あ゙::\the\kcatcode"223042}% U+3042 U+3099 Kana with Voiced Sound Mark
\write16{か゚::\the\kcatcode"24304B}% U+304B U+309A Kana with Semi-Voiced Sound Mark
\write16{🇯🇵::\the\kcatcode"25EFF5}% U+1F1EF U+1F1F5 Emoji Flag Sequence
\write16{👶🏽::\the\kcatcode"2BF476}% U+1F476 U+1F3FD Emoji with Modifier Fitzpatrick
\write16{0x800030::\the\kcatcode"800030}% U+0030 U+FE0F U+20E3 Emoji Keycap Sequence
\write16{逸︀::\the\kcatcode"409038}% U+9038 U+FE00 Standardized Variation Sequence
\write16{逸󠄀::\the\kcatcode"809038}% U+9038 U+E0100 Ideographic Variation Sequence

\relax\end
45 changes: 45 additions & 0 deletions source/texk/web2c/uptexdir/tests/upkcat-e.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
1区::18
2区::18
3区::17
4区::17
5区::17
6区::17
7区::18
8区::18
9区::18
10区::18
11区::18
12区::18
13区::18
14区::16
15区::16
16区::16
17区::16
18区::16
19区::16
20区::16
21区::16
22区::16
23区::16
24区::16
80区::16
81区::16
82区::16
83区::16
84区::16
90区::16
91区::16
92区::16
93区::16
94区::16
0x110000::16
0x120000::16
0x130000::16
0x140000::16
あ^^e3^^82^^99::17
か^^e3^^82^^9a::17
^^f0^^9f^^87^^af^^f0^^9f^^87^^b5::18
^^f0^^9f^^91^^b6^^f0^^9f^^8f^^bd::18
0x800030::18
逸^^ef^^b8^^80::16
逸^^f3^^a0^^84^^80::16
53 changes: 53 additions & 0 deletions source/texk/web2c/uptexdir/tests/upkcat-s.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
1区::18
2区::18
3区::17
4区::17
5区::17
6区::17
7区::18
8区::18
9区::18
10区::18
11区::18
12区::18
13区::18
14区::16
15区::16
16区::16
17区::16
18区::16
19区::16
20区::16
21区::16
22区::16
23区::16
24区::16
80区::16
81区::16
82区::16
83区::16
84区::16
90区::16
91区::16
92区::16
93区::16
94区::16
95区(ext)::16
96区(ext)::16
97区(ext)::16
98区(ext)::16
117区(ext)::16
118区(ext)::16
119区(ext)::16
120区(ext)::16
0x110000::16
0x120000::16
0x130000::16
0x140000::16
あ^^e3^^82^^99::17
か^^e3^^82^^9a::17
^^f0^^9f^^87^^af^^f0^^9f^^87^^b5::18
^^f0^^9f^^91^^b6^^f0^^9f^^8f^^bd::18
0x800030::18
逸^^ef^^b8^^80::16
逸^^f3^^a0^^84^^80::16
18 changes: 11 additions & 7 deletions source/texk/web2c/uptexdir/tests/upkcat.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@ A::15
丽::16
𰀀::16
𱍐::16
逸︀::16
逸󠄀::16
あ::17
ア::17
ㇰ::17
𚿰::17
𛀀::17
𛄀::17
𛅐::17
あ゙::17
か゚::17
※::18
℃::18
Ⅻ::18
Expand All @@ -48,9 +44,6 @@ A::15
〠::18
㋐::18
㍿::18
🇯🇵::18
👶🏽::18
0x800030::18
ᄀ::19
ㄱ::19
ꥠ::19
Expand All @@ -61,3 +54,14 @@ A::15
🏻::20
︀::20
󠄀::20
0x110000::16
0x120000::16
0x130000::16
0x140000::16
あ゙::17
か゚::17
🇯🇵::18
👶🏽::18
0x800030::18
逸︀::16
逸󠄀::16
10 changes: 10 additions & 0 deletions source/texk/web2c/uptexdir/upkcat.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,18 @@ LC_ALL=C; export LC_ALL; LANGUAGE=C; export LANGUAGE
TEXMFCNF=$srcdir/../kpathsea; export TEXMFCNF
TEXINPUTS="$srcdir/uptexdir/tests;."; export TEXINPUTS

rm -f upkcat*log*.log

$_uptex -ini -interaction batchmode -jobname upkcat-log kcat.tex || exit 1
grep '::' upkcat-log.log > upkcat-log_ed.log \
&& diff upkcat-log_ed.log $srcdir/uptexdir/tests/upkcat.txt || exit 2

$_uptex -ini -interaction batchmode -jobname upkcat-e-log -kanji-internal=euc kcat.tex || exit 3
grep '::' upkcat-e-log.log > upkcat-e-log_ed.log \
&& diff upkcat-e-log_ed.log $srcdir/uptexdir/tests/upkcat-e.txt || exit 4

$_uptex -ini -interaction batchmode -jobname upkcat-s-log -kanji-internal=sjis kcat.tex || exit 5
grep '::' upkcat-s-log.log > upkcat-s-log_ed.log \
&& diff upkcat-s-log_ed.log $srcdir/uptexdir/tests/upkcat-s.txt || exit 6

exit 0
6 changes: 4 additions & 2 deletions source/texk/web2c/uptexdir/uptex-m.ch
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,6 @@ if (isinternalUPTEX) then begin
@+@t\1@>for k:=@"115 to @"118 do kcat_code(k):=kana; { Kana Extended-B .. Small Kana Extension }
@+@t\1@>for k:=@"145 to @"14F do kcat_code(k):=kanji; { CJK Unified Ideographs Extension B .. J }
@t\hskip10pt@>kcat_code(@"15B):=modifier; { Variation Selectors Supplement }
@+@t\1@>for k:=@"170 to @"171 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark }
@t\hskip10pt@>kcat_code(@"175):=kanji; { Standardized Variation Sequence }
@+@t\1@>for k:=@"177 to @"178 do kcat_code(k):=kanji; { Ideographic Variation Sequence }
@+@t\1@>for k:=@"1F9 to @"1FC do kcat_code(k):=modifier;
{ Combining Katakana-Hiragana (Semi-)Voiced Sound Mark .. Emoji Modifier Fitzpatrick }
Expand All @@ -302,6 +300,10 @@ end else begin
{ $\.{@@"20}+|k| = |kcatcodekey|(|fromKUTEN|(|HILO|(k,1))$ }
@+@t\1@>for k:=16 to 94 do kcat_code(@"A0+k):=kanji; {2 men 16 ku ... 94 ku}
end;
@+@t\1@>for k:=@"15F to @"162 do kcat_code(k):=kanji; { for japanese-otf, japanese-otf-uptex }
@+@t\1@>for k:=@"170 to @"171 do kcat_code(k):=kana; { Kana with (Semi-)Voiced Sound Mark }
@t\hskip10pt@>kcat_code(@"175):=kanji; { Standardized Variation Sequence }
@+@t\1@>for k:=@"177 to @"178 do kcat_code(k):=kanji; { Ideographic Variation Sequence }
@z
@x l.5897 - upTeX
Expand Down

0 comments on commit f927597

Please sign in to comment.