From ebdfd5fda66fbb52b83728e11ab9d7b87a404e23 Mon Sep 17 00:00:00 2001 From: SSgumS Date: Tue, 19 Jul 2022 04:54:55 +0200 Subject: [PATCH] update utf8 add Extend Move feature --- README.md | 2 + ...ian Toolkit.lua => AL.Persian Toolkit.lua} | 270 +++++++++++------- include/AL/utf8/README.md | 31 ++ include/AL/utf8/functions/lua53.lua | 1 + include/AL/utf8/init.lua | 6 +- include/AL/utf8/primitives/dummy.lua | 36 ++- include/AL/utf8/primitives/native.lua | 84 +++--- include/AL/utf8/test.sh | 3 +- .../AL/utf8/test/charclass_compiletime.lua | 3 +- include/AL/utf8/test/test.lua | 4 +- include/AL/utf8/test/test_compat.lua | 4 +- include/AL/utf8/test/test_pm.lua | 13 +- include/AL/utf8/test/test_utf8data.lua | 15 + 13 files changed, 317 insertions(+), 155 deletions(-) rename autoload/{AnimeList Persian Toolkit.lua => AL.Persian Toolkit.lua} (77%) create mode 100644 include/AL/utf8/test/test_utf8data.lua diff --git a/README.md b/README.md index ef26730..a845e3b 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ Mentioned directories are at the locations bellow: # Scripts ## PakNevis Correct common mistakes in Persian text. +## Extend Move +Extend \move based on line's time (Created for linear signs that go outside of video boundries). ## Unretard Converts non-RTL typed text to RTL compatible one. ## RTL / RTL diff --git a/autoload/AnimeList Persian Toolkit.lua b/autoload/AL.Persian Toolkit.lua similarity index 77% rename from autoload/AnimeList Persian Toolkit.lua rename to autoload/AL.Persian Toolkit.lua index f7c45a4..7948f4e 100644 --- a/autoload/AnimeList Persian Toolkit.lua +++ b/autoload/AL.Persian Toolkit.lua @@ -1,10 +1,11 @@ -- Special thanks to Majid110 for inspiring us the great feature of RTL Editor. -- https://github.com/Majid110/MasafAutomation --- Special thanks to lyger for writing base of an excelent splitter +-- Special thanks to lyger for writing the base of an excelent splitter -- https://github.com/lyger/Aegisub_automation_scripts -- Authers of each section: -- PakNevis: SSgumS +-- Extend Move: SSgumS -- RTL: Shinsekai_Yuri & SSgumS -- Un-RTL: Shinsekai_Yuri & SSgumS -- Unretard: SSgumS & MD @@ -21,10 +22,11 @@ local re = require 'aegisub.re' script_name = 'AnimeList Persian Toolkit' script_description = 'A toolkit for easier persian fansubbing.' script_author = 'AnimeList Team' -script_version = '1.2.3' +script_version = '1.3.0' ----- Script Names ----- local paknevis_script_name = 'AL Persian Toolkit/PakNevis' +local extend_move_script_name = 'AL Persian Toolkit/Extend Move' local rtl_script_name = 'AL Persian Toolkit/RTL/RTL' local unrtl_script_name = 'AL Persian Toolkit/RTL/Un-RTL' local unretard_script_name = 'AL Persian Toolkit/Unretard' @@ -45,15 +47,15 @@ local function removeRleChars(text) end local function unrtl(text) - text, _ = re.sub(text, "^((?:\\{.*?\\})*)"..RLE, "\\1") - text, _ = re.sub(text, "(\\\\[Nn])((?:\\{.*?\\})*)"..RLE, "\\1\\2") + text, _ = re.sub(text, "^((?:\\{.*?\\})*)" .. RLE, "\\1") + text, _ = re.sub(text, "(\\\\[Nn])((?:\\{.*?\\})*)" .. RLE, "\\1\\2") return text end local function rtl(text) text = unrtl(text) - text, _ = re.sub(text, "^((?:\\{.*?\\})*)", "\\1"..RLE) - text, _ = re.sub(text, "(\\\\[Nn])((?:\\{.*?\\})*)", "\\1\\2"..RLE) + text, _ = re.sub(text, "^((?:\\{.*?\\})*)", "\\1" .. RLE) + text, _ = re.sub(text, "(\\\\[Nn])((?:\\{.*?\\})*)", "\\1\\2" .. RLE) return text end @@ -69,7 +71,7 @@ local function serializeTable(val, name, skipnewlines, depth) tmp = tmp .. "{" .. (not skipnewlines and "\n" or "") for k, v in pairs(val) do - tmp = tmp .. serializeTable(v, k, skipnewlines, depth + 1) .. "," .. (not skipnewlines and "\n" or "") + tmp = tmp .. serializeTable(v, k, skipnewlines, depth + 1) .. "," .. (not skipnewlines and "\n" or "") end tmp = tmp .. string.rep(" ", depth) .. "}" @@ -130,6 +132,13 @@ local function expand(text) return result end +-- source: https://github.com/unanimated/luaegisub/blob/master/ua.Relocator.lua#L2555 +local function round(n, dec) + dec = dec or 0 + n = math.floor(n * 10 ^ dec + 0.5) / 10 ^ dec + return n +end + ----- PakNevis ----- function PakNevis(subtitles, selected_lines, active_line) -- local translation_src = ' كي“”0123456789?⸮,’‘ﺑﺗﺛﺟﺣﺧﺳﺷﺻﺿﻃﻇﻋﻏﻓﻗﻛﻟﻣﻧﻫﻳﺋﺍﺏﺕﺙﺝﺡﺥﺩﺫﺭﺯﺱﺵﺹﺽﻁﻅﻉﻍﻑﻕﻙﻝﻡﻥﻩﻭﻱﺁﺃﺅﺇﺉˈﯿٱھ《》' @@ -140,7 +149,7 @@ function PakNevis(subtitles, selected_lines, active_line) local punc_after = '%.:!،؛؟»%]%)' local punc_before = '«%[%(' - for z, i in ipairs(selected_lines) do + for z, i in ipairs(selected_lines) do local line = subtitles[i] -- translation -- for j = 0, translation_src:len() do @@ -158,27 +167,37 @@ function PakNevis(subtitles, selected_lines, active_line) -- line.text = utf8.gsub(line.text, '-(\\[Nn])', '–%1') -- replace ending - with – -- line.text = utf8.gsub(line.text, '-$', '–') -- replace ending - with – -- punctuation spacing patterns - line.text = utf8.gsub(line.text, ' (['..punc_after..'])', '%1') -- remove space before - line.text = utf8.gsub(line.text, '(['..punc_before..']) ', '%1') -- remove space after - line.text = utf8.gsub(line.text, '([^%d'..persian_digits..']%.)([^ '..punc_after..'])', '%1 %2') -- put space after . - line.text = utf8.gsub(line.text, '([%d'..persian_digits..']%.)([^ %d'..persian_digits..punc_after..'])', '%1 %2') -- put space after . - line.text = utf8.gsub(line.text, '(['..punc_after:sub(3)..'])([^ '..punc_after..'])', '%1 %2') -- put space after - line.text = utf8.gsub(line.text, '([^ '..punc_before..'])(['..punc_before..'])', '%1 %2') -- put space before + line.text = utf8.gsub(line.text, ' ([' .. punc_after .. '])', '%1') -- remove space before + line.text = utf8.gsub(line.text, '([' .. punc_before .. ']) ', '%1') -- remove space after + line.text = utf8.gsub(line.text, '([^%d' .. persian_digits .. ']%.)([^ ' .. punc_after .. '])', '%1 %2') -- put space after . + line.text = utf8.gsub(line.text, '([%d' .. persian_digits .. ']%.)([^ %d' .. persian_digits .. punc_after .. '])' + , '%1 %2') -- put space after . + line.text = utf8.gsub(line.text, '([' .. punc_after:sub(3) .. '])([^ ' .. punc_after .. '])', '%1 %2') -- put space after + line.text = utf8.gsub(line.text, '([^ ' .. punc_before .. '])([' .. punc_before .. '])', '%1 %2') -- put space before -- affix spacing patterns line.text = utf8.gsub(line.text, '([^ ]ه) ی ', '%1‌ی ') -- fix ی space line.text = utf8.gsub(line.text, ' (ن?می) ', ' %1‌') -- put zwnj after می, نمی line.text = utf8.gsub(line.text, '^(ن?می) ', '%1‌') -- put zwnj after می, نمی - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (های?)([^'..persian_alphabets..'])', '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (گری?)([^'..persian_alphabets..'])', '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (تری?ن?)([^'..persian_alphabets..'])', '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (های?)$', '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (گری?)$', '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '(['..persian_alphabets..']['..persian_alphabets..']) (تری?ن?)$', '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های - line.text = utf8.gsub(line.text, '([^ ]ه) (ا[میشنت][مد]?)([^'..persian_alphabets..'])', '%1‌%2%3') -- join ام, ایم, اش, اند, ای, اید, ات + line.text = utf8.gsub(line.text, + '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (های?)([^' .. persian_alphabets .. '])', + '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, + '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (گری?)([^' .. persian_alphabets .. '])', + '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, + '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (تری?ن?)([^' .. persian_alphabets .. '])', + '%1‌%2%3') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (های?)$', + '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (گری?)$', + '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, '([' .. persian_alphabets .. '][' .. persian_alphabets .. ']) (تری?ن?)$', + '%1‌%2') -- put zwnj before تر, تری, ترین, گر, گری, ها, های + line.text = utf8.gsub(line.text, '([^ ]ه) (ا[میشنت][مد]?)([^' .. persian_alphabets .. '])', '%1‌%2%3') -- join ام, ایم, اش, اند, ای, اید, ات line.text = utf8.gsub(line.text, '([^ ]ه) (ا[میشنت][مد]?)$', '%1‌%2') -- join ام, ایم, اش, اند, ای, اید, ات - subtitles[i] = line - end - aegisub.set_undo_point(paknevis_script_name) + subtitles[i] = line + end + aegisub.set_undo_point(paknevis_script_name) end ----- Unretard ----- @@ -230,40 +249,48 @@ function Unretard(subtitles, selected_lines, active_line) -- unretard -- find local linetext_copy = line.text - line.text = utf8.gsub(line.text, '^(['..ending_punc..']+)([^\\]+)$', '%2gce') -- ending puncs - line.text = utf8.gsub(line.text, '^(['..ending_punc..']+)([^\\]+)(\\[Nn])', '%2gce%3') -- ending puncs - line.text = utf8.gsub(line.text, '(\\[Nn])(['..ending_punc..']+)([^\\]+)(\\[Nn])', '%1%3gce%4') -- ending puncs - line.text = utf8.gsub(line.text, '(\\[Nn])(['..ending_punc..']+)([^\\]+)$', '%1%3gce') -- ending puncs - line.text = utf8.gsub(line.text, '^([^\\]+[^'..starting_punc..'])(['..starting_punc..']+)(g?c?e?)$', 'gcs%1%3') -- starting puncs - line.text = utf8.gsub(line.text, '^([^\\]+[^'..starting_punc..'])(['..starting_punc..']+)(g?c?e?)(\\[Nn])', 'gcs%1%3%4') -- starting puncs - line.text = utf8.gsub(line.text, '(\\[Nn])([^\\]+[^'..starting_punc..'])(['..starting_punc..']+)(g?c?e?)(\\[Nn])', '%1gcs%2%4%5') -- starting puncs - line.text = utf8.gsub(line.text, '(\\[Nn])([^\\]+[^'..starting_punc..'])(['..starting_punc..']+)(g?c?e?)$', '%1gcs%2%3') -- starting puncs + line.text = utf8.gsub(line.text, '^([' .. ending_punc .. ']+)([^\\]+)$', '%2gce') -- ending puncs + line.text = utf8.gsub(line.text, '^([' .. ending_punc .. ']+)([^\\]+)(\\[Nn])', '%2gce%3') -- ending puncs + line.text = utf8.gsub(line.text, '(\\[Nn])([' .. ending_punc .. ']+)([^\\]+)(\\[Nn])', '%1%3gce%4') -- ending puncs + line.text = utf8.gsub(line.text, '(\\[Nn])([' .. ending_punc .. ']+)([^\\]+)$', '%1%3gce') -- ending puncs + line.text = utf8.gsub(line.text, '^([^\\]+[^' .. starting_punc .. '])([' .. starting_punc .. ']+)(g?c?e?)$', + 'gcs%1%3') -- starting puncs + line.text = utf8.gsub(line.text, '^([^\\]+[^' .. starting_punc .. + '])([' .. starting_punc .. ']+)(g?c?e?)(\\[Nn])', 'gcs%1%3%4') -- starting puncs + line.text = utf8.gsub(line.text, + '(\\[Nn])([^\\]+[^' .. starting_punc .. '])([' .. starting_punc .. ']+)(g?c?e?)(\\[Nn])', '%1gcs%2%4%5') -- starting puncs + line.text = utf8.gsub(line.text, '(\\[Nn])([^\\]+[^' .. starting_punc .. + '])([' .. starting_punc .. ']+)(g?c?e?)$', '%1gcs%2%3') -- starting puncs -- replace - line.text = replace(linetext_copy, line.text, '^(['..ending_punc..']+)[^\\]+$', 'gce') - line.text = replace(linetext_copy, line.text, '^(['..ending_punc..']+)[^\\]+\\[Nn]', 'gce') - line.text = replace(linetext_copy, line.text, '\\[Nn](['..ending_punc..']+)[^\\]+\\[Nn]', 'gce') - line.text = replace(linetext_copy, line.text, '\\[Nn](['..ending_punc..']+)[^\\]+$', 'gce') - line.text = replace(linetext_copy, line.text, '^[^\\]+[^'..starting_punc..'](['..starting_punc..']+)$', 'gcs') - line.text = replace(linetext_copy, line.text, '^[^\\]+[^'..starting_punc..'](['..starting_punc..']+)\\[Nn]', 'gcs') - line.text = replace(linetext_copy, line.text, '\\[Nn][^\\]+[^'..starting_punc..'](['..starting_punc..']+)\\[Nn]', 'gcs') - line.text = replace(linetext_copy, line.text, '\\[Nn][^\\]+[^'..starting_punc..'](['..starting_punc..']+)$', 'gcs') + line.text = replace(linetext_copy, line.text, '^([' .. ending_punc .. ']+)[^\\]+$', 'gce') + line.text = replace(linetext_copy, line.text, '^([' .. ending_punc .. ']+)[^\\]+\\[Nn]', 'gce') + line.text = replace(linetext_copy, line.text, '\\[Nn]([' .. ending_punc .. ']+)[^\\]+\\[Nn]', 'gce') + line.text = replace(linetext_copy, line.text, '\\[Nn]([' .. ending_punc .. ']+)[^\\]+$', 'gce') + line.text = replace(linetext_copy, line.text, '^[^\\]+[^' .. starting_punc .. ']([' .. starting_punc .. + ']+)$', 'gcs') + line.text = replace(linetext_copy, line.text, '^[^\\]+[^' .. starting_punc .. + ']([' .. starting_punc .. ']+)\\[Nn]', 'gcs') + line.text = replace(linetext_copy, line.text, + '\\[Nn][^\\]+[^' .. starting_punc .. ']([' .. starting_punc .. ']+)\\[Nn]', 'gcs') + line.text = replace(linetext_copy, line.text, '\\[Nn][^\\]+[^' .. starting_punc .. + ']([' .. starting_punc .. ']+)$', 'gcs') end - subtitles[i] = line - end - aegisub.set_undo_point(unretard_script_name) + subtitles[i] = line + end + aegisub.set_undo_point(unretard_script_name) end ----- RTL ----- function Rtl(subtitles, selected_lines, active_line) - for z, i in ipairs(selected_lines) do + for z, i in ipairs(selected_lines) do local l = subtitles[i] - - l.text = rtl(l.text) - - subtitles[i] = l - end - aegisub.set_undo_point(rtl_script_name) + + l.text = rtl(l.text) + + subtitles[i] = l + end + aegisub.set_undo_point(rtl_script_name) end ----- Un-RTL ----- @@ -286,44 +313,45 @@ local editor_btn = { } local function openEditor(str) - local btns = {"OK", "OK w/o RTL", "Cancel"} + local btns = { "OK", "OK w/o RTL", "Cancel" } local btn_switch_case = {} for key, value in pairs(btns) do btn_switch_case[value] = key end - local config = { - {class="label", label="Press Ctrl+Shift at the right side of your keyboard to switch to RTL mode.", x=0, y=0}, - {class="textbox", name="editor", value=str, x=0, y=1, width=33, height=11} + local config = { + { class = "label", label = "Press Ctrl+Shift at the right side of your keyboard to switch to RTL mode.", x = 0, + y = 0 }, + { class = "textbox", name = "editor", value = str, x = 0, y = 1, width = 33, height = 11 } } - local btn, result = aegisub.dialog.display(config, btns, {ok="OK", cancel="Cancel"}) + local btn, result = aegisub.dialog.display(config, btns, { ok = "OK", cancel = "Cancel" }) if btn == true then btn = "OK" elseif btn == false then btn = "Cancel" end - return btn_switch_case[btn], result.editor + return btn_switch_case[btn], result.editor end function RtlEditor(subtitles, selected_lines) - if #selected_lines > 1 then - return - end + if #selected_lines > 1 then + return + end local line = subtitles[selected_lines[1]] local text = unrtl(line.text) - text = utf8.gsub(text, "\\[Nn]", "\n") - local btn, newText = openEditor(text) + text = utf8.gsub(text, "\\[Nn]", "\n") + local btn, newText = openEditor(text) - if btn == editor_btn.Cancel then - return + if btn == editor_btn.Cancel then + return end - newText = utf8.gsub(newText, "\n", "\\N") - if btn == editor_btn.Ok then + newText = utf8.gsub(newText, "\n", "\\N") + if btn == editor_btn.Ok then newText = rtl(newText) - end + end line.text = newText - - subtitles[selected_lines[1]] = line - aegisub.set_undo_point(rtleditor_script_name) + subtitles[selected_lines[1]] = line + + aegisub.set_undo_point(rtleditor_script_name) end ----- Split at Tags ----- @@ -343,11 +371,11 @@ Split.non_style_tags = { 'pos', 'move', 'org', 'fad', 'fade', 't', 'clip', 'iclip', 'p', 'pbo' } Split.style_names_tags = { - {'fontname','fn'}, {'fontsize','fs'}, - {'color1','1c','1a'}, {'color2','2c','2a'}, {'color3','3c','3a'}, {'color4','4c','4a'}, - {'bold','b'}, {'italic','i'}, {'underline','u'}, {'strikeout','s'}, - {'scale_x','fscx'}, {'scale_y','fscy'}, {'spacing','fsp'}, {'angle','frz'}, - {'outline','bord'}, {'shadow','shad'}, {'align','an'}, {'encoding','fe'} + { 'fontname', 'fn' }, { 'fontsize', 'fs' }, + { 'color1', '1c', '1a' }, { 'color2', '2c', '2a' }, { 'color3', '3c', '3a' }, { 'color4', '4c', '4a' }, + { 'bold', 'b' }, { 'italic', 'i' }, { 'underline', 'u' }, { 'strikeout', 's' }, + { 'scale_x', 'fscx' }, { 'scale_y', 'fscy' }, { 'spacing', 'fsp' }, { 'angle', 'frz' }, + { 'outline', 'bord' }, { 'shadow', 'shad' }, { 'align', 'an' }, { 'encoding', 'fe' } } Split.simple_text_value_tags = { 'fn', 'alpha', '1a', '2a', '3a', '4a', 'c', '1c', '2c', '3c', '4c', 'r' @@ -391,11 +419,11 @@ end function Split:parse_tags(tags, line_tags, current_appearance) -- TODO: add r support -- handle t tags - local t_tags={} + local t_tags = {} for t in tags:gmatch("\\t%b()") do -- Thanks lyger! table.insert(t_tags, t) end - tags = tags:gsub("\\t%b()","") -- remove t tags + tags = tags:gsub("\\t%b()", "") -- remove t tags if #t_tags > 0 then -- add to table current_appearance["t"] = t_tags end @@ -480,15 +508,15 @@ function Split:reverse(line) for tag, value in pairs(diff) do if tag == "t" then for _, t_tag in ipairs(value) do - rebuilt_tag = rebuilt_tag:gsub("}", t_tag.."}") + rebuilt_tag = rebuilt_tag:gsub("}", t_tag .. "}") end else - rebuilt_tag = rebuilt_tag:gsub("{","{\\"..tag..value) + rebuilt_tag = rebuilt_tag:gsub("{", "{\\" .. tag .. value) end end if i == #tag_text_table then for tag, value in pairs(line_tags) do - rebuilt_tag = rebuilt_tag:gsub("{","{\\"..tag..value) + rebuilt_tag = rebuilt_tag:gsub("{", "{\\" .. tag .. value) end end val.tag = rebuilt_tag @@ -497,7 +525,7 @@ function Split:reverse(line) val.text, _ = re.sub(val.text, "^( *)(.*?)( *)$", "\\3\\2\\1") -- rebuild line - line.text = line.text..val.tag..val.text + line.text = line.text .. val.tag .. val.text end return line @@ -538,8 +566,8 @@ function Split:splitAtTags(line) posx = line.eff_margin_l elseif _temp == 2 then posx = line.eff_margin_l + - (vid_x - line.eff_margin_l - - line.eff_margin_r) / 2 + (vid_x - line.eff_margin_l - + line.eff_margin_r) / 2 else posx = vid_x - line.eff_margin_r end @@ -560,8 +588,8 @@ function Split:splitAtTags(line) posx = line.eff_margin_l elseif _temp == 2 then posx = line.eff_margin_l + - (vid_x - line.eff_margin_l - line.eff_margin_r) / - 2 + (vid_x - line.eff_margin_l - line.eff_margin_r) / + 2 else posx = vid_x - line.eff_margin_r end @@ -643,7 +671,7 @@ function Split:splitAtTags(line) local line_table_copy = util.copy(line_table) for i, e in ipairs(line_table_copy) do local m = re.match(e.text, "^( *)(.*?)( *)$") - + if m[2].str ~= "" then table.insert(line_table, i + lines_added, { tag = e.tag, text = rtl(m[2].str) }) lines_added = lines_added + 1 @@ -677,15 +705,15 @@ function Split:splitAtTags(line) -- Fix style tables to reflect override tags current_style.fontname = subtable["fn"] or current_style.fontname current_style.fontsize = tonumber(subtable["fs"]) or - current_style.fontsize + current_style.fontsize current_style.scale_x = tonumber(subtable["fscx"]) or - current_style.scale_x + current_style.scale_x current_style.scale_y = tonumber(subtable["fscy"]) or - current_style.scale_y + current_style.scale_y current_style.spacing = tonumber(subtable["fsp"]) or - current_style.spacing + current_style.spacing current_style.align = tonumber(subtable["an"]) or - current_style.align + current_style.align if subtable["b"] ~= nil then if subtable["b"] == "1" then current_style.bold = true @@ -718,7 +746,7 @@ function Split:splitAtTags(line) -- Get extents of the section. _sdesc is not used -- Temporarily remove all newlines first local swidth, sheight, _sdesc, sext = - aegisub.text_extents(current_style, val.text:gsub("\n", "")) + aegisub.text_extents(current_style, val.text:gsub("\n", "")) -- aegisub.log("Text: %s\n--w: %.3f\n--h: %.3f\n--d: %.3f\n--el: %.3f\n\n", -- val.text, swidth, sheight, _sdesc, sext) @@ -727,7 +755,7 @@ function Split:splitAtTags(line) cum_width = cum_width + swidth -- Total height of the line - local theight=0 + local theight = 0 -- Handle tasks for a line that has a newline --[[if val.text:match("\n")~=nil then @@ -863,7 +891,7 @@ function Split:splitAtTags(line) -- Start rebuilding text local rebuilt_tag = string.format("{\\pos(%s,%s)}", float2str(new_x), - float2str(py)) + float2str(py)) -- Add the remaining tags for tag, param in pairs(current_subtable) do @@ -878,8 +906,8 @@ function Split:splitAtTags(line) if do_org then rebuilt_tag = rebuilt_tag:gsub("{", string.format( - "{\\org(%s,%s)", - float2str(ox), float2str(oy))) + "{\\org(%s,%s)", + float2str(ox), float2str(oy))) end -- reverse back text @@ -892,7 +920,7 @@ function Split:splitAtTags(line) -- clean text val.text = re.sub(val.text, '^ +', '') -- trim redundant spaces val.text = re.sub(val.text, ' +$', '') - val.text = re.sub(val.text, '^['..RLE..' ]+$', '') + val.text = re.sub(val.text, '^[' .. RLE .. ' ]+$', '') new_line.text = rebuilt_tag .. val.text @@ -946,8 +974,8 @@ function SplitAtSpaces(subtitles, selected_lines, active_line) local parts = expand(line.text) line.text = "" for _, p in ipairs(parts) do - p.text, _ = re.sub(p.text, "( +)", "{}"..RLE.."\\1") - line.text = line.text..p.tag..p.text + p.text, _ = re.sub(p.text, "( +)", "{}" .. RLE .. "\\1") + line.text = line.text .. p.tag .. p.text end lines[i] = line end @@ -1014,13 +1042,55 @@ function ReverseAtTags(subtitles, selected_lines, active_line) aegisub.set_undo_point(reverse_at_tags_script_name) end +----- Extend Move ----- +function ExtendMove(subtitles, selected_lines, active_line) + for _, i in ipairs(selected_lines) do + local line = subtitles[i] + + line.text = utf8.gsub(line.text, + "\\move%(([%d%.%-]*),([%d%.%-]*),([%d%.%-]*),([%d%.%-]*),([%d%.%-]*),([%d%.%-]*)%)", + function(x1, y1, x2, y2, t1, t2) + local f1 = aegisub.frame_from_ms(line.start_time + t1) + if f1 ~= nil then + t1 = aegisub.ms_from_frame(f1) + local f2 = aegisub.frame_from_ms(line.start_time + t2) + t2 = aegisub.ms_from_frame(f2) + end + local dt = t2 - t1 + local dxdt = (x2 - x1) / dt + local dydt = (y2 - y1) / dt + + local s = aegisub.ms_from_frame(aegisub.frame_from_ms(line.start_time)) + local e = aegisub.ms_from_frame(aegisub.frame_from_ms(line.end_time)) + local ds = t1 - s + local de = e - t2 + if ds < 0 then ds = 0 end + if de < 0 then de = 0 end + + x1 = round(x1 - ds * dxdt, 2) + x2 = round(x2 + de * dxdt, 2) + y1 = round(y1 - ds * dydt, 2) + y2 = round(y2 + de * dydt, 2) + + return "\\move(" .. x1 .. "," .. y1 .. "," .. x2 .. "," .. y2 .. ")" + end) + + subtitles[i] = line + end + + aegisub.set_undo_point(extend_move_script_name) +end + ----- Register Scripts ----- aegisub.register_macro(paknevis_script_name, 'Fix your shity writing habbits! (Unretarded Lines Only)', PakNevis) +aegisub.register_macro(extend_move_script_name, 'Extend \\move based on line\'s time.', ExtendMove) aegisub.register_macro(unretard_script_name, 'Unretard your retarted Persian typing! (Retarded Lines Only)', Unretard) aegisub.register_macro(rtl_script_name, 'Fix RTL languages displaying issues. (Unretarded Lines Only)', Rtl) aegisub.register_macro(unrtl_script_name, 'Undo RTL function effects.', Unrtl) aegisub.register_macro(rtleditor_script_name, 'An editor for easy editing of RTL language lines.', RtlEditor) aegisub.register_macro(split_at_tags_script_name, 'A splitter (at tags) for RTL language lines.', SplitAtTags) aegisub.register_macro(split_at_spaces_script_name, 'A splitter (at spaces) for RTL language lines.', SplitAtSpaces) -aegisub.register_macro(reverse_split_at_tags_script_name, 'Split / Reverse at Tags + Split / Split at Tags.', ReverseSplitAtTags) -aegisub.register_macro(reverse_at_tags_script_name, 'Reverse line at tags to use it with other LTR automations.', ReverseAtTags) +aegisub.register_macro(reverse_split_at_tags_script_name, 'Split / Reverse at Tags + Split / Split at Tags.', + ReverseSplitAtTags) +aegisub.register_macro(reverse_at_tags_script_name, 'Reverse line at tags to use it with other LTR automations.', + ReverseAtTags) diff --git a/include/AL/utf8/README.md b/include/AL/utf8/README.md index 1b2d803..0c31574 100644 --- a/include/AL/utf8/README.md +++ b/include/AL/utf8/README.md @@ -36,10 +36,15 @@ utf8.raw.gsub(str, "ло+", "보라") It also provides all functions from Lua 5.3 UTF-8 [module](https://www.lua.org/manual/5.3/manual.html#6.5) except `utf8.len (s [, i [, j]])`. If you need to validate your strings use `utf8.validate(str, byte_pos)` or iterate over with `utf8.validator`. +Please note that library assumes regexes are valid UTF-8 strings, if you need to manipulate individual bytes use vanilla functions under `utf8.raw`. + + #### Installation: Download repository to your project folder. (no rockspecs yet) +Examples assume library placed under `utf8` subfolder not `utf8.lua`. + As of Lua 5.3 default `utf8` module has precedence over user-provided. In this case you can specify full module path (`.utf8`). #### Configuration: @@ -57,6 +62,32 @@ utf8.config = { } utf8:init() ``` + +For `lower` and `upper` functions to work in environments where `ffi` cannot be used, you can specify substitution tables ([data example](https://github.com/artemshein/luv/blob/master/utf8data.lua)) + +```Lua +local utf8 = require('.utf8') +utf8.config = { + conversion = { + uc_lc = utf8_uc_lc, + lc_uc = utf8_lc_uc + }, +} +utf8:init() +``` Customization is done before initialization. If you want, you can change configuration after `init`, it might work for everything but modules. All of them should be reloaded. #### [Documentation:](test/test.lua) + +#### Issue reporting: + +Please provide example script that causes error together with environment description and debug output. Debug output can be obtained like: +```Lua +local utf8 = require('.utf8') +utf8.config = { + debug = utf8:require("util").debug +} +utf8:init() +-- your code +``` +Default logger used is [`io.write`](https://www.lua.org/manual/5.3/manual.html#pdf-io.write) and can be changed by specifying `logger = my_logger` in configuration diff --git a/include/AL/utf8/functions/lua53.lua b/include/AL/utf8/functions/lua53.lua index bc31301..26e6f23 100644 --- a/include/AL/utf8/functions/lua53.lua +++ b/include/AL/utf8/functions/lua53.lua @@ -5,6 +5,7 @@ local utf8gensub = utf8.gensub local unpack = utf8.config.unpack local generate_matcher_function = utf8:require 'regex_parser' +local function get_matcher_function(regex, plain) local res if utf8.config.cache then diff --git a/include/AL/utf8/init.lua b/include/AL/utf8/init.lua index 6aea97f..d2f72a4 100644 --- a/include/AL/utf8/init.lua +++ b/include/AL/utf8/init.lua @@ -25,7 +25,11 @@ local utf8 = { else return {} end - end + end, + conversion = { + uc_lc = nil, + lc_uc = nil + } }, regex = { compiletime = { diff --git a/include/AL/utf8/primitives/dummy.lua b/include/AL/utf8/primitives/dummy.lua index ae3929b..a4665f5 100644 --- a/include/AL/utf8/primitives/dummy.lua +++ b/include/AL/utf8/primitives/dummy.lua @@ -12,11 +12,6 @@ -- * utf8gmatch(str, regex, all) -- * utf8gsub(str, regex, repl, limit) -- --- If utf8data.lua (containing the lower<->upper case mappings) is loaded, these --- additional functions are available: --- * utf8upper(s) --- * utf8lower(s) --- -- All functions behave as their non UTF-8 aware counterparts with the exception -- that UTF-8 characters are used instead of bytes for all units. @@ -76,6 +71,8 @@ local rep = string.rep local sub = string.sub local upper = string.upper +local utf8charpattern = '[%z\1-\127\194-\244][\128-\191]*' + local function utf8symbollen(byte) return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1 end @@ -494,6 +491,33 @@ local function utf8offset(str, n, bs) end +local function utf8replace (s, mapping) + if type(s) ~= "string" then + error("bad argument #1 to 'utf8replace' (string expected, got ".. type(s).. ")") + end + if type(mapping) ~= "table" then + error("bad argument #2 to 'utf8replace' (table expected, got ".. type(mapping).. ")") + end + local result = utf8.raw.gsub( s, utf8charpattern, mapping ) + return result +end + +local function utf8upper (s) + return utf8replace(s, utf8.config.conversion.lc_uc) +end + +if utf8.config.conversion.lc_uc then + upper = utf8upper +end + +local function utf8lower (s) + return utf8replace(s, utf8.config.conversion.uc_lc) +end + +if utf8.config.conversion.uc_lc then + lower = utf8lower +end + utf8.len = utf8len utf8.sub = utf8sub utf8.reverse = utf8reverse @@ -514,7 +538,7 @@ for k,v in pairs(string) do utf8.raw[k] = v end -utf8.charpattern = '[\0-\127\194-\244][\128-\191]*' +utf8.charpattern = utf8charpattern utf8.offset = utf8offset if _VERSION == 'Lua 5.3' then local utf8_53 = require "utf8" diff --git a/include/AL/utf8/primitives/native.lua b/include/AL/utf8/primitives/native.lua index 7ad2aec..c9aca54 100644 --- a/include/AL/utf8/primitives/native.lua +++ b/include/AL/utf8/primitives/native.lua @@ -1,52 +1,56 @@ return function(utf8) - local ffi = require("ffi") - if ffi.os == "Windows" then - os.setlocale(utf8.config.locale or "english_us.65001", "ctype") - ffi.cdef[[ - short towupper(short c); - short towlower(short c); - ]] - else - os.setlocale(utf8.config.locale or "C.UTF-8", "ctype") - ffi.cdef[[ - int towupper(int c); - int towlower(int c); - ]] - end +local ffi = require("ffi") +if ffi.os == "Windows" then + os.setlocale(utf8.config.locale or "english_us.65001", "ctype") + ffi.cdef[[ + short towupper(short c); + short towlower(short c); + ]] +else + os.setlocale(utf8.config.locale or "C.UTF-8", "ctype") + ffi.cdef[[ + int towupper(int c); + int towlower(int c); + ]] +end utf8:require "primitives.dummy" -function utf8.lower(str) - local bs = 1 - local nbs - local bytes = utf8.raw.len(str) - local res = {} - - while bs <= bytes do - nbs = utf8.next(str, bs) - local cp = utf8.unicode(str, bs, nbs) - res[#res + 1] = ffi.C.towlower(cp) - bs = nbs +if not utf8.config.conversion.uc_lc then + function utf8.lower(str) + local bs = 1 + local nbs + local bytes = utf8.raw.len(str) + local res = {} + + while bs <= bytes do + nbs = utf8.next(str, bs) + local cp = utf8.unicode(str, bs, nbs) + res[#res + 1] = ffi.C.towlower(cp) + bs = nbs + end + + return utf8.char(utf8.config.unpack(res)) end - - return utf8.char(utf8.config.unpack(res)) end -function utf8.upper(str) - local bs = 1 - local nbs - local bytes = utf8.raw.len(str) - local res = {} - - while bs <= bytes do - nbs = utf8.next(str, bs) - local cp = utf8.unicode(str, bs, nbs) - res[#res + 1] = ffi.C.towupper(cp) - bs = nbs +if not utf8.config.conversion.lc_uc then + function utf8.upper(str) + local bs = 1 + local nbs + local bytes = utf8.raw.len(str) + local res = {} + + while bs <= bytes do + nbs = utf8.next(str, bs) + local cp = utf8.unicode(str, bs, nbs) + res[#res + 1] = ffi.C.towupper(cp) + bs = nbs + end + + return utf8.char(utf8.config.unpack(res)) end - - return utf8.char(utf8.config.unpack(res)) end return utf8 diff --git a/include/AL/utf8/test.sh b/include/AL/utf8/test.sh index 0a336e0..b8d2d63 100644 --- a/include/AL/utf8/test.sh +++ b/include/AL/utf8/test.sh @@ -12,7 +12,8 @@ for test in \ test/context_runtime.lua \ test/test.lua \ test/test_compat.lua \ - test/test_pm.lua + test/test_pm.lua \ + test/test_utf8data.lua do $lua53 $test $lua51 $test diff --git a/include/AL/utf8/test/charclass_compiletime.lua b/include/AL/utf8/test/charclass_compiletime.lua index 945e202..05d762d 100644 --- a/include/AL/utf8/test/charclass_compiletime.lua +++ b/include/AL/utf8/test/charclass_compiletime.lua @@ -1,6 +1,7 @@ local utf8 = require "init" utf8.config = { - debug = nil, --utf8:require("util").debug + debug = nil, +-- debug = utf8:require("util").debug, } utf8:init() diff --git a/include/AL/utf8/test/test.lua b/include/AL/utf8/test/test.lua index 6758033..8653b5d 100644 --- a/include/AL/utf8/test/test.lua +++ b/include/AL/utf8/test/test.lua @@ -1,8 +1,10 @@ local utf8 = require('init') utf8.config = { - debug = nil, --utf8:require("util").debug + debug = nil, +-- debug = utf8:require("util").debug, } utf8:init() + for k,v in pairs(utf8) do string[k] = v end diff --git a/include/AL/utf8/test/test_compat.lua b/include/AL/utf8/test/test_compat.lua index 74fc941..d5042a5 100644 --- a/include/AL/utf8/test/test_compat.lua +++ b/include/AL/utf8/test/test_compat.lua @@ -33,7 +33,7 @@ assert(utf8.sub("\000123456789", 8) == "789") print('+') assert(utf8.find("123456789", "345") == 3) -a,b = utf8.find("123456789", "345") +local a,b = utf8.find("123456789", "345") assert(utf8.sub("123456789", a, b) == "345") assert(utf8.find("1234567890123456789", "345", 3) == 3) assert(utf8.find("1234567890123456789", "345", 4) == 13) @@ -102,7 +102,7 @@ print('+') do local f = utf8.gmatch("1 2 3 4 5", "%d+") assert(f() == "1") - co = coroutine.wrap(f) + local co = coroutine.wrap(f) assert(co() == "2") end diff --git a/include/AL/utf8/test/test_pm.lua b/include/AL/utf8/test/test_pm.lua index 0bbcd31..9c8e472 100644 --- a/include/AL/utf8/test/test_pm.lua +++ b/include/AL/utf8/test/test_pm.lua @@ -30,11 +30,13 @@ utf8:init() print('testing pattern matching') +local function f(s, p) local i,e = utf8.find(s, p) if i then return utf8.sub(s, i, e) end end +local function f1(s, p) p = utf8.gsub(p, "%%([0-9])", function (s) return "%" .. (tonumber(s)+1) end) p = utf8.gsub(p, "^(^?)", "%1()", 1) @@ -43,6 +45,7 @@ function f1(s, p) return utf8.sub(s, t[1], t[#t] - 1) end +local a,b = utf8.find('', '') -- empty patterns are tricky assert(a == 1 and b == 0); a,b = utf8.find('alo', '') @@ -132,12 +135,15 @@ local abc = utf8.char(range(0, 255)); assert(utf8.len(abc) == 256) assert(string.len(abc) == 384) +local function strset (p) local res = {s=''} utf8.gsub(abc, p, function (c) res.s = res.s .. c end) return res.s end; +local a, b, c, d, e, t + -- local E = utf8.escape -- assert(utf8.len(strset(E'[%200-%210]')) == 11) @@ -157,7 +163,7 @@ assert(utf8.match("254 K", "(%d*)K") == "") assert(utf8.match("alo ", "(%w*)$") == "") assert(utf8.match("alo ", "(%w+)$") == nil) assert(utf8.find("(álo)", "%(á") == 1) -local a, b, c, d, e = utf8.match("âlo alo", "^(((.).).* (%w*))$") +a, b, c, d, e = utf8.match("âlo alo", "^(((.).).* (%w*))$") assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) a, b, c, d = utf8.match('0123456789', '(.+(.?)())') assert(a == '0123456789' and b == '' and c == 11 and d == nil) @@ -203,6 +209,7 @@ x = utf8.gsub("$local utf8=require'init' x=utf8.gsub('alo', '.', utf8.upper)$ as "$([^$]*)%$", dostring) assert(x == ' assim vai para ALO') +local s,r t = {} s = 'a alo jose joao' r = utf8.gsub(s, '()(%w+)()', function (a,w,b) @@ -211,7 +218,7 @@ r = utf8.gsub(s, '()(%w+)()', function (a,w,b) end) assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4) - +local function isbalanced (s) return utf8.find(utf8.gsub(s, "%b()", ""), "[()]") == nil end @@ -273,7 +280,7 @@ Stepets: ignoring this test because it's probably bug in Lua. -- end -- recursive nest of gsubs -function rev (s) +local function rev (s) return utf8.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end) end diff --git a/include/AL/utf8/test/test_utf8data.lua b/include/AL/utf8/test/test_utf8data.lua new file mode 100644 index 0000000..e915b2b --- /dev/null +++ b/include/AL/utf8/test/test_utf8data.lua @@ -0,0 +1,15 @@ +local utf8uclc = require('init') +utf8uclc.config = { + debug = nil, +-- debug = utf8:require("util").debug, + conversion = { + uc_lc = setmetatable({}, {__index = function(self, idx) return "l" end}), + lc_uc = setmetatable({}, {__index = function(self, idx) return "u" end}), + } +} +utf8uclc:init() + +local assert_equals = require 'test.util'.assert_equals + +assert_equals(utf8uclc.lower("фыва"), "llll") +assert_equals(utf8uclc.upper("фыва"), "uuuu")