Skip to content

Commit

Permalink
Continued implementation of generating translations
Browse files Browse the repository at this point in the history
  • Loading branch information
Logonz committed Oct 4, 2024
1 parent ed07e4a commit 5652c9b
Show file tree
Hide file tree
Showing 15 changed files with 1,161 additions and 85 deletions.
192 changes: 124 additions & 68 deletions .generate_database_lua/generate_translation_trie_root.lua
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ require("cli.dump")
---@alias filepath string

-- The root folder
local root_folder = "translations"
local root_folder = "Translations"
local data_folder = "_data"
local full_path = root_folder .. "/" .. data_folder

-- Variable to disable the writing of html
local write_html = true
Expand All @@ -16,9 +18,30 @@ local splitCharacter = "‡"
local MAX_TRANSLATIONS_PER_FILE = 50
local SEGMENT_SIZE = 4000
local REDUCE_SEGMENT_SIZE = math.max(math.min(SEGMENT_SIZE * 0.05, 100), 10)

local f = string.format

-- From here
-- https://github.com/Questie/Questie/blob/2e2c44dc42dd66fb144be8ba0115287da5b7cd8e/Localization/l10n.lua#L22
local localeOrder = {
'enUS',
'esES',
'esMX',
'ptBR',
'frFR',
'deDE',
'ruRU',
'zhCN',
'zhTW',
'koKR',
}

print("Max translations per file: " .. MAX_TRANSLATIONS_PER_FILE)
print("Segment size: " .. SEGMENT_SIZE, "Reduced segment size: " .. SEGMENT_SIZE - REDUCE_SEGMENT_SIZE)

local tConcat = table.concat
local tInsert = table.insert

--- Function to sanitize translation strings by replacing special characters with HTML entities
---@param str string
---@return string
Expand All @@ -37,49 +60,62 @@ local function mkdir(path)
os.execute("mkdir -p " .. path)
end

--- Function to split a string into segments based on maximum characters per segment
---@param str string
---@param max_chars number
---@return string[]
local function split_into_segments(str, max_chars)
--- Function to split a table of strings into segments based on maximum characters per segment
---@param tbl string[] The table of strings to split
---@param max_chars number The maximum number of characters per segment
---@return string[] A table of segments, each being a table of strings
local function split_into_segments(tbl, max_chars)
local segments = {}
local total_segments = math.ceil(#str / max_chars)

-- Loop through the string and create segments
for i = 1, total_segments do
local start_pos = (i - 1) * max_chars + 1
local end_pos = math.min(i * max_chars, #str)
local segment = string.sub(str, start_pos, end_pos)
-- Add segment number prefix for all segments except the first one
if i > 1 then
segment = tostring(i) .. segment
local current_segment = {}

-- Is the total length too long?
if #tConcat(tbl, splitCharacter) > max_chars then
local concat_current_segments = tConcat(current_segment, splitCharacter)
for _, segment in ipairs(tbl) do
-- Will the current length + the next one be over max_chars?
if #concat_current_segments + #segment > max_chars then
-- Insert a segment and start a new one
tInsert(segments, concat_current_segments)
current_segment = { segment, }
else
-- We are below the max_chars limit, so we can add the next string to the current segment
tInsert(current_segment, segment)
end
end
table.insert(segments, segment)
-- Insert the last segment
tInsert(segments, concat_current_segments)
end

-- Add total segments count to the first segment
segments[1] = total_segments .. segments[1]
-- Iterate over each segment and write it as a separate paragraph
local ret_segments = {}
for idx, segment in ipairs(segments) do
-- If it's the first segment, prepend the total segments count
if idx == 1 and #segments > 0 then
segment = tostring(#segments) .. segment
elseif idx > 1 then
segment = tostring(idx) .. segment
end
tInsert(ret_segments, segment)
end

return segments
return ret_segments
end

--- Function to write translations to an HTML file with segmentation
---@param key_path string The path for the translation e.g. translations/u/s/e/t/h/e
---@param translations string[]
---@param translation_func fun(string):string[] A function that takes the english string and returns all translations
---@param translation_func fun(string):string[], string A function that takes the english string and returns all translations
local function write_html_file(key_path, translations, translation_func)
-- ? This stops the function from writing in subfolders but instead writes in the root folder
-- ? Remember to activate the folder creation function in the create_trie_folders function if disabled
-- "%-" Always escape
local replaceChar = ""
key_path = key_path:gsub("/", replaceChar)
key_path = key_path:gsub(root_folder .. replaceChar, root_folder .. "/", 1) -- We also removed the root folder from the key_path, add back the /
key_path = key_path:gsub(full_path:gsub("/", replaceChar) .. replaceChar, full_path .. "/", 1) -- We also removed the root folder from the key_path, add back the /

-- Define the file path
local filename = key_path .. ".html"

-- print(filename)

-- Open the file for writing
if write_html then
local file, err = io.open(filename, "w")
Expand All @@ -91,22 +127,20 @@ local function write_html_file(key_path, translations, translation_func)
-- Write the HTML structure
file:write("<html><body>\n")
for _, translation in ipairs(translations) do
local fullTranslationTable = translation_func(translation)
local fullTranslationTable, enUS = translation_func(translation)
file:write("<!--" .. enUS .. "-->\n")



-- Check if the translation needs to be segmented
if #table.concat(fullTranslationTable, splitCharacter) > SEGMENT_SIZE - REDUCE_SEGMENT_SIZE then
if #tConcat(fullTranslationTable, splitCharacter) > SEGMENT_SIZE - REDUCE_SEGMENT_SIZE then
print("Splitting translation into segments", translation, filename)
-- Split the translation into segments
local segments = split_into_segments(table.concat(fullTranslationTable, splitCharacter), SEGMENT_SIZE - REDUCE_SEGMENT_SIZE)
-- Write each segment as a separate paragraph
local segments = split_into_segments(fullTranslationTable, SEGMENT_SIZE - REDUCE_SEGMENT_SIZE)
for _, segment in ipairs(segments) do
file:write("<p>" .. sanitize_translation(segment) .. "</p>\n")
end
else
-- Write the translation as a single paragraph
file:write("<p>" .. sanitize_translation(table.concat(fullTranslationTable, splitCharacter)) .. "</p>\n")
local concatenated = tConcat(fullTranslationTable, splitCharacter)
file:write("<p>" .. sanitize_translation(concatenated) .. "</p>\n")
end
end
file:write("</body></html>\n")
Expand All @@ -118,7 +152,7 @@ end
---comment
---@param trie trie
---@param current_path filepath
---@param translation_func fun(translation:string):string[] A function that takes the english string and returns all translations
---@param translation_func fun(translation:string):string[], string A function that takes the english string and returns all translations
local function write_trie_structure(trie, current_path, translation_func)
-- print("Current path: " .. current_path)
for trieKey, translations in pairs(trie) do
Expand Down Expand Up @@ -148,28 +182,31 @@ local function create_trie(strings, stringIndex)
-- Process each string in the input array
for i = 1, #strings do
local string = strings[i]
-- Remove all whitespaces from the string
local cleanedString = string.gsub(string, "%s", "")
-- Remove all numbers from the string
-- Remove all numbers from the string (Not in use as some strings are very short and contains numbers)
-- cleanedString = string.gsub(cleanedString, "%d+", "")
-- Remove all punctuation from the string
cleanedString = string.gsub(cleanedString, "%p", "")
-- Remove all control characters from the string
cleanedString = string.gsub(cleanedString, "%c", "")

-- Get the character at the current index
-- local char = string.sub(string.lower(cleanedString), stringIndex, stringIndex)
local char = string.sub(cleanedString, stringIndex, stringIndex)

if char == "" then
-- error(string.format("%s: %d out of range, increase MAX_TRANSLATIONS_PER_FILE", string, stringIndex))
print(string.format("%s: %d out of range", string, stringIndex))
-- error(f("%s: %d out of range, increase MAX_TRANSLATIONS_PER_FILE", string, stringIndex))
print(f("%s: %d out of range", string, stringIndex))
char = "."
-- table.insert(parent[char], string)
-- tInsert(parent[char], string)
-- else
end
-- Create a new branch for the character if it doesn't exist
if not branch[char] then
branch[char] = {}
end
table.insert(branch[char], string)
tInsert(branch[char], string)
end

-- Recursively create branches for child nodes if needed
Expand Down Expand Up @@ -216,24 +253,11 @@ local function replaceArrays(t, filepath)
end
end

local localeOrder = {
'enUS',
'esES',
'esMX',
'ptBR',
'frFR',
'deDE',
'ruRU',
'zhCN',
'zhTW',
'koKR',
}

-- Main function to compile translations to HTML
---comment
---@param strings string[]
---@param addonName string The folder name for the addon, it is for the path in the XML files
---@param translation_func fun(string):table<string, string|boolean> A function that takes the english string and returns all translations
function Compile_translations_to_html(strings, translation_func)
function Compile_translations_to_html(strings, addonName, translation_func)
-- Initialize the trie
local success, trie
repeat
Expand All @@ -246,58 +270,90 @@ function Compile_translations_to_html(strings, translation_func)
until success

mkdir(root_folder)
mkdir(root_folder .. "/" .. data_folder)

---@param enUStext string
---@return string[]?
---@return error?
---@return string enUS translation
-- -@return error?
local function getTranslation(enUStext)
local allTranslations, err = translation_func(enUStext)
if err then
return nil, err
end
local combinedTranslations = { "enUS" .. "[" .. enUStext .. "]", }

local enUSTranslation = enUStext -- "enUS" .. "[" .. enUStext .. "]"
local combinedTranslations = {}
for _, locale in ipairs(localeOrder) do
local text = allTranslations[locale]
if type(text) == "string" and locale ~= "enUS" then
table.insert(combinedTranslations, locale .. "[" .. text .. "]")
tInsert(combinedTranslations, locale .. "[" .. text .. "]")
elseif locale ~= "enUS" then
table.insert(combinedTranslations, "")
tInsert(combinedTranslations, "")
end
end
return combinedTranslations --table.concat(combinedTranslations, "‡")
return combinedTranslations, enUSTranslation --tConcat(combinedTranslations, "‡")
end

-- Create trie folders and write translations
write_trie_structure(trie, root_folder, getTranslation)
write_trie_structure(trie, full_path, getTranslation)

-- Replace the actual string arrays with the template xml name
replaceArrays(trie, "")

local allHTMLFiles = {}
-- Function to print the table for verification (optional)
-- Function to print the table for verification and collect HTML files
---@param t table The table to print
---@param indent string? The indentation string
---@return string The formatted table string
local function printTable(t, indent)
-- Reset the HTML files if we are on the first step.
if indent == "" then
allHTMLFiles = {}
end
local lines = {}
indent = indent or ""
for k, v in pairs(t) do
if type(v) == "table" then
table.insert(lines, indent .. "[\"" .. tostring(k) .. "\"] = {")
table.insert(lines, printTable(v, indent .. " "))
table.insert(lines, indent .. "},")
tInsert(lines, indent .. "[\"" .. tostring(k) .. "\"] = {")
tInsert(lines, printTable(v, indent .. " "))
tInsert(lines, indent .. "},")
else
-- if html in v use " otherwise it will be wrapped in [[]]
if string.find(v, ".html") then
table.insert(lines, indent .. "[\"" .. tostring(k) .. "\"] = \"" .. tostring(v) .. "\",")
tInsert(lines, indent .. "[\"" .. tostring(k) .. "\"] = \"" .. tostring(v) .. "\",")
tInsert(allHTMLFiles, v)
else
table.insert(lines, indent .. "[\"" .. tostring(k) .. "\"] = [[" .. tostring(v) .. "]],")
tInsert(lines, indent .. "[\"" .. tostring(k) .. "\"] = [[" .. tostring(v) .. "]],")
end
end
end
return table.concat(lines, "\n")
return tConcat(lines, "\n")
end

local lua_file = io.open(root_folder .. "/trie.lua", "w")
if lua_file ~= nil then
-- ? Generate the lookup file loaded in Lua
local lua_file = io.open(root_folder .. "/TranslationsLookup_gen.lua", "w")
if lua_file then
local dump_str = printTable(trie, "") --dump_trie(point_to_html(trie), 1)
lua_file:write("local trie = {\n" .. dump_str .. "\n}")
lua_file:write("-- ! File generated by generate_translation_trie_root.lua --\n")
lua_file:write("-- ! DO NOT EDIT --\n")
lua_file:write("---@class LibQuestieDB\n")
lua_file:write("---@field translationsLookup table<string, table<string, any>|string> Contains lookup for HTML files for translations\n")
lua_file:write("local LibQuestieDB = select(2, ...)\n")
lua_file:write("\n")
lua_file:write(f("LibQuestieDB.translationsLookup = {\n%s\n}", dump_str))
lua_file:close()
end

-- ? Generate the XML file that creates the virtual SimpleHTML objects
local fileString = '<SimpleHTML name="%s" file="Interface\\AddOns\\%s\\translations\\%s\\%s" virtual="true" font="GameFontNormal"/>\n'
lua_file = io.open(root_folder .. "/TranslationsFiles_gen.xml", "w")
if lua_file then
lua_file:write('<Ui xsi:schemaLocation="http://www.blizzard.com/wow/ui/ ..\\FrameXML\\UI.xsd">\n')
for _, htmlfile in pairs(allHTMLFiles) do
lua_file:write(f(fileString, htmlfile, addonName, data_folder, htmlfile))
end
lua_file:write('</Ui>')
end
end
3 changes: 3 additions & 0 deletions .generate_database_lua/helpers.lua
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ local function find_addon_name()
print("Found Addons folder: " .. addon_dir)
end

-- Remove / or \ characters
addon_dir = addon_dir:gsub("[/\\]", "")

return addon_dir
end

Expand Down
26 changes: 21 additions & 5 deletions .generate_database_lua/main.lua
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,30 @@ CLI_Helpers.loadTOC(".generate_database_lua/translations.toc")

local single_translation = {}
for key, value in pairs(translations) do
local translation = string.gsub(key, "\n", "<br>")
translation = string.gsub(translation, '"', '\\"')
table.insert(single_translation, translation)
-- local translation = string.gsub(key, "\n", "<br>")
-- translation = string.gsub(translation, '"', '\\"')
table.insert(single_translation, key)
end

require("generate_translation_trie")

Compile_translations_to_html(single_translation)
---comment
---@param enUStext string
---@return table<string, string|boolean>?
---@return error?
local function getTranslation(enUStext)
if translations[enUStext] then
return translations[enUStext], nil
else
return nil, "Translation not found for: " .. enUStext
end
end


require("generate_translation_trie_root")
-- Find the addon name
local addon_name = helpers.find_addon_name()
print("Addon Name: " .. addon_name)
Compile_translations_to_html(single_translation, addon_name, getTranslation)

-- Run the main function
-- main()
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ Database/*/**/*.html
Database/*/*/*Data*.xml
Database/*/*/*Data*.html

translations/*Lookup.lua
translations/*Files.xml
translations/_data/*.html
Translations/*_gen.xml
Translations/*_gen.lua
Translations/_data/*.html

.shit
.translator
Expand Down
Loading

0 comments on commit 5652c9b

Please sign in to comment.