Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: xlsx import and export #2829

Merged
merged 6 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backend/data/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,13 @@ dependencies {
implementation libs.jacksonKotlin
implementation("org.apache.commons:commons-configuration2:2.10.1")
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:$jacksonVersion"

/**
* Table formats
*/
implementation("com.opencsv:opencsv:5.9")
implementation 'org.apache.poi:poi:5.3.0'
implementation 'org.apache.poi:poi-ooxml:5.3.0'

/**
* Google translation API
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,5 @@ enum class ExportFormat(
JSON_I18NEXT("json", "application/json"),
CSV("csv", "text/csv"),
RESX_ICU("resx", "text/microsoft-resx"),
XLSX("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import io.tolgee.formats.po.`in`.PoFileProcessor
import io.tolgee.formats.properties.`in`.PropertiesFileProcessor
import io.tolgee.formats.resx.`in`.ResxProcessor
import io.tolgee.formats.xliff.`in`.XliffFileProcessor
import io.tolgee.formats.xlsx.`in`.XlsxFileProcessor
import io.tolgee.formats.xmlResources.`in`.XmlResourcesProcessor
import io.tolgee.formats.yaml.`in`.YamlFileProcessor
import io.tolgee.service.dataImport.processors.FileProcessorContext
Expand Down Expand Up @@ -64,6 +65,7 @@ class ImportFileProcessorFactory(
ImportFileFormat.YAML -> YamlFileProcessor(context, yamlObjectMapper)
ImportFileFormat.CSV -> CsvFileProcessor(context)
ImportFileFormat.RESX -> ResxProcessor(context)
ImportFileFormat.XLSX -> XlsxFileProcessor(context)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,28 @@ package io.tolgee.formats.csv.`in`

import com.opencsv.CSVParserBuilder
import com.opencsv.CSVReaderBuilder
import io.tolgee.formats.csv.CsvEntry
import io.tolgee.formats.genericTable.TableEntry
import io.tolgee.formats.genericTable.`in`.TableParser
import java.io.InputStream

class CsvFileParser(
private val inputStream: InputStream,
private val delimiter: Char,
private val languageFallback: String,
) {
val rawData: List<Array<String>> by lazy {
val rawData: List<List<String>> by lazy {
val inputReader = inputStream.reader()
val parser = CSVParserBuilder().withSeparator(delimiter).build()
val reader = CSVReaderBuilder(inputReader).withCSVParser(parser).build()

return@lazy reader.readAll()
return@lazy reader.readAll().map { it.toList() }
}

val headers: Array<String>? by lazy {
rawData.firstOrNull()
val tableParser: TableParser by lazy {
TableParser(rawData, languageFallback)
}

val languages: List<String> by lazy {
headers?.takeIf { it.size > 1 }?.drop(1) ?: emptyList()
}

val languagesWithFallback: Sequence<String>
get() = languages.asSequence().plus(generateSequence { languageFallback })

val rows: List<Array<String>> by lazy {
rawData.takeIf { it.size > 1 }?.drop(1) ?: emptyList()
}

fun Array<String>.rowToCsvEntries(): Sequence<CsvEntry> {
if (isEmpty()) {
return emptySequence()
}
val keyName = getOrNull(0) ?: ""
if (size == 1) {
return sequenceOf(CsvEntry(keyName, languageFallback, null))
}
val translations = drop(1).asSequence()
return translations
.zip(languagesWithFallback)
.map { (translation, languageTag) ->
CsvEntry(
keyName,
languageTag,
translation,
)
}
}

fun parse(): List<CsvEntry> {
return rows.flatMap {
it.rowToCsvEntries()
}
fun parse(): List<TableEntry> {
return tableParser.parse()
}
}
Original file line number Diff line number Diff line change
@@ -1,46 +1,15 @@
package io.tolgee.formats.csv.`in`

import io.tolgee.exceptions.ImportCannotParseFileException
import io.tolgee.formats.ImportFileProcessor
import io.tolgee.formats.csv.CsvEntry
import io.tolgee.formats.genericTable.TableEntry
import io.tolgee.formats.genericTable.`in`.TableProcessor
import io.tolgee.formats.importCommon.ImportFormat
import io.tolgee.service.dataImport.processors.FileProcessorContext

class CsvFileProcessor(
override val context: FileProcessorContext,
) : ImportFileProcessor() {
override fun process() {
val (data, format) = parse()
data.importAll(format)
}

fun Iterable<CsvEntry>.importAll(format: ImportFormat) {
forEachIndexed { idx, it -> it.import(idx, format) }
}

fun CsvEntry.import(
index: Int,
format: ImportFormat,
) {
val converted =
format.messageConvertor.convert(
value,
language,
convertPlaceholders = context.importSettings.convertPlaceholdersToIcu,
isProjectIcuEnabled = context.projectIcuPlaceholdersEnabled,
)
context.addTranslation(
key,
language,
converted.message,
index,
pluralArgName = converted.pluralArgName,
rawData = value,
convertedBy = format,
)
}

private fun parse(): Pair<Iterable<CsvEntry>, ImportFormat> {
) : TableProcessor(context) {
override fun parse(): Pair<Iterable<TableEntry>, ImportFormat> {
try {
val detector = CsvDelimiterDetector(context.file.data.inputStream())
val parser =
Expand All @@ -50,7 +19,7 @@ class CsvFileProcessor(
languageFallback = firstLanguageTagGuessOrUnknown,
)
val data = parser.parse()
val format = getFormat(parser.rows)
val format = getFormat(parser.tableParser.rows)
return data to format
} catch (e: Exception) {
throw ImportCannotParseFileException(context.file.name, e.message ?: "", e)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,68 +1,17 @@
package io.tolgee.formats.csv.out

import io.tolgee.dtos.IExportParams
import io.tolgee.formats.ExportMessageFormat
import io.tolgee.formats.csv.CsvEntry
import io.tolgee.formats.generic.IcuToGenericFormatMessageConvertor
import io.tolgee.service.export.ExportFilePathProvider
import io.tolgee.formats.genericTable.TableEntry
import io.tolgee.formats.genericTable.out.TableExporter
import io.tolgee.service.export.dataProvider.ExportTranslationView
import io.tolgee.service.export.exporters.FileExporter
import java.io.InputStream

class CsvFileExporter(
val translations: List<ExportTranslationView>,
val exportParams: IExportParams,
private val isProjectIcuPlaceholdersEnabled: Boolean = true,
) : FileExporter {
private val pathProvider by lazy {
ExportFilePathProvider(
exportParams,
"csv",
)
}

private val messageFormat
get() = exportParams.messageFormat ?: ExportMessageFormat.ICU

private val placeholderConvertorFactory
get() = messageFormat.paramConvertorFactory

val entries =
translations.map {
val converted = convertMessage(it.text, it.key.isPlural)
val path =
pathProvider.getFilePath(it.key.namespace)
val entry =
CsvEntry(
key = it.key.name,
language = it.languageTag,
value = converted,
)
path to entry
}.groupBy({ it.first }, { it.second })

private fun convertMessage(
text: String?,
isPlural: Boolean,
): String? {
return getMessageConvertor(text, isPlural).convert()
}

private fun getMessageConvertor(
text: String?,
isPlural: Boolean,
) = IcuToGenericFormatMessageConvertor(
text,
isPlural,
isProjectIcuPlaceholdersEnabled = isProjectIcuPlaceholdersEnabled,
paramConvertorFactory = placeholderConvertorFactory,
)

override fun produceFiles(): Map<String, InputStream> {
return entries.mapValues { (_, entry) -> entry.toCsv() }
}

private fun List<CsvEntry>.toCsv(): InputStream {
translations: List<ExportTranslationView>,
exportParams: IExportParams,
isProjectIcuPlaceholdersEnabled: Boolean = true,
) : TableExporter(translations, exportParams, "csv", isProjectIcuPlaceholdersEnabled) {
override fun List<TableEntry>.toFileContents(): InputStream {
val languageTags =
exportParams.languages?.sorted()?.toTypedArray()
?: this.map { it.language }.distinct().sorted().toTypedArray()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package io.tolgee.formats.csv.out

import com.opencsv.CSVWriterBuilder
import io.tolgee.formats.csv.CsvEntry
import io.tolgee.formats.genericTable.TableEntry
import java.io.InputStream
import java.io.StringWriter

class CsvFileWriter(
private val languageTags: Array<String>,
private val data: List<CsvEntry>,
private val data: List<TableEntry>,
private val delimiter: Char,
) {
val translations: Map<String, Map<String, String?>> by lazy {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.tolgee.formats.csv
package io.tolgee.formats.genericTable

data class CsvEntry(
data class TableEntry(
val key: String,
val language: String,
val value: String?,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package io.tolgee.formats.genericTable.`in`

import io.tolgee.formats.genericTable.TableEntry

class TableParser(
private val rawData: List<List<String>>,
private val languageFallback: String,
) {
val headers: List<String>? by lazy {
rawData.firstOrNull()
}

val languages: List<String> by lazy {
headers?.takeIf { it.size > 1 }?.drop(1) ?: emptyList()
}

val languagesWithFallback: Sequence<String>
get() = languages.asSequence().plus(generateSequence { languageFallback })

val rows: List<List<String>> by lazy {
rawData.takeIf { it.size > 1 }?.drop(1) ?: emptyList()
}

fun List<String>.rowToTableEntries(): Sequence<TableEntry> {
if (isEmpty()) {
return emptySequence()
}
val keyName = getOrNull(0) ?: ""
if (size == 1) {
return sequenceOf(TableEntry(keyName, languageFallback, null))
}
val translations = drop(1).asSequence()
return translations
.zip(languagesWithFallback)
.map { (translation, languageTag) ->
TableEntry(
keyName,
languageTag,
translation,
)
}
}

fun parse(): List<TableEntry> {
return rows.flatMap {
it.rowToTableEntries()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package io.tolgee.formats.genericTable.`in`

import io.tolgee.formats.ImportFileProcessor
import io.tolgee.formats.genericTable.TableEntry
import io.tolgee.formats.importCommon.ImportFormat
import io.tolgee.service.dataImport.processors.FileProcessorContext

abstract class TableProcessor(
override val context: FileProcessorContext,
) : ImportFileProcessor() {
override fun process() {
val (data, format) = parse()
data.importAll(format)
}

fun Iterable<TableEntry>.importAll(format: ImportFormat) {
forEachIndexed { idx, it -> it.import(idx, format) }
}

fun TableEntry.import(
index: Int,
format: ImportFormat,
) {
val converted =
format.messageConvertor.convert(
value,
language,
convertPlaceholders = context.importSettings.convertPlaceholdersToIcu,
isProjectIcuEnabled = context.projectIcuPlaceholdersEnabled,
)
context.addTranslation(
key,
language,
converted.message,
index,
pluralArgName = converted.pluralArgName,
rawData = value,
convertedBy = format,
)
}

protected abstract fun parse(): Pair<Iterable<TableEntry>, ImportFormat>
}
Loading
Loading