From c713b5e2c5c985c5ea6ccb9c259b02ea0a33f90d Mon Sep 17 00:00:00 2001 From: "Finn Hermansson (fihe02)" Date: Tue, 2 Apr 2024 14:43:28 +0200 Subject: [PATCH] New feature: Dialogue enhancement in AudioEncode --- .../oss/encore/model/profile/AudioEncode.kt | 67 ++++++++++++++++++- .../svt/oss/encore/EncoreIntegrationTest.kt | 6 +- .../src/test/resources/application-test.yml | 25 ++----- .../src/test/resources/profile/program.yml | 12 ++++ .../src/test/resources/application-test.yml | 13 ++-- 5 files changed, 95 insertions(+), 28 deletions(-) diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt index ed6abd4..cd87e76 100644 --- a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt +++ b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt @@ -25,6 +25,7 @@ data class AudioEncode( val params: LinkedHashMap = linkedMapOf(), val filters: List = emptyList(), val audioMixPreset: String = "default", + val dialogueEnhancement: DialogueEnhancement = DialogueEnhancement(), override val optional: Boolean = false, val format: String = "mp4", val inputLabel: String = DEFAULT_AUDIO_LABEL, @@ -44,7 +45,19 @@ data class AudioEncode( val preset = encodingProperties.audioMixPresets[audioMixPreset] ?: throw RuntimeException("Audio mix preset '$audioMixPreset' not found!") val inputChannels = analyzed.channelCount() - val inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts) + var inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts) + if (dialogueEnhancement.enabled && !dialogueEnhancePossible(inputChannelLayout)) { + return logOrThrow("Can not generate $outputName! Dialogue enhancement not possible for source channel layout ${inputChannelLayout.layoutName}") + } + + val dialogueEnhanceFilters = mutableListOf() + if (shouldDialogueEnhanceStereo(inputChannelLayout)) { + inputChannelLayout = ChannelLayout.CH_LAYOUT_3POINT0 + dialogueEnhanceFilters.add(dialogueEnhancement.dialogueEnhanceStereo.filterString) + } + if (dialogueEnhancement.enabled) { + dialogueEnhanceFilters.add(dialogueEnhance(inputChannelLayout)) + } val mixFilters = mutableListOf() @@ -77,14 +90,64 @@ data class AudioEncode( AudioStreamEncode( params = outParams.toParams(), inputLabels = listOf(inputLabel), - filter = (mixFilters + filters).joinToString(",").ifEmpty { null } + filter = (dialogueEnhanceFilters + mixFilters + filters).joinToString(",").ifEmpty { null } ) ), output = outputName, ) } + private fun shouldDialogueEnhanceStereo(inputChannelLayout: ChannelLayout): Boolean = + dialogueEnhancement.enabled && + dialogueEnhancement.dialogueEnhanceStereo.enabled && + inputChannelLayout == ChannelLayout.CH_LAYOUT_STEREO + + private fun dialogueEnhancePossible(inputChannelLayout: ChannelLayout): Boolean = + inputChannelLayout.channels.size > 1 && + (inputChannelLayout.channels.contains(ChannelId.FC) || shouldDialogueEnhanceStereo(inputChannelLayout)) + + private fun dialogueEnhance(inputChannelLayout: ChannelLayout): String { + val layoutName = inputChannelLayout.layoutName + val channels = inputChannelLayout.channels + val channelSplit = + "channelsplit=channel_layout=$layoutName${channels.joinToString(separator = "") { "[CH-$suffix-$it]" }}" + val centerSplit = "[CH-$suffix-FC]asplit=2[SC-$suffix][CH-$suffix-FC-OUT]" + val bgChannels = channels - ChannelId.FC + val bgMerge = + "${bgChannels.joinToString(separator = "") { "[CH-$suffix-$it]" }}amerge=inputs=${bgChannels.size}[BG-$suffix]" + val compress = + "[BG-$suffix][SC-$suffix]${dialogueEnhancement.sidechainCompress.filterString}[COMPR-$suffix]" + val mixMerge = "[COMPR-$suffix][CH-$suffix-FC-OUT]amerge" + return listOf(channelSplit, centerSplit, bgMerge, compress, mixMerge).joinToString(";") + } + private fun isApplicable(channelCount: Int): Boolean { return channelCount > 0 && (channelLayout == ChannelLayout.CH_LAYOUT_STEREO || channelLayout.channels.size in 1..channelCount) } + + data class DialogueEnhancement( + val enabled: Boolean = false, + val sidechainCompress: SidechainCompress = SidechainCompress(), + val dialogueEnhanceStereo: DialogueEnhanceStereo = DialogueEnhanceStereo() + ) { + data class DialogueEnhanceStereo( + val enabled: Boolean = true, + val original: Int = 1, + val enhance: Int = 1, + val voice: Int = 2 + ) { + val filterString: String + get() = "dialoguenhance=original=$original:enhance=$enhance:voice=$voice" + } + + data class SidechainCompress( + val ratio: Int = 8, + val threshold: Double = 0.012, + val release: Double = 1000.0, + val attack: Double = 100.0 + ) { + val filterString: String + get() = "sidechaincompress=threshold=$threshold:ratio=$ratio:release=$release:attack=$attack" + } + } } diff --git a/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt b/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt index 2134dc6..e5a12d3 100644 --- a/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt +++ b/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt @@ -34,7 +34,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() { defaultExpectedOutputFiles(outputDir, testFileSurround) + listOf( expectedFile(outputDir, testFileSurround, "STEREO_DE.mp4"), - expectedFile(outputDir, testFileSurround, "SURROUND.mp4") + expectedFile(outputDir, testFileSurround, "SURROUND.mp4"), + expectedFile(outputDir, testFileSurround, "SURROUND_DE.mp4"), ) ) } @@ -133,7 +134,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() { fun jobIsSuccessfulStereo(@TempDir outputDir: File) { successfulTest( job(outputDir = outputDir, file = testFileStereo), - defaultExpectedOutputFiles(outputDir, testFileStereo) + defaultExpectedOutputFiles(outputDir, testFileStereo) + + listOf(expectedFile(outputDir, testFileStereo, "STEREO_DE.mp4")) ) } diff --git a/encore-common/src/test/resources/application-test.yml b/encore-common/src/test/resources/application-test.yml index c317ca6..4930311 100644 --- a/encore-common/src/test/resources/application-test.yml +++ b/encore-common/src/test/resources/application-test.yml @@ -12,12 +12,6 @@ logging: level: se.svt: debug -server: - port: ${random-port.server} - -service: - name: encore-test - encore-settings: concurrency: 3 local-temporary-encode: false @@ -30,24 +24,19 @@ encore-settings: audio-mix-presets: default: default-pan: - "[5.1]": c0 = c0 | c1 = c1 | c2 = c2 | c3 = c3 | c4 = c4 | c5 = c5 - stereo: c0 = c0 + 0.707*c2 + 0.707*c4 | c1 = c1 + 0.707*c2 + 0.707*c5 + stereo: FL=FL+0.707107*FC+0.707107*BL+0.707107*SL|FR=FR+0.707107*FC+0.707107*BR+0.707107*SR pan-mapping: - "[5.1]": - stereo: c0=1.0*c0+0.707*c2+0.707*c4|c1=1.0*c1+0.707*c2+0.707*c5 + mono: + stereo: FL=0.707*FC|FR=0.707*FC de: fallback-to-auto: false + default-pan: + stereo: FL