From c713b5e2c5c985c5ea6ccb9c259b02ea0a33f90d Mon Sep 17 00:00:00 2001
From: "Finn Hermansson (fihe02)" <finn.hermansson@svt.se>
Date: Tue, 2 Apr 2024 14:43:28 +0200
Subject: [PATCH] New feature: Dialogue enhancement in AudioEncode

---
 .../oss/encore/model/profile/AudioEncode.kt   | 67 ++++++++++++++++++-
 .../svt/oss/encore/EncoreIntegrationTest.kt   |  6 +-
 .../src/test/resources/application-test.yml   | 25 ++-----
 .../src/test/resources/profile/program.yml    | 12 ++++
 .../src/test/resources/application-test.yml   | 13 ++--
 5 files changed, 95 insertions(+), 28 deletions(-)
diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt
index ed6abd4..cd87e76 100644
--- a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt
+++ b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/AudioEncode.kt
@@ -25,6 +25,7 @@ data class AudioEncode(
     val params: LinkedHashMap<String, String> = linkedMapOf(),
     val filters: List<String> = emptyList(),
     val audioMixPreset: String = "default",
+    val dialogueEnhancement: DialogueEnhancement = DialogueEnhancement(),
     override val optional: Boolean = false,
     val format: String = "mp4",
     val inputLabel: String = DEFAULT_AUDIO_LABEL,
@@ -44,7 +45,19 @@ data class AudioEncode(
         val preset = encodingProperties.audioMixPresets[audioMixPreset]
             ?: throw RuntimeException("Audio mix preset '$audioMixPreset' not found!")
         val inputChannels = analyzed.channelCount()
-        val inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts)
+        var inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts)
+        if (dialogueEnhancement.enabled && !dialogueEnhancePossible(inputChannelLayout)) {
+            return logOrThrow("Can not generate $outputName! Dialogue enhancement not possible for source channel layout ${inputChannelLayout.layoutName}")
+        }
+
+        val dialogueEnhanceFilters = mutableListOf<String>()
+        if (shouldDialogueEnhanceStereo(inputChannelLayout)) {
+            inputChannelLayout = ChannelLayout.CH_LAYOUT_3POINT0
+            dialogueEnhanceFilters.add(dialogueEnhancement.dialogueEnhanceStereo.filterString)
+        }
+        if (dialogueEnhancement.enabled) {
+            dialogueEnhanceFilters.add(dialogueEnhance(inputChannelLayout))
+        }
 
         val mixFilters = mutableListOf<String>()
 
@@ -77,14 +90,64 @@ data class AudioEncode(
                 AudioStreamEncode(
                     params = outParams.toParams(),
                     inputLabels = listOf(inputLabel),
-                    filter = (mixFilters + filters).joinToString(",").ifEmpty { null }
+                    filter = (dialogueEnhanceFilters + mixFilters + filters).joinToString(",").ifEmpty { null }
                 )
             ),
             output = outputName,
         )
     }
 
+    private fun shouldDialogueEnhanceStereo(inputChannelLayout: ChannelLayout): Boolean =
+        dialogueEnhancement.enabled &&
+            dialogueEnhancement.dialogueEnhanceStereo.enabled &&
+            inputChannelLayout == ChannelLayout.CH_LAYOUT_STEREO
+
+    private fun dialogueEnhancePossible(inputChannelLayout: ChannelLayout): Boolean =
+        inputChannelLayout.channels.size > 1 &&
+            (inputChannelLayout.channels.contains(ChannelId.FC) || shouldDialogueEnhanceStereo(inputChannelLayout))
+
+    private fun dialogueEnhance(inputChannelLayout: ChannelLayout): String {
+        val layoutName = inputChannelLayout.layoutName
+        val channels = inputChannelLayout.channels
+        val channelSplit =
+            "channelsplit=channel_layout=$layoutName${channels.joinToString(separator = "") { "[CH-$suffix-$it]" }}"
+        val centerSplit = "[CH-$suffix-FC]asplit=2[SC-$suffix][CH-$suffix-FC-OUT]"
+        val bgChannels = channels - ChannelId.FC
+        val bgMerge =
+            "${bgChannels.joinToString(separator = "") { "[CH-$suffix-$it]" }}amerge=inputs=${bgChannels.size}[BG-$suffix]"
+        val compress =
+            "[BG-$suffix][SC-$suffix]${dialogueEnhancement.sidechainCompress.filterString}[COMPR-$suffix]"
+        val mixMerge = "[COMPR-$suffix][CH-$suffix-FC-OUT]amerge"
+        return listOf(channelSplit, centerSplit, bgMerge, compress, mixMerge).joinToString(";")
+    }
+
     private fun isApplicable(channelCount: Int): Boolean {
         return channelCount > 0 && (channelLayout == ChannelLayout.CH_LAYOUT_STEREO || channelLayout.channels.size in 1..channelCount)
     }
+
+    data class DialogueEnhancement(
+        val enabled: Boolean = false,
+        val sidechainCompress: SidechainCompress = SidechainCompress(),
+        val dialogueEnhanceStereo: DialogueEnhanceStereo = DialogueEnhanceStereo()
+    ) {
+        data class DialogueEnhanceStereo(
+            val enabled: Boolean = true,
+            val original: Int = 1,
+            val enhance: Int = 1,
+            val voice: Int = 2
+        ) {
+            val filterString: String
+                get() = "dialoguenhance=original=$original:enhance=$enhance:voice=$voice"
+        }
+
+        data class SidechainCompress(
+            val ratio: Int = 8,
+            val threshold: Double = 0.012,
+            val release: Double = 1000.0,
+            val attack: Double = 100.0
+        ) {
+            val filterString: String
+                get() = "sidechaincompress=threshold=$threshold:ratio=$ratio:release=$release:attack=$attack"
+        }
+    }
 }
diff --git a/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt b/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt
index 2134dc6..e5a12d3 100644
--- a/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt
+++ b/encore-common/src/test/kotlin/se/svt/oss/encore/EncoreIntegrationTest.kt
@@ -34,7 +34,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() {
             defaultExpectedOutputFiles(outputDir, testFileSurround) +
                 listOf(
                     expectedFile(outputDir, testFileSurround, "STEREO_DE.mp4"),
-                    expectedFile(outputDir, testFileSurround, "SURROUND.mp4")
+                    expectedFile(outputDir, testFileSurround, "SURROUND.mp4"),
+                    expectedFile(outputDir, testFileSurround, "SURROUND_DE.mp4"),
                 )
         )
     }
@@ -133,7 +134,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() {
     fun jobIsSuccessfulStereo(@TempDir outputDir: File) {
         successfulTest(
             job(outputDir = outputDir, file = testFileStereo),
-            defaultExpectedOutputFiles(outputDir, testFileStereo)
+            defaultExpectedOutputFiles(outputDir, testFileStereo) +
+                listOf(expectedFile(outputDir, testFileStereo, "STEREO_DE.mp4"))
         )
     }
 
diff --git a/encore-common/src/test/resources/application-test.yml b/encore-common/src/test/resources/application-test.yml
index c317ca6..4930311 100644
--- a/encore-common/src/test/resources/application-test.yml
+++ b/encore-common/src/test/resources/application-test.yml
@@ -12,12 +12,6 @@ logging:
   level:
     se.svt: debug
 
-server:
-  port: ${random-port.server}
-
-service:
-  name: encore-test
-
 encore-settings:
   concurrency: 3
   local-temporary-encode: false
@@ -30,24 +24,19 @@ encore-settings:
     audio-mix-presets:
       default:
         default-pan:
-          "[5.1]": c0 = c0 | c1 = c1 | c2 = c2 | c3 = c3 | c4 = c4 | c5 = c5
-          stereo: c0 = c0 + 0.707*c2 + 0.707*c4 | c1 = c1 + 0.707*c2 + 0.707*c5
+          stereo: FL=FL+0.707107*FC+0.707107*BL+0.707107*SL|FR=FR+0.707107*FC+0.707107*BR+0.707107*SR
         pan-mapping:
-          "[5.1]":
-            stereo: c0=1.0*c0+0.707*c2+0.707*c4|c1=1.0*c1+0.707*c2+0.707*c5
+          mono:
+            stereo: FL=0.707*FC|FR=0.707*FC
       de:
         fallback-to-auto: false
+        default-pan:
+          stereo: FL<FL+1.5*FC+0.707107*BL+0.707107*SL|FR<FR+1.5*FC+0.707107*BR+0.707107*SR
         pan-mapping:
           "[5.1]":
-            stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
+            "[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
           "[5.1(side)]":
-            stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
+            "[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
 
 profile:
   location: classpath:profile/profiles.yml
-
-feign:
-  client:
-    config:
-      default:
-        logger-level: basic
diff --git a/encore-common/src/test/resources/profile/program.yml b/encore-common/src/test/resources/profile/program.yml
index 42bbf5f..94d31e1 100644
--- a/encore-common/src/test/resources/profile/program.yml
+++ b/encore-common/src/test/resources/profile/program.yml
@@ -208,6 +208,8 @@ encodes:
   - type: AudioEncode
     bitrate: 128k
     suffix: _STEREO_DE
+    dialogueEnhancement:
+      enabled: true
     audioMixPreset: de
     optional: true
 
@@ -218,6 +220,16 @@ encodes:
     optional: true
     channelLayout: '5.1'
 
+  - type: AudioEncode
+    codec: ac3
+    bitrate: 448k
+    suffix: _SURROUND_DE
+    dialogueEnhancement:
+      enabled: true
+    audioMixPreset: de
+    optional: true
+    channelLayout: '5.1'
+
   - type: ThumbnailMapEncode
 
   - type: ThumbnailEncode
diff --git a/encore-web/src/test/resources/application-test.yml b/encore-web/src/test/resources/application-test.yml
index 1a22df9..475845a 100644
--- a/encore-web/src/test/resources/application-test.yml
+++ b/encore-web/src/test/resources/application-test.yml
@@ -27,18 +27,19 @@ encore-settings:
     audio-mix-presets:
       default:
         default-pan:
-          "[5.1]": c0 = c0 | c1 = c1 | c2 = c2 | c3 = c3 | c4 = c4 | c5 = c5
-          stereo: c0 = c0 + 0.707*c2 + 0.707*c4 | c1 = c1 + 0.707*c2 + 0.707*c5
+          stereo: FL=FL+0.707107*FC+0.707107*BL+0.707107*SL|FR=FR+0.707107*FC+0.707107*BR+0.707107*SR
         pan-mapping:
-          "[5.1]":
-            stereo: c0=1.0*c0+0.707*c2+0.707*c4|c1=1.0*c1+0.707*c2+0.707*c5
+          mono:
+            stereo: FL=0.707*FC|FR=0.707*FC
       de:
         fallback-to-auto: false
+        default-pan:
+          stereo: FL<FL+1.5*FC+0.707107*BL+0.707107*SL|FR<FR+1.5*FC+0.707107*BR+0.707107*SR
         pan-mapping:
           "[5.1]":
-            stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
+            "[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
           "[5.1(side)]":
-            stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
+            "[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
 
 profile:
   location: classpath:profile/profiles.yml