Skip to content

Commit

Permalink
New feature: Dialogue enhancement in AudioEncode
Browse files Browse the repository at this point in the history
  • Loading branch information
fhermansson committed Apr 2, 2024
2 parents 47f9295 + c713b5e commit 5c14a5e
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ data class AudioEncode(
val params: LinkedHashMap<String, String> = linkedMapOf(),
val filters: List<String> = emptyList(),
val audioMixPreset: String = "default",
val dialogueEnhancement: DialogueEnhancement = DialogueEnhancement(),
override val optional: Boolean = false,
val format: String = "mp4",
val inputLabel: String = DEFAULT_AUDIO_LABEL,
Expand All @@ -44,7 +45,19 @@ data class AudioEncode(
val preset = encodingProperties.audioMixPresets[audioMixPreset]
?: throw RuntimeException("Audio mix preset '$audioMixPreset' not found!")
val inputChannels = analyzed.channelCount()
val inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts)
var inputChannelLayout = audioIn.channelLayout(encodingProperties.defaultChannelLayouts)
if (dialogueEnhancement.enabled && !dialogueEnhancePossible(inputChannelLayout)) {
return logOrThrow("Can not generate $outputName! Dialogue enhancement not possible for source channel layout ${inputChannelLayout.layoutName}")
}

val dialogueEnhanceFilters = mutableListOf<String>()
if (shouldDialogueEnhanceStereo(inputChannelLayout)) {
inputChannelLayout = ChannelLayout.CH_LAYOUT_3POINT0
dialogueEnhanceFilters.add(dialogueEnhancement.dialogueEnhanceStereo.filterString)
}
if (dialogueEnhancement.enabled) {
dialogueEnhanceFilters.add(dialogueEnhance(inputChannelLayout))
}

val mixFilters = mutableListOf<String>()

Expand Down Expand Up @@ -77,14 +90,64 @@ data class AudioEncode(
AudioStreamEncode(
params = outParams.toParams(),
inputLabels = listOf(inputLabel),
filter = (mixFilters + filters).joinToString(",").ifEmpty { null }
filter = (dialogueEnhanceFilters + mixFilters + filters).joinToString(",").ifEmpty { null }
)
),
output = outputName,
)
}

private fun shouldDialogueEnhanceStereo(inputChannelLayout: ChannelLayout): Boolean =
dialogueEnhancement.enabled &&
dialogueEnhancement.dialogueEnhanceStereo.enabled &&
inputChannelLayout == ChannelLayout.CH_LAYOUT_STEREO

private fun dialogueEnhancePossible(inputChannelLayout: ChannelLayout): Boolean =
inputChannelLayout.channels.size > 1 &&
(inputChannelLayout.channels.contains(ChannelId.FC) || shouldDialogueEnhanceStereo(inputChannelLayout))

private fun dialogueEnhance(inputChannelLayout: ChannelLayout): String {
val layoutName = inputChannelLayout.layoutName
val channels = inputChannelLayout.channels
val channelSplit =
"channelsplit=channel_layout=$layoutName${channels.joinToString(separator = "") { "[CH-$suffix-$it]" }}"
val centerSplit = "[CH-$suffix-FC]asplit=2[SC-$suffix][CH-$suffix-FC-OUT]"
val bgChannels = channels - ChannelId.FC
val bgMerge =
"${bgChannels.joinToString(separator = "") { "[CH-$suffix-$it]" }}amerge=inputs=${bgChannels.size}[BG-$suffix]"
val compress =
"[BG-$suffix][SC-$suffix]${dialogueEnhancement.sidechainCompress.filterString}[COMPR-$suffix]"
val mixMerge = "[COMPR-$suffix][CH-$suffix-FC-OUT]amerge"
return listOf(channelSplit, centerSplit, bgMerge, compress, mixMerge).joinToString(";")
}

private fun isApplicable(channelCount: Int): Boolean {
return channelCount > 0 && (channelLayout == ChannelLayout.CH_LAYOUT_STEREO || channelLayout.channels.size in 1..channelCount)
}

data class DialogueEnhancement(
val enabled: Boolean = false,
val sidechainCompress: SidechainCompress = SidechainCompress(),
val dialogueEnhanceStereo: DialogueEnhanceStereo = DialogueEnhanceStereo()
) {
data class DialogueEnhanceStereo(
val enabled: Boolean = true,
val original: Int = 1,
val enhance: Int = 1,
val voice: Int = 2
) {
val filterString: String
get() = "dialoguenhance=original=$original:enhance=$enhance:voice=$voice"
}

data class SidechainCompress(
val ratio: Int = 8,
val threshold: Double = 0.012,
val release: Double = 1000.0,
val attack: Double = 100.0
) {
val filterString: String
get() = "sidechaincompress=threshold=$threshold:ratio=$ratio:release=$release:attack=$attack"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() {
defaultExpectedOutputFiles(outputDir, testFileSurround) +
listOf(
expectedFile(outputDir, testFileSurround, "STEREO_DE.mp4"),
expectedFile(outputDir, testFileSurround, "SURROUND.mp4")
expectedFile(outputDir, testFileSurround, "SURROUND.mp4"),
expectedFile(outputDir, testFileSurround, "SURROUND_DE.mp4"),
)
)
}
Expand Down Expand Up @@ -133,7 +134,8 @@ class EncoreIntegrationTest : EncoreIntegrationTestBase() {
fun jobIsSuccessfulStereo(@TempDir outputDir: File) {
successfulTest(
job(outputDir = outputDir, file = testFileStereo),
defaultExpectedOutputFiles(outputDir, testFileStereo)
defaultExpectedOutputFiles(outputDir, testFileStereo) +
listOf(expectedFile(outputDir, testFileStereo, "STEREO_DE.mp4"))
)
}

Expand Down
25 changes: 7 additions & 18 deletions encore-common/src/test/resources/application-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,6 @@ logging:
level:
se.svt: debug

server:
port: ${random-port.server}

service:
name: encore-test

encore-settings:
concurrency: 3
local-temporary-encode: false
Expand All @@ -30,24 +24,19 @@ encore-settings:
audio-mix-presets:
default:
default-pan:
"[5.1]": c0 = c0 | c1 = c1 | c2 = c2 | c3 = c3 | c4 = c4 | c5 = c5
stereo: c0 = c0 + 0.707*c2 + 0.707*c4 | c1 = c1 + 0.707*c2 + 0.707*c5
stereo: FL=FL+0.707107*FC+0.707107*BL+0.707107*SL|FR=FR+0.707107*FC+0.707107*BR+0.707107*SR
pan-mapping:
"[5.1]":
stereo: c0=1.0*c0+0.707*c2+0.707*c4|c1=1.0*c1+0.707*c2+0.707*c5
mono:
stereo: FL=0.707*FC|FR=0.707*FC
de:
fallback-to-auto: false
default-pan:
stereo: FL<FL+1.5*FC+0.707107*BL+0.707107*SL|FR<FR+1.5*FC+0.707107*BR+0.707107*SR
pan-mapping:
"[5.1]":
stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
"[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
"[5.1(side)]":
stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
"[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5

profile:
location: classpath:profile/profiles.yml

feign:
client:
config:
default:
logger-level: basic
12 changes: 12 additions & 0 deletions encore-common/src/test/resources/profile/program.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ encodes:
- type: AudioEncode
bitrate: 128k
suffix: _STEREO_DE
dialogueEnhancement:
enabled: true
audioMixPreset: de
optional: true

Expand All @@ -218,6 +220,16 @@ encodes:
optional: true
channelLayout: '5.1'

- type: AudioEncode
codec: ac3
bitrate: 448k
suffix: _SURROUND_DE
dialogueEnhancement:
enabled: true
audioMixPreset: de
optional: true
channelLayout: '5.1'

- type: ThumbnailMapEncode

- type: ThumbnailEncode
Expand Down
13 changes: 7 additions & 6 deletions encore-web/src/test/resources/application-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,19 @@ encore-settings:
audio-mix-presets:
default:
default-pan:
"[5.1]": c0 = c0 | c1 = c1 | c2 = c2 | c3 = c3 | c4 = c4 | c5 = c5
stereo: c0 = c0 + 0.707*c2 + 0.707*c4 | c1 = c1 + 0.707*c2 + 0.707*c5
stereo: FL=FL+0.707107*FC+0.707107*BL+0.707107*SL|FR=FR+0.707107*FC+0.707107*BR+0.707107*SR
pan-mapping:
"[5.1]":
stereo: c0=1.0*c0+0.707*c2+0.707*c4|c1=1.0*c1+0.707*c2+0.707*c5
mono:
stereo: FL=0.707*FC|FR=0.707*FC
de:
fallback-to-auto: false
default-pan:
stereo: FL<FL+1.5*FC+0.707107*BL+0.707107*SL|FR<FR+1.5*FC+0.707107*BR+0.707107*SR
pan-mapping:
"[5.1]":
stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
"[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5
"[5.1(side)]":
stereo: c0<0.25*c0+1.5*c2+0.25*c4|c1<0.25*c1+1.5*c2+0.25*c5
"[5.1]": c0=c0|c1=c1|c2<1.5*c2|c3=c3|c4=c4|c5=c5

profile:
location: classpath:profile/profiles.yml

0 comments on commit 5c14a5e

Please sign in to comment.