From 215992870f7bb4cc1d82da4d759668ac67efac5c Mon Sep 17 00:00:00 2001 From: "Finn Hermansson (fihe02)" Date: Thu, 16 May 2024 15:23:22 +0200 Subject: [PATCH] As a workaround for issues with thumbnails in combination with Ffmpeg 7, add option decodeOutput to ThumbnailEncode and ThumbnailMapEncode allowing them to use video output as input using loopback decoder introduced in Ffmpeg 7. --- .../se/svt/oss/encore/model/output/Output.kt | 5 +-- .../oss/encore/model/profile/ChannelLayout.kt | 16 ++++++--- .../encore/model/profile/ThumbnailEncode.kt | 25 ++++++++++---- .../model/profile/ThumbnailMapEncode.kt | 34 +++++++++++-------- .../svt/oss/encore/process/CommandBuilder.kt | 27 +++++++++++---- .../model/profile/ThumbnailMapEncodeTest.kt | 8 ++--- .../oss/encore/process/CommandBuilderTest.kt | 20 +++++++++-- .../resources/profile/multiple_inputs.yml | 2 ++ .../test/resources/profile/program-x265.yml | 2 ++ .../src/test/resources/profile/program.yml | 2 ++ .../profile/test_profile_invalid.yml | 2 ++ .../resources/profile/multiple_inputs.yml | 2 ++ .../test/resources/profile/program-x265.yml | 2 ++ .../src/test/resources/profile/program.yml | 2 ++ .../profile/test_profile_invalid.yml | 2 ++ 15 files changed, 109 insertions(+), 42 deletions(-) diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/model/output/Output.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/model/output/Output.kt index 87a72b6..932fecc 100644 --- a/encore-common/src/main/kotlin/se/svt/oss/encore/model/output/Output.kt +++ b/encore-common/src/main/kotlin/se/svt/oss/encore/model/output/Output.kt @@ -13,7 +13,8 @@ data class Output( val format: String = "mp4", val postProcessor: PostProcessor = PostProcessor { outputFolder -> listOf(outputFolder.resolve(output)) }, val id: String, - val isImage: Boolean = false + val isImage: Boolean = false, + val decodeOutputStream: String? = null ) fun interface PostProcessor { @@ -39,7 +40,7 @@ data class AudioStreamEncode( override val params: List, override val filter: String? = null, override val inputLabels: List, - val preserveLayout: Boolean = false + val preserveLayout: Boolean = false, ) : StreamEncode { override val twoPass: Boolean get() = false diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ChannelLayout.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ChannelLayout.kt index c3a5bb1..f5039d7 100644 --- a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ChannelLayout.kt +++ b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ChannelLayout.kt @@ -51,6 +51,7 @@ enum class ChannelLayout(@JsonValue val layoutName: String, val channels: List outputFolder.listFiles().orEmpty().filter { it.name.matches(fileRegex) } }, - isImage = true + isImage = true, + decodeOutputStream = decodeOutput?.let { "$it:v:0" } ) } private fun selectInterval(interval: Double, outputSeek: Double?): String { - val select = outputSeek - ?.let { "gte(t\\,$it)*(isnan(prev_selected_t)+gt(floor((t-$it)/$interval)\\,floor((prev_selected_t-$it)/$interval)))" } - ?: "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))" + val select = if (outputSeek != null && decodeOutput == null) { + "gte(t\\,$outputSeek)*(isnan(prev_selected_t)+gt(floor((t-$outputSeek)/$interval)\\,floor((prev_selected_t-$outputSeek)/$interval)))" + } else { + "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))" + } return "select=$select" } @@ -87,7 +91,14 @@ data class ThumbnailEncode( val outputDuration = outputDuration(videoIn, job) return percentages .map { it * outputDuration / 100 } - .map { t -> job.seekTo?.let { t + it } ?: t } + .map { t -> + val outputSeek = job.seekTo + if (outputSeek != null && decodeOutput == null) { + t + outputSeek + } else { + t + } + } } private fun selectTimes(times: List) = diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncode.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncode.kt index 60314c3..720a36a 100644 --- a/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncode.kt +++ b/encore-common/src/main/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncode.kt @@ -22,10 +22,12 @@ data class ThumbnailMapEncode( val tileHeight: Int = 90, val cols: Int = 12, val rows: Int = 20, + val quality: Int = 5, val optional: Boolean = true, val suffix: String = "_${cols}x${rows}_${tileWidth}x${tileHeight}_thumbnail_map", val format: String = "jpg", - val inputLabel: String = DEFAULT_VIDEO_LABEL + val inputLabel: String = DEFAULT_VIDEO_LABEL, + val decodeOutput: Int? = null ) : OutputProducer { private val log = KotlinLogging.logger { } @@ -40,8 +42,9 @@ data class ThumbnailMapEncode( ?: return logOrThrow("No input with label $inputLabel!") var inputDuration = videoStream.duration + val outputSeek = job.seekTo inputSeekTo?.let { inputDuration -= it } - job.seekTo?.let { inputDuration -= it } + outputSeek?.let { inputDuration -= it } val outputDuration = job.duration ?: inputDuration if (outputDuration <= 0) { @@ -49,22 +52,20 @@ data class ThumbnailMapEncode( } val interval = outputDuration / (cols * rows) - val select = job.seekTo - ?.let { "gte(t\\,$it)*(isnan(prev_selected_t)+gt(floor((t-$it)/$interval)\\,floor((prev_selected_t-$it)/$interval)))" } - ?: "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))" + val select = if (outputSeek != null && decodeOutput == null) { + "gte(t\\,$outputSeek)*(isnan(prev_selected_t)+gt(floor((t-$outputSeek)/$interval)\\,floor((prev_selected_t-$outputSeek)/$interval)))" + } else { + "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))" + } val tempFolder = createTempDir(suffix).toFile() tempFolder.deleteOnExit() val pad = "aspect=${Fraction(tileWidth, tileHeight).stringValue()}:x=(ow-iw)/2:y=(oh-ih)/2" - val scale = if (format == "jpg") { - "-1:$tileHeight:out_range=jpeg" - } else { - "-1:$tileHeight" - } + val scale = "-1:$tileHeight" + val params = linkedMapOf( - "q:v" to "5", "fps_mode" to "vfr" ) return Output( @@ -72,9 +73,9 @@ data class ThumbnailMapEncode( video = VideoStreamEncode( params = params.toParams(), filter = "select=$select,pad=$pad,scale=$scale", - inputLabels = listOf(inputLabel) + inputLabels = listOf(inputLabel), ), - output = tempFolder.resolve("${job.baseName}$suffix%04d.$format").toString(), + output = tempFolder.resolve("${job.baseName}$suffix%04d.png").toString(), postProcessor = { outputFolder -> try { val targetFile = outputFolder.resolve("${job.baseName}$suffix.$format") @@ -82,11 +83,13 @@ data class ThumbnailMapEncode( "ffmpeg", "-y", "-i", - "${job.baseName}$suffix%04d.$format", + "${job.baseName}$suffix%04d.png", "-vf", "tile=${cols}x$rows", "-frames:v", "1", + "-q:v", + "$quality", "$targetFile" ) .directory(tempFolder) @@ -102,7 +105,8 @@ data class ThumbnailMapEncode( emptyList() } }, - isImage = true + isImage = true, + decodeOutputStream = decodeOutput?.let { "$it:v:0" } ) } diff --git a/encore-common/src/main/kotlin/se/svt/oss/encore/process/CommandBuilder.kt b/encore-common/src/main/kotlin/se/svt/oss/encore/process/CommandBuilder.kt index d10ff46..c83af7c 100644 --- a/encore-common/src/main/kotlin/se/svt/oss/encore/process/CommandBuilder.kt +++ b/encore-common/src/main/kotlin/se/svt/oss/encore/process/CommandBuilder.kt @@ -62,12 +62,23 @@ class CommandBuilder( } private fun secondPassCommand(outputs: List): List { - val videoFilters = videoFilters(encoreJob.inputs, outputs) - val audioFilters = audioFilters(outputs) - val outputParams = outputs.flatMap(this::secondPassParams) - return inputParams(encoreJob.inputs) + filterParam(videoFilters + audioFilters) + outputParams + val (loopbackOutputs, mainOutputs) = outputs.partition { it.decodeOutputStream != null } + val videoFilters = videoFilters(encoreJob.inputs, mainOutputs) + val audioFilters = audioFilters(mainOutputs) + val outputParams = mainOutputs.flatMap(this::secondPassParams) + return inputParams(encoreJob.inputs) + filterParam(videoFilters + audioFilters) + outputParams + loopbackParams(loopbackOutputs) } + private fun loopbackParams(outputs: List): List = + outputs.flatMapIndexed { index: Int, output: Output -> + listOf( + "-dec", + output.decodeOutputStream ?: throw RuntimeException("No decodeOutputStream in $output!"), + "-filter_complex", + "[dec:$index]${output.video?.filter ?: ""}${MapName.VIDEO.mapLabel(output.id)}" + ) + secondPassParams(output) + } + private fun audioFilters(outputs: List): List { val audioSplits = encoreJob.inputs.mapIndexedNotNull { inputIndex, input -> if (input !is AudioIn) return@mapIndexedNotNull null @@ -230,8 +241,10 @@ class CommandBuilder( } private fun secondPassParams(output: Output): List { + val seekParams = output.decodeOutputStream?.let { emptyList() } ?: seekParams() + val durationParams = output.decodeOutputStream?.let { emptyList() } ?: durationParams() val mapV: List = - output.video?.let { listOf("-map", MapName.VIDEO.mapLabel(output.id)) + seekParams() } + output.video?.let { listOf("-map", MapName.VIDEO.mapLabel(output.id)) + seekParams } ?: emptyList() val preserveAudioLayout = output.audioStreams.any { it.preserveLayout } @@ -246,7 +259,7 @@ class CommandBuilder( } else { MapName.AUDIO.mapLabel("${output.id}-$index") } - listOf("-map", mapLabel) + seekParams() + listOf("-map", mapLabel) + seekParams } val maps = mapV + mapA @@ -264,7 +277,7 @@ class CommandBuilder( val metaDataParams = listOf("-metadata", "comment=Transcoded using Encore") return maps + - durationParams() + + durationParams + videoParams + audioParams + metaDataParams + File(outputFolder).resolve(output.output).toString() diff --git a/encore-common/src/test/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncodeTest.kt b/encore-common/src/test/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncodeTest.kt index 451dfe1..19d0466 100644 --- a/encore-common/src/test/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncodeTest.kt +++ b/encore-common/src/test/kotlin/se/svt/oss/encore/model/profile/ThumbnailMapEncodeTest.kt @@ -29,8 +29,8 @@ class ThumbnailMapEncodeTest { .hasId("_12x20_160x90_thumbnail_map.jpg") .hasVideo( VideoStreamEncode( - params = listOf("-q:v", "5", "-fps_mode", "vfr"), - filter = "select=isnan(prev_selected_t)+gt(floor(t/0.041666666666666664)\\,floor(prev_selected_t/0.041666666666666664)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90:out_range=jpeg", + params = listOf("-fps_mode", "vfr"), + filter = "select=isnan(prev_selected_t)+gt(floor(t/0.041666666666666664)\\,floor(prev_selected_t/0.041666666666666664)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90", twoPass = false, inputLabels = listOf(DEFAULT_VIDEO_LABEL) ) @@ -50,8 +50,8 @@ class ThumbnailMapEncodeTest { .hasId("_6x10_160x90_thumbnail_map.jpg") .hasVideo( VideoStreamEncode( - params = listOf("-q:v", "5", "-fps_mode", "vfr"), - filter = "select=gte(t\\,1.0)*(isnan(prev_selected_t)+gt(floor((t-1.0)/0.08333333333333333)\\,floor((prev_selected_t-1.0)/0.08333333333333333))),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90:out_range=jpeg", + params = listOf("-fps_mode", "vfr"), + filter = "select=gte(t\\,1.0)*(isnan(prev_selected_t)+gt(floor((t-1.0)/0.08333333333333333)\\,floor((prev_selected_t-1.0)/0.08333333333333333))),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90", twoPass = false, inputLabels = listOf(DEFAULT_VIDEO_LABEL) ) diff --git a/encore-common/src/test/kotlin/se/svt/oss/encore/process/CommandBuilderTest.kt b/encore-common/src/test/kotlin/se/svt/oss/encore/process/CommandBuilderTest.kt index dbc64b3..6d11ae5 100644 --- a/encore-common/src/test/kotlin/se/svt/oss/encore/process/CommandBuilderTest.kt +++ b/encore-common/src/test/kotlin/se/svt/oss/encore/process/CommandBuilderTest.kt @@ -202,7 +202,7 @@ internal class CommandBuilderTest { encodingProperties.copy(exitOnError = false, globalParams = linkedMapOf("err_detect" to "explode")) ) - val buildCommands = commandBuilder.buildCommands(listOf(output(true), audioOutput("other", "extra"))) + val buildCommands = commandBuilder.buildCommands(listOf(output(true), audioOutput("other", "extra"), thumbnailOutput("thumb", "0:v:0"))) assertThat(buildCommands).hasSize(2) @@ -210,7 +210,7 @@ internal class CommandBuilderTest { val secondPass = buildCommands[1].joinToString(" ") assertThat(firstPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out] -map [VIDEO-test-out] -ss 12.1 -an -t 10.4 first pass -f mp4 /dev/null") - assertThat(secondPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -ac 4 -t 22.5 -i /input/main-audio.mp4 -t 22.5 -i /input/other-audio.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out];[1:a]join=inputs=4:channel_layout=4.0:map=0.0-FL|1.0-FR|2.0-FC|3.0-BC,audio-main,main-filter,asplit=1[AUDIO-main-test-out-0];[2:a:3]asplit=1[AUDIO-other-extra-0];[AUDIO-main-test-out-0]audio-filter[AUDIO-test-out-0];[AUDIO-other-extra-0]audio-filter-extra[AUDIO-extra-0] -map [VIDEO-test-out] -ss 12.1 -map [AUDIO-test-out-0] -ss 12.1 -t 10.4 video params audio params -metadata comment=Transcoded using Encore /tmp/123/out.mp4 -map [AUDIO-extra-0] -ss 12.1 -t 10.4 -vn audio extra -metadata comment=Transcoded using Encore /tmp/123/extra.mp4") + assertThat(secondPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -ac 4 -t 22.5 -i /input/main-audio.mp4 -t 22.5 -i /input/other-audio.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out];[1:a]join=inputs=4:channel_layout=4.0:map=0.0-FL|1.0-FR|2.0-FC|3.0-BC,audio-main,main-filter,asplit=1[AUDIO-main-test-out-0];[2:a:3]asplit=1[AUDIO-other-extra-0];[AUDIO-main-test-out-0]audio-filter[AUDIO-test-out-0];[AUDIO-other-extra-0]audio-filter-extra[AUDIO-extra-0] -map [VIDEO-test-out] -ss 12.1 -map [AUDIO-test-out-0] -ss 12.1 -t 10.4 video params audio params -metadata comment=Transcoded using Encore /tmp/123/out.mp4 -map [AUDIO-extra-0] -ss 12.1 -t 10.4 -vn audio extra -metadata comment=Transcoded using Encore /tmp/123/extra.mp4 -dec 0:v:0 -filter_complex [dec:0]thumb-filter[VIDEO-thumb] -map [VIDEO-thumb] thumb thumb -an -metadata comment=Transcoded using Encore /tmp/123/thumb.jpg") } private fun output(twoPass: Boolean): Output { @@ -234,7 +234,7 @@ internal class CommandBuilderTest { ) } - fun audioOutput(label: String, id: String): Output { + private fun audioOutput(label: String, id: String): Output { return Output( id = id, output = "$id.mp4", @@ -248,4 +248,18 @@ internal class CommandBuilderTest { ) ) } + + private fun thumbnailOutput(id: String, decodeOutputStream: String? = null): Output { + return Output( + id = id, + output = "$id.jpg", + video = VideoStreamEncode( + params = listOf("thumb", id), + inputLabels = listOf(DEFAULT_VIDEO_LABEL), + filter = "thumb-filter", + ), + isImage = true, + decodeOutputStream = decodeOutputStream + ) + } } diff --git a/encore-common/src/test/resources/profile/multiple_inputs.yml b/encore-common/src/test/resources/profile/multiple_inputs.yml index 30a8d3d..1d8b928 100644 --- a/encore-common/src/test/resources/profile/multiple_inputs.yml +++ b/encore-common/src/test/resources/profile/multiple_inputs.yml @@ -67,7 +67,9 @@ encodes: - type: ThumbnailMapEncode cols: 6 rows: 10 + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-common/src/test/resources/profile/program-x265.yml b/encore-common/src/test/resources/profile/program-x265.yml index baf7ad6..ec603a1 100644 --- a/encore-common/src/test/resources/profile/program-x265.yml +++ b/encore-common/src/test/resources/profile/program-x265.yml @@ -471,5 +471,7 @@ encodes: channelLayout: '5.1' - type: ThumbnailMapEncode + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-common/src/test/resources/profile/program.yml b/encore-common/src/test/resources/profile/program.yml index 94d31e1..2330e18 100644 --- a/encore-common/src/test/resources/profile/program.yml +++ b/encore-common/src/test/resources/profile/program.yml @@ -231,7 +231,9 @@ encodes: channelLayout: '5.1' - type: ThumbnailMapEncode + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-common/src/test/resources/profile/test_profile_invalid.yml b/encore-common/src/test/resources/profile/test_profile_invalid.yml index 1a2d761..33cc478 100644 --- a/encore-common/src/test/resources/profile/test_profile_invalid.yml +++ b/encore-common/src/test/resources/profile/test_profile_invalid.yml @@ -48,8 +48,10 @@ encodes: suffix: STEREO - type: ThumbnailEncode + decodeOutput: 0 - type: ThumbnailMapEncode + decodeOutput: 0 - type: AudioEncode codec: aac diff --git a/encore-web/src/test/resources/profile/multiple_inputs.yml b/encore-web/src/test/resources/profile/multiple_inputs.yml index 30a8d3d..1d8b928 100644 --- a/encore-web/src/test/resources/profile/multiple_inputs.yml +++ b/encore-web/src/test/resources/profile/multiple_inputs.yml @@ -67,7 +67,9 @@ encodes: - type: ThumbnailMapEncode cols: 6 rows: 10 + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-web/src/test/resources/profile/program-x265.yml b/encore-web/src/test/resources/profile/program-x265.yml index baf7ad6..ec603a1 100644 --- a/encore-web/src/test/resources/profile/program-x265.yml +++ b/encore-web/src/test/resources/profile/program-x265.yml @@ -471,5 +471,7 @@ encodes: channelLayout: '5.1' - type: ThumbnailMapEncode + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-web/src/test/resources/profile/program.yml b/encore-web/src/test/resources/profile/program.yml index 42bbf5f..e12f6c5 100644 --- a/encore-web/src/test/resources/profile/program.yml +++ b/encore-web/src/test/resources/profile/program.yml @@ -219,7 +219,9 @@ encodes: channelLayout: '5.1' - type: ThumbnailMapEncode + decodeOutput: 0 - type: ThumbnailEncode + decodeOutput: 0 diff --git a/encore-web/src/test/resources/profile/test_profile_invalid.yml b/encore-web/src/test/resources/profile/test_profile_invalid.yml index 1a2d761..33cc478 100644 --- a/encore-web/src/test/resources/profile/test_profile_invalid.yml +++ b/encore-web/src/test/resources/profile/test_profile_invalid.yml @@ -48,8 +48,10 @@ encodes: suffix: STEREO - type: ThumbnailEncode + decodeOutput: 0 - type: ThumbnailMapEncode + decodeOutput: 0 - type: AudioEncode codec: aac