Skip to content

Commit

Permalink
As a workaround for issues with thumbnails in combination with Ffmpeg…
Browse files Browse the repository at this point in the history
… 7, add option decodeOutput to ThumbnailEncode and ThumbnailMapEncode allowing them to use video output as input using loopback decoder introduced in Ffmpeg 7.
  • Loading branch information
fhermansson committed May 16, 2024
1 parent acd9d62 commit 2159928
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ data class Output(
val format: String = "mp4",
val postProcessor: PostProcessor = PostProcessor { outputFolder -> listOf(outputFolder.resolve(output)) },
val id: String,
val isImage: Boolean = false
val isImage: Boolean = false,
val decodeOutputStream: String? = null
)

fun interface PostProcessor {
Expand All @@ -39,7 +40,7 @@ data class AudioStreamEncode(
override val params: List<String>,
override val filter: String? = null,
override val inputLabels: List<String>,
val preserveLayout: Boolean = false
val preserveLayout: Boolean = false,
) : StreamEncode {
override val twoPass: Boolean
get() = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum class ChannelLayout(@JsonValue val layoutName: String, val channels: List<C
CH_LAYOUT_5POINT1_SIDE("5.1(side)", listOf(FL, FR, FC, LFE, SL, SR)),
CH_LAYOUT_6POINT0("6.0", listOf(FL, FR, FC, BC, SL, SR)),
CH_LAYOUT_6POINT0_FRONT("6.0(front)", listOf(FL, FR, FLC, FRC, SL, SR)),
CH_LAYOUT_3POINT1POINT2("3.1.2", listOf(FL, FR, FC, LFE, TFL, TFR)),
CH_LAYOUT_HEXAGONAL("hexagonal", listOf(FL, FR, FC, BL, BR, BC)),
CH_LAYOUT_6POINT1("6.1", listOf(FL, FR, FC, LFE, BC, SL, SR)),
CH_LAYOUT_6POINT1_BACK("6.1(back)", listOf(FL, FR, FC, LFE, BL, BR, BC)),
Expand All @@ -60,9 +61,16 @@ enum class ChannelLayout(@JsonValue val layoutName: String, val channels: List<C
CH_LAYOUT_7POINT1("7.1", listOf(FL, FR, FC, LFE, BL, BR, SL, SR)),
CH_LAYOUT_7POINT1_WIDE("7.1(wide)", listOf(FL, FR, FC, LFE, BL, BR, FLC, FRC)),
CH_LAYOUT_7POINT1_WIDE_SIDE("7.1(wide-side)", listOf(FL, FR, FC, LFE, FLC, FRC, SL, SR)),
CH_LAYOUT_OCTAGONAL("octagonal)", listOf(FL, FR, FC, BL, BR, BC, SL, SR)),
CH_LAYOUT_5POINT1POINT2("5.1.2", listOf(FL, FR, FC, LFE, BL, BR, TFL, TFR)),
CH_LAYOUT_OCTAGONAL("octagonal", listOf(FL, FR, FC, BL, BR, BC, SL, SR)),
CH_LAYOUT_CUBE("cube", listOf(FL, FR, BL, BR, TFL, TFR, TBL, TBR)),
CH_LAYOUT_5POINT1POINT4("5.1.4", listOf(FL, FR, FC, LFE, BL, BR, TFL, TFR, TBL, TBR)),
CH_LAYOUT_7POINT1POINT2("7.1.2", listOf(FL, FR, FC, LFE, BL, BR, SL, SR, TFL, TFR)),
CH_LAYOUT_7POINT1POINT4("7.1.4", listOf(FL, FR, FC, LFE, BL, BR, SL, SR, TFL, TFR, TBL, TBR)),
CH_LAYOUT_7POINT2POINT3("7.2.3", listOf(FL, FR, FC, LFE, BL, BR, SL, SR, TFL, TFR, TBC, LFE2)),
CH_LAYOUT_9POINT1POINT4("9.1.4", listOf(FL, FR, FC, LFE, BL, BR, FLC, FRC, SL, SR, TFL, TFR, TBL, TBR)),
CH_LAYOUT_HEXADECAGONAL(
"hexadecagonal)",
"hexadecagonal",
listOf(
FL, FR, FC, BL, BR, BC, SL, SR, TFL, TFC, TFR, TBL, TBC, TBR, WL, WR
)
Expand Down Expand Up @@ -99,7 +107,7 @@ enum class ChannelLayout(@JsonValue val layoutName: String, val channels: List<C
);

companion object {
fun defaultChannelLayout(numChannels: Int) = values().firstOrNull { it.channels.size == numChannels }
fun getByNameOrNull(layoutName: String) = values().firstOrNull { it.layoutName == layoutName }
fun defaultChannelLayout(numChannels: Int) = entries.firstOrNull { it.channels.size == numChannels }
fun getByNameOrNull(layoutName: String) = entries.firstOrNull { it.layoutName == layoutName }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ data class ThumbnailEncode(
val suffixZeroPad: Int = 2,
val inputLabel: String = DEFAULT_VIDEO_LABEL,
val optional: Boolean = false,
val intervalSeconds: Double? = null
val intervalSeconds: Double? = null,
val decodeOutput: Int? = null
) : OutputProducer {

private val log = KotlinLogging.logger { }
Expand Down Expand Up @@ -58,20 +59,23 @@ data class ThumbnailEncode(
video = VideoStreamEncode(
params = params.toParams(),
filter = filter,
inputLabels = listOf(inputLabel)
inputLabels = listOf(inputLabel),
),
output = "${job.baseName}$suffix%0${suffixZeroPad}d.jpg",
postProcessor = { outputFolder ->
outputFolder.listFiles().orEmpty().filter { it.name.matches(fileRegex) }
},
isImage = true
isImage = true,
decodeOutputStream = decodeOutput?.let { "$it:v:0" }
)
}

private fun selectInterval(interval: Double, outputSeek: Double?): String {
val select = outputSeek
?.let { "gte(t\\,$it)*(isnan(prev_selected_t)+gt(floor((t-$it)/$interval)\\,floor((prev_selected_t-$it)/$interval)))" }
?: "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))"
val select = if (outputSeek != null && decodeOutput == null) {
"gte(t\\,$outputSeek)*(isnan(prev_selected_t)+gt(floor((t-$outputSeek)/$interval)\\,floor((prev_selected_t-$outputSeek)/$interval)))"
} else {
"isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))"
}
return "select=$select"
}

Expand All @@ -87,7 +91,14 @@ data class ThumbnailEncode(
val outputDuration = outputDuration(videoIn, job)
return percentages
.map { it * outputDuration / 100 }
.map { t -> job.seekTo?.let { t + it } ?: t }
.map { t ->
val outputSeek = job.seekTo
if (outputSeek != null && decodeOutput == null) {
t + outputSeek
} else {
t
}
}
}

private fun selectTimes(times: List<Double>) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ data class ThumbnailMapEncode(
val tileHeight: Int = 90,
val cols: Int = 12,
val rows: Int = 20,
val quality: Int = 5,
val optional: Boolean = true,
val suffix: String = "_${cols}x${rows}_${tileWidth}x${tileHeight}_thumbnail_map",
val format: String = "jpg",
val inputLabel: String = DEFAULT_VIDEO_LABEL
val inputLabel: String = DEFAULT_VIDEO_LABEL,
val decodeOutput: Int? = null
) : OutputProducer {

private val log = KotlinLogging.logger { }
Expand All @@ -40,53 +42,54 @@ data class ThumbnailMapEncode(
?: return logOrThrow("No input with label $inputLabel!")

var inputDuration = videoStream.duration
val outputSeek = job.seekTo
inputSeekTo?.let { inputDuration -= it }
job.seekTo?.let { inputDuration -= it }
outputSeek?.let { inputDuration -= it }
val outputDuration = job.duration ?: inputDuration

if (outputDuration <= 0) {
return logOrThrow("Cannot create thumbnail map $suffix! Could not detect duration.")
}

val interval = outputDuration / (cols * rows)
val select = job.seekTo
?.let { "gte(t\\,$it)*(isnan(prev_selected_t)+gt(floor((t-$it)/$interval)\\,floor((prev_selected_t-$it)/$interval)))" }
?: "isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))"
val select = if (outputSeek != null && decodeOutput == null) {
"gte(t\\,$outputSeek)*(isnan(prev_selected_t)+gt(floor((t-$outputSeek)/$interval)\\,floor((prev_selected_t-$outputSeek)/$interval)))"
} else {
"isnan(prev_selected_t)+gt(floor(t/$interval)\\,floor(prev_selected_t/$interval))"
}

val tempFolder = createTempDir(suffix).toFile()
tempFolder.deleteOnExit()

val pad = "aspect=${Fraction(tileWidth, tileHeight).stringValue()}:x=(ow-iw)/2:y=(oh-ih)/2"

val scale = if (format == "jpg") {
"-1:$tileHeight:out_range=jpeg"
} else {
"-1:$tileHeight"
}
val scale = "-1:$tileHeight"

val params = linkedMapOf(
"q:v" to "5",
"fps_mode" to "vfr"
)
return Output(
id = "$suffix.$format",
video = VideoStreamEncode(
params = params.toParams(),
filter = "select=$select,pad=$pad,scale=$scale",
inputLabels = listOf(inputLabel)
inputLabels = listOf(inputLabel),
),
output = tempFolder.resolve("${job.baseName}$suffix%04d.$format").toString(),
output = tempFolder.resolve("${job.baseName}$suffix%04d.png").toString(),
postProcessor = { outputFolder ->
try {
val targetFile = outputFolder.resolve("${job.baseName}$suffix.$format")
val process = ProcessBuilder(
"ffmpeg",
"-y",
"-i",
"${job.baseName}$suffix%04d.$format",
"${job.baseName}$suffix%04d.png",
"-vf",
"tile=${cols}x$rows",
"-frames:v",
"1",
"-q:v",
"$quality",
"$targetFile"
)
.directory(tempFolder)
Expand All @@ -102,7 +105,8 @@ data class ThumbnailMapEncode(
emptyList()
}
},
isImage = true
isImage = true,
decodeOutputStream = decodeOutput?.let { "$it:v:0" }
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,23 @@ class CommandBuilder(
}

private fun secondPassCommand(outputs: List<Output>): List<String> {
val videoFilters = videoFilters(encoreJob.inputs, outputs)
val audioFilters = audioFilters(outputs)
val outputParams = outputs.flatMap(this::secondPassParams)
return inputParams(encoreJob.inputs) + filterParam(videoFilters + audioFilters) + outputParams
val (loopbackOutputs, mainOutputs) = outputs.partition { it.decodeOutputStream != null }
val videoFilters = videoFilters(encoreJob.inputs, mainOutputs)
val audioFilters = audioFilters(mainOutputs)
val outputParams = mainOutputs.flatMap(this::secondPassParams)
return inputParams(encoreJob.inputs) + filterParam(videoFilters + audioFilters) + outputParams + loopbackParams(loopbackOutputs)
}

private fun loopbackParams(outputs: List<Output>): List<String> =
outputs.flatMapIndexed { index: Int, output: Output ->
listOf(
"-dec",
output.decodeOutputStream ?: throw RuntimeException("No decodeOutputStream in $output!"),
"-filter_complex",
"[dec:$index]${output.video?.filter ?: ""}${MapName.VIDEO.mapLabel(output.id)}"
) + secondPassParams(output)
}

private fun audioFilters(outputs: List<Output>): List<String> {
val audioSplits = encoreJob.inputs.mapIndexedNotNull { inputIndex, input ->
if (input !is AudioIn) return@mapIndexedNotNull null
Expand Down Expand Up @@ -230,8 +241,10 @@ class CommandBuilder(
}

private fun secondPassParams(output: Output): List<String> {
val seekParams = output.decodeOutputStream?.let { emptyList() } ?: seekParams()
val durationParams = output.decodeOutputStream?.let { emptyList() } ?: durationParams()
val mapV: List<String> =
output.video?.let { listOf("-map", MapName.VIDEO.mapLabel(output.id)) + seekParams() }
output.video?.let { listOf("-map", MapName.VIDEO.mapLabel(output.id)) + seekParams }
?: emptyList()

val preserveAudioLayout = output.audioStreams.any { it.preserveLayout }
Expand All @@ -246,7 +259,7 @@ class CommandBuilder(
} else {
MapName.AUDIO.mapLabel("${output.id}-$index")
}
listOf("-map", mapLabel) + seekParams()
listOf("-map", mapLabel) + seekParams
}

val maps = mapV + mapA
Expand All @@ -264,7 +277,7 @@ class CommandBuilder(
val metaDataParams = listOf("-metadata", "comment=Transcoded using Encore")

return maps +
durationParams() +
durationParams +
videoParams + audioParams +
metaDataParams +
File(outputFolder).resolve(output.output).toString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ class ThumbnailMapEncodeTest {
.hasId("_12x20_160x90_thumbnail_map.jpg")
.hasVideo(
VideoStreamEncode(
params = listOf("-q:v", "5", "-fps_mode", "vfr"),
filter = "select=isnan(prev_selected_t)+gt(floor(t/0.041666666666666664)\\,floor(prev_selected_t/0.041666666666666664)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90:out_range=jpeg",
params = listOf("-fps_mode", "vfr"),
filter = "select=isnan(prev_selected_t)+gt(floor(t/0.041666666666666664)\\,floor(prev_selected_t/0.041666666666666664)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90",
twoPass = false,
inputLabels = listOf(DEFAULT_VIDEO_LABEL)
)
Expand All @@ -50,8 +50,8 @@ class ThumbnailMapEncodeTest {
.hasId("_6x10_160x90_thumbnail_map.jpg")
.hasVideo(
VideoStreamEncode(
params = listOf("-q:v", "5", "-fps_mode", "vfr"),
filter = "select=gte(t\\,1.0)*(isnan(prev_selected_t)+gt(floor((t-1.0)/0.08333333333333333)\\,floor((prev_selected_t-1.0)/0.08333333333333333))),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90:out_range=jpeg",
params = listOf("-fps_mode", "vfr"),
filter = "select=gte(t\\,1.0)*(isnan(prev_selected_t)+gt(floor((t-1.0)/0.08333333333333333)\\,floor((prev_selected_t-1.0)/0.08333333333333333))),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,scale=-1:90",
twoPass = false,
inputLabels = listOf(DEFAULT_VIDEO_LABEL)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,15 +202,15 @@ internal class CommandBuilderTest {
encodingProperties.copy(exitOnError = false, globalParams = linkedMapOf("err_detect" to "explode"))
)

val buildCommands = commandBuilder.buildCommands(listOf(output(true), audioOutput("other", "extra")))
val buildCommands = commandBuilder.buildCommands(listOf(output(true), audioOutput("other", "extra"), thumbnailOutput("thumb", "0:v:0")))

assertThat(buildCommands).hasSize(2)

val firstPass = buildCommands[0].joinToString(" ")
val secondPass = buildCommands[1].joinToString(" ")

assertThat(firstPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out] -map [VIDEO-test-out] -ss 12.1 -an -t 10.4 first pass -f mp4 /dev/null")
assertThat(secondPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -ac 4 -t 22.5 -i /input/main-audio.mp4 -t 22.5 -i /input/other-audio.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out];[1:a]join=inputs=4:channel_layout=4.0:map=0.0-FL|1.0-FR|2.0-FC|3.0-BC,audio-main,main-filter,asplit=1[AUDIO-main-test-out-0];[2:a:3]asplit=1[AUDIO-other-extra-0];[AUDIO-main-test-out-0]audio-filter[AUDIO-test-out-0];[AUDIO-other-extra-0]audio-filter-extra[AUDIO-extra-0] -map [VIDEO-test-out] -ss 12.1 -map [AUDIO-test-out-0] -ss 12.1 -t 10.4 video params audio params -metadata comment=Transcoded using Encore /tmp/123/out.mp4 -map [AUDIO-extra-0] -ss 12.1 -t 10.4 -vn audio extra -metadata comment=Transcoded using Encore /tmp/123/extra.mp4")
assertThat(secondPass).isEqualTo("ffmpeg -err_detect explode -hide_banner -loglevel +level -y -f mp4 -t 22.5 -i /input/test.mp4 -ac 4 -t 22.5 -i /input/main-audio.mp4 -t 22.5 -i /input/other-audio.mp4 -filter_complex sws_flags=scaling;[0:v:1]yadif,setdar=16/9,scale=iw*sar:ih,crop=min(iw\\,ih*1/1):min(ih\\,iw/(1/1)),pad=aspect=16/9:x=(ow-iw)/2:y=(oh-ih)/2,video,filter,split=1[VIDEO-main-test-out];[VIDEO-main-test-out]video-filter[VIDEO-test-out];[1:a]join=inputs=4:channel_layout=4.0:map=0.0-FL|1.0-FR|2.0-FC|3.0-BC,audio-main,main-filter,asplit=1[AUDIO-main-test-out-0];[2:a:3]asplit=1[AUDIO-other-extra-0];[AUDIO-main-test-out-0]audio-filter[AUDIO-test-out-0];[AUDIO-other-extra-0]audio-filter-extra[AUDIO-extra-0] -map [VIDEO-test-out] -ss 12.1 -map [AUDIO-test-out-0] -ss 12.1 -t 10.4 video params audio params -metadata comment=Transcoded using Encore /tmp/123/out.mp4 -map [AUDIO-extra-0] -ss 12.1 -t 10.4 -vn audio extra -metadata comment=Transcoded using Encore /tmp/123/extra.mp4 -dec 0:v:0 -filter_complex [dec:0]thumb-filter[VIDEO-thumb] -map [VIDEO-thumb] thumb thumb -an -metadata comment=Transcoded using Encore /tmp/123/thumb.jpg")
}

private fun output(twoPass: Boolean): Output {
Expand All @@ -234,7 +234,7 @@ internal class CommandBuilderTest {
)
}

fun audioOutput(label: String, id: String): Output {
private fun audioOutput(label: String, id: String): Output {
return Output(
id = id,
output = "$id.mp4",
Expand All @@ -248,4 +248,18 @@ internal class CommandBuilderTest {
)
)
}

private fun thumbnailOutput(id: String, decodeOutputStream: String? = null): Output {
return Output(
id = id,
output = "$id.jpg",
video = VideoStreamEncode(
params = listOf("thumb", id),
inputLabels = listOf(DEFAULT_VIDEO_LABEL),
filter = "thumb-filter",
),
isImage = true,
decodeOutputStream = decodeOutputStream
)
}
}
2 changes: 2 additions & 0 deletions encore-common/src/test/resources/profile/multiple_inputs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ encodes:
- type: ThumbnailMapEncode
cols: 6
rows: 10
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0


2 changes: 2 additions & 0 deletions encore-common/src/test/resources/profile/program-x265.yml
Original file line number Diff line number Diff line change
Expand Up @@ -471,5 +471,7 @@ encodes:
channelLayout: '5.1'

- type: ThumbnailMapEncode
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0
2 changes: 2 additions & 0 deletions encore-common/src/test/resources/profile/program.yml
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,9 @@ encodes:
channelLayout: '5.1'

- type: ThumbnailMapEncode
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0


Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ encodes:
suffix: STEREO

- type: ThumbnailEncode
decodeOutput: 0

- type: ThumbnailMapEncode
decodeOutput: 0

- type: AudioEncode
codec: aac
Expand Down
2 changes: 2 additions & 0 deletions encore-web/src/test/resources/profile/multiple_inputs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ encodes:
- type: ThumbnailMapEncode
cols: 6
rows: 10
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0


2 changes: 2 additions & 0 deletions encore-web/src/test/resources/profile/program-x265.yml
Original file line number Diff line number Diff line change
Expand Up @@ -471,5 +471,7 @@ encodes:
channelLayout: '5.1'

- type: ThumbnailMapEncode
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0
2 changes: 2 additions & 0 deletions encore-web/src/test/resources/profile/program.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ encodes:
channelLayout: '5.1'

- type: ThumbnailMapEncode
decodeOutput: 0

- type: ThumbnailEncode
decodeOutput: 0


Loading

0 comments on commit 2159928

Please sign in to comment.