diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/ProVideoEditorPlugin.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/ProVideoEditorPlugin.kt index d41af78..e6741e9 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/ProVideoEditorPlugin.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/ProVideoEditorPlugin.kt @@ -303,7 +303,7 @@ class ProVideoEditorPlugin : FlutterPlugin, MethodCallHandler { } }, onError = { error -> - Log.e("RenderVideo", "Error rendering video: ${error.message}") + Log.e("RenderVideo", "Error rendering video: ${error.message}", error) mainHandler.post { val removedTask = activeRenderTasks.remove(id) val code = if (removedTask?.canceled?.get() == true) { @@ -321,9 +321,11 @@ class ProVideoEditorPlugin : FlutterPlugin, MethodCallHandler { jobHandle.cancel() } } catch (e: IllegalArgumentException) { + Log.e("RenderVideo", "Error rendering video: ${e.message}", e) activeRenderTasks.remove(id) result.error("INVALID_ARGUMENTS", e.message, null) } catch (e: Exception) { + Log.e("RenderVideo", "Error rendering video: ${e.message}", e) activeRenderTasks.remove(id) result.error("RENDER_ERROR", "Failed to start render: ${e.message}", null) } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt index 4051de8..1abbd5f 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt @@ -18,8 +18,10 @@ import java.util.concurrent.atomic.AtomicReference import applyBitrate import mapFormatToMimeType import ch.waio.pro_video_editor.src.features.render.helpers.applyComposition +import ch.waio.pro_video_editor.src.features.render.helpers.CompositionBuilder import ch.waio.pro_video_editor.src.features.render.helpers.VolumeControlAudioMixerFactory import ch.waio.pro_video_editor.src.features.render.helpers.ConfigurableInAppMp4Muxer +import ch.waio.pro_video_editor.src.features.render.helpers.MediaInfoExtractor import ch.waio.pro_video_editor.src.features.render.helpers.VideoTranscoder import ch.waio.pro_video_editor.src.features.render.models.RenderConfig import ch.waio.pro_video_editor.src.features.render.models.RenderJobHandle @@ -57,27 +59,59 @@ class RenderVideo(private val context: Context) { * Checks if transcoding is needed for video compatibility. * * Transcoding is needed when: - * 1. GPU effects are used with HEVC 10-bit HDR videos + * 1. GPU effects are used with HEVC 10-bit HDR videos. * 2. Multiple videos are being merged and at least one is HEVC 10-bit - * (mixing different codecs in a composition can cause frame processing errors) + * (mixing different codecs in a composition can cause frame processing errors). + * 3. Multi-channel audio (5.1/7.1) is detected AND more than one unmuted audio source + * is present (mixing multi-channel with stereo or silence causes mixer reconfiguration errors). */ private fun needsPreTranscoding(config: RenderConfig): Boolean { - // Check for GPU effects + // 1. Check for HEVC 10-bit / HDR compatibility with GPU effects if (hasGpuEffects(config)) { - return true + val hasAnyHevc10bit = config.videoClips.any { clip -> + val info = MediaInfoExtractor.getVideoFormatInfo(clip.inputPath) + info.isHevc && info.bitDepth == 10 + } + if (hasAnyHevc10bit) { + Log.d(RENDER_TAG, "HEVC 10-bit detected with GPU effects, pre-transcoding needed") + return true + } + } + + // 2. Check for multi-channel audio mixing complexity + val unmutedVideoClips = config.videoClips.filter { (it.volume ?: 1.0f) > 0.0f } + val audibleCustomTracks = config.audioTracks.filter { it.volume > 0.0f } + val totalAudibleSources = unmutedVideoClips.size + audibleCustomTracks.size + + val hasMultiChannel = config.videoClips.any { clip -> + (MediaInfoExtractor.getAudioChannelCount(clip.inputPath) ?: 2) > 2 + } + + if (hasMultiChannel) { + // We only MUST transcode multi-channel audio if it needs to be mixed with something else. + // If it's the only source, Media3 can handle downmixing via AudioProcessors during render. + if (totalAudibleSources > 1) { + Log.d(RENDER_TAG, "Multi-channel audio detected with multiple sources, pre-transcoding for mixing safety") + return true + } + + // TODO having multiple video clips (even if muted), mixing/overlapping transitions + // can trigger audio reconfiguration errors if formats don't match perfectly + if (config.videoClips.size > 1) { + Log.d(RENDER_TAG, "Multi-channel audio detected in multi-clip merge, pre-transcoding for transition stability") + return true + } } - // When multiple clips are being merged, check if any need transcoding - // Mixing different codecs (HEVC + H.264) can cause frame processing errors + // 3. Codec mixing check (H.264 + HEVC) + // Mixing different codecs (HEVC + H.264) in a single sequence can cause frame processing errors if (config.videoClips.size > 1) { val hasAnyHevc10bit = config.videoClips.any { clip -> - VideoTranscoder.needsTranscoding(clip.inputPath) + val info = MediaInfoExtractor.getVideoFormatInfo(clip.inputPath) + info.isHevc && info.bitDepth == 10 } if (hasAnyHevc10bit) { - Log.d( - RENDER_TAG, "Multiple video clips with HEVC 10-bit detected, " + - "pre-transcoding to ensure codec compatibility" - ) + Log.d(RENDER_TAG, "HEVC 10-bit detected in merge, pre-transcoding to ensure consistency") return true } } @@ -138,8 +172,8 @@ class RenderVideo(private val context: Context) { val updatedClips = config.videoClips.map { clip -> val newPath = transcodeMap[clip.inputPath] ?: clip.inputPath if (newPath != clip.inputPath) { - // If transcoded, use the new path but keep trim times and volume - VideoClip(newPath, clip.startUs, clip.endUs, clip.volume) + // If transcoded, use the new path but preserve all other parameters + clip.copy(inputPath = newPath) } else { clip } @@ -241,10 +275,39 @@ class RenderVideo(private val context: Context) { // Check if we need custom audio mixing with volume control val hasCustomAudio = config.audioTracks.isNotEmpty() - // Determine if video audio will be present in the mix - // Video audio is removed when audio is disabled or all clips have volume 0 - val videoAudioPresent = config.enableAudio && - config.videoClips.any { (it.volume ?: 1.0f) > 0.0f } + // Determine how many video sequences will contribute to the audio mix + // A video sequence has audio if audio is enabled AND at least one clip has volume > 0 + val videoAudioSourceCount: Int + val videoSequenceVolumes: List + + if (config.enableAudio) { + val needsMultipleSequences = config.videoClips.any { + it.x != null || it.y != null || it.width != null || it.height != null || + it.segmentTimeUs != null || it.opacity != null || (it.zIndex ?: 0) != 0 + } + if (needsMultipleSequences) { + // In Pip mode, each clip is a separate sequence. + // IMPORTANT: Match the exact layering logic from CompositionBuilder. + // 1. Higher zIndex on top. + // 2. Default zIndex is 0. + // 3. If zIndex is same, latter segment in input list is on top. + // + // In Media3, the first sequence in the list is the bottom-most layer. + // By using a stable ascending sort, we satisfy the rules. + val sortedClips = config.videoClips.sortedByDescending { it.zIndex ?: 0 } + val activeClips = sortedClips.filter { (it.volume ?: 1.0f) > 0.0f } + videoAudioSourceCount = activeClips.size + videoSequenceVolumes = activeClips.map { it.volume ?: 1.0f } + } else { + // Single sequence. Has audio if any clip is unmuted. + val isUnmuted = config.videoClips.any { (it.volume ?: 1.0f) > 0.0f } + videoAudioSourceCount = if (isUnmuted) 1 else 0 + videoSequenceVolumes = if (isUnmuted) listOf(1.0f) else emptyList() + } + } else { + videoAudioSourceCount = 0 + videoSequenceVolumes = emptyList() + } // Build transformer with callbacks val transformerBuilder = Transformer.Builder(context) @@ -266,7 +329,8 @@ class RenderVideo(private val context: Context) { transformerBuilder.setAudioMixerFactory( VolumeControlAudioMixerFactory( trackVolumes = trackVolumes, - videoAudioPresent = videoAudioPresent + videoAudioSourceCount = videoAudioSourceCount, + videoSequenceVolumes = videoSequenceVolumes ) ) } @@ -310,16 +374,25 @@ class RenderVideo(private val context: Context) { // Create composition (now fast - no manual audio mixing needed, Media3 handles it natively) Thread { try { - val composition = applyComposition( - context = context, - config = config, - videoEffects = videoEffects, - audioEffects = audioEffects - ) + val compositionBuilder = CompositionBuilder(config, context) + val composition = compositionBuilder + .setVideoEffects(videoEffects) + .setAudioEffects(audioEffects) + .build() mainHandler.post { if (composition != null) { transformer.start(composition, outputFile.absolutePath) + + // Register release on transformer end + transformer.addListener(object : Transformer.Listener { + override fun onCompleted(c: Composition, r: ExportResult) { + compositionBuilder.release() + } + override fun onError(c: Composition, r: ExportResult, e: ExportException) { + compositionBuilder.release() + } + }) // Start progress tracking loop val progressHolder = ProgressHolder() @@ -339,6 +412,7 @@ class RenderVideo(private val context: Context) { } }) } else { + compositionBuilder.release() onError(IllegalStateException("Failed to create composition")) } } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt index 3582caf..0205b90 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt @@ -27,7 +27,7 @@ fun applyComposition( videoEffects: List, audioEffects: List ): Composition? { - return CompositionBuilder(context, config) + return CompositionBuilder(config, context) .setVideoEffects(videoEffects) .setAudioEffects(audioEffects) .build() diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyImageLayer.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyImageLayer.kt index 0e08a58..c935f7a 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyImageLayer.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyImageLayer.kt @@ -67,7 +67,7 @@ fun applyImageLayer( @UnstableApi fun applyTimedImageLayers( videoEffects: MutableList, - imageLayers: List, + imageLayers: List, videoWidth: Int, videoHeight: Int ) { @@ -79,7 +79,7 @@ fun applyTimedImageLayers( ) for (layer in imageLayers) { try { - val imageBytes = layer.imageBytes ?: continue + val imageBytes = layer.imageData val options = BitmapFactory.Options().apply { inPreferredConfig = Bitmap.Config.ARGB_8888 } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyOpacity.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyOpacity.kt new file mode 100644 index 0000000..d79ab0d --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyOpacity.kt @@ -0,0 +1,24 @@ +package ch.waio.pro_video_editor.src.features.render.helpers + +import RENDER_TAG +import androidx.media3.common.Effect +import androidx.media3.common.util.UnstableApi +import androidx.media3.effect.AlphaScale +import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log + +/** + * Applies opacity to a video segment. + * + * @param videoEffects List to add opacity effect to + * @param opacity Transparency factor (0.0 to 1.0) + */ +@UnstableApi +fun applyOpacity( + videoEffects: MutableList, + opacity: Float? +) { + if (opacity == null || opacity >= 1.0f) return + + Log.d(RENDER_TAG, "Applying opacity: $opacity") + videoEffects += AlphaScale(opacity) +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioMixingUtils.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioMixingUtils.kt new file mode 100644 index 0000000..96548e8 --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioMixingUtils.kt @@ -0,0 +1,75 @@ +package ch.waio.pro_video_editor.src.features.render.helpers + +import RENDER_TAG +import androidx.media3.common.audio.AudioProcessor +import androidx.media3.common.audio.ChannelMixingAudioProcessor +import androidx.media3.common.audio.ChannelMixingMatrix +import androidx.media3.common.util.UnstableApi +import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log + +/** + * Utility functions for audio channel mixing and normalization. + */ +@UnstableApi +object AudioMixingUtils { + + /** + * Creates a ChannelMixingAudioProcessor configured with standard mixing matrices + * to downmix common multi-channel formats to Stereo (2 channels). + * + * Supports: + * - 1 channel (Mono) -> Stereo + * - 2 channels (Stereo) -> Stereo (Identity) + * - 4 channels (Quad) -> Stereo + * - 6 channels (5.1 Surround) -> Stereo (ITU-R BS.775) + * - 8 channels (7.1 Surround) -> Stereo + */ + fun createStandardStereoMixer(): ChannelMixingAudioProcessor { + val channelMixer = ChannelMixingAudioProcessor() + val boost = 1.4f // Slight boost to compensate for downmixing volume loss + + // 8 channels (7.1) -> 2 channels (Stereo) + // FL, FR, FC, LFE, BL, BR, SL, SR + val eightToTwo = floatArrayOf( + 1.0f * boost, 0.0f, // FL -> L, R + 0.0f, 1.0f * boost, // FR -> L, R + 0.707f * boost, 0.707f * boost, // FC -> L, R + 0.0f, 0.0f, // LFE + 0.707f * boost, 0.0f, // BL -> L + 0.0f, 0.707f * boost, // BR -> R + 0.707f * boost, 0.0f, // SL -> L + 0.0f, 0.707f * boost // SR -> R + ) + channelMixer.putChannelMixingMatrix(ChannelMixingMatrix(8, 2, eightToTwo)) + + // 6 channels (5.1) -> 2 channels (Stereo) + // FL, FR, FC, LFE, BL, BR + val sixToTwo = floatArrayOf( + 1.0f * boost, 0.0f, // FL -> L, R + 0.0f, 1.0f * boost, // FR -> L, R + 0.707f * boost, 0.707f * boost, // FC -> L, R + 0.0f, 0.0f, // LFE + 0.707f * boost, 0.0f, // BL -> L + 0.0f, 0.707f * boost // BR -> R + ) + channelMixer.putChannelMixingMatrix(ChannelMixingMatrix(6, 2, sixToTwo)) + + // 4 channels (Quad) -> 2 channels (Stereo) + // FL, FR, BL, BR + val fourToTwo = floatArrayOf( + 1.0f, 0.0f, // FL -> L + 0.0f, 1.0f, // FR -> R + 0.707f, 0.0f, // BL -> L + 0.0f, 0.707f // BR -> R + ) + channelMixer.putChannelMixingMatrix(ChannelMixingMatrix(4, 2, fourToTwo)) + + // 2 channels -> 2 channels (Stereo passthrough) + channelMixer.putChannelMixingMatrix(ChannelMixingMatrix.createForConstantGain(2, 2)) + + // 1 channel (Mono) -> 2 channels (Stereo) + channelMixer.putChannelMixingMatrix(ChannelMixingMatrix.createForConstantGain(1, 2)) + + return channelMixer + } +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt index 8a72386..a1ec32c 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt @@ -2,6 +2,7 @@ package ch.waio.pro_video_editor.src.features.render.helpers import RENDER_TAG import android.net.Uri +import androidx.media3.common.C import androidx.media3.common.MediaItem import androidx.media3.common.audio.AudioProcessor import androidx.media3.common.audio.ChannelMixingAudioProcessor @@ -143,51 +144,45 @@ class AudioSequenceBuilder( val compEnd = compositionEndTimeUs ?: videoDurationUs val targetDurationUs = (compEnd - compStart).coerceAtLeast(0L) - // Build audio effects - val audioProcessors = buildAudioProcessors() - val audioEffects = Effects(audioProcessors, emptyList()) - - // Create audio content items with looping or single play + // Create audio content items with looping or single play. + // NOTE: AudioProcessor instances cannot be shared across multiple EditedMediaItems. + // We create fresh effects for each item inside the creation methods. val audioContentItems = if (loopAudio) { createLoopedAudioItems( audioFile, sourceEndUs, effectiveAudioDurationUs, - targetDurationUs, - audioEffects + targetDurationUs ) } else { - createSingleAudioItem(audioFile, sourceEndUs, effectiveAudioDurationUs, targetDurationUs, audioEffects) + createSingleAudioItem(audioFile, sourceEndUs, effectiveAudioDurationUs, targetDurationUs) } - val allItems = mutableListOf() + // Build audio sequence using addGap() for leading and trailing silence. + // This is more efficient than generating temporary silent WAV files + // and avoids potential NPEs with empty MediaItems. + val trackTypes = setOf(@C.TrackType C.TRACK_TYPE_AUDIO) + val sequenceBuilder = EditedMediaItemSequence.Builder(trackTypes) // Add leading silence if audio starts after composition time 0. - // Media3 parallel sequences always start at time 0, so we need - // silence padding to offset the audio to the correct position. if (compStart > 0) { - val silentItem = createSilentAudioItem(compStart, audioEffects) - if (silentItem != null) { - allItems.add(silentItem) - Log.d(RENDER_TAG, "Added ${compStart / 1000}ms leading silence for composition offset") - } + sequenceBuilder.addGap(compStart) + Log.d(RENDER_TAG, "Added ${compStart / 1000}ms leading gap for composition offset") } - allItems.addAll(audioContentItems) + for (item in audioContentItems) { + sequenceBuilder.addItem(item) + } // Add trailing silence so the sequence spans the full video duration. - // This ensures all parallel sequences have matching lengths. val totalContentDurationUs = compStart + targetDurationUs if (totalContentDurationUs < videoDurationUs) { val trailingDurationUs = videoDurationUs - totalContentDurationUs - val silentItem = createSilentAudioItem(trailingDurationUs, audioEffects) - if (silentItem != null) { - allItems.add(silentItem) - Log.d(RENDER_TAG, "Added ${trailingDurationUs / 1000}ms trailing silence") - } + sequenceBuilder.addGap(trailingDurationUs) + Log.d(RENDER_TAG, "Added ${trailingDurationUs / 1000}ms trailing gap") } - return EditedMediaItemSequence.Builder(allItems).build() + return sequenceBuilder.build() } /** @@ -200,48 +195,7 @@ class AudioSequenceBuilder( // Add channel mixing if needed if (needsNormalization) { - val channelMixer = ChannelMixingAudioProcessor() - - // 7.1 Surround (8 channels) to Stereo (2 channels) - // Channel order: FL, FR, FC, LFE, BL, BR, SL, SR - val eightToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.707f, 0.0f, 0.0f, 0.707f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(8, 2, eightToTwo) - ) - - // 5.1 Surround (6 channels) to Stereo (2 channels) - // ITU-R BS.775 standard - val sixToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.707f, 0.0f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(6, 2, sixToTwo) - ) - - // Quad (4 channels) to Stereo (2 channels) - val fourToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(4, 2, fourToTwo) - ) - - // Stereo (2 channels) to Stereo (2 channels) - passthrough - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(2, 2) - ) - - // Mono (1 channel) to Stereo (2 channels) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(1, 2) - ) - - processors.add(channelMixer) + processors.add(AudioMixingUtils.createStandardStereoMixer()) Log.d(RENDER_TAG, "Added channel normalization for custom audio") } @@ -268,14 +222,13 @@ class AudioSequenceBuilder( audioFile: File, sourceEndUs: Long, effectiveAudioDurationUs: Long, - targetDurationUs: Long, - effects: Effects + targetDurationUs: Long ): List { val audioItems = mutableListOf() if (effectiveAudioDurationUs <= 0 || targetDurationUs <= 0) { // Fallback: add audio once without duration constraints - val audioItem = createAudioItem(audioFile, startTimeUs, null, effects) + val audioItem = createAudioItem(audioFile, startTimeUs, null, Effects(buildAudioProcessors(), emptyList())) audioItems.add(audioItem) return audioItems } @@ -308,7 +261,7 @@ class AudioSequenceBuilder( if (audioEndTimeUs != null) loopEndUs else null } - val audioItem = createAudioItem(audioFile, loopStartUs, endPositionUs, effects) + val audioItem = createAudioItem(audioFile, loopStartUs, endPositionUs, Effects(buildAudioProcessors(), emptyList())) audioItems.add(audioItem) remainingDurationUs -= loopAudioDurationUs isFirstLoop = false @@ -325,8 +278,7 @@ class AudioSequenceBuilder( audioFile: File, sourceEndUs: Long, effectiveAudioDurationUs: Long, - targetDurationUs: Long, - effects: Effects + targetDurationUs: Long ): List { val endPositionUs = if (effectiveAudioDurationUs > targetDurationUs && targetDurationUs > 0) { Log.d(RENDER_TAG, "Trimming audio to ${targetDurationUs / 1000} ms (no loop)") @@ -344,7 +296,7 @@ class AudioSequenceBuilder( ) null } - return listOf(createAudioItem(audioFile, startTimeUs, endPositionUs, effects)) + return listOf(createAudioItem(audioFile, startTimeUs, endPositionUs, Effects(buildAudioProcessors(), emptyList()))) } /** @@ -376,86 +328,4 @@ class AudioSequenceBuilder( .build() } - /** - * Creates a silent audio EditedMediaItem of the specified duration. - * - * Media3 parallel sequences always start at time 0, so we use silence - * to offset audio to the correct composition position. - */ - private fun createSilentAudioItem(durationUs: Long, effects: Effects): EditedMediaItem? { - if (durationUs <= 0) return null - - val silentFile = generateSilentWavFile(durationUs) - if (silentFile == null) { - Log.e(RENDER_TAG, "Failed to create silent audio item") - return null - } - - val mediaItem = MediaItem.Builder().setUri(Uri.fromFile(silentFile)).build() - return EditedMediaItem.Builder(mediaItem) - .setRemoveVideo(true) - .setEffects(effects) - .build() - } - - /** - * Generates a temporary WAV file containing silence of the specified duration. - * - * Creates a valid PCM WAV file with stereo 44100Hz 16-bit silence. - */ - private fun generateSilentWavFile(durationUs: Long): File? { - try { - val sampleRate = 44100 - val channels = 2 - val bitsPerSample = 16 - val bytesPerSample = bitsPerSample / 8 - val numSamples = (sampleRate * durationUs / 1_000_000.0).toInt() - val dataSize = numSamples * channels * bytesPerSample - val fileSize = 36 + dataSize - - val file = File.createTempFile("silence_", ".wav") - file.deleteOnExit() - - file.outputStream().use { out -> - // RIFF header - out.write("RIFF".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(fileSize, 4)) - out.write("WAVE".toByteArray(Charsets.US_ASCII)) - - // fmt subchunk - out.write("fmt ".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(16, 4)) // Subchunk1Size (PCM) - out.write(toLittleEndian(1, 2)) // AudioFormat (PCM = 1) - out.write(toLittleEndian(channels, 2)) - out.write(toLittleEndian(sampleRate, 4)) - out.write(toLittleEndian(sampleRate * channels * bytesPerSample, 4)) - out.write(toLittleEndian(channels * bytesPerSample, 2)) - out.write(toLittleEndian(bitsPerSample, 2)) - - // data subchunk - out.write("data".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(dataSize, 4)) - - // Write silence (all zeros) - val buffer = ByteArray(8192) - var remaining = dataSize - while (remaining > 0) { - val toWrite = minOf(remaining, buffer.size) - out.write(buffer, 0, toWrite) - remaining -= toWrite - } - } - - Log.d(RENDER_TAG, "Generated ${durationUs / 1000}ms silent WAV: ${file.absolutePath}") - return file - } catch (e: Exception) { - Log.e(RENDER_TAG, "Failed to generate silent WAV: ${e.message}") - return null - } - } - - /** Converts an integer to little-endian byte array. */ - private fun toLittleEndian(value: Int, numBytes: Int): ByteArray { - return ByteArray(numBytes) { i -> ((value shr (8 * i)) and 0xFF).toByte() } - } } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt index a83ae3b..32b02a6 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt @@ -2,14 +2,27 @@ package ch.waio.pro_video_editor.src.features.render.helpers import RENDER_TAG import android.content.Context +import android.graphics.Bitmap +import android.graphics.Color +import android.net.Uri import androidx.media3.common.Effect +import androidx.media3.common.MediaItem import androidx.media3.common.audio.AudioProcessor +import androidx.media3.transformer.EditedMediaItem import androidx.media3.common.util.UnstableApi +import androidx.media3.effect.AlphaScale +import androidx.media3.effect.Presentation import androidx.media3.transformer.Composition import androidx.media3.transformer.EditedMediaItemSequence -import ch.waio.pro_video_editor.src.features.render.models.AudioTrackConfig +import androidx.media3.transformer.Effects import ch.waio.pro_video_editor.src.features.render.models.RenderConfig +import ch.waio.pro_video_editor.src.features.render.models.VideoClip +import ch.waio.pro_video_editor.src.features.render.utils.getRotatedVideoDimensions import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log +import java.io.File +import kotlin.math.max +import kotlin.math.min +import androidx.core.graphics.createBitmap /** * Main builder class for creating Media3 Compositions from render configurations. @@ -21,8 +34,8 @@ import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log */ @UnstableApi class CompositionBuilder( - private val context: Context, - private val config: RenderConfig + private val config: RenderConfig, + private val context: Context ) { private var videoEffects: List = emptyList() @@ -58,68 +71,183 @@ class CompositionBuilder( Log.d(RENDER_TAG, "Audio enabled: ${config.enableAudio}") Log.d(RENDER_TAG, "Audio tracks: ${config.audioTracks.size}") + // Default render dimensions if not provided + var renderWidth = config.renderWidth + var renderHeight = config.renderHeight val rotationDegrees = (4 - (config.rotateTurns ?: 0)) * 90f + // If any clip has x, y, width, height, segmentTimeUs, opacity or zIndex, we use multiple sequences. + val needsMultipleSequences = config.videoClips.any { + it.x != null || it.y != null || it.width != null || it.height != null || + it.segmentTimeUs != null || it.opacity != null || (it.zIndex ?: 0) != 0 + } + + val hasImageLayers = config.imageLayers.isNotEmpty() + if ((renderWidth == null || renderHeight == null) && (needsMultipleSequences || hasImageLayers)) { + // Use the first clip for dimensions. + // Prefer explicitly set width/height if available, otherwise fallback to file dimensions. + val backgroundClip = config.videoClips.first() + if (backgroundClip.width != null && backgroundClip.height != null) { + renderWidth = backgroundClip.width.toInt() + renderHeight = backgroundClip.height.toInt() + Log.d(RENDER_TAG, "Defaulting render dimensions to first clip's size: ${renderWidth}x${renderHeight}") + } else { + val (w, h, _) = getRotatedVideoDimensions(File(backgroundClip.inputPath), rotationDegrees) + renderWidth = w + renderHeight = h + Log.d(RENDER_TAG, "Defaulting render dimensions to first clip's file: ${renderWidth}x${renderHeight}") + } + } + + Log.d(RENDER_TAG, "Render dimensions: ${renderWidth}x${renderHeight}") + val hasCustomAudio = config.audioTracks.isNotEmpty() - // Build video sequence - val videoBuilder = VideoSequenceBuilder(config.videoClips) - .setVideoEffects(videoEffects) - .setAudioEffects(audioEffects) - .setRotation(rotationDegrees) - .setFlip(config.flipX, config.flipY) - .setScale(config.scaleX, config.scaleY) - .setCrop(config.cropWidth, config.cropHeight, config.cropX, config.cropY) - .setTimedImageLayers(config.imageLayers.map { imageLayer -> - VideoSequenceBuilder.ImageLayerConfig( - imageBytes = imageLayer.imageData, - scaleX = config.scaleX, - scaleY = config.scaleY, - withCropping = config.imageBytesWithCropping, - startUs = imageLayer.startUs, - endUs = imageLayer.endUs, - x = imageLayer.x, - y = imageLayer.y, - width = imageLayer.width, - height = imageLayer.height, - animations = imageLayer.animations - ) - }) + // Detect if audio normalization is needed (check both video and custom audio) + // This MUST be done before building sequences so they all use consistent channel counts. + // We now rely on pre-transcoding for multi-channel video clips, + // so videoNeedsNormalization will usually be false here. + val videoMetadataBuilder = VideoSequenceBuilder(config.videoClips) .setEnableAudio(config.enableAudio) - .setGlobalTrim(config.startUs, config.endUs) - .setHasCustomAudio(hasCustomAudio) + val videoNeedsNormalization = videoMetadataBuilder.detectAudioNormalizationNeeded() + val needsNormalization = videoNeedsNormalization || hasCustomAudio - // Detect if audio normalization is needed (check both video and custom audio) - val needsNormalization = videoBuilder.detectAudioNormalizationNeeded() || hasCustomAudio - videoBuilder.setAudioNormalization(needsNormalization) + // 1. Calculate total duration of the entire composition. + // Even in complex compositions, some clips might not have segmentTimeUs, + // in which case they should follow the previous clip in the input list. + var totalDurationUs = 0L + var runningSequentialTimeUs = 0L + + // Map to store calculated start/end times for each clip by its identity (original index) + val clipTimings = mutableMapOf>() - // Video keeps its audio - Media3 will mix it natively with custom audio sequence - videoBuilder.setForceRemoveAudio(false) + for ((index, clip) in config.videoClips.withIndex()) { + val clipDurationUs = when { + clip.endUs != null -> clip.endUs - (clip.startUs ?: 0L) + else -> MediaInfoExtractor.getVideoDuration(clip.inputPath) - (clip.startUs ?: 0L) + } - // Build video sequence (with audio intact) - val videoSequence = videoBuilder.build() + val clipStartInComposition = if (needsMultipleSequences) { + clip.segmentTimeUs ?: runningSequentialTimeUs + } else { + runningSequentialTimeUs + } + + val clipEndInComposition = clipStartInComposition + clipDurationUs + clipTimings[index] = Pair(clipStartInComposition, clipEndInComposition) + + totalDurationUs = max(totalDurationUs, clipEndInComposition) + + // Sequential time only increments if we are NOT using explicit segment timing, + // or if we are in sequential mode. + if (!needsMultipleSequences || clip.segmentTimeUs == null) { + runningSequentialTimeUs = clipEndInComposition + } + } + + // Calculate global timing + val globalStartUs = config.startUs ?: 0L + val globalEndUs = config.endUs ?: totalDurationUs + val globalDurationUs = globalEndUs - globalStartUs + + Log.d(RENDER_TAG, "Total composition duration: ${totalDurationUs / 1000}ms") + Log.d(RENDER_TAG, "Global trim: ${globalStartUs / 1000}ms to ${globalEndUs / 1000}ms (duration: ${globalDurationUs / 1000}ms)") - // Prepare sequences list val sequences = mutableListOf() - sequences.add(videoSequence) - Log.d( - RENDER_TAG, - "Created video EditedMediaItemSequence with ${config.videoClips.size} items" - ) + + if (needsMultipleSequences) { + Log.d(RENDER_TAG, "Complex composition detected, building multiple video sequences") + + // 3. Sort clips for correct layering based on zIndex and original order. + // Rules: + // 1. Higher zIndex on top. + // 2. Default zIndex is 0. + // 3. If zIndex is same, latter segment in input list is on top. + // + // In Media3, the first sequence in the list is the BOTTOM-MOST layer (Index 0). + // By using a stable ascending sort, we satisfy the rules. + val indexedClips = config.videoClips.mapIndexed { index, clip -> index to clip } + val sortedIndexedClips = indexedClips.sortedByDescending { it.second.zIndex ?: 0 } + + Log.d(RENDER_TAG, "Sorted clips for composition (top to bottom):") + for ((index, clip) in sortedIndexedClips) { + Log.d(RENDER_TAG, " Sequence ${index + 1}: path=${clip.inputPath}, zIndex=${clip.zIndex ?: 0}") + + val (clipStartUs, clipEndUs) = clipTimings[index]!! + + // Check if clip overlaps with global trim range + if (clipEndUs <= globalStartUs || clipStartUs >= globalEndUs) { + Log.d(RENDER_TAG, "Skipping clip outside global trim: ${clip.inputPath}") + continue + } + + // Adjust clip boundaries and calculate leading gap + val adjustedStartInComposition = max(clipStartUs, globalStartUs) + val adjustedEndInComposition = min(clipEndUs, globalEndUs) + val leadingGapUs = adjustedStartInComposition - globalStartUs + + // Adjust trim relative to source + var clipTrimStartUs = clip.startUs ?: 0L + if (clipStartUs < globalStartUs) { + clipTrimStartUs += (globalStartUs - clipStartUs) + } + val clipTrimEndUs = clipTrimStartUs + (adjustedEndInComposition - adjustedStartInComposition) + + // Build sequence with pre-trimmed clip + val trimmedClip = clip.copy(startUs = clipTrimStartUs, endUs = clipTrimEndUs) + val videoBuilder = VideoSequenceBuilder(listOf(trimmedClip)) + .setVideoEffects(emptyList()) + .setAudioEffects(emptyList()) + .setRotation(rotationDegrees) + .setFlip(config.flipX, config.flipY) + .setScale(config.scaleX, config.scaleY) + .setCrop(config.cropWidth, config.cropHeight, config.cropX, config.cropY) + .setEnableAudio(config.enableAudio && (clip.volume ?: 1.0f) > 0 && !hasCustomAudio) + .setHasCustomAudio(hasCustomAudio) + .setForceRemoveAudio(false) + .setRenderDimensions(renderWidth, renderHeight) + // Ensure consistency across multiple sequences in complex compositions. + .setAudioNormalization(needsNormalization) + + val baseSequence = videoBuilder.build() + val sequenceBuilder = EditedMediaItemSequence.Builder(baseSequence.trackTypes) + + // Prepend leading gap relative to globalStartUs + if (leadingGapUs > 0) { + sequenceBuilder.addItem(createTransparentGapItem(leadingGapUs, renderWidth, renderHeight)) + } + + sequenceBuilder.addItems(baseSequence.editedMediaItems) + + // Pad remaining duration to prevent frozen frames + val sequenceDurationUs = leadingGapUs + (adjustedEndInComposition - adjustedStartInComposition) + if (sequenceDurationUs < globalDurationUs) { + sequenceBuilder.addItem(createTransparentGapItem(globalDurationUs - sequenceDurationUs, renderWidth, renderHeight)) + } + + sequences.add(sequenceBuilder.build()) + } + } else { + // Build single optimized video sequence + val videoBuilder = createVideoBuilder(config.videoClips, rotationDegrees, hasCustomAudio) + .setVideoEffects(emptyList()) + .setAudioEffects(emptyList()) + .setRenderDimensions(renderWidth, renderHeight) + .setAudioNormalization(needsNormalization) + sequences.add(videoBuilder.build()) + } // Add audio tracks as separate sequences - Media3 will mix all tracks natively if (hasCustomAudio) { - val totalVideoDuration = videoBuilder.calculateTotalDuration() - for ((index, track) in config.audioTracks.withIndex()) { Log.d( RENDER_TAG, "🎵 Adding audio track $index: path=${track.path}, volume=${track.volume}, loop=${track.loop}" ) - val audioSequence = AudioSequenceBuilder(track.path, totalVideoDuration) + val audioSequence = AudioSequenceBuilder(track.path, globalDurationUs) .setVolume(track.volume) - .setNormalization(needsNormalization) + .setNormalization(false) // Custom tracks are usually stereo; allow native resampling .setLoop(track.loop) .setStartTime(track.audioStartUs) .setAudioEndTime(track.audioEndUs) @@ -135,9 +263,109 @@ class CompositionBuilder( } // Build final composition - val composition = Composition.Builder(sequences).build() + val compositionBuilder = Composition.Builder(sequences.toList()) + + // Add Global effects and Presentation effect. + // Moving effects to Composition level ensures they apply to the final combined video. + val combinedVideoEffects = mutableListOf() + combinedVideoEffects.addAll(videoEffects) + + // Apply Image Layers at the Composition level so they are truly global + if (config.imageLayers.isNotEmpty() && renderWidth != null && renderHeight != null) { + applyTimedImageLayers( + combinedVideoEffects, + config.imageLayers, + renderWidth, + renderHeight + ) + } + + if (renderWidth != null && renderHeight != null) { + combinedVideoEffects += Presentation.createForWidthAndHeight( + renderWidth, + renderHeight, + Presentation.LAYOUT_SCALE_TO_FIT + ) + Log.d(RENDER_TAG, "Global Presentation effect applied: ${renderWidth}x${renderHeight}") + } + + // Prepare global audio effects + val finalAudioEffects = mutableListOf() + finalAudioEffects.addAll(audioEffects) + + compositionBuilder.setEffects(Effects(finalAudioEffects, combinedVideoEffects)) + + val composition = compositionBuilder.build() Log.d(RENDER_TAG, "Composition created successfully with ${sequences.size} sequences") return composition } -} \ No newline at end of file + + /** + * Cleans up temporary resources used during composition building. + */ + fun release() { + // No temporary files to cleanup in current implementation + } + + private fun createTransparentGapItem(durationUs: Long, renderWidth: Int?, renderHeight: Int?): EditedMediaItem { + val gapFile = File(context.cacheDir, "pve_transparent_gap.png") + if (!gapFile.exists()) { + try { + val bitmap = createBitmap(1, 1) + bitmap.eraseColor(Color.TRANSPARENT) + gapFile.outputStream().use { + bitmap.compress(Bitmap.CompressFormat.PNG, 100, it) + } + Log.d(RENDER_TAG, "Created transparent gap PNG at: ${gapFile.absolutePath}") + } catch (e: Exception) { + Log.e(RENDER_TAG, "Failed to create transparent gap PNG: ${e.message}") + } + } + + val mediaItem = MediaItem.Builder() + .setUri(Uri.fromFile(gapFile)) + .setImageDurationMs(maxOf(1, (durationUs + 999) / 1000)) + .build() + + val videoEffects = mutableListOf() + videoEffects.add(AlphaScale(0f)) + + // Move the gap item off-screen to ensure it doesn't obscure anything even if transparency fails + if (renderWidth != null && renderHeight != null) { + videoEffects.add(VideoCompositionTransformation( + x = -100.0, // Off-screen + y = -100.0, + width = 1.0, + height = 1.0, + videoWidth = 1, + videoHeight = 1, + renderWidth = renderWidth, + renderHeight = renderHeight + )) + } + + return EditedMediaItem.Builder(mediaItem) + .setFrameRate(30) + .setEffects(Effects(emptyList(), videoEffects)) + .build() + } + + private fun createVideoBuilder( + clips: List, + rotationDegrees: Float, + hasCustomAudio: Boolean + ): VideoSequenceBuilder { + return VideoSequenceBuilder(clips) + .setVideoEffects(videoEffects) + .setAudioEffects(audioEffects) + .setRotation(rotationDegrees) + .setFlip(config.flipX, config.flipY) + .setScale(config.scaleX, config.scaleY) + .setCrop(config.cropWidth, config.cropHeight, config.cropX, config.cropY) + .setEnableAudio(config.enableAudio) + .setGlobalTrim(config.startUs, config.endUs) + .setHasCustomAudio(hasCustomAudio) + .setForceRemoveAudio(false) + } +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoCompositionTransformation.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoCompositionTransformation.kt new file mode 100644 index 0000000..b59bee4 --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoCompositionTransformation.kt @@ -0,0 +1,136 @@ +package ch.waio.pro_video_editor.src.features.render.helpers + +import android.content.Context +import android.opengl.GLES20 +import android.opengl.Matrix +import androidx.media3.common.VideoFrameProcessingException +import androidx.media3.common.util.GlProgram +import androidx.media3.common.util.GlUtil +import androidx.media3.common.util.Size +import androidx.media3.common.util.UnstableApi +import androidx.media3.effect.BaseGlShaderProgram +import androidx.media3.effect.GlEffect +import androidx.media3.effect.GlShaderProgram + +/** + * A GlEffect that handles positioning and scaling of a video segment + * within a larger render canvas. + * + * It converts pixel-based offsets and sizes from the Flutter side into + * the normalized OpenGL coordinates used by Media3 effects. + */ +@UnstableApi +class VideoCompositionTransformation( + private val x: Double?, + private val y: Double?, + private val width: Double?, + private val height: Double?, + private val videoWidth: Int, + private val videoHeight: Int, + private val renderWidth: Int, + private val renderHeight: Int +) : GlEffect { + + override fun toGlShaderProgram(context: Context, useHdr: Boolean): GlShaderProgram { + return VideoCompositionShaderProgram(context, useHdr, this) + } + + @UnstableApi + private class VideoCompositionShaderProgram( + context: Context, + useHdr: Boolean, + private val effect: VideoCompositionTransformation + ) : BaseGlShaderProgram(useHdr, /* texturePoolCapacity= */ 1) { + + private val glProgram: GlProgram + + companion object { + private const val VERTEX_SHADER_SOURCE = + "attribute vec4 aFramePosition;\n" + + "attribute vec4 aTexSamplingCoord;\n" + + "varying vec2 vTexSamplingCoord;\n" + + "uniform mat4 uTransformationMatrix;\n" + + "void main() {\n" + + " gl_Position = uTransformationMatrix * aFramePosition;\n" + + " vTexSamplingCoord = aTexSamplingCoord.xy;\n" + + "}" + + private const val FRAGMENT_SHADER_SOURCE = + "precision mediump float;\n" + + "uniform sampler2D uTexSampler;\n" + + "varying vec2 vTexSamplingCoord;\n" + + "void main() {\n" + + " gl_FragColor = texture2D(uTexSampler, vTexSamplingCoord);\n" + + "}" + } + + init { + try { + glProgram = GlProgram(VERTEX_SHADER_SOURCE, FRAGMENT_SHADER_SOURCE) + } catch (e: Exception) { + throw VideoFrameProcessingException(e) + } + } + + override fun configure(inputWidth: Int, inputHeight: Int): Size { + return Size(effect.renderWidth, effect.renderHeight) + } + + override fun drawFrame(inputTexId: Int, presentationTimeUs: Long) { + try { + glProgram.use() + + // Clear the target framebuffer to transparent before drawing the segment. + // This ensures that segments that don't cover the full canvas don't show garbage. + GLES20.glClearColor(0f, 0f, 0f, 0f) + GLES20.glClear(GLES20.GL_COLOR_BUFFER_BIT) + + // Enable alpha blending to support transparent layers and overlays. + GLES20.glEnable(GLES20.GL_BLEND) + GLES20.glBlendFunc(GLES20.GL_SRC_ALPHA, GLES20.GL_ONE_MINUS_SRC_ALPHA) + + val glMatrix = FloatArray(16) + Matrix.setIdentityM(glMatrix, 0) + + val targetWidth = (effect.width ?: effect.videoWidth.toDouble()).toFloat() + val targetHeight = (effect.height ?: effect.videoHeight.toDouble()).toFloat() + + // sx and sy are half-widths in NDC (relative to a 2.0 wide NDC space) + val sx = if (effect.renderWidth > 0) targetWidth / effect.renderWidth else 1.0f + val sy = if (effect.renderHeight > 0) targetHeight / effect.renderHeight else 1.0f + + // Convert pixel (x, y) to NDC top-left + val leftNDC = if (effect.renderWidth > 0) (2f * (effect.x ?: 0.0).toFloat() / effect.renderWidth) - 1f else -1.0f + val topNDC = if (effect.renderHeight > 0) 1f - (2f * (effect.y ?: 0.0).toFloat() / effect.renderHeight) else 1.0f + + // Target center in NDC for a quad that is 2x2 centered at 0,0 + val centerX = leftNDC + sx + val centerY = topNDC - sy + + Matrix.translateM(glMatrix, 0, centerX, centerY, 0f) + Matrix.scaleM(glMatrix, 0, sx, sy, 1f) + + glProgram.setFloatsUniform("uTransformationMatrix", glMatrix) + glProgram.setSamplerTexIdUniform("uTexSampler", inputTexId, 0) + + // Set attribute buffers with robust size detection + val vertexData = GlUtil.getNormalizedCoordinateBounds() + val vertexSize = if (vertexData.size == 8) 2 else 4 + glProgram.setBufferAttribute("aFramePosition", vertexData, vertexSize) + + val texData = GlUtil.getTextureCoordinateBounds() + val texSize = if (texData.size == 8) 2 else 4 + glProgram.setBufferAttribute("aTexSamplingCoord", texData, texSize) + + glProgram.bindAttributesAndUniforms() + + GLES20.glDrawArrays(GLES20.GL_TRIANGLE_STRIP, 0, 4) + + GLES20.glDisable(GLES20.GL_BLEND) + GlUtil.checkGlError() + } catch (e: Exception) { + throw VideoFrameProcessingException(e, presentationTimeUs) + } + } + } +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoSequenceBuilder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoSequenceBuilder.kt index 8193e0c..15305f9 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoSequenceBuilder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoSequenceBuilder.kt @@ -3,6 +3,7 @@ package ch.waio.pro_video_editor.src.features.render.helpers import RENDER_TAG import android.net.Uri import applyScale +import applyRotation import androidx.media3.common.C import androidx.media3.common.Effect import androidx.media3.common.MediaItem @@ -35,7 +36,6 @@ class VideoSequenceBuilder( private var flipX: Boolean = false private var flipY: Boolean = false private var cropConfig: CropConfig? = null - private var timedImageLayers: List = emptyList() private var enableAudio: Boolean = true private var needsAudioNormalization: Boolean = false private var forceRemoveAudio: Boolean = false @@ -44,6 +44,8 @@ class VideoSequenceBuilder( private var hasCustomAudio: Boolean = false private var scaleX: Float? = null private var scaleY: Float? = null + private var renderWidth: Int? = null + private var renderHeight: Int? = null data class CropConfig( val width: Int?, @@ -52,19 +54,14 @@ class VideoSequenceBuilder( val y: Int? ) - data class ImageLayerConfig( - val imageBytes: ByteArray?, - val scaleX: Float?, - val scaleY: Float?, - val withCropping: Boolean = false, - val startUs: Long = 0, - val endUs: Long = -1, - val x: Int? = null, - val y: Int? = null, - val width: Double? = null, - val height: Double? = null, - val animations: List = emptyList() - ) + /** + * Sets target render dimensions for composition-based positioning. + */ + fun setRenderDimensions(width: Int?, height: Int?): VideoSequenceBuilder { + this.renderWidth = width + this.renderHeight = height + return this + } /** * Sets the video effects to apply to all clips. @@ -116,14 +113,6 @@ class VideoSequenceBuilder( return this } - /** - * Sets time-based image layer overlays configuration. - */ - fun setTimedImageLayers(layers: List): VideoSequenceBuilder { - this.timedImageLayers = layers - return this - } - /** * Enables or disables audio in the output. */ @@ -179,10 +168,17 @@ class VideoSequenceBuilder( /** * Detects if audio normalization is needed across video clips. * - * @return true if clips have different audio channel counts + * Only considers clips that have audio enabled (not muted). + * + * @return true if any active clip has > 2 channels (needs downmixing) + */ + /** + * Detects if audio normalization is needed across video clips. + * + * @return true if any clip has non-stereo audio (needs downmixing) */ fun detectAudioNormalizationNeeded(): Boolean { - if (!enableAudio || videoClips.size <= 1) { + if (!enableAudio) { return false } @@ -190,18 +186,15 @@ class VideoSequenceBuilder( MediaInfoExtractor.getAudioChannelCount(clip.inputPath) } - val needsNormalization = audioChannelCounts.isNotEmpty() && - audioChannelCounts.toSet().size > 1 + // Normalize if any clip is NOT Stereo (2 channels). + // This includes Mono (1 channel) and Multi-channel (5.1/7.1). + // Forcing Stereo consistency prevents reconfiguration errors in Media3 AudioGraph. + val needsNormalization = audioChannelCounts.any { it != 2 } if (needsNormalization) { Log.d( RENDER_TAG, - "Audio normalization needed - detected different channel counts: $audioChannelCounts" - ) - } else if (audioChannelCounts.isNotEmpty()) { - Log.d( - RENDER_TAG, - "Audio normalization NOT needed - all videos have same channel count: ${audioChannelCounts.firstOrNull()}" + "Audio normalization needed - non-stereo audio detected: $audioChannelCounts" ) } @@ -225,9 +218,8 @@ class VideoSequenceBuilder( var totalDurationUs = 0L trimmedClips.forEach { clip -> val clipDurationUs = when { - clip.endUs != null && clip.startUs != null -> clip.endUs - clip.startUs - clip.endUs != null -> clip.endUs - else -> MediaInfoExtractor.getVideoDuration(clip.inputPath) + clip.startUs != null -> MediaInfoExtractor.getVideoDuration(clip.inputPath) - clip.startUs + else -> MediaInfoExtractor.getVideoDuration(clip.inputPath) - (clip.startUs ?: 0L) } totalDurationUs += clipDurationUs } @@ -248,17 +240,17 @@ class VideoSequenceBuilder( val trimmedClips = applyGlobalTrim(videoClips) Log.d(RENDER_TAG, "After global trim: ${trimmedClips.size} clips (was ${videoClips.size})") - // Prepare normalized audio effects with channel mixing if needed - val normalizedAudioEffects = if (needsAudioNormalization) { - Log.d(RENDER_TAG, "Adding ChannelMixingAudioProcessor to normalize audio to stereo") - buildChannelNormalizationEffects() - } else { - audioEffects.toList() - } - // Build EditedMediaItems for each clip val editedMediaItems = trimmedClips.mapIndexed { index, clip -> - buildEditedMediaItem(index, clip, normalizedAudioEffects) + // Audio normalization is now handled primarily via pre-transcoding. + // We keep the processor for edge cases where pre-transcoding was skipped. + val itemAudioProcessors = mutableListOf() + if (needsAudioNormalization) { + itemAudioProcessors.add(AudioMixingUtils.createStandardStereoMixer()) + } + itemAudioProcessors.addAll(audioEffects) + + buildEditedMediaItem(index, clip, itemAudioProcessors) } Log.d(RENDER_TAG, "Total EditedMediaItems created: ${editedMediaItems.size}") @@ -281,8 +273,13 @@ class VideoSequenceBuilder( // Determine track types for the sequence val trackTypes = mutableSetOf<@C.TrackType Int>(C.TRACK_TYPE_VIDEO) - if (enableAudio) { + + // ONLY add audio track type if at least one item provides audio + if (enableAudio && finalVideoItems.any { !it.removeAudio }) { trackTypes.add(C.TRACK_TYPE_AUDIO) + Log.d(RENDER_TAG, "Sequence will include AUDIO track") + } else { + Log.d(RENDER_TAG, "Sequence will NOT include AUDIO track (muted or disabled)") } return EditedMediaItemSequence.Builder(trackTypes) @@ -292,93 +289,13 @@ class VideoSequenceBuilder( } /** - * Builds channel normalization effects (channel mixer + audio processors). - * - * Uses boosted ITU-R BS.775 coefficients for multi-channel downmixing. - * - * The standard ITU-R BS.775 coefficients (1.0, 0.707, 0.707) cause volume loss - * because the energy distributed across multiple channels doesn't fully translate - * to stereo. We apply a boost factor of ~1.4 (sqrt(2)) to compensate. - * - * This ensures that surround content maintains similar perceived loudness - * when mixed with stereo custom audio tracks. + * Builds an EditedMediaItem for a single video clip with all effects. */ - private fun buildChannelNormalizationEffects(): List { - val channelMixer = ChannelMixingAudioProcessor() - - // Boost factor to compensate for energy loss during downmixing - // sqrt(2) ≈ 1.414 compensates for the typical ~70% volume loss - val boost = 1.4f - - // 7.1 Surround (8 channels) to Stereo (2 channels) - // Channel order: FL, FR, FC, LFE, BL, BR, SL, SR - // Boosted coefficients to maintain loudness - val eightToTwo = floatArrayOf( - 1.0f * boost, - 0.0f, - 0.707f * boost, - 0.0f, - 0.707f * boost, - 0.0f, - 0.707f * boost, - 0.0f, // Left output - 0.0f, - 1.0f * boost, - 0.707f * boost, - 0.0f, - 0.0f, - 0.707f * boost, - 0.0f, - 0.707f * boost // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(8, 2, eightToTwo) - ) - - // 5.1 Surround (6 channels) to Stereo (2 channels) - // Channel order: FL, FR, FC, LFE, BL, BR - // Boosted ITU-R BS.775: L' = (L + 0.707*C + 0.707*Ls) * boost - val sixToTwo = floatArrayOf( - 1.0f * boost, 0.0f, 0.707f * boost, 0.0f, 0.707f * boost, 0.0f, // Left output - 0.0f, 1.0f * boost, 0.707f * boost, 0.0f, 0.0f, 0.707f * boost // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(6, 2, sixToTwo) - ) - - // Quad (4 channels) to Stereo (2 channels) - // Channel order: FL, FR, BL, BR - // Slightly lower boost for quad (less energy distributed) - val boostQuad = 1.2f - val fourToTwo = floatArrayOf( - 1.0f * boostQuad, 0.0f, 0.707f * boostQuad, 0.0f, // Left output - 0.0f, 1.0f * boostQuad, 0.0f, 0.707f * boostQuad // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(4, 2, fourToTwo) - ) - - // Stereo (2 channels) to Stereo (2 channels) - passthrough (no boost needed) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(2, 2) - ) - - // Mono (1 channel) to Stereo (2 channels) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(1, 2) - ) - - Log.d( - RENDER_TAG, - "Channel normalization configured with boosted coefficients for loudness preservation" - ) - - return mutableListOf(channelMixer).apply { addAll(audioEffects) } + private fun isImageFile(path: String): Boolean { + val extension = path.substringAfterLast('.', "").lowercase() + return extension in listOf("jpg", "jpeg", "png", "webp", "heic", "heif") } - /** - * Builds an EditedMediaItem for a single video clip with all effects. - */ private fun buildEditedMediaItem( index: Int, clip: VideoClip, @@ -389,27 +306,44 @@ class VideoSequenceBuilder( if (!inputFile.exists()) { Log.e(RENDER_TAG, "ERROR: Video file does not exist: ${clip.inputPath}") - } else { - Log.d(RENDER_TAG, "Video file exists, size: ${inputFile.length()} bytes") } // Build MediaItem with optional trimming val mediaItemBuilder = MediaItem.Builder().setUri(Uri.fromFile(inputFile)) + val isImage = isImageFile(clip.inputPath) + if (isImage) { + val durationUs = when { + clip.endUs != null -> clip.endUs - (clip.startUs ?: 0L) + else -> MediaInfoExtractor.getVideoDuration(clip.inputPath) - (clip.startUs ?: 0L) + } + mediaItemBuilder.setImageDurationMs(maxOf(1, durationUs / 1000)) + + // Map common extensions to MIME types for Transformer + val extension = clip.inputPath.substringAfterLast('.', "").lowercase() + val mimeType = when (extension) { + "png" -> "image/png" + "webp" -> "image/webp" + "heic", "heif" -> "image/heif" + else -> "image/jpeg" + } + mediaItemBuilder.setMimeType(mimeType) + } + if (clip.startUs != null || clip.endUs != null) { val startMs = (clip.startUs ?: 0L) / 1000 - val endMs = clip.endUs?.div(1000) ?: C.TIME_END_OF_SOURCE - val expectedDurationMs = if (clip.endUs != null && clip.startUs != null) { - (clip.endUs - clip.startUs) / 1000 - } else if (clip.endUs != null) { + + // Explicitly use C.TIME_END_OF_SOURCE only if endUs is null. + // If it's provided, ensure it's not accidentally set to Long.MIN_VALUE via overflow/underflow. + val endMs = if (clip.endUs != null) { clip.endUs / 1000 } else { - -1L + C.TIME_END_OF_SOURCE } Log.d( RENDER_TAG, - "Applying trim to clip ${clip.inputPath}: start=$startMs ms, end=$endMs ms, expectedDuration=$expectedDurationMs ms" + "Applying trim to clip ${clip.inputPath}: start=$startMs ms, end=$endMs ms" ) val clippingConfig = MediaItem.ClippingConfiguration.Builder() @@ -433,30 +367,16 @@ class VideoSequenceBuilder( rotationDegrees ) + // Apply rotation early so subsequent effects (Crop, Composition) see correctly oriented frames + applyRotation(clipVideoEffects, videoRotation.toFloat()) + // Adjust dimensions based on rotation val isRotated90Deg = videoRotation == 90 || videoRotation == 270 // If crop is applied, update dimensions for AFTER crop scenario - val croppedWidth: Int? - val croppedHeight: Int? val crop = cropConfig if (crop != null) { - croppedWidth = if (isRotated90Deg) crop.height else crop.width - croppedHeight = if (isRotated90Deg) crop.width else crop.height - } else { - croppedWidth = null - croppedHeight = null - } - - // Apply timed image layers BEFORE crop if withCropping is enabled - // This makes the images get cropped together with the video - val hasWithCropping = timedImageLayers.any { it.withCropping } - if (hasWithCropping && timedImageLayers.isNotEmpty()) { - applyTimedImageLayers(clipVideoEffects, timedImageLayers, videoWidth, videoHeight) - } - - // Apply crop if configured - cropConfig?.let { crop -> + // Apply crop if configured applyCrop( clipVideoEffects, inputFile, @@ -470,20 +390,34 @@ class VideoSequenceBuilder( ) // Update dimensions after crop for image layers applied AFTER crop + val croppedWidth: Int? = if (isRotated90Deg) crop.height else crop.width + val croppedHeight: Int? = if (isRotated90Deg) crop.width else crop.height if (croppedWidth != null) videoWidth = croppedWidth if (croppedHeight != null) videoHeight = croppedHeight } - // Apply timed image layers AFTER crop if withCropping is disabled (default) - // This makes the images stretch to the final cropped size - if (!hasWithCropping && timedImageLayers.isNotEmpty()) { - applyTimedImageLayers(clipVideoEffects, timedImageLayers, videoWidth, videoHeight) + // Apply composition transformation if render dimensions are set. + // This ensures consistent canvas sizing. + if (renderWidth != null && renderHeight != null) { + clipVideoEffects += VideoCompositionTransformation( + x = clip.x, + y = clip.y, + width = clip.width, + height = clip.height, + videoWidth = videoWidth, + videoHeight = videoHeight, + renderWidth = renderWidth!!, + renderHeight = renderHeight!! + ) } // Apply scale AFTER overlay and crop to match the iOS/macOS pipeline. // This prevents the overlay from being distorted by a pre-applied scale. applyScale(clipVideoEffects, scaleX, scaleY) + // Apply opacity + applyOpacity(clipVideoEffects, clip.opacity) + // Per-clip volume control: // - Without custom audio: VolumeAudioProcessor per clip works (single sequence) // - With custom audio: AudioProcessors don't work with parallel sequences, @@ -521,6 +455,11 @@ class VideoSequenceBuilder( return EditedMediaItem.Builder(mediaItem) .setEffects(effects) .setRemoveAudio(shouldRemoveAudio) + .apply { + if (isImage) { + setFrameRate(30) + } + } .build() } @@ -599,7 +538,14 @@ class VideoSequenceBuilder( inputPath = clip.inputPath, startUs = newStartInSource, endUs = newEndInSource, - volume = clip.volume + volume = clip.volume, + x = clip.x, + y = clip.y, + width = clip.width, + height = clip.height, + zIndex = clip.zIndex, + opacity = clip.opacity, + segmentTimeUs = clip.segmentTimeUs ) ) val trimmedDuration = newEndInSource - newStartInSource diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoTranscoder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoTranscoder.kt index 84d9a06..b5a2194 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoTranscoder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VideoTranscoder.kt @@ -15,6 +15,7 @@ import androidx.media3.transformer.ExportException import androidx.media3.transformer.ExportResult import androidx.media3.transformer.Transformer import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log +import androidx.media3.transformer.Effects import java.io.File import java.util.concurrent.CountDownLatch import java.util.concurrent.atomic.AtomicReference @@ -44,19 +45,21 @@ object VideoTranscoder { } /** - * Checks if a video needs transcoding for effect compatibility. + * Checks if a video needs transcoding for compatibility. * * @param videoPath Path to the video file * @return True if transcoding is needed */ fun needsTranscoding(videoPath: String): Boolean { val formatInfo = MediaInfoExtractor.getVideoFormatInfo(videoPath) - val needsTranscode = formatInfo.needsTranscodingForEffects() + val audioChannels = MediaInfoExtractor.getAudioChannelCount(videoPath) ?: 2 + + val needsTranscode = formatInfo.needsTranscodingForEffects() || audioChannels > 2 Log.d( RENDER_TAG, "Video transcoding check: path=$videoPath, " + "isHevc=${formatInfo.isHevc}, bitDepth=${formatInfo.bitDepth}, " + - "isHdr=${formatInfo.isHdr}, needsTranscoding=$needsTranscode" + "isHdr=${formatInfo.isHdr}, channels=$audioChannels, needsTranscoding=$needsTranscode" ) return needsTranscode @@ -82,7 +85,7 @@ object VideoTranscoder { return TranscodeResult.NotNeeded(inputPath) } - Log.i(RENDER_TAG, "Starting HEVC 10-bit HDR -> H.264 8-bit SDR transcoding for: $inputPath") + Log.i(RENDER_TAG, "Starting HEVC 10-bit HDR or Multi-channel -> H.264 8-bit Stereo transcoding for: $inputPath") val outputFile = File( context.cacheDir, @@ -138,10 +141,12 @@ object VideoTranscoder { .build() // Use HDR_MODE_TONE_MAP_HDR_TO_SDR_USING_OPEN_GL to convert HDR to SDR - // This forces 8-bit output which then allows H.264 encoding + // This forces 8-bit output which then allows H.264 encoding. + // Also add standard audio normalization to ensure stereo output. val editedMediaItem = EditedMediaItem.Builder(mediaItem) .setRemoveAudio(false) .setRemoveVideo(false) + .setEffects(Effects(listOf(AudioMixingUtils.createStandardStereoMixer()), emptyList())) .build() // Build composition with HDR tonemapping enabled diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VolumeControlAudioMixer.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VolumeControlAudioMixer.kt index 11af2ee..04d9bb6 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VolumeControlAudioMixer.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/VolumeControlAudioMixer.kt @@ -20,24 +20,27 @@ import java.nio.ByteBuffer * to each audio source during the mixing process. * * @property trackVolumes Volume multipliers for each audio track sequence (0.0-1.0+) - * @property videoAudioPresent Whether video audio is present (not removed due to volume=0) + * @property videoAudioSourceCount Number of video sequences that have active audio + * @property videoSequenceVolumes Volume multipliers for each video sequence */ @UnstableApi class VolumeControlAudioMixerFactory( private val trackVolumes: List, - private val videoAudioPresent: Boolean + private val videoAudioSourceCount: Int, + private val videoSequenceVolumes: List ) : AudioMixer.Factory { init { Log.d( RENDER_TAG, - "VolumeControlAudioMixerFactory created: trackVolumes=$trackVolumes, videoAudioPresent=$videoAudioPresent" + "VolumeControlAudioMixerFactory created: trackVolumes=$trackVolumes, " + + "videoAudioSourceCount=$videoAudioSourceCount, videoSequenceVolumes=$videoSequenceVolumes" ) } override fun create(): AudioMixer { Log.d(RENDER_TAG, "Creating VolumeControlAudioMixer") - return VolumeControlAudioMixer(trackVolumes, videoAudioPresent) + return VolumeControlAudioMixer(trackVolumes, videoAudioSourceCount, videoSequenceVolumes) } } @@ -47,17 +50,15 @@ class VolumeControlAudioMixerFactory( * When sources are added, it tracks their IDs and applies the appropriate volume * using DefaultAudioMixer.setSourceVolume() after each source is added. * - * If videoAudioPresent is true (mixing video + audio tracks): - * Source 0 = Video audio (first sequence) - volume 1.0 (per-clip volume not available in mixer mode) - * Source 1..N = Audio tracks - applies trackVolumes[0], trackVolumes[1], etc. - * - * If videoAudioPresent is false (no video audio): - * Source 0..N = Audio tracks - applies trackVolumes[0], trackVolumes[1], etc. + * Source order in Media3 Composition (Mixed): + * Source 0..N-1 = Video audio (from each sequence that has an AUDIO track) + * Source N..M = Audio tracks */ @UnstableApi private class VolumeControlAudioMixer( private val trackVolumes: List, - private val videoAudioPresent: Boolean + private val videoAudioSourceCount: Int, + private val videoSequenceVolumes: List ) : AudioMixer { private val delegate: DefaultAudioMixer = @@ -90,26 +91,19 @@ private class VolumeControlAudioMixer( override fun addSource(sourceFormat: AudioProcessor.AudioFormat, startTimeUs: Long): Int { val sourceId = delegate.addSource(sourceFormat, startTimeUs) - // Determine which volume to apply based on source order and whether video audio is present + // Determine which volume to apply based on source order val volume: Float val sourceType: String - if (videoAudioPresent) { - // Both video and audio tracks present (mixing mode) - // Source 0 = Video audio, Source 1..N = Audio tracks - if (sourceCount == 0) { - volume = 1.0f // Video audio at full volume (per-clip volume not available in mixer mode) - sourceType = "VIDEO AUDIO" - } else { - val trackIndex = sourceCount - 1 - volume = trackVolumes.getOrElse(trackIndex) { 1.0f } - sourceType = "AUDIO TRACK $trackIndex" - } + if (sourceCount < videoAudioSourceCount) { + // Source is a video audio track + volume = videoSequenceVolumes.getOrElse(sourceCount) { 1.0f } + sourceType = "VIDEO AUDIO (Sequence $sourceCount)" } else { - // Video audio was removed - only audio tracks present - val trackIndex = sourceCount + // Source is an audio track + val trackIndex = sourceCount - videoAudioSourceCount volume = trackVolumes.getOrElse(trackIndex) { 1.0f } - sourceType = "AUDIO TRACK $trackIndex (no video audio)" + sourceType = "AUDIO TRACK $trackIndex" } Log.d( diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/models/RenderConfig.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/models/RenderConfig.kt index bb71bc3..a6f443c 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/models/RenderConfig.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/models/RenderConfig.kt @@ -5,18 +5,32 @@ import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log import io.flutter.plugin.common.MethodCall /** - * Represents a video clip segment with optional trimming. + * Represents a video clip segment with optional trimming and composition parameters. * * @property inputPath Absolute path to video file * @property startUs Start time in microseconds (null = from beginning) * @property endUs End time in microseconds (null = until end) * @property volume Volume multiplier for this clip (null = unchanged, 0.0=mute, 1.0=original) + * @property x Horizontal offset in pixels from the left edge + * @property y Vertical offset in pixels from the top edge + * @property width Target width of the segment in pixels (null = original) + * @property height Target height of the segment in pixels (null = original) + * @property zIndex Layer order (higher values on top) + * @property opacity Transparency (0.0=invisible, 1.0=opaque) + * @property segmentTimeUs Absolute start time in the composition (null = sequential) */ data class VideoClip( val inputPath: String, val startUs: Long?, val endUs: Long?, - val volume: Float? = null + val volume: Float? = null, + val x: Double? = null, + val y: Double? = null, + val width: Double? = null, + val height: Double? = null, + val zIndex: Int? = null, + val opacity: Float? = null, + val segmentTimeUs: Long? = null ) /** @@ -176,6 +190,8 @@ data class RenderConfig( val cropY: Int? = null, val scaleX: Float? = null, val scaleY: Float? = null, + val renderWidth: Int? = null, + val renderHeight: Int? = null, val bitrate: Int? = null, val enableAudio: Boolean = true, val playbackSpeed: Float? = null, @@ -231,11 +247,20 @@ data class RenderConfig( inputPath = clipMap["inputPath"] as String, startUs = (clipMap["startUs"] as? Number)?.toLong(), endUs = (clipMap["endUs"] as? Number)?.toLong(), - volume = (clipMap["volume"] as? Number)?.toFloat() + volume = (clipMap["volume"] as? Number)?.toFloat(), + x = (clipMap["x"] as? Number)?.toDouble(), + y = (clipMap["y"] as? Number)?.toDouble(), + width = (clipMap["width"] as? Number)?.toDouble(), + height = (clipMap["height"] as? Number)?.toDouble(), + zIndex = (clipMap["zIndex"] as? Number)?.toInt(), + opacity = (clipMap["opacity"] as? Number)?.toFloat(), + segmentTimeUs = (clipMap["segmentTimeUs"] as? Number)?.toLong() ) Log.d( PACKAGE_TAG, - "Clip $index: path=${clip.inputPath}, start=${clip.startUs}, end=${clip.endUs}, volume=${clip.volume}" + "Clip $index: path=${clip.inputPath}, start=${clip.startUs}, end=${clip.endUs}, " + + "volume=${clip.volume}, pos=(${clip.x}, ${clip.y}), size=${clip.width}x${clip.height}, " + + "zIndex=${clip.zIndex}, opacity=${clip.opacity}, time=${clip.segmentTimeUs}" ) clip } @@ -292,6 +317,8 @@ data class RenderConfig( cropY = call.argument("cropY")?.toInt(), scaleX = call.argument("scaleX")?.toFloat(), scaleY = call.argument("scaleY")?.toFloat(), + renderWidth = call.argument("renderWidth")?.toInt(), + renderHeight = call.argument("renderHeight")?.toInt(), bitrate = call.argument("bitrate")?.toInt(), enableAudio = call.argument("enableAudio") ?: true, playbackSpeed = call.argument("playbackSpeed")?.toFloat(), diff --git a/example/lib/features/render/video_renderer_page.dart b/example/lib/features/render/video_renderer_page.dart index 42ca328..d691a6c 100644 --- a/example/lib/features/render/video_renderer_page.dart +++ b/example/lib/features/render/video_renderer_page.dart @@ -47,11 +47,10 @@ class _VideoRendererPageState extends State { final double _blurFactor = 0; final List> _colorFilters = []; - // kBasicFilterMatrix kComplexFilterMatrix - VideoMetadata? _outputMetadata; String _taskId = DateTime.now().microsecondsSinceEpoch.toString(); + String? _error; late final EditorVideo _video; @@ -373,7 +372,7 @@ class _VideoRendererPageState extends State { videoSegments: [ VideoSegment( video: _video, - startTime: const Duration(seconds: 0), + startTime: Duration.zero, endTime: const Duration(seconds: 7), volume: 1.0, ), @@ -636,6 +635,215 @@ class _VideoRendererPageState extends State { await _renderVideo(data); } + /// Picture in picture example. Combined video segments. + /// + /// This example demonstrates how to render composite video from multiple + /// overlapping video segments + Future _combinedPip() async { + VideoMetadata meta = await _pve.getMetadata(_video); + var Size(width: width, height: height) = meta.resolution; + + var data = VideoRenderData( + videoSegments: [ + // the first segment is used to drive the video resolution + VideoSegment( + video: _video, + segmentTime: Duration.zero, + startTime: Duration.zero, + endTime: const Duration(seconds: 10), + offset: const Offset(100, 0), + // size: Size(width, height), + zIndex: 0, + volume: 1.0, + ), + VideoSegment( + video: _video, + segmentTime: const Duration(seconds: 5), + startTime: const Duration(seconds: 5), + endTime: const Duration(seconds: 15), + offset: const Offset(20, 20), + size: Size(width / 2, height / 2), + zIndex: 10, + volume: 0, + ), + ], + + // imageLayers: [ + // // Stretched overlay for entire video + // ImageLayer(image: EditorLayerImage.memory(imageBytes)), + // // Sticker visible only from 3s–8s + // ImageLayer( + // image: stickerImage, + // offset: const Offset(500, 150), + // startTime: const Duration(seconds: 3), + // endTime: const Duration(seconds: 8), + // ), + // ], + // audioTracks: [ + // // Background music in second half at low volume + // VideoAudioTrack( + // path: audioFile.path, + // volume: 0.4, + // startTime: const Duration(seconds: 10), + // ), + // ], + ); + + await _renderVideo(data); + } + + /// Video grid example. Combined video segments. + /// + /// This example demonstrates how to render composite video with multiple + /// stacked video segments + Future _combinedStack() async { + VideoMetadata meta1 = await _pve.getMetadata(_video); + var Size(width: width1, height: height1) = meta1.resolution; + + EditorVideo video2 = EditorVideo.asset(kVideoEditorExampleAssetWorldPath); + VideoMetadata meta2 = await _pve.getMetadata(video2); + var Size(width: width2, height: height2) = meta2.resolution; + + double width = max(width1, width2); + double scale1 = width / width1; + double scale2 = width / width2; + + // resize one of the videos to match the other video width + // stack them vertically video1 on top of video2 + double height = height1 * scale1 + height2 * scale2; + + // int duration = min(meta1.duration.inSeconds, meta2.duration.inSeconds); + debugPrint('target resolution: $width x $height'); + + var data = VideoRenderData( + qualityConfig: VideoQualityConfig.custom( + bitrate: meta1.bitrate, + resolution: Size(width, height), + ), + videoSegments: [ + VideoSegment( + video: _video, + segmentTime: Duration.zero, + startTime: Duration.zero, + // endTime: Duration(seconds: duration), + endTime: const Duration(seconds: 5), + offset: const Offset(0, 0), + size: Size(width1 * scale1, height1 * scale1), + zIndex: 10, + volume: 1.0, + ), + + VideoSegment( + video: video2, + segmentTime: Duration.zero, + startTime: Duration.zero, + // endTime: Duration(seconds: duration), + endTime: const Duration(seconds: 5), + offset: Offset(0, height1 * scale1), + size: Size(width2 * scale2, height2 * scale2), + volume: 0, + ), + ], + ); + + await _renderVideo(data); + } + + /// Video grid example. Combined video segments. + /// + /// This example demonstrates how to render composite video from multiple + /// overlapping video segments + Future _combinedGrid() async { + VideoMetadata metadata = await _pve.getMetadata(_video); + + Size resolution = metadata.resolution; + double width = resolution.width; + double height = resolution.height; + + // red image with opacity bytes for interleaving test + final redRecorder = ui.PictureRecorder(); + ui.Canvas(redRecorder).drawRect( + const ui.Rect.fromLTWH(0, 0, 500, 500), + ui.Paint()..color = Colors.red.withValues(alpha: 0.5), + ); + ui.Picture redPicture = redRecorder.endRecording(); + ui.Image redImg = await redPicture.toImage(500, 500); + ByteData? bytes = await redImg.toByteData(format: ui.ImageByteFormat.png); + Uint8List redImageBytes = bytes!.buffer.asUint8List(); + redPicture.dispose(); + + var data = VideoRenderData( + qualityConfig: VideoQualityConfig.custom( + bitrate: metadata.bitrate, + resolution: Size(width * 2, height * 2), + ), + videoSegments: [ + VideoSegment( + video: _video, + segmentTime: Duration.zero, + startTime: Duration.zero, + endTime: const Duration(seconds: 10), + offset: const Offset(0, 0), + size: Size(width, height), + zIndex: 10, + volume: 1.0, + ), + VideoSegment( + video: _video, + segmentTime: const Duration(seconds: 3), + startTime: const Duration(seconds: 3), + endTime: const Duration(seconds: 13), + offset: Offset(width - 50, 50), + size: Size(width, height), + zIndex: 20, + volume: 1.0, + ), + VideoSegment( + video: _video, + segmentTime: const Duration(seconds: 6), + startTime: const Duration(seconds: 6), + endTime: const Duration(seconds: 16), + offset: Offset(width, height), + size: Size(width, height), + zIndex: 30, + volume: 1.0, + ), + VideoSegment( + video: _video, + segmentTime: const Duration(seconds: 9), + startTime: const Duration(seconds: 9), + endTime: const Duration(seconds: 20), + offset: Offset(50, height - 50), + size: Size(width, height), + zIndex: 40, + volume: 1.0, + ), + + // 480 × 270 + VideoSegment( + video: EditorVideo.asset(kVideoEditorExampleAssetWorldPath), + segmentTime: Duration.zero, + zIndex: 50, + opacity: 0.5, + offset: Offset(width - 600, height - 200), + size: const Size(480 * 2, 270 * 2), + volume: 0, + ), + ], + + imageLayers: [ + // Transparent red square at the top + ImageLayer( + image: EditorLayerImage.memory(redImageBytes), + offset: Offset(width - 250, height - 300), + size: const Size(500, 500), + ), + ], + ); + + await _renderVideo(data); + } + /// Fade animation on image layer. /// /// This example demonstrates a simple fade-in and fade-out animation @@ -805,7 +1013,7 @@ class _VideoRendererPageState extends State { videoSegments: [ VideoSegment( video: _video, - startTime: const Duration(seconds: 0), + startTime: Duration.zero, endTime: const Duration(seconds: 5), ), VideoSegment( @@ -831,7 +1039,7 @@ class _VideoRendererPageState extends State { videoSegments: [ VideoSegment( video: _video, - startTime: const Duration(seconds: 0), + startTime: Duration.zero, endTime: const Duration(seconds: 5), ), VideoSegment( @@ -934,6 +1142,7 @@ class _VideoRendererPageState extends State { Future _renderVideo(VideoRenderData value) async { _taskId = DateTime.now().microsecondsSinceEpoch.toString(); + _error = null; setState(() => _isExporting = true); final directory = await getTemporaryDirectory(); @@ -949,8 +1158,15 @@ class _VideoRendererPageState extends State { } on RenderCanceledException { setState(() => _isExporting = false); return; + } catch (ex) { + setState(() { + _error = 'Failed to render video: $ex'; + _isExporting = false; + }); + return; } + debugPrint('output $outputPath'); final result = File(outputPath).readAsBytesSync(); _generationTime = sp.elapsed; @@ -1112,10 +1328,13 @@ class _VideoRendererPageState extends State { ), Text( 'Result: ${formatBytes(_videoBytes!.lengthInBytes)} ' - 'bytes in ${_generationTime.inMilliseconds}ms', + 'bytes in ${_generationTime.inMilliseconds}ms, ' + '${_outputMetadata?.resolution.width ?? 0}' + ' x ${_outputMetadata?.resolution.height ?? 0}', ), if (_outputMetadata?.isOptimizedForStreaming != null) Row( + spacing: 6, children: [ Icon( _outputMetadata!.isOptimizedForStreaming! @@ -1126,7 +1345,6 @@ class _VideoRendererPageState extends State { : Colors.red, size: 18, ), - const SizedBox(width: 6), Text( _outputMetadata!.isOptimizedForStreaming! ? 'Optimized for streaming (moov before mdat)' @@ -1134,6 +1352,19 @@ class _VideoRendererPageState extends State { ), ], ), + if (_error != null) + Row( + crossAxisAlignment: CrossAxisAlignment.start, + spacing: 6, + children: [ + const Icon( + Icons.warning_rounded, + color: Colors.red, + size: 20, + ), + Expanded(child: Text('$_error')), + ], + ), ], ); } @@ -1183,11 +1414,13 @@ class _VideoRendererPageState extends State { onTap: _layers, leading: const Icon(Icons.layers_outlined), title: const Text('Parse with layers'), + subtitle: const Text('Layer for the whole video duration'), ), ListTile( onTap: _layersTimed, leading: const Icon(Icons.av_timer_outlined), title: const Text('Parse with timed layers'), + subtitle: const Text('Layers at 0, 5, 7 + random one every second'), ), ListTile( onTap: _layersWithSize, @@ -1215,6 +1448,7 @@ class _VideoRendererPageState extends State { onTap: _multipleChanges, leading: const Icon(Icons.web_stories_outlined), title: const Text('Multiple changes'), + subtitle: const Text('FlipX, image, color filter, crop'), ), ListTile( onTap: _combinedTimeBased, @@ -1222,6 +1456,24 @@ class _VideoRendererPageState extends State { title: const Text('Combined Time-Based'), subtitle: const Text('Clips + filters + layers + audio, all timed'), ), + ListTile( + onTap: _combinedPip, + leading: const Icon(Icons.picture_in_picture_alt), + title: const Text('Picture in picture'), + subtitle: const Text('Pip starts at 5, main ends at 10'), + ), + ListTile( + onTap: _combinedStack, + leading: const Icon(Icons.stacked_line_chart), + title: const Text('Video stack'), + subtitle: const Text('Two videos stacked together'), + ), + ListTile( + onTap: _combinedGrid, + leading: const Icon(Icons.dashboard_outlined), + title: const Text('Video grid'), + subtitle: const Text('Grid of videos, with overlays and opacity'), + ), ListTile( onTap: _bitrate, leading: const Icon(Icons.animation), diff --git a/ios/Classes/src/features/render/RenderVideo.swift b/ios/Classes/src/features/render/RenderVideo.swift index 5a78609..4eddb3f 100644 --- a/ios/Classes/src/features/render/RenderVideo.swift +++ b/ios/Classes/src/features/render/RenderVideo.swift @@ -74,7 +74,14 @@ class RenderVideo { inputPath: newPath, startUs: clip.startUs, endUs: clip.endUs, - volume: clip.volume + volume: clip.volume, + opacity: clip.opacity, + x: clip.x, + y: clip.y, + width: clip.width, + height: clip.height, + segmentTimeUs: clip.segmentTimeUs, + zIndex: clip.zIndex ) } return clip @@ -138,13 +145,16 @@ class RenderVideo { var effectsConfig = VideoCompositorConfig() // Use composition helper to merge multiple video clips - let (composition, videoCompData, renderSize, audioMix, sourceTrackID) = + let (composition, videoCompData, renderSize, audioMix, sourceTrackID, updatedEffectsConfig) = try await applyComposition( videoClips: workingConfig.videoClips, videoEffects: effectsConfig, enableAudio: workingConfig.enableAudio, - audioTracks: workingConfig.audioTracks + audioTracks: workingConfig.audioTracks, + renderWidth: workingConfig.renderWidth, + renderHeight: workingConfig.renderHeight ) + effectsConfig = updatedEffectsConfig var videoCompConfig = videoCompData // Set source track ID for fallback on older iOS versions (e.g., iPhone 7) @@ -235,9 +245,17 @@ class RenderVideo { videoComposition.frameDuration = videoCompConfig.frameDuration videoComposition.renderSize = finalRenderSize videoComposition.instructions = videoCompConfig.instructions + + // Ensure compositor knows the intended logical size for coordinate mapping + effectsConfig.intendedRenderSize = finalRenderSize + videoComposition.customVideoCompositorClass = makeVideoCompositorSubclass(with: effectsConfig) - let preset = applyBitrate(requestedBitrate: workingConfig.bitrate) + let preset = applyBitrate( + requestedBitrate: workingConfig.bitrate, + renderWidth: workingConfig.renderWidth, + renderHeight: workingConfig.renderHeight + ) let export = try await prepareExportSession( composition: composition, diff --git a/ios/Classes/src/features/render/helpers/ApplyBitrate.swift b/ios/Classes/src/features/render/helpers/ApplyBitrate.swift index 9b84b4a..d441852 100644 --- a/ios/Classes/src/features/render/helpers/ApplyBitrate.swift +++ b/ios/Classes/src/features/render/helpers/ApplyBitrate.swift @@ -7,8 +7,9 @@ import AVFoundation /// resolution/quality presets. /// /// - Parameters: -/// - requestedBitrate: Target bitrate in bits per second. If nil, returns preset hint or highest quality. -/// - presetHint: Optional preset to use as fallback. If nil, defaults to highest quality. +/// - requestedBitrate: Target bitrate in bits per second. If nil, returns highest quality. +/// - renderWidth: Optional target render width to ensure preset supports the resolution. +/// - renderHeight: Optional target render height to ensure preset supports the resolution. /// - Returns: AVAssetExportPreset string matching the requested quality level. /// /// Bitrate mapping: @@ -23,7 +24,28 @@ import AVFoundation /// - ≥2 Mbps: 480p /// - ≥1 Mbps: Medium quality /// - <1 Mbps: Low quality -public func applyBitrate(requestedBitrate: Int?, presetHint: String? = nil) -> String { +public func applyBitrate( + requestedBitrate: Int?, + renderWidth: Double? = nil, + renderHeight: Double? = nil +) -> String { + // If a custom resolution is provided, we should ideally use a "HighestQuality" + // preset to avoid resolution constraints from bitrate-based presets. + // However, if a bitrate is also specified, we'll try to pick the best matching one. + if let rw = renderWidth, let rh = renderHeight { + let maxDim = max(rw, rh) + + if maxDim > 1920 { + if #available(iOS 11.0, *) { + return AVAssetExportPresetHEVC3840x2160 + } else { + return AVAssetExportPreset3840x2160 + } + } else if maxDim > 1280 { + return AVAssetExportPreset1920x1080 + } + } + if let bitrate = requestedBitrate { PluginLog.print( "[\(Tags.render)] 📊 Requested bitrate: \(bitrate) bps (\(String(format: "%.1f", Double(bitrate) / 1_000_000)) Mbps)" @@ -73,5 +95,5 @@ public func applyBitrate(requestedBitrate: Int?, presetHint: String? = nil) -> S } } - return presetHint ?? AVAssetExportPresetHighestQuality + return AVAssetExportPresetHighestQuality } diff --git a/ios/Classes/src/features/render/helpers/ApplyComposition.swift b/ios/Classes/src/features/render/helpers/ApplyComposition.swift index 98d3080..115d3c8 100644 --- a/ios/Classes/src/features/render/helpers/ApplyComposition.swift +++ b/ios/Classes/src/features/render/helpers/ApplyComposition.swift @@ -19,18 +19,22 @@ import Foundation /// - CGSize: Final render size (max dimensions from all clips) /// - AVAudioMix?: Audio mix with volume controls (nil if no audio mixing needed) /// - CMPersistentTrackID: The track ID of the video composition track (for fallback on older iOS) +/// - VideoCompositorConfig: Updated compositor configuration with track info /// /// - Throws: NSError if video clips are empty, files don't exist, or tracks can't be loaded. func applyComposition( videoClips: [VideoClip], videoEffects: VideoCompositorConfig, enableAudio: Bool, - audioTracks: [AudioTrackConfig] + audioTracks: [AudioTrackConfig], + renderWidth: Double? = nil, + renderHeight: Double? = nil ) async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, VideoCompositorConfig ) { return try await CompositionBuilder(videoClips: videoClips, videoEffects: videoEffects) .setEnableAudio(enableAudio) .setAudioTracks(audioTracks) + .setRenderSize(width: renderWidth, height: renderHeight) .build() } diff --git a/ios/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift b/ios/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift index 4099370..6d54575 100644 --- a/ios/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift +++ b/ios/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift @@ -43,10 +43,10 @@ public func applyPlaybackSpeed( } let scaledStart = CMTimeMultiplyByFloat64(custom.timeRange.start, multiplier: multiplier) let scaledDuration = CMTimeMultiplyByFloat64(custom.timeRange.duration, multiplier: multiplier) - let trackID = (custom.requiredSourceTrackIDs?.first as? NSNumber)?.int32Value ?? kCMPersistentTrackID_Invalid + let trackIDs = custom.requiredSourceTrackIDs?.compactMap { ($0 as? NSNumber)?.int32Value } ?? [] return CustomVideoCompositionInstruction( timeRange: CMTimeRange(start: scaledStart, duration: scaledDuration), - sourceTrackID: trackID, + sourceTrackIDs: trackIDs, layerInstructions: custom.layerInstructions, backgroundColor: custom.backgroundColor ) diff --git a/ios/Classes/src/features/render/helpers/CompositionBuilder.swift b/ios/Classes/src/features/render/helpers/CompositionBuilder.swift index 0590ec2..e2ef6d8 100644 --- a/ios/Classes/src/features/render/helpers/CompositionBuilder.swift +++ b/ios/Classes/src/features/render/helpers/CompositionBuilder.swift @@ -12,6 +12,8 @@ internal class CompositionBuilder { private let videoEffects: VideoCompositorConfig private var enableAudio: Bool = true private var audioTracks: [AudioTrackConfig] = [] + private var renderWidth: Double? + private var renderHeight: Double? /// Initializes builder with configuration. /// @@ -23,6 +25,13 @@ internal class CompositionBuilder { self.videoEffects = videoEffects } + /// Sets the target render size. + func setRenderSize(width: Double?, height: Double?) -> CompositionBuilder { + self.renderWidth = width + self.renderHeight = height + return self + } + /// Enables or disables audio. /// /// - Parameter enabled: If true, includes original audio from video clips @@ -46,7 +55,7 @@ internal class CompositionBuilder { /// - Returns: Tuple containing composition, video composition, render size, audio mix, and source track ID /// - Throws: Error if composition creation fails func build() async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, VideoCompositorConfig ) { guard !videoClips.isEmpty else { throw NSError( @@ -64,9 +73,14 @@ internal class CompositionBuilder { // Build video sequence let videoBuilder = VideoSequenceBuilder(videoClips: videoClips) .setEnableAudio(enableAudio) + .setRenderSize(width: renderWidth, height: renderHeight) let videoResult = try await videoBuilder.build(in: composition) + // Store track configs for compositor + var updatedVideoEffects = videoEffects + updatedVideoEffects.videoClipConfigs = videoResult.trackConfigs + // Add custom audio tracks var customAudioTracks: [(track: AVMutableCompositionTrack, config: AudioTrackConfig)] = [] for trackConfig in audioTracks { @@ -106,9 +120,7 @@ internal class CompositionBuilder { ) let compositionRenderSize = videoResult.renderSize - // Create instructions for each clip segment - // Use custom instruction class to ensure requiredSourceTrackIDs is properly set - // This fixes issues on older iOS versions (e.g., iPhone 7, iOS 15) + // Create instructions for each non-overlapping time segment var instructions: [AVVideoCompositionInstructionProtocol] = [] PluginLog.print("") @@ -120,6 +132,8 @@ internal class CompositionBuilder { PluginLog.print("==========================================") PluginLog.print("") + // Calculate pre-determined transforms for all clips + var clipTransforms: [CGAffineTransform] = [] for (index, clipInstruction) in videoResult.clipInstructions.enumerated() { PluginLog.print("🎬 Processing instruction for clip \(index)") PluginLog.print( @@ -133,36 +147,55 @@ internal class CompositionBuilder { with: clipInstruction.transform, clipIndex: index ) + clipTransforms.append(transform) + } - let layerInstruction: AVVideoCompositionLayerInstruction - if #available(iOS 26.0, *) { - var config = AVVideoCompositionLayerInstruction.Configuration( - assetTrack: videoResult.videoTrack - ) - config.setTransform(transform, at: .zero) - layerInstruction = AVVideoCompositionLayerInstruction(configuration: config) - } else { - let mutableInstruction = AVMutableVideoCompositionLayerInstruction( - assetTrack: videoResult.videoTrack - ) - mutableInstruction.setTransform(transform, at: .zero) - layerInstruction = mutableInstruction - } - - // Use custom instruction that explicitly provides requiredSourceTrackIDs - let instruction = CustomVideoCompositionInstruction( - timeRange: clipInstruction.timeRange, - sourceTrackID: videoResult.videoTrack.trackID, - layerInstructions: [layerInstruction], - backgroundColor: CGColor(red: 0, green: 0, blue: 0, alpha: 1) - ) + // Calculate non-overlapping time segments + let segments = calculateSegments( + from: videoResult.clipInstructions, + totalDuration: videoResult.totalDuration + ) + for (segIndex, segmentRange) in segments.enumerated() { + PluginLog.print("🎬 Processing segment \(segIndex)") PluginLog.print( - " ⚙️ Layer instruction configured with transform (trackID: \(videoResult.videoTrack.trackID))" + " Time range: \(String(format: "%.2f", segmentRange.start.seconds))s - \(String(format: "%.2f", (segmentRange.start + segmentRange.duration).seconds))s" ) - PluginLog.print("") - instructions.append(instruction) + var activeTrackIDs: [CMPersistentTrackID] = [] + var layerInstructions: [AVVideoCompositionLayerInstruction] = [] + + for (clipIndex, clipInstruction) in videoResult.clipInstructions.enumerated() { + // Check if this clip is active during this segment + let clipRange = clipInstruction.timeRange + let intersection = CMTimeRangeGetIntersection(segmentRange, otherRange: clipRange) + + if CMTimeGetSeconds(intersection.duration) > 0 { + activeTrackIDs.append(clipInstruction.trackID) + + let transform = clipTransforms[clipIndex] + let mutableLayerInstruction = AVMutableVideoCompositionLayerInstruction( + assetTrack: composition.track(withTrackID: clipInstruction.trackID)! + ) + mutableLayerInstruction.setTransform(transform, at: .zero) + layerInstructions.append(mutableLayerInstruction) + + PluginLog.print(" - Added trackID \(clipInstruction.trackID) (Clip \(clipIndex))") + } + } + + if !layerInstructions.isEmpty { + // Use custom instruction that explicitly provides requiredSourceTrackIDs + let instruction = CustomVideoCompositionInstruction( + timeRange: segmentRange, + sourceTrackIDs: activeTrackIDs, + layerInstructions: layerInstructions, + backgroundColor: CGColor(red: 0, green: 0, blue: 0, alpha: 1) + ) + instructions.append(instruction) + PluginLog.print(" ✅ Segment instruction created with \(layerInstructions.count) layers") + } + PluginLog.print("") } let videoCompositionData = VideoCompositionData( @@ -173,10 +206,10 @@ internal class CompositionBuilder { PluginLog.print("✅ Composition created successfully with \(videoClips.count) clips") - // Return the track ID for fallback on older iOS versions - let sourceTrackID = videoResult.videoTrack.trackID + // Return the first track ID for fallback on older iOS versions + let sourceTrackID = videoResult.clipInstructions.first?.trackID ?? kCMPersistentTrackID_Invalid - return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID) + return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID, updatedVideoEffects) } /// Creates audio mix with per-clip and per-track volume parameters. @@ -191,11 +224,17 @@ internal class CompositionBuilder { for track in originalTracks { let inputParameters = AVMutableAudioMixInputParameters(track: track) - // Use setVolumeRamp for each clip's time range to ensure - // volume changes are applied precisely per segment - for (index, clipInstruction) in clipInstructions.enumerated() { + // Find all instructions that apply to this specific audio track + let relevantInstructions = clipInstructions.enumerated().filter { _, instruction in + instruction.audioTrackID == track.trackID + } + + for (index, clipInstruction) in relevantInstructions { let clipVolume = index < videoClips.count ? (videoClips[index].volume ?? 1.0) : 1.0 + + PluginLog.print("🔊 Setting volume ramp for track \(track.trackID): volume=\(clipVolume) at \(String(format: "%.2f", clipInstruction.timeRange.start.seconds))s") + inputParameters.setVolumeRamp( fromStartVolume: clipVolume, toEndVolume: clipVolume, @@ -204,7 +243,7 @@ internal class CompositionBuilder { } audioMixInputParameters.append(inputParameters) - PluginLog.print("🔊 Applied per-clip volume to original audio track") + PluginLog.print("🔊 Applied per-clip volume to original audio track (ID: \(track.trackID))") } // Apply volume to custom audio tracks @@ -234,6 +273,16 @@ internal class CompositionBuilder { with preferredTransform: CGAffineTransform, clipIndex: Int ) -> CGAffineTransform { + // If manual positioning is requested, use the preferred transform as-is. + // The compositor will handle custom scaling and positioning based on VideoClip config. + if clipIndex < videoClips.count { + let clip = videoClips[clipIndex] + if clip.x != nil || clip.y != nil || clip.width != nil || clip.height != nil { + PluginLog.print(" 🎯 Manual positioning detected for clip \(clipIndex), skipping fit and center transform") + return preferredTransform + } + } + // Get the display size after applying the original transform (handles rotation) let displaySize = naturalSize.applying(preferredTransform) let videoWidth = abs(displaySize.width) @@ -315,4 +364,39 @@ internal class CompositionBuilder { return transform } + + /// Calculates non-overlapping time segments from clip instructions. + private func calculateSegments(from instructions: [ClipInstruction], totalDuration: CMTime) -> [CMTimeRange] { + var points: [CMTime] = [.zero, totalDuration] + for instruction in instructions { + points.append(instruction.timeRange.start) + points.append(CMTimeAdd(instruction.timeRange.start, instruction.timeRange.duration)) + } + + let sortedPoints = points + .filter { CMTimeCompare($0, totalDuration) <= 0 } + .sorted { CMTimeCompare($0, $1) < 0 } + + var uniquePoints: [CMTime] = [] + for point in sortedPoints { + if let last = uniquePoints.last { + if CMTimeCompare(last, point) != 0 { + uniquePoints.append(point) + } + } else { + uniquePoints.append(point) + } + } + + var segments: [CMTimeRange] = [] + for i in 0.. 0 { + segments.append(CMTimeRange(start: start, duration: duration)) + } + } + return segments + } } diff --git a/ios/Classes/src/features/render/helpers/VideoSequenceBuilder.swift b/ios/Classes/src/features/render/helpers/VideoSequenceBuilder.swift index 56f93b3..32c010a 100644 --- a/ios/Classes/src/features/render/helpers/VideoSequenceBuilder.swift +++ b/ios/Classes/src/features/render/helpers/VideoSequenceBuilder.swift @@ -9,6 +9,8 @@ internal class VideoSequenceBuilder { private let videoClips: [VideoClip] private var enableAudio: Bool = true + private var renderWidth: Double? + private var renderHeight: Double? /// Initializes builder with video clips. /// @@ -17,6 +19,13 @@ internal class VideoSequenceBuilder { self.videoClips = videoClips } + /// Sets the target render size. + func setRenderSize(width: Double?, height: Double?) -> VideoSequenceBuilder { + self.renderWidth = width + self.renderHeight = height + return self + } + /// Enables or disables audio in the output. /// /// - Parameter enabled: If true, includes original audio from video clips @@ -67,7 +76,7 @@ internal class VideoSequenceBuilder { /// Builds the video composition with all clips. /// /// - Parameter composition: Composition to build into - /// - Returns: Tuple containing video track, audio tracks, render size, frame rate, and clip instructions + /// - Returns: Tuple containing video tracks, audio tracks, render size, frame rate, and clip instructions func build(in composition: AVMutableComposition) async throws -> VideoSequenceResult { guard !videoClips.isEmpty else { throw NSError( @@ -85,32 +94,7 @@ internal class VideoSequenceBuilder { var maxFrameRate: Float = 30.0 var originalAudioTracks: [AVMutableCompositionTrack] = [] var clipInstructions: [ClipInstruction] = [] - - // Create single video track for all clips - guard - let compositionVideoTrack = composition.addMutableTrack( - withMediaType: .video, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - else { - throw NSError( - domain: "VideoSequenceBuilder", - code: 2, - userInfo: [NSLocalizedDescriptionKey: "Failed to create video track"] - ) - } - - // Create single shared audio track for all clips (if enabled) - var sharedAudioTrack: AVMutableCompositionTrack? - if enableAudio { - sharedAudioTrack = composition.addMutableTrack( - withMediaType: .audio, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - if sharedAudioTrack != nil { - PluginLog.print("🔊 Created SHARED audio track for all clips (will prevent empty segments)") - } - } + var trackConfigs: [CMPersistentTrackID: VideoClip] = [:] // Process each video clip for (index, clip) in videoClips.enumerated() { @@ -156,15 +140,12 @@ internal class VideoSequenceBuilder { PluginLog.print(" - Display size: \(correctedSize.width) x \(correctedSize.height)") PluginLog.print(" - Frame rate: \(nominalFrameRate) fps") - // Update max render size - if correctedSize.width > maxRenderSize.width - || correctedSize.height > maxRenderSize.height - { - let oldSize = maxRenderSize + // Update max render size (only if not explicitly provided) + if index == 0 && (renderWidth == nil || renderHeight == nil) { maxRenderSize = correctedSize - PluginLog.print( - " - ⬆️ Max render size updated: \(oldSize.width)x\(oldSize.height) → \(maxRenderSize.width)x\(maxRenderSize.height)" - ) + PluginLog.print(" - 📏 Base render size set from first clip: \(maxRenderSize.width)x\(maxRenderSize.height)") + } else if renderWidth != nil && renderHeight != nil { + maxRenderSize = CGSize(width: renderWidth!, height: renderHeight!) } // Update max frame rate @@ -176,62 +157,87 @@ internal class VideoSequenceBuilder { let clipTimeRange = await calculateTimeRange(for: clip, from: asset) let clipDuration = clipTimeRange.duration + // Determine insertion time in composition + let insertionTime: CMTime + if let segmentTimeUs = clip.segmentTimeUs { + insertionTime = CMTime(value: segmentTimeUs, timescale: 1_000_000) + } else { + insertionTime = totalDuration + } + + // Create a new track for each clip to support overlapping and independent positioning + guard let compositionVideoTrack = composition.addMutableTrack( + withMediaType: .video, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "VideoSequenceBuilder", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "Failed to create video track for clip \(index)"] + ) + } + // Insert video clip into the composition track try compositionVideoTrack.insertTimeRange( clipTimeRange, of: videoTrack, - at: totalDuration + at: insertionTime ) + // Track mapping for compositor + trackConfigs[compositionVideoTrack.trackID] = clip + + // Add audio if enabled + var audioTrackID: CMPersistentTrackID? = nil + if enableAudio, + let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: asset) + { + if let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) { + do { + try compositionAudioTrack.insertTimeRange( + clipTimeRange, + of: audioTrack, + at: insertionTime + ) + originalAudioTracks.append(compositionAudioTrack) + audioTrackID = compositionAudioTrack.trackID + PluginLog.print(" 🔊 Audio inserted into its own track (ID: \(audioTrackID!))") + } catch { + PluginLog.print(" ❌ ERROR inserting audio: \(error.localizedDescription)") + } + } else { + PluginLog.print(" ⚠️ WARNING: Failed to create audio track for clip \(index)") + } + } + // Store instruction for this clip segment clipInstructions.append( ClipInstruction( - timeRange: CMTimeRange(start: totalDuration, duration: clipDuration), + timeRange: CMTimeRange(start: insertionTime, duration: clipDuration), transform: preferredTransform, naturalSize: naturalSize, - renderSize: correctedSize + renderSize: correctedSize, + trackID: compositionVideoTrack.trackID, + audioTrackID: audioTrackID )) - // Add audio to shared track if enabled - if enableAudio, - let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: asset), - let sharedAudioTrack = sharedAudioTrack - { - PluginLog.print("🔊 Processing audio for clip \(index)...") - PluginLog.print(" ✅ Audio track loaded from asset") - PluginLog.print(" Track ID: \(audioTrack.trackID)") - PluginLog.print( - " Duration: \(String(format: "%.2f", audioTrack.timeRange.duration.seconds))s" - ) - PluginLog.print(" Format: \(audioTrack.mediaType)") - - do { - try sharedAudioTrack.insertTimeRange( - clipTimeRange, - of: audioTrack, - at: totalDuration - ) - PluginLog.print(" ✅ Audio inserted into SHARED track!") - PluginLog.print( - " Source time range: \(String(format: "%.2f", clipTimeRange.start.seconds))s - \(String(format: "%.2f", (clipTimeRange.start + clipTimeRange.duration).seconds))s" - ) - PluginLog.print( - " Inserted at composition time: \(String(format: "%.2f", totalDuration.seconds))s" - ) - PluginLog.print( - " Audio duration: \(String(format: "%.2f", clipTimeRange.duration.seconds))s" - ) - } catch { - PluginLog.print(" ❌ ERROR inserting audio: \(error.localizedDescription)") - PluginLog.print(" Error details: \(error)") + // Update total duration (sequential part) + if clip.segmentTimeUs == nil { + totalDuration = CMTimeAdd(totalDuration, clipDuration) + } else { + let endInComposition = CMTimeAdd(insertionTime, clipDuration) + if CMTimeCompare(endInComposition, totalDuration) > 0 { + totalDuration = endInComposition } } - totalDuration = CMTimeAdd(totalDuration, clipDuration) PluginLog.print("✅ Clip \(index) added successfully") PluginLog.print(" - Duration: \(String(format: "%.2f", clipDuration.seconds))s") PluginLog.print( - " - Time range in composition: \(String(format: "%.2f", totalDuration.seconds - clipDuration.seconds))s - \(String(format: "%.2f", totalDuration.seconds))s" + " - Time range in composition: \(String(format: "%.2f", insertionTime.seconds))s - \(String(format: "%.2f", CMTimeAdd(insertionTime, clipDuration).seconds))s" ) } @@ -239,32 +245,20 @@ internal class VideoSequenceBuilder { PluginLog.print("📊 ===== VIDEO SEQUENCE SUMMARY =====") PluginLog.print(" Total clips: \(videoClips.count)") PluginLog.print(" Total duration: \(String(format: "%.2f", totalDuration.seconds))s") - PluginLog.print(" Max render size: \(maxRenderSize.width) x \(maxRenderSize.height)") + PluginLog.print(" Render size: \(maxRenderSize.width) x \(maxRenderSize.height)") PluginLog.print(" Max frame rate: \(maxFrameRate) fps") PluginLog.print(" Clip instructions: \(clipInstructions.count)") - - // Handle shared audio track - add to result if it has segments, otherwise remove from composition - if let audioTrack = sharedAudioTrack { - if !audioTrack.segments.isEmpty { - originalAudioTracks.append(audioTrack) - } else { - PluginLog.print(" ⚠️ Shared audio track has no segments - removing from composition") - composition.removeTrack(audioTrack) - } - } else { - PluginLog.print(" 🔊 AUDIO TRACKS: 0 (no audio track created)") - } - + PluginLog.print(" Audio tracks: \(originalAudioTracks.count)") PluginLog.print("=====================================") PluginLog.print("") return VideoSequenceResult( - videoTrack: compositionVideoTrack, audioTracks: originalAudioTracks, totalDuration: totalDuration, renderSize: maxRenderSize, frameRate: maxFrameRate, - clipInstructions: clipInstructions + clipInstructions: clipInstructions, + trackConfigs: trackConfigs ) } @@ -302,16 +296,18 @@ internal struct ClipInstruction { let transform: CGAffineTransform let naturalSize: CGSize let renderSize: CGSize + let trackID: CMPersistentTrackID + let audioTrackID: CMPersistentTrackID? } /// Result of building a video sequence. internal struct VideoSequenceResult { - let videoTrack: AVMutableCompositionTrack let audioTracks: [AVMutableCompositionTrack] let totalDuration: CMTime let renderSize: CGSize let frameRate: Float let clipInstructions: [ClipInstruction] + let trackConfigs: [CMPersistentTrackID: VideoClip] } /// Holds the data needed to construct an AVMutableVideoComposition without @@ -346,12 +342,12 @@ internal class CustomVideoCompositionInstruction: NSObject, AVVideoCompositionIn init( timeRange: CMTimeRange, - sourceTrackID: CMPersistentTrackID, + sourceTrackIDs: [CMPersistentTrackID], layerInstructions: [AVVideoCompositionLayerInstruction], backgroundColor: CGColor? = nil ) { self.timeRange = timeRange - self._requiredSourceTrackIDs = [NSNumber(value: sourceTrackID)] + self._requiredSourceTrackIDs = sourceTrackIDs.map { NSNumber(value: $0) } self.layerInstructions = layerInstructions self.backgroundColor = backgroundColor super.init() diff --git a/ios/Classes/src/features/render/models/RenderConfig.swift b/ios/Classes/src/features/render/models/RenderConfig.swift index 2e8b12d..d274b08 100644 --- a/ios/Classes/src/features/render/models/RenderConfig.swift +++ b/ios/Classes/src/features/render/models/RenderConfig.swift @@ -212,6 +212,12 @@ struct RenderConfig { /// Global end time in microseconds for trimming the final composition let endUs: Int64? + /// Target render width + let renderWidth: Double? + + /// Target render height + let renderHeight: Double? + /// Whether to optimize the video for network streaming (fast start). /// When true, moves the moov atom to the beginning of the file. let shouldOptimizeForNetworkUse: Bool @@ -248,6 +254,8 @@ struct RenderConfig { blur: self.blur, startUs: self.startUs, endUs: self.endUs, + renderWidth: self.renderWidth, + renderHeight: self.renderHeight, shouldOptimizeForNetworkUse: self.shouldOptimizeForNetworkUse, imageBytesWithCropping: self.imageBytesWithCropping ) @@ -269,7 +277,14 @@ struct RenderConfig { inputPath: inputPath, startUs: (clipMap["startUs"] as? NSNumber)?.int64Value, endUs: (clipMap["endUs"] as? NSNumber)?.int64Value, - volume: (clipMap["volume"] as? NSNumber)?.floatValue + volume: (clipMap["volume"] as? NSNumber)?.floatValue, + opacity: (clipMap["opacity"] as? NSNumber)?.doubleValue, + x: (clipMap["x"] as? NSNumber)?.doubleValue, + y: (clipMap["y"] as? NSNumber)?.doubleValue, + width: (clipMap["width"] as? NSNumber)?.doubleValue, + height: (clipMap["height"] as? NSNumber)?.doubleValue, + segmentTimeUs: (clipMap["segmentTimeUs"] as? NSNumber)?.int64Value, + zIndex: clipMap["zIndex"] as? Int ) } } @@ -320,6 +335,8 @@ struct RenderConfig { blur: (args["blur"] as? NSNumber)?.doubleValue, startUs: (args["startUs"] as? NSNumber)?.int64Value, endUs: (args["endUs"] as? NSNumber)?.int64Value, + renderWidth: (args["renderWidth"] as? NSNumber)?.doubleValue, + renderHeight: (args["renderHeight"] as? NSNumber)?.doubleValue, shouldOptimizeForNetworkUse: args["shouldOptimizeForNetworkUse"] as? Bool ?? true, imageBytesWithCropping: args["imageBytesWithCropping"] as? Bool ?? false ) diff --git a/ios/Classes/src/features/render/models/VideoClip.swift b/ios/Classes/src/features/render/models/VideoClip.swift index 6241247..dd59540 100644 --- a/ios/Classes/src/features/render/models/VideoClip.swift +++ b/ios/Classes/src/features/render/models/VideoClip.swift @@ -1,16 +1,44 @@ import Foundation -/// Represents a video clip with optional trimming and volume control +/// Represents a video clip with optional trimming, volume control, and positioning internal struct VideoClip { let inputPath: String let startUs: Int64? let endUs: Int64? let volume: Float? + let opacity: Double? - init(inputPath: String, startUs: Int64? = nil, endUs: Int64? = nil, volume: Float? = nil) { + // New fields for composition support + let x: Double? + let y: Double? + let width: Double? + let height: Double? + let segmentTimeUs: Int64? + let zIndex: Int? + + init( + inputPath: String, + startUs: Int64? = nil, + endUs: Int64? = nil, + volume: Float? = nil, + opacity: Double? = nil, + x: Double? = nil, + y: Double? = nil, + width: Double? = nil, + height: Double? = nil, + segmentTimeUs: Int64? = nil, + zIndex: Int? = nil + ) { self.inputPath = inputPath self.startUs = startUs self.endUs = endUs self.volume = volume + self.opacity = opacity + self.x = x + self.y = y + self.width = width + self.height = height + self.segmentTimeUs = segmentTimeUs + self.zIndex = zIndex } } diff --git a/ios/Classes/src/features/render/models/VideoCompositorConfig.swift b/ios/Classes/src/features/render/models/VideoCompositorConfig.swift index 374b5a0..f3ea65c 100644 --- a/ios/Classes/src/features/render/models/VideoCompositorConfig.swift +++ b/ios/Classes/src/features/render/models/VideoCompositorConfig.swift @@ -35,4 +35,12 @@ struct VideoCompositorConfig { /// Fallback source track ID for older iOS versions where sourceTrackIDs may be empty. /// This is used when the custom compositor doesn't receive track IDs properly. var sourceTrackID: CMPersistentTrackID = kCMPersistentTrackID_Invalid + + /// Mapping of track ID to video clip configuration for multi-track compositing + var videoClipConfigs: [CMPersistentTrackID: VideoClip] = [:] + + /// The intended render size of the composition (logical coordinate space). + /// This is used to calculate scale factors if the actual render context size + /// differs from the intended size (e.g. due to AVAssetExportSession presets). + var intendedRenderSize: CGSize = .zero } diff --git a/ios/Classes/src/features/render/utils/VideoCompositor.swift b/ios/Classes/src/features/render/utils/VideoCompositor.swift index 21fe06f..4d96cac 100644 --- a/ios/Classes/src/features/render/utils/VideoCompositor.swift +++ b/ios/Classes/src/features/render/utils/VideoCompositor.swift @@ -34,12 +34,15 @@ class VideoCompositor: NSObject, AVVideoCompositing { var cropWidth: CGFloat? var cropHeight: CGFloat? - // New properties for handling iPhone orientation var originalNaturalSize: CGSize = .zero + var intendedRenderSize: CGSize = .zero /// Fallback source track ID for older iOS versions var sourceTrackID: CMPersistentTrackID = kCMPersistentTrackID_Invalid + /// Track configurations for multi-track compositing + var videoClipConfigs: [CMPersistentTrackID: VideoClip] = [:] + /// Color filter configs for per-frame LUT computation private var colorFilterConfigs: [ColorFilterConfig] = [] @@ -58,7 +61,6 @@ class VideoCompositor: NSObject, AVVideoCompositing { var videoRotationDegrees: Double = 0.0 var shouldApplyOrientationCorrection: Bool = false - // Update the apply function: func apply(_ config: VideoCompositorConfig) { self.blurSigma = config.blurSigma self.rotateRadians = config.rotateRadians @@ -77,7 +79,9 @@ class VideoCompositor: NSObject, AVVideoCompositing { self.videoRotationDegrees = config.videoRotationDegrees self.shouldApplyOrientationCorrection = config.shouldApplyOrientationCorrection self.originalNaturalSize = config.originalNaturalSize + self.intendedRenderSize = config.intendedRenderSize self.sourceTrackID = config.sourceTrackID + self.videoClipConfigs = config.videoClipConfigs self.setOverlayImageLayers(from: config.imageLayerConfigs) self.colorFilterConfigs = config.colorFilterConfigs @@ -178,326 +182,186 @@ class VideoCompositor: NSObject, AVVideoCompositing { func renderContextChanged(_ newRenderContext: AVVideoCompositionRenderContext) {} func startRequest(_ request: AVAsynchronousVideoCompositionRequest) { - // Try to get source buffer from the first available track - var sourceBuffer: CVPixelBuffer? - - if !request.sourceTrackIDs.isEmpty { - sourceBuffer = request.sourceFrame(byTrackID: request.sourceTrackIDs[0].int32Value) - } - - // Fallback 1: Try to get track ID from layer instruction if sourceTrackIDs is empty - // This can happen on older iOS versions (iPhone 7, iOS 15) - if sourceBuffer == nil, - let instruction = request.videoCompositionInstruction - as? CustomVideoCompositionInstruction, - let layerInstruction = instruction.layerInstructions.first - { - let trackID = layerInstruction.trackID - if trackID != kCMPersistentTrackID_Invalid { - sourceBuffer = request.sourceFrame(byTrackID: trackID) + let renderSize = request.renderContext.size + let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) + + // Calculate scale factors between intended logical resolution and actual render size. + // This handles cases where AVAssetExportSession forces a different resolution + // (e.g. 1080p preset for a 4K composition). + let scaleFactorX = intendedRenderSize.width > 0 ? renderSize.width / intendedRenderSize.width : 1.0 + let scaleFactorY = intendedRenderSize.height > 0 ? renderSize.height / intendedRenderSize.height : 1.0 + + // 1. Define a common structure for all renderable items + enum RenderableItem { + case video(image: CIImage, clip: VideoClip, trackID: CMPersistentTrackID) + case imageLayer(layer: ImageLayer) + + var zIndex: Int { + switch self { + case .video(_, let clip, _): return clip.zIndex ?? 0 + case .imageLayer: return Int.max + } } } - // Fallback 2: Use the pre-configured sourceTrackID from VideoCompositorConfig - // This is set during composition building and guarantees we have the correct track ID - if sourceBuffer == nil && sourceTrackID != kCMPersistentTrackID_Invalid { - sourceBuffer = request.sourceFrame(byTrackID: sourceTrackID) - } - - guard let sourceBuffer = sourceBuffer else { - request.finish( - with: NSError( - domain: "VideoCompositor", code: 0, - userInfo: [ - NSLocalizedDescriptionKey: - "No source tracks available for compositing (sourceTrackIDs: \(request.sourceTrackIDs.count), configTrackID: \(sourceTrackID))" - ])) - return - } - var outputImage = CIImage(cvPixelBuffer: sourceBuffer) - - // Apply layer instruction transform first (video scaling/centering/rotation) - // This ensures all videos are properly sized and oriented before applying user effects. - // The layerInstruction contains the preferredTransform which already handles video rotation - // from portrait to landscape or vice versa, so no additional orientation correction is needed. - // - // IMPORTANT: AVFoundation uses a top-left origin coordinate system (Y points down), - // while CIImage uses a bottom-left origin (Y points up). We need to convert the transform - // to work correctly with CIImage's coordinate system. - - // Extract layer instruction from CustomVideoCompositionInstruction - var layerInstruction: AVVideoCompositionLayerInstruction? - if let customInstruction = request.videoCompositionInstruction - as? CustomVideoCompositionInstruction, - let firstLayerInstruction = customInstruction.layerInstructions.first - { - layerInstruction = firstLayerInstruction - } - - if let layerInstruction = layerInstruction { - var startTransform = CGAffineTransform.identity - var endTransform = CGAffineTransform.identity - var timeRange = CMTimeRange.zero - - // Get the transform at the current composition time - let hasTransform = layerInstruction.getTransformRamp( - for: request.compositionTime, - start: &startTransform, - end: &endTransform, - timeRange: &timeRange - ) - - if hasTransform && !startTransform.isIdentity { - // Convert AVFoundation transform to CIImage coordinate system: - // 1. Flip Y axis before transform (go from CIImage coords to AVFoundation coords) - // 2. Apply the AVFoundation transform - // 3. Flip Y axis after transform (go back to CIImage coords) - let imageHeight = outputImage.extent.height - - // Flip Y: translate to top, scale Y by -1 - let flipY = CGAffineTransform(scaleX: 1, y: -1) - .translatedBy(x: 0, y: -imageHeight) - - // Convert transform: flipY * transform * flipY^-1 - // But since flipY is its own inverse (when combined with translate), we use: - // result = flipY * transform * flipY (adjusted for new height after transform) - let convertedTransform = - flipY - .concatenating(startTransform) - - outputImage = outputImage.transformed(by: convertedTransform) - - // After transform, we need to flip back and normalize - let transformedExtent = outputImage.extent - let newHeight = transformedExtent.height - let flipBack = CGAffineTransform(scaleX: 1, y: -1) - .translatedBy(x: 0, y: -newHeight) - - outputImage = outputImage.transformed(by: flipBack) - - // Normalize position to origin - let finalExtent = outputImage.extent - if finalExtent.origin.x != 0 || finalExtent.origin.y != 0 { - let translation = CGAffineTransform( - translationX: -finalExtent.origin.x, - y: -finalExtent.origin.y - ) - outputImage = outputImage.transformed(by: translation) + var items: [RenderableItem] = [] + + // 2. Collect active video frames + for trackIDValue in request.sourceTrackIDs { + let trackID = trackIDValue.int32Value + if let sourceBuffer = request.sourceFrame(byTrackID: trackID), + let clipConfig = videoClipConfigs[trackID] { + + var frameImage = CIImage(cvPixelBuffer: sourceBuffer) + + // Apply individual track transform from layer instructions + if let customInstruction = request.videoCompositionInstruction as? CustomVideoCompositionInstruction { + for layerInstruction in customInstruction.layerInstructions { + if layerInstruction.trackID == trackID { + var startTransform = CGAffineTransform.identity + var endTransform = CGAffineTransform.identity + var timeRange = CMTimeRange.zero + + let hasTransform = layerInstruction.getTransformRamp( + for: request.compositionTime, + start: &startTransform, + end: &endTransform, + timeRange: &timeRange + ) + + if hasTransform && !startTransform.isIdentity { + let imageHeight = frameImage.extent.height + let flipY = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -imageHeight) + let convertedTransform = flipY.concatenating(startTransform) + frameImage = frameImage.transformed(by: convertedTransform) + + let transformedExtent = frameImage.extent + let flipBack = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -transformedExtent.height) + frameImage = frameImage.transformed(by: flipBack) + + // Normalize + let finalExtent = frameImage.extent + if finalExtent.origin.x != 0 || finalExtent.origin.y != 0 { + frameImage = frameImage.transformed(by: CGAffineTransform(translationX: -finalExtent.origin.x, y: -finalExtent.origin.y)) + } + } + break + } + } } + + items.append(.video(image: frameImage, clip: clipConfig, trackID: trackID)) } } - var center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) - - // Apply user-defined effects (crop, rotation, flip, scale) - var transform = CGAffineTransform.identity - - // Apply LUT, blur, and flip BEFORE overlay when imageBytesWithCropping is enabled - // This ensures these effects only affect the video, not the overlay - if imageBytesWithCropping { - // Apply color filter (timed LUT) to video only - outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) - - // Apply blur to video only - if blurSigma > 0 { - outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) + // 3. Collect active image layers + for layer in overlayImageLayers { + let inRange = (layer.startUs == -1 || currentTimeUs >= layer.startUs) && (layer.endUs == -1 || currentTimeUs <= layer.endUs) + if inRange { + items.append(.imageLayer(layer: layer)) } + } - // Apply flip to video only (before adding overlay) - if flipX || flipY { - let flipScaleX: CGFloat = flipX ? -1 : 1 - let flipScaleY: CGFloat = flipY ? -1 : 1 - - let flipTransform = CGAffineTransform(translationX: center.x, y: center.y) - .scaledBy(x: flipScaleX, y: flipScaleY) - .translatedBy(x: -center.x, y: -center.y) - - outputImage = outputImage.transformed(by: flipTransform) - - // Normalize position after flip - let flippedExtent = outputImage.extent - if flippedExtent.origin.x != 0 || flippedExtent.origin.y != 0 { - let translation = CGAffineTransform( - translationX: -flippedExtent.origin.x, - y: -flippedExtent.origin.y - ) - outputImage = outputImage.transformed(by: translation) - } - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) - } + if items.isEmpty { + PluginLog.print("⚠️ VideoCompositor: No active items found at time \(request.compositionTime.seconds)s") + request.finish(with: NSError(domain: "VideoCompositor", code: 0, userInfo: [NSLocalizedDescriptionKey: "No active items found"])) + return } - // Apply overlay BEFORE crop if imageBytesWithCropping is enabled - if imageBytesWithCropping { - let imageRect = outputImage.extent - - // Apply time-based overlay layers - let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) - for layer in overlayImageLayers { - // Check if current time is within the layer's time range - // startUs of -1 means "from the start of the video" - // endUs of -1 means "until the end of the video" - let inTimeRange = - (layer.startUs == -1 || currentTimeUs >= layer.startUs) - && (layer.endUs == -1 || currentTimeUs <= layer.endUs) - - if inTimeRange { - var img = layer.image - - if let w = layer.width, let h = layer.height { - let sx = CGFloat(w) / img.extent.width - let sy = CGFloat(h) / img.extent.height - img = img.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) - } + // 4. Sort all items by zIndex + let sortedItems = items.sorted { $0.zIndex < $1.zIndex } + + // 5. Initialize background image (black frame) + var outputImage = CIImage(color: .black).cropped(to: CGRect(origin: .zero, size: renderSize)) + + // 6. Composite each item + for item in sortedItems { + switch item { + case .video(let img, let clip, _): + var frameImg = img + + // Apply custom size if provided, otherwise scale by global factor + if let w = clip.width, let h = clip.height { + let targetW = CGFloat(w) * scaleFactorX + let targetH = CGFloat(h) * scaleFactorY + let sx = targetW / frameImg.extent.width + let sy = targetH / frameImg.extent.height + frameImg = frameImg.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) + } else if scaleFactorX != 1.0 || scaleFactorY != 1.0 { + frameImg = frameImg.transformed(by: CGAffineTransform(scaleX: scaleFactorX, y: scaleFactorY)) + } - let overlay: CIImage - if layer.x == nil && layer.y == nil { - // Stretch to fill frame when no position is specified - overlay = img.transformed( - by: CGAffineTransform( - scaleX: imageRect.width / img.extent.width, - y: imageRect.height / img.extent.height)) - } else { - // Position at specific coordinates - let posX = CGFloat(layer.x ?? 0) - let posY = CGFloat(layer.y ?? 0) - // Convert y from top-left (Dart) to bottom-left (Core Graphics) - let cgY = imageRect.height - posY - img.extent.height - overlay = img.transformed( - by: CGAffineTransform(translationX: posX, y: cgY)) + // Apply custom offset if provided + if clip.x != nil || clip.y != nil { + let posX = CGFloat(clip.x ?? 0) * scaleFactorX + let posY = CGFloat(clip.y ?? 0) * scaleFactorY + // Convert from top-left (Flutter) to bottom-left (Core Image) + let cgY = renderSize.height - posY - frameImg.extent.height + frameImg = frameImg.transformed(by: CGAffineTransform(translationX: posX, y: cgY)) + } else if scaleFactorX != 1.0 || scaleFactorY != 1.0 { + // Normalize position if we scaled but didn't translate manually + let extent = frameImg.extent + if extent.origin.x != 0 || extent.origin.y != 0 { + frameImg = frameImg.transformed(by: CGAffineTransform(translationX: -extent.origin.x, y: -extent.origin.y)) } - - let (opacity, animTransform) = computeAnimation( - layer: layer, - currentTimeUs: currentTimeUs, - overlayExtent: overlay.extent, - frameExtent: imageRect - ) - outputImage = compositeOverlay( - overlay, over: outputImage, opacity: opacity, transform: animTransform) } - } - } - - // Cropping - if cropX != 0 || cropY != 0 || cropWidth != nil || cropHeight != nil { - let inputExtent = outputImage.extent - let videoWidth = inputExtent.width - let videoHeight = inputExtent.height - let x = cropX - var y = cropY - let width = cropWidth ?? (videoWidth - x) - let height = cropHeight ?? (videoHeight - y) + // Apply opacity if needed + if let opacity = clip.opacity, opacity < 1.0 { + frameImg = frameImg.applyingFilter("CIColorMatrix", parameters: [ + "inputAVector": CIVector(x: 0, y: 0, z: 0, w: CGFloat(opacity)), + ]) + } - y = videoHeight - height - y + outputImage = frameImg.composited(over: outputImage) - let cropRect = CGRect(x: x, y: y, width: width, height: height) + case .imageLayer(let layer): + var layerImg = layer.image + if let w = layer.width, let h = layer.height { + let targetW = CGFloat(w) * scaleFactorX + let targetH = CGFloat(h) * scaleFactorY + layerImg = layerImg.transformed(by: CGAffineTransform(scaleX: targetW/layerImg.extent.width, y: targetH/layerImg.extent.height)) + } - outputImage = outputImage.cropped(to: cropRect) - outputImage = outputImage.transformed( - by: CGAffineTransform( - translationX: -cropRect.origin.x, - y: -cropRect.origin.y + let overlay: CIImage + if layer.x == nil && layer.y == nil { + overlay = layerImg.transformed(by: CGAffineTransform(scaleX: renderSize.width/layerImg.extent.width, y: renderSize.height/layerImg.extent.height)) + } else { + let posX = CGFloat(layer.x ?? 0) * scaleFactorX + let posY = CGFloat(layer.y ?? 0) * scaleFactorY + let cgY = renderSize.height - posY - layerImg.extent.height + overlay = layerImg.transformed(by: CGAffineTransform(translationX: posX, y: cgY)) + } - )) - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) + let (opacity, animTransform) = computeAnimation(layer: layer, currentTimeUs: currentTimeUs, overlayExtent: overlay.extent, frameExtent: CGRect(origin: .zero, size: renderSize)) + outputImage = compositeOverlay(overlay, over: outputImage, opacity: opacity, transform: animTransform) + } } - // Rotation - if rotateRadians != 0 { - // Rotate the image - let rotation = CGAffineTransform(rotationAngle: rotateRadians) - let rotatedImage = outputImage.transformed(by: rotation) - - // Get the new bounding box after rotation - let rotatedExtent = rotatedImage.extent - - // Translate to (0, 0) - let translation = CGAffineTransform( - translationX: -rotatedExtent.origin.x, y: -rotatedExtent.origin.y) - outputImage = rotatedImage.transformed(by: translation) - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) - } + // 7. Apply global effects (if any) + let center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) + var transform = CGAffineTransform.identity - // Flipping (only if NOT imageBytesWithCropping - otherwise already applied before overlay) - if !imageBytesWithCropping && (flipX || flipY) { + // Apply flip (Global) + if flipX || flipY { let scaleX: CGFloat = flipX ? -1 : 1 let scaleY: CGFloat = flipY ? -1 : 1 - - let flipTransform = CGAffineTransform(translationX: center.x, y: center.y) + transform = transform.concatenating(CGAffineTransform(translationX: center.x, y: center.y) .scaledBy(x: scaleX, y: scaleY) - .translatedBy(x: -center.x, y: -center.y) - - transform = transform.concatenating(flipTransform) + .translatedBy(x: -center.x, y: -center.y)) } - // Apply Scale + // Apply Global Scale if scaleX != 1 || scaleY != 1 { transform = transform.scaledBy(x: scaleX, y: scaleY) } outputImage = outputImage.transformed(by: transform) - // Apply color filter (only if NOT imageBytesWithCropping - otherwise already applied before overlay) - if !imageBytesWithCropping { - outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) - - // Apply blur - if blurSigma > 0 { - outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) - } - } - - // Apply overlay image layers (only if not already applied before crop) - if !imageBytesWithCropping { - let imageRect = outputImage.extent - - // Apply time-based overlay layers with positioning - let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) - for layer in overlayImageLayers { - // Check if current time is within the layer's time range - // startUs of -1 means "from the start of the video" - // endUs of -1 means "until the end of the video" - let inTimeRange = - (layer.startUs == -1 || currentTimeUs >= layer.startUs) - && (layer.endUs == -1 || currentTimeUs <= layer.endUs) - if inTimeRange { - var img = layer.image - - if let w = layer.width, let h = layer.height { - let sx = CGFloat(w) / img.extent.width - let sy = CGFloat(h) / img.extent.height - img = img.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) - } - - let overlay: CIImage - if layer.x == nil && layer.y == nil { - // Stretch to fill frame when no position is specified - overlay = img.transformed( - by: CGAffineTransform( - scaleX: imageRect.width / img.extent.width, - y: imageRect.height / img.extent.height)) - } else { - // Position at specific coordinates - let posX = CGFloat(layer.x ?? 0) - let posY = CGFloat(layer.y ?? 0) - // Convert y from top-left (Dart) to bottom-left (Core Graphics) - let cgY = imageRect.height - posY - img.extent.height - overlay = img.transformed( - by: CGAffineTransform(translationX: posX, y: cgY)) - } - - let (opacity, animTransform) = computeAnimation( - layer: layer, - currentTimeUs: currentTimeUs, - overlayExtent: overlay.extent, - frameExtent: imageRect - ) - outputImage = compositeOverlay( - overlay, over: outputImage, opacity: opacity, transform: animTransform) - } - } + // Apply LUT and Blur (Global) + outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) + if blurSigma > 0 { + outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) } guard let outputBuffer = request.renderContext.newPixelBuffer() else { diff --git a/lib/core/models/video/video_render_data_model.dart b/lib/core/models/video/video_render_data_model.dart index 7399245..3e270a6 100644 --- a/lib/core/models/video/video_render_data_model.dart +++ b/lib/core/models/video/video_render_data_model.dart @@ -408,25 +408,6 @@ class VideoRenderData { double? scaleX = transform.scaleX; double? scaleY = transform.scaleY; - // Handle quality config - if (qualityConfig != null && scaleX == null && scaleY == null) { - final targetVideo = video ?? - (videoSegments != null && videoSegments!.isNotEmpty - ? videoSegments!.first.video - : null); - if (targetVideo != null) { - final meta = await ProVideoEditor.instance.getMetadata(targetVideo); - final originalResolution = meta.resolution; - final targetResolution = - qualityConfig!.resolution ?? originalResolution; - final sx = targetResolution.width / originalResolution.width; - final sy = targetResolution.height / originalResolution.height; - final scale = sx < sy ? sx : sy; - scaleX = scale; - scaleY = scale; - } - } - // Convert video clips to map format // ignore: deprecated_member_use_from_same_package final fallbackVolume = originalAudioVolume; @@ -536,6 +517,8 @@ class VideoRenderData { 'bitrate': bitrate, 'scaleX': scaleX, 'scaleY': scaleY, + 'renderWidth': qualityConfig?.resolution?.width, + 'renderHeight': qualityConfig?.resolution?.height, // Global trim for entire composition (only for videoSegments, // not single video). For single video, startTime/endTime are already // applied to the clip itself diff --git a/lib/core/models/video/video_segment_model.dart b/lib/core/models/video/video_segment_model.dart index c927e0d..2f31b5b 100644 --- a/lib/core/models/video/video_segment_model.dart +++ b/lib/core/models/video/video_segment_model.dart @@ -1,5 +1,6 @@ // ignore_for_file: public_member_api_docs, sort_constructors_first import 'dart:convert'; +import 'dart:ui'; import 'package:pro_video_editor/pro_video_editor.dart'; import 'package:pro_video_editor/shared/utils/parser/double_parser.dart'; @@ -16,6 +17,11 @@ class VideoSegment { this.startTime, this.endTime, this.volume, + this.offset, + this.size, + this.zIndex, + this.opacity, + this.segmentTime, }) : assert( startTime == null || endTime == null || startTime < endTime, 'startTime must be before endTime', @@ -50,6 +56,42 @@ class VideoSegment { /// If null, the original volume is used. final double? volume; + /// The stacking order of overlapping elements along the z-axis. + /// Segments with higher zIndex are rendered on top. + /// Defaulted to 0 if not specified. + /// + /// If multiple segments have the same zIndex, then their order is used + /// (the latter segment is higher) + /// + final int? zIndex; + + /// The opacity of of this video segments used with overlapping elements + final double? opacity; + + /// Position offset from the top-left corner of the video frame, in pixels. + /// + /// [Offset.dx] is the horizontal offset from the left edge. + /// [Offset.dy] is the vertical offset from the top edge. + /// + /// When `null`, the image is stretched to fill the entire video frame. + /// When set to a specific value (e.g., [Offset.zero]), the image is + /// placed at that position at its original size. + final Offset? offset; + + /// The display size of the image layer, in pixels. + /// + /// [Size.width] is the target width of the image. + /// [Size.height] is the target height of the image. + /// + /// When `null`, the image is used at its original size (or stretched to + /// fill the frame when [offset] is also `null`). + final Size? size; + + /// Optional start time for this video segment in the rendered video + /// + /// If null, the clip will start right after the previous video segment + final Duration? segmentTime; + /// Converts this clip to a map for platform channel communication. Future> toAsyncMap() async { final inputPath = await video.safeFilePath(); @@ -59,6 +101,13 @@ class VideoSegment { 'startUs': startTime?.inMicroseconds, 'endUs': endTime?.inMicroseconds, 'volume': volume, + 'zIndex': zIndex, + 'opacity': opacity, + 'x': offset?.dx, + 'y': offset?.dy, + 'width': size?.width, + 'height': size?.height, + 'segmentTimeUs': segmentTime?.inMicroseconds, }; } @@ -68,12 +117,22 @@ class VideoSegment { Duration? startTime, Duration? endTime, double? volume, + double? opacity, + int? zIndex, + Offset? offset, + Size? size, + Duration? segmentTime, }) { return VideoSegment( video: video ?? this.video, startTime: startTime ?? this.startTime, endTime: endTime ?? this.endTime, volume: volume ?? this.volume, + zIndex: zIndex ?? this.zIndex, + opacity: opacity ?? this.opacity, + offset: offset ?? this.offset, + size: size ?? this.size, + segmentTime: segmentTime ?? this.segmentTime, ); } @@ -84,7 +143,12 @@ class VideoSegment { return other.video == video && other.startTime == startTime && other.endTime == endTime && - other.volume == volume; + other.volume == volume && + other.zIndex == zIndex && + other.opacity == opacity && + other.offset == offset && + other.size == size && + other.segmentTime == segmentTime; } @override @@ -92,7 +156,12 @@ class VideoSegment { return video.hashCode ^ startTime.hashCode ^ endTime.hashCode ^ - volume.hashCode; + volume.hashCode ^ + zIndex.hashCode ^ + opacity.hashCode ^ + offset.hashCode ^ + size.hashCode ^ + segmentTime.hashCode; } @override @@ -100,7 +169,12 @@ class VideoSegment { return 'VideoSegment(video: $video, ' 'startTime: $startTime, ' 'endTime: $endTime, ' - 'volume: $volume)'; + 'volume: $volume, ' + 'zIndex: $zIndex, ' + 'opacity: $opacity, ' + 'offset: $offset, ' + 'size: $size, ' + 'segmentTime: $segmentTime)'; } Map toMap() { @@ -109,6 +183,12 @@ class VideoSegment { 'startTime': startTime?.inMicroseconds, 'endTime': endTime?.inMicroseconds, 'volume': volume, + 'zIndex': zIndex, + 'opacity': opacity, + 'offset': offset != null ? {'dx': offset!.dx, 'dy': offset!.dy} : null, + 'size': + size != null ? {'width': size!.width, 'height': size!.height} : null, + 'segmentTime': segmentTime?.inMicroseconds, }; } @@ -122,6 +202,23 @@ class VideoSegment { ? Duration(microseconds: safeParseInt(map['endTime'])) : null, volume: tryParseDouble(map['volume']), + zIndex: tryParseInt(map['zIndex']), + opacity: tryParseDouble(map['opacity']), + offset: map['offset'] != null + ? Offset( + safeParseDouble((map['offset'] as Map)['dx']), + safeParseDouble((map['offset'] as Map)['dy']), + ) + : null, + size: map['size'] != null + ? Size( + safeParseDouble((map['size'] as Map)['width']), + safeParseDouble((map['size'] as Map)['height']), + ) + : null, + segmentTime: map['segmentTime'] != null + ? Duration(microseconds: safeParseInt(map['segmentTime'])) + : null, ); } diff --git a/macos/Classes/src/features/render/RenderVideo.swift b/macos/Classes/src/features/render/RenderVideo.swift index 4c51432..3a95f8b 100644 --- a/macos/Classes/src/features/render/RenderVideo.swift +++ b/macos/Classes/src/features/render/RenderVideo.swift @@ -75,7 +75,14 @@ class RenderVideo { inputPath: newPath, startUs: clip.startUs, endUs: clip.endUs, - volume: clip.volume + volume: clip.volume, + opacity: clip.opacity, + x: clip.x, + y: clip.y, + width: clip.width, + height: clip.height, + segmentTimeUs: clip.segmentTimeUs, + zIndex: clip.zIndex ) } return clip @@ -139,13 +146,16 @@ class RenderVideo { var effectsConfig = VideoCompositorConfig() // Use composition helper to merge multiple video clips - let (composition, videoCompData, renderSize, audioMix, sourceTrackID) = + let (composition, videoCompData, renderSize, audioMix, sourceTrackID, updatedEffectsConfig) = try await applyComposition( videoClips: workingConfig.videoClips, videoEffects: effectsConfig, enableAudio: workingConfig.enableAudio, - audioTracks: workingConfig.audioTracks + audioTracks: workingConfig.audioTracks, + renderWidth: workingConfig.renderWidth, + renderHeight: workingConfig.renderHeight ) + effectsConfig = updatedEffectsConfig var videoCompConfig = videoCompData // Set source track ID for fallback on older macOS versions @@ -236,10 +246,18 @@ class RenderVideo { videoComposition.frameDuration = videoCompConfig.frameDuration videoComposition.renderSize = finalRenderSize videoComposition.instructions = videoCompConfig.instructions + + // Ensure compositor knows the intended logical size for coordinate mapping + effectsConfig.intendedRenderSize = finalRenderSize + videoComposition.customVideoCompositorClass = makeVideoCompositorSubclass( with: effectsConfig) - let preset = applyBitrate(requestedBitrate: workingConfig.bitrate) + let preset = applyBitrate( + requestedBitrate: workingConfig.bitrate, + renderWidth: workingConfig.renderWidth, + renderHeight: workingConfig.renderHeight + ) let export = try await prepareExportSession( composition: composition, diff --git a/macos/Classes/src/features/render/helpers/ApplyBitrate.swift b/macos/Classes/src/features/render/helpers/ApplyBitrate.swift index 9eaf01b..00a6287 100644 --- a/macos/Classes/src/features/render/helpers/ApplyBitrate.swift +++ b/macos/Classes/src/features/render/helpers/ApplyBitrate.swift @@ -9,7 +9,8 @@ import Foundation /// /// - Parameters: /// - requestedBitrate: Target bitrate in bits per second. If nil, returns highest quality. -/// - presetHint: Optional preset hint (currently unused). +/// - renderWidth: Optional target render width to ensure preset supports the resolution. +/// - renderHeight: Optional target render height to ensure preset supports the resolution. /// - Returns: AVAssetExportPreset string matching the requested quality level. /// /// Bitrate mapping: @@ -24,14 +25,30 @@ import Foundation /// - ≥2 Mbps: 480p /// - ≥1 Mbps: Medium quality /// - <1 Mbps: Low quality -public func applyBitrate(requestedBitrate: Int?, presetHint: String? = nil) -> String { - if let bitrate = requestedBitrate { - PluginLog.print( - "[\(Tags.render)] 📊 Requested bitrate: \(bitrate) bps (\(String(format: "%.1f", Double(bitrate) / 1_000_000)) Mbps)" - ) - PluginLog.print( - "[\(Tags.render)] ⚠️ AVAssetExportSession does not support custom bitrate directly - using closest preset" - ) +public func applyBitrate( + requestedBitrate: Int?, + renderWidth: Double? = nil, + renderHeight: Double? = nil +) -> String { + // If a custom resolution is provided, we should ideally use a "HighestQuality" + // preset to avoid resolution constraints from bitrate-based presets. + // However, if a bitrate is also specified, we'll try to pick the best matching one. + if let rw = renderWidth, let rh = renderHeight { + let maxDim = max(rw, rh) + + if maxDim > 3840 { + if #available(macOS 12.1, *) { + return AVAssetExportPresetHEVC7680x4320 + } + } else if maxDim > 1920 { + if #available(macOS 10.13, *) { + return AVAssetExportPresetHEVC3840x2160 + } else { + return AVAssetExportPreset3840x2160 + } + } else if maxDim > 1280 { + return AVAssetExportPreset1920x1080 + } } if let bitrate = requestedBitrate { diff --git a/macos/Classes/src/features/render/helpers/ApplyComposition.swift b/macos/Classes/src/features/render/helpers/ApplyComposition.swift index d1c28b9..bd1a926 100644 --- a/macos/Classes/src/features/render/helpers/ApplyComposition.swift +++ b/macos/Classes/src/features/render/helpers/ApplyComposition.swift @@ -19,18 +19,22 @@ import Foundation /// - CGSize: Final render size (max dimensions from all clips) /// - AVAudioMix?: Audio mix with volume controls (nil if no audio mixing needed) /// - CMPersistentTrackID: The track ID of the video composition track (for fallback on older macOS) +/// - VideoCompositorConfig: Updated compositor configuration with track info /// /// - Throws: NSError if video clips are empty, files don't exist, or tracks can't be loaded. func applyComposition( videoClips: [VideoClip], videoEffects: VideoCompositorConfig, enableAudio: Bool, - audioTracks: [AudioTrackConfig] + audioTracks: [AudioTrackConfig], + renderWidth: Double? = nil, + renderHeight: Double? = nil ) async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, VideoCompositorConfig ) { return try await CompositionBuilder(videoClips: videoClips, videoEffects: videoEffects) .setEnableAudio(enableAudio) .setAudioTracks(audioTracks) + .setRenderSize(width: renderWidth, height: renderHeight) .build() } diff --git a/macos/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift b/macos/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift index 440117b..6d54575 100644 --- a/macos/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift +++ b/macos/Classes/src/features/render/helpers/ApplyPlaybackSpeed.swift @@ -25,9 +25,7 @@ public func applyPlaybackSpeed( guard let speed = speed, speed > 0, speed != 1 else { return instructions } let speedType = speed < 1 ? "slow motion" : "fast forward" - PluginLog.print( - "[\(Tags.render)] ⚡ Applying playback speed: \(String(format: "%.2f", speed))x (\(speedType))" - ) + PluginLog.print("[\(Tags.render)] ⚡ Applying playback speed: \(String(format: "%.2f", speed))x (\(speedType))") let multiplier = 1.0 / Double(speed) @@ -44,14 +42,11 @@ public func applyPlaybackSpeed( return instruction } let scaledStart = CMTimeMultiplyByFloat64(custom.timeRange.start, multiplier: multiplier) - let scaledDuration = CMTimeMultiplyByFloat64( - custom.timeRange.duration, multiplier: multiplier) - let trackID = - (custom.requiredSourceTrackIDs?.first as? NSNumber)?.int32Value - ?? kCMPersistentTrackID_Invalid + let scaledDuration = CMTimeMultiplyByFloat64(custom.timeRange.duration, multiplier: multiplier) + let trackIDs = custom.requiredSourceTrackIDs?.compactMap { ($0 as? NSNumber)?.int32Value } ?? [] return CustomVideoCompositionInstruction( timeRange: CMTimeRange(start: scaledStart, duration: scaledDuration), - sourceTrackID: trackID, + sourceTrackIDs: trackIDs, layerInstructions: custom.layerInstructions, backgroundColor: custom.backgroundColor ) diff --git a/macos/Classes/src/features/render/helpers/CompositionBuilder.swift b/macos/Classes/src/features/render/helpers/CompositionBuilder.swift index 224909d..64b183d 100644 --- a/macos/Classes/src/features/render/helpers/CompositionBuilder.swift +++ b/macos/Classes/src/features/render/helpers/CompositionBuilder.swift @@ -12,6 +12,8 @@ internal class CompositionBuilder { private let videoEffects: VideoCompositorConfig private var enableAudio: Bool = true private var audioTracks: [AudioTrackConfig] = [] + private var renderWidth: Double? + private var renderHeight: Double? /// Initializes builder with configuration. /// @@ -23,6 +25,13 @@ internal class CompositionBuilder { self.videoEffects = videoEffects } + /// Sets the target render size. + func setRenderSize(width: Double?, height: Double?) -> CompositionBuilder { + self.renderWidth = width + self.renderHeight = height + return self + } + /// Enables or disables audio. /// /// - Parameter enabled: If true, includes original audio from video clips @@ -46,7 +55,7 @@ internal class CompositionBuilder { /// - Returns: Tuple containing composition, video composition, render size, audio mix, and source track ID /// - Throws: Error if composition creation fails func build() async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, VideoCompositorConfig ) { guard !videoClips.isEmpty else { throw NSError( @@ -64,9 +73,14 @@ internal class CompositionBuilder { // Build video sequence let videoBuilder = VideoSequenceBuilder(videoClips: videoClips) .setEnableAudio(enableAudio) + .setRenderSize(width: renderWidth, height: renderHeight) let videoResult = try await videoBuilder.build(in: composition) + // Store track configs for compositor + var updatedVideoEffects = videoEffects + updatedVideoEffects.videoClipConfigs = videoResult.trackConfigs + // Add custom audio tracks var customAudioTracks: [(track: AVMutableCompositionTrack, config: AudioTrackConfig)] = [] for trackConfig in audioTracks { @@ -106,9 +120,7 @@ internal class CompositionBuilder { ) let compositionRenderSize = videoResult.renderSize - // Create instructions for each clip segment - // Use custom instruction class to ensure requiredSourceTrackIDs is properly set - // This fixes issues on older macOS versions + // Create instructions for each non-overlapping time segment var instructions: [AVVideoCompositionInstructionProtocol] = [] PluginLog.print("") @@ -120,6 +132,8 @@ internal class CompositionBuilder { PluginLog.print("==========================================") PluginLog.print("") + // Calculate pre-determined transforms for all clips + var clipTransforms: [CGAffineTransform] = [] for (index, clipInstruction) in videoResult.clipInstructions.enumerated() { PluginLog.print("🎬 Processing instruction for clip \(index)") PluginLog.print( @@ -133,36 +147,55 @@ internal class CompositionBuilder { with: clipInstruction.transform, clipIndex: index ) + clipTransforms.append(transform) + } - let layerInstruction: AVVideoCompositionLayerInstruction - if #available(macOS 26.0, *) { - var config = AVVideoCompositionLayerInstruction.Configuration( - assetTrack: videoResult.videoTrack - ) - config.setTransform(transform, at: .zero) - layerInstruction = AVVideoCompositionLayerInstruction(configuration: config) - } else { - let mutableInstruction = AVMutableVideoCompositionLayerInstruction( - assetTrack: videoResult.videoTrack - ) - mutableInstruction.setTransform(transform, at: .zero) - layerInstruction = mutableInstruction - } - - // Use custom instruction that explicitly provides requiredSourceTrackIDs - let instruction = CustomVideoCompositionInstruction( - timeRange: clipInstruction.timeRange, - sourceTrackID: videoResult.videoTrack.trackID, - layerInstructions: [layerInstruction], - backgroundColor: CGColor(red: 0, green: 0, blue: 0, alpha: 1) - ) + // Calculate non-overlapping time segments + let segments = calculateSegments( + from: videoResult.clipInstructions, + totalDuration: videoResult.totalDuration + ) + for (segIndex, segmentRange) in segments.enumerated() { + PluginLog.print("🎬 Processing segment \(segIndex)") PluginLog.print( - " ⚙️ Layer instruction configured with transform (trackID: \(videoResult.videoTrack.trackID))" + " Time range: \(String(format: "%.2f", segmentRange.start.seconds))s - \(String(format: "%.2f", (segmentRange.start + segmentRange.duration).seconds))s" ) - PluginLog.print("") - instructions.append(instruction) + var activeTrackIDs: [CMPersistentTrackID] = [] + var layerInstructions: [AVVideoCompositionLayerInstruction] = [] + + for (clipIndex, clipInstruction) in videoResult.clipInstructions.enumerated() { + // Check if this clip is active during this segment + let clipRange = clipInstruction.timeRange + let intersection = CMTimeRangeGetIntersection(segmentRange, otherRange: clipRange) + + if CMTimeGetSeconds(intersection.duration) > 0 { + activeTrackIDs.append(clipInstruction.trackID) + + let transform = clipTransforms[clipIndex] + let mutableLayerInstruction = AVMutableVideoCompositionLayerInstruction( + assetTrack: composition.track(withTrackID: clipInstruction.trackID)! + ) + mutableLayerInstruction.setTransform(transform, at: .zero) + layerInstructions.append(mutableLayerInstruction) + + PluginLog.print(" - Added trackID \(clipInstruction.trackID) (Clip \(clipIndex))") + } + } + + if !layerInstructions.isEmpty { + // Use custom instruction that explicitly provides requiredSourceTrackIDs + let instruction = CustomVideoCompositionInstruction( + timeRange: segmentRange, + sourceTrackIDs: activeTrackIDs, + layerInstructions: layerInstructions, + backgroundColor: CGColor(red: 0, green: 0, blue: 0, alpha: 1) + ) + instructions.append(instruction) + PluginLog.print(" ✅ Segment instruction created with \(layerInstructions.count) layers") + } + PluginLog.print("") } let videoCompositionData = VideoCompositionData( @@ -173,10 +206,10 @@ internal class CompositionBuilder { PluginLog.print("✅ Composition created successfully with \(videoClips.count) clips") - // Return the track ID for fallback on older macOS versions - let sourceTrackID = videoResult.videoTrack.trackID + // Return the first track ID for fallback on older macOS versions + let sourceTrackID = videoResult.clipInstructions.first?.trackID ?? kCMPersistentTrackID_Invalid - return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID) + return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID, updatedVideoEffects) } /// Creates audio mix with per-clip and per-track volume parameters. @@ -191,11 +224,17 @@ internal class CompositionBuilder { for track in originalTracks { let inputParameters = AVMutableAudioMixInputParameters(track: track) - // Use setVolumeRamp for each clip's time range to ensure - // volume changes are applied precisely per segment - for (index, clipInstruction) in clipInstructions.enumerated() { + // Find all instructions that apply to this specific audio track + let relevantInstructions = clipInstructions.enumerated().filter { _, instruction in + instruction.audioTrackID == track.trackID + } + + for (index, clipInstruction) in relevantInstructions { let clipVolume = index < videoClips.count ? (videoClips[index].volume ?? 1.0) : 1.0 + + PluginLog.print("🔊 Setting volume ramp for track \(track.trackID): volume=\(clipVolume) at \(String(format: "%.2f", clipInstruction.timeRange.start.seconds))s") + inputParameters.setVolumeRamp( fromStartVolume: clipVolume, toEndVolume: clipVolume, @@ -204,7 +243,7 @@ internal class CompositionBuilder { } audioMixInputParameters.append(inputParameters) - PluginLog.print("🔊 Applied per-clip volume to original audio track") + PluginLog.print("🔊 Applied per-clip volume to original audio track (ID: \(track.trackID))") } // Apply volume to custom audio tracks @@ -234,6 +273,16 @@ internal class CompositionBuilder { with preferredTransform: CGAffineTransform, clipIndex: Int ) -> CGAffineTransform { + // If manual positioning is requested, use the preferred transform as-is. + // The compositor will handle custom scaling and positioning based on VideoClip config. + if clipIndex < videoClips.count { + let clip = videoClips[clipIndex] + if clip.x != nil || clip.y != nil || clip.width != nil || clip.height != nil { + PluginLog.print(" 🎯 Manual positioning detected for clip \(clipIndex), skipping fit and center transform") + return preferredTransform + } + } + // Get the display size after applying the original transform (handles rotation) let displaySize = naturalSize.applying(preferredTransform) let videoWidth = abs(displaySize.width) @@ -315,4 +364,39 @@ internal class CompositionBuilder { return transform } + + /// Calculates non-overlapping time segments from clip instructions. + private func calculateSegments(from instructions: [ClipInstruction], totalDuration: CMTime) -> [CMTimeRange] { + var points: [CMTime] = [.zero, totalDuration] + for instruction in instructions { + points.append(instruction.timeRange.start) + points.append(CMTimeAdd(instruction.timeRange.start, instruction.timeRange.duration)) + } + + let sortedPoints = points + .filter { CMTimeCompare($0, totalDuration) <= 0 } + .sorted { CMTimeCompare($0, $1) < 0 } + + var uniquePoints: [CMTime] = [] + for point in sortedPoints { + if let last = uniquePoints.last { + if CMTimeCompare(last, point) != 0 { + uniquePoints.append(point) + } + } else { + uniquePoints.append(point) + } + } + + var segments: [CMTimeRange] = [] + for i in 0.. 0 { + segments.append(CMTimeRange(start: start, duration: duration)) + } + } + return segments + } } diff --git a/macos/Classes/src/features/render/helpers/VideoSequenceBuilder.swift b/macos/Classes/src/features/render/helpers/VideoSequenceBuilder.swift index 8fdc973..3cca21b 100644 --- a/macos/Classes/src/features/render/helpers/VideoSequenceBuilder.swift +++ b/macos/Classes/src/features/render/helpers/VideoSequenceBuilder.swift @@ -9,6 +9,8 @@ internal class VideoSequenceBuilder { private let videoClips: [VideoClip] private var enableAudio: Bool = true + private var renderWidth: Double? + private var renderHeight: Double? /// Initializes builder with video clips. /// @@ -17,6 +19,13 @@ internal class VideoSequenceBuilder { self.videoClips = videoClips } + /// Sets the target render size. + func setRenderSize(width: Double?, height: Double?) -> VideoSequenceBuilder { + self.renderWidth = width + self.renderHeight = height + return self + } + /// Enables or disables audio in the output. /// /// - Parameter enabled: If true, includes original audio from video clips @@ -67,7 +76,7 @@ internal class VideoSequenceBuilder { /// Builds the video composition with all clips. /// /// - Parameter composition: Composition to build into - /// - Returns: Tuple containing video track, audio tracks, render size, frame rate, and clip instructions + /// - Returns: Tuple containing video tracks, audio tracks, render size, frame rate, and clip instructions func build(in composition: AVMutableComposition) async throws -> VideoSequenceResult { guard !videoClips.isEmpty else { throw NSError( @@ -85,32 +94,7 @@ internal class VideoSequenceBuilder { var maxFrameRate: Float = 30.0 var originalAudioTracks: [AVMutableCompositionTrack] = [] var clipInstructions: [ClipInstruction] = [] - - // Create single video track for all clips - guard - let compositionVideoTrack = composition.addMutableTrack( - withMediaType: .video, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - else { - throw NSError( - domain: "VideoSequenceBuilder", - code: 2, - userInfo: [NSLocalizedDescriptionKey: "Failed to create video track"] - ) - } - - // Create single shared audio track for all clips (if enabled) - var sharedAudioTrack: AVMutableCompositionTrack? - if enableAudio { - sharedAudioTrack = composition.addMutableTrack( - withMediaType: .audio, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - if sharedAudioTrack != nil { - PluginLog.print("🔊 Created SHARED audio track for all clips (will prevent empty segments)") - } - } + var trackConfigs: [CMPersistentTrackID: VideoClip] = [:] // Process each video clip for (index, clip) in videoClips.enumerated() { @@ -156,15 +140,12 @@ internal class VideoSequenceBuilder { PluginLog.print(" - Display size: \(correctedSize.width) x \(correctedSize.height)") PluginLog.print(" - Frame rate: \(nominalFrameRate) fps") - // Update max render size - if correctedSize.width > maxRenderSize.width - || correctedSize.height > maxRenderSize.height - { - let oldSize = maxRenderSize + // Update max render size (only if not explicitly provided) + if index == 0 && (renderWidth == nil || renderHeight == nil) { maxRenderSize = correctedSize - PluginLog.print( - " - ⬆️ Max render size updated: \(oldSize.width)x\(oldSize.height) → \(maxRenderSize.width)x\(maxRenderSize.height)" - ) + PluginLog.print(" - 📏 Base render size set from first clip: \(maxRenderSize.width)x\(maxRenderSize.height)") + } else if renderWidth != nil && renderHeight != nil { + maxRenderSize = CGSize(width: renderWidth!, height: renderHeight!) } // Update max frame rate @@ -176,62 +157,87 @@ internal class VideoSequenceBuilder { let clipTimeRange = await calculateTimeRange(for: clip, from: asset) let clipDuration = clipTimeRange.duration + // Determine insertion time in composition + let insertionTime: CMTime + if let segmentTimeUs = clip.segmentTimeUs { + insertionTime = CMTime(value: segmentTimeUs, timescale: 1_000_000) + } else { + insertionTime = totalDuration + } + + // Create a new track for each clip to support overlapping and independent positioning + guard let compositionVideoTrack = composition.addMutableTrack( + withMediaType: .video, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + throw NSError( + domain: "VideoSequenceBuilder", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "Failed to create video track for clip \(index)"] + ) + } + // Insert video clip into the composition track try compositionVideoTrack.insertTimeRange( clipTimeRange, of: videoTrack, - at: totalDuration + at: insertionTime ) + // Track mapping for compositor + trackConfigs[compositionVideoTrack.trackID] = clip + + // Add audio if enabled + var audioTrackID: CMPersistentTrackID? = nil + if enableAudio, + let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: asset) + { + if let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) { + do { + try compositionAudioTrack.insertTimeRange( + clipTimeRange, + of: audioTrack, + at: insertionTime + ) + originalAudioTracks.append(compositionAudioTrack) + audioTrackID = compositionAudioTrack.trackID + PluginLog.print(" 🔊 Audio inserted into its own track (ID: \(audioTrackID!))") + } catch { + PluginLog.print(" ❌ ERROR inserting audio: \(error.localizedDescription)") + } + } else { + PluginLog.print(" ⚠️ WARNING: Failed to create audio track for clip \(index)") + } + } + // Store instruction for this clip segment clipInstructions.append( ClipInstruction( - timeRange: CMTimeRange(start: totalDuration, duration: clipDuration), + timeRange: CMTimeRange(start: insertionTime, duration: clipDuration), transform: preferredTransform, naturalSize: naturalSize, - renderSize: correctedSize + renderSize: correctedSize, + trackID: compositionVideoTrack.trackID, + audioTrackID: audioTrackID )) - // Add audio to shared track if enabled - if enableAudio, - let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: asset), - let sharedAudioTrack = sharedAudioTrack - { - PluginLog.print("🔊 Processing audio for clip \(index)...") - PluginLog.print(" ✅ Audio track loaded from asset") - PluginLog.print(" Track ID: \(audioTrack.trackID)") - PluginLog.print( - " Duration: \(String(format: "%.2f", audioTrack.timeRange.duration.seconds))s" - ) - PluginLog.print(" Format: \(audioTrack.mediaType)") - - do { - try sharedAudioTrack.insertTimeRange( - clipTimeRange, - of: audioTrack, - at: totalDuration - ) - PluginLog.print(" ✅ Audio inserted into SHARED track!") - PluginLog.print( - " Source time range: \(String(format: "%.2f", clipTimeRange.start.seconds))s - \(String(format: "%.2f", (clipTimeRange.start + clipTimeRange.duration).seconds))s" - ) - PluginLog.print( - " Inserted at composition time: \(String(format: "%.2f", totalDuration.seconds))s" - ) - PluginLog.print( - " Audio duration: \(String(format: "%.2f", clipTimeRange.duration.seconds))s" - ) - } catch { - PluginLog.print(" ❌ ERROR inserting audio: \(error.localizedDescription)") - PluginLog.print(" Error details: \(error)") + // Update total duration (sequential part) + if clip.segmentTimeUs == nil { + totalDuration = CMTimeAdd(totalDuration, clipDuration) + } else { + let endInComposition = CMTimeAdd(insertionTime, clipDuration) + if CMTimeCompare(endInComposition, totalDuration) > 0 { + totalDuration = endInComposition } } - totalDuration = CMTimeAdd(totalDuration, clipDuration) PluginLog.print("✅ Clip \(index) added successfully") PluginLog.print(" - Duration: \(String(format: "%.2f", clipDuration.seconds))s") PluginLog.print( - " - Time range in composition: \(String(format: "%.2f", totalDuration.seconds - clipDuration.seconds))s - \(String(format: "%.2f", totalDuration.seconds))s" + " - Time range in composition: \(String(format: "%.2f", insertionTime.seconds))s - \(String(format: "%.2f", CMTimeAdd(insertionTime, clipDuration).seconds))s" ) } @@ -239,42 +245,20 @@ internal class VideoSequenceBuilder { PluginLog.print("📊 ===== VIDEO SEQUENCE SUMMARY =====") PluginLog.print(" Total clips: \(videoClips.count)") PluginLog.print(" Total duration: \(String(format: "%.2f", totalDuration.seconds))s") - PluginLog.print(" Max render size: \(maxRenderSize.width) x \(maxRenderSize.height)") + PluginLog.print(" Render size: \(maxRenderSize.width) x \(maxRenderSize.height)") PluginLog.print(" Max frame rate: \(maxFrameRate) fps") PluginLog.print(" Clip instructions: \(clipInstructions.count)") - - // Handle shared audio track - add to result if it has segments, otherwise remove from composition - if let audioTrack = sharedAudioTrack { - if !audioTrack.segments.isEmpty { - originalAudioTracks.append(audioTrack) - PluginLog.print( - " 🔊 AUDIO TRACKS: 1 (shared track with \(audioTrack.segments.count) segment(s))" - ) - for (segIdx, segment) in audioTrack.segments.enumerated() { - let timeMapping = segment as AVCompositionTrackSegment - PluginLog.print( - " Segment \(segIdx): \(String(format: "%.2f", timeMapping.timeMapping.target.start.seconds))s - \(String(format: "%.2f", (timeMapping.timeMapping.target.start + timeMapping.timeMapping.target.duration).seconds))s (duration: \(String(format: "%.2f", timeMapping.timeMapping.target.duration.seconds))s)" - ) - } - } else { - // Remove empty audio track from composition to prevent export errors - composition.removeTrack(audioTrack) - PluginLog.print(" 🔊 AUDIO TRACKS: 0 (shared track was empty and removed from composition)") - } - } else { - PluginLog.print(" 🔊 AUDIO TRACKS: 0 (no audio track created)") - } - + PluginLog.print(" Audio tracks: \(originalAudioTracks.count)") PluginLog.print("=====================================") PluginLog.print("") return VideoSequenceResult( - videoTrack: compositionVideoTrack, audioTracks: originalAudioTracks, totalDuration: totalDuration, renderSize: maxRenderSize, frameRate: maxFrameRate, - clipInstructions: clipInstructions + clipInstructions: clipInstructions, + trackConfigs: trackConfigs ) } @@ -312,16 +296,18 @@ internal struct ClipInstruction { let transform: CGAffineTransform let naturalSize: CGSize let renderSize: CGSize + let trackID: CMPersistentTrackID + let audioTrackID: CMPersistentTrackID? } /// Result of building a video sequence. internal struct VideoSequenceResult { - let videoTrack: AVMutableCompositionTrack let audioTracks: [AVMutableCompositionTrack] let totalDuration: CMTime let renderSize: CGSize let frameRate: Float let clipInstructions: [ClipInstruction] + let trackConfigs: [CMPersistentTrackID: VideoClip] } /// Holds the data needed to construct an AVMutableVideoComposition without @@ -356,12 +342,12 @@ internal class CustomVideoCompositionInstruction: NSObject, AVVideoCompositionIn init( timeRange: CMTimeRange, - sourceTrackID: CMPersistentTrackID, + sourceTrackIDs: [CMPersistentTrackID], layerInstructions: [AVVideoCompositionLayerInstruction], backgroundColor: CGColor? = nil ) { self.timeRange = timeRange - self._requiredSourceTrackIDs = [NSNumber(value: sourceTrackID)] + self._requiredSourceTrackIDs = sourceTrackIDs.map { NSNumber(value: $0) } self.layerInstructions = layerInstructions self.backgroundColor = backgroundColor super.init() diff --git a/macos/Classes/src/features/render/models/RenderConfig.swift b/macos/Classes/src/features/render/models/RenderConfig.swift index 36ede4a..722e981 100644 --- a/macos/Classes/src/features/render/models/RenderConfig.swift +++ b/macos/Classes/src/features/render/models/RenderConfig.swift @@ -212,6 +212,12 @@ struct RenderConfig { /// Global end time in microseconds for trimming the final composition let endUs: Int64? + /// Target render width + let renderWidth: Double? + + /// Target render height + let renderHeight: Double? + /// Whether to optimize the video for network streaming (fast start). /// When true, moves the moov atom to the beginning of the file. let shouldOptimizeForNetworkUse: Bool @@ -248,6 +254,8 @@ struct RenderConfig { blur: self.blur, startUs: self.startUs, endUs: self.endUs, + renderWidth: self.renderWidth, + renderHeight: self.renderHeight, shouldOptimizeForNetworkUse: self.shouldOptimizeForNetworkUse, imageBytesWithCropping: self.imageBytesWithCropping ) @@ -269,7 +277,14 @@ struct RenderConfig { inputPath: inputPath, startUs: (clipMap["startUs"] as? NSNumber)?.int64Value, endUs: (clipMap["endUs"] as? NSNumber)?.int64Value, - volume: (clipMap["volume"] as? NSNumber)?.floatValue + volume: (clipMap["volume"] as? NSNumber)?.floatValue, + opacity: (clipMap["opacity"] as? NSNumber)?.doubleValue, + x: (clipMap["x"] as? NSNumber)?.doubleValue, + y: (clipMap["y"] as? NSNumber)?.doubleValue, + width: (clipMap["width"] as? NSNumber)?.doubleValue, + height: (clipMap["height"] as? NSNumber)?.doubleValue, + segmentTimeUs: (clipMap["segmentTimeUs"] as? NSNumber)?.int64Value, + zIndex: clipMap["zIndex"] as? Int ) } } @@ -320,6 +335,8 @@ struct RenderConfig { blur: (args["blur"] as? NSNumber)?.doubleValue, startUs: (args["startUs"] as? NSNumber)?.int64Value, endUs: (args["endUs"] as? NSNumber)?.int64Value, + renderWidth: (args["renderWidth"] as? NSNumber)?.doubleValue, + renderHeight: (args["renderHeight"] as? NSNumber)?.doubleValue, shouldOptimizeForNetworkUse: args["shouldOptimizeForNetworkUse"] as? Bool ?? true, imageBytesWithCropping: args["imageBytesWithCropping"] as? Bool ?? false ) diff --git a/macos/Classes/src/features/render/models/VideoClip.swift b/macos/Classes/src/features/render/models/VideoClip.swift index 6241247..dd59540 100644 --- a/macos/Classes/src/features/render/models/VideoClip.swift +++ b/macos/Classes/src/features/render/models/VideoClip.swift @@ -1,16 +1,44 @@ import Foundation -/// Represents a video clip with optional trimming and volume control +/// Represents a video clip with optional trimming, volume control, and positioning internal struct VideoClip { let inputPath: String let startUs: Int64? let endUs: Int64? let volume: Float? + let opacity: Double? - init(inputPath: String, startUs: Int64? = nil, endUs: Int64? = nil, volume: Float? = nil) { + // New fields for composition support + let x: Double? + let y: Double? + let width: Double? + let height: Double? + let segmentTimeUs: Int64? + let zIndex: Int? + + init( + inputPath: String, + startUs: Int64? = nil, + endUs: Int64? = nil, + volume: Float? = nil, + opacity: Double? = nil, + x: Double? = nil, + y: Double? = nil, + width: Double? = nil, + height: Double? = nil, + segmentTimeUs: Int64? = nil, + zIndex: Int? = nil + ) { self.inputPath = inputPath self.startUs = startUs self.endUs = endUs self.volume = volume + self.opacity = opacity + self.x = x + self.y = y + self.width = width + self.height = height + self.segmentTimeUs = segmentTimeUs + self.zIndex = zIndex } } diff --git a/macos/Classes/src/features/render/models/VideoCompositorConfig.swift b/macos/Classes/src/features/render/models/VideoCompositorConfig.swift index 5d01bbb..6e210c9 100644 --- a/macos/Classes/src/features/render/models/VideoCompositorConfig.swift +++ b/macos/Classes/src/features/render/models/VideoCompositorConfig.swift @@ -35,4 +35,12 @@ struct VideoCompositorConfig { /// Fallback source track ID for older macOS versions where sourceTrackIDs may be empty. /// This is used when the custom compositor doesn't receive track IDs properly. var sourceTrackID: CMPersistentTrackID = kCMPersistentTrackID_Invalid + + /// Mapping of track ID to video clip configuration for multi-track compositing + var videoClipConfigs: [CMPersistentTrackID: VideoClip] = [:] + + /// The intended render size of the composition (logical coordinate space). + /// This is used to calculate scale factors if the actual render context size + /// differs from the intended size (e.g. due to AVAssetExportSession presets). + var intendedRenderSize: CGSize = .zero } diff --git a/macos/Classes/src/features/render/utils/VideoCompositor.swift b/macos/Classes/src/features/render/utils/VideoCompositor.swift index e6c2862..6d5530f 100644 --- a/macos/Classes/src/features/render/utils/VideoCompositor.swift +++ b/macos/Classes/src/features/render/utils/VideoCompositor.swift @@ -35,10 +35,14 @@ class VideoCompositor: NSObject, AVVideoCompositing { var cropHeight: CGFloat? var originalNaturalSize: CGSize = .zero + var intendedRenderSize: CGSize = .zero /// Fallback source track ID for older macOS versions var sourceTrackID: CMPersistentTrackID = kCMPersistentTrackID_Invalid + /// Track configurations for multi-track compositing + var videoClipConfigs: [CMPersistentTrackID: VideoClip] = [:] + /// Color filter configs for per-frame LUT computation private var colorFilterConfigs: [ColorFilterConfig] = [] @@ -75,7 +79,9 @@ class VideoCompositor: NSObject, AVVideoCompositing { self.videoRotationDegrees = config.videoRotationDegrees self.shouldApplyOrientationCorrection = config.shouldApplyOrientationCorrection self.originalNaturalSize = config.originalNaturalSize + self.intendedRenderSize = config.intendedRenderSize self.sourceTrackID = config.sourceTrackID + self.videoClipConfigs = config.videoClipConfigs self.setOverlayImageLayers(from: config.imageLayerConfigs) self.colorFilterConfigs = config.colorFilterConfigs @@ -176,313 +182,186 @@ class VideoCompositor: NSObject, AVVideoCompositing { func renderContextChanged(_ newRenderContext: AVVideoCompositionRenderContext) {} func startRequest(_ request: AVAsynchronousVideoCompositionRequest) { - // Try to get source buffer from the first available track - var sourceBuffer: CVPixelBuffer? - - if !request.sourceTrackIDs.isEmpty { - sourceBuffer = request.sourceFrame(byTrackID: request.sourceTrackIDs[0].int32Value) - } - - // Fallback 1: Try to get track ID from layer instruction if sourceTrackIDs is empty - // This can happen on older macOS versions - if sourceBuffer == nil, - let instruction = request.videoCompositionInstruction - as? CustomVideoCompositionInstruction, - let layerInstruction = instruction.layerInstructions.first - { - let trackID = layerInstruction.trackID - if trackID != kCMPersistentTrackID_Invalid { - sourceBuffer = request.sourceFrame(byTrackID: trackID) - } - } - - // Fallback 2: Use the pre-configured sourceTrackID from VideoCompositorConfig - // This is set during composition building and guarantees we have the correct track ID - if sourceBuffer == nil && sourceTrackID != kCMPersistentTrackID_Invalid { - sourceBuffer = request.sourceFrame(byTrackID: sourceTrackID) - } - - guard let sourceBuffer = sourceBuffer else { - request.finish( - with: NSError( - domain: "VideoCompositor", code: 0, - userInfo: [ - NSLocalizedDescriptionKey: - "No source tracks available for compositing (sourceTrackIDs: \(request.sourceTrackIDs.count), configTrackID: \(sourceTrackID))" - ])) - return - } - var outputImage = CIImage(cvPixelBuffer: sourceBuffer) - - // Apply layer instruction transform first (video scaling/centering/rotation) - // This ensures all videos are properly sized and oriented before applying user effects. - // The layerInstruction contains the preferredTransform which already handles video rotation - // from portrait to landscape or vice versa, so no additional orientation correction is needed. - // - // IMPORTANT: AVFoundation uses a top-left origin coordinate system (Y points down), - // while CIImage uses a bottom-left origin (Y points up). We need to convert the transform - // to work correctly with CIImage's coordinate system. - - // Extract layer instruction from CustomVideoCompositionInstruction - var layerInstruction: AVVideoCompositionLayerInstruction? - if let customInstruction = request.videoCompositionInstruction - as? CustomVideoCompositionInstruction, - let firstLayerInstruction = customInstruction.layerInstructions.first - { - layerInstruction = firstLayerInstruction - } - - if let layerInstruction = layerInstruction { - var startTransform = CGAffineTransform.identity - var endTransform = CGAffineTransform.identity - var timeRange = CMTimeRange.zero - - // Get the transform at the current composition time - let hasTransform = layerInstruction.getTransformRamp( - for: request.compositionTime, - start: &startTransform, - end: &endTransform, - timeRange: &timeRange - ) - - if hasTransform && !startTransform.isIdentity { - // Convert AVFoundation transform to CIImage coordinate system: - // 1. Flip Y axis before transform (go from CIImage coords to AVFoundation coords) - // 2. Apply the AVFoundation transform - // 3. Flip Y axis after transform (go back to CIImage coords) - let imageHeight = outputImage.extent.height - - // Flip Y: translate to top, scale Y by -1 - let flipY = CGAffineTransform(scaleX: 1, y: -1) - .translatedBy(x: 0, y: -imageHeight) - - // Convert transform: flipY * transform * flipY^-1 - // But since flipY is its own inverse (when combined with translate), we use: - // result = flipY * transform * flipY (adjusted for new height after transform) - let convertedTransform = - flipY - .concatenating(startTransform) - - outputImage = outputImage.transformed(by: convertedTransform) - - // After transform, we need to flip back and normalize - let transformedExtent = outputImage.extent - let newHeight = transformedExtent.height - let flipBack = CGAffineTransform(scaleX: 1, y: -1) - .translatedBy(x: 0, y: -newHeight) - - outputImage = outputImage.transformed(by: flipBack) - - // Normalize position to origin - let finalExtent = outputImage.extent - if finalExtent.origin.x != 0 || finalExtent.origin.y != 0 { - let translation = CGAffineTransform( - translationX: -finalExtent.origin.x, - y: -finalExtent.origin.y - ) - outputImage = outputImage.transformed(by: translation) + let renderSize = request.renderContext.size + let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) + + // Calculate scale factors between intended logical resolution and actual render size. + // This handles cases where AVAssetExportSession forces a different resolution + // (e.g. 1080p preset for a 4K composition). + let scaleFactorX = intendedRenderSize.width > 0 ? renderSize.width / intendedRenderSize.width : 1.0 + let scaleFactorY = intendedRenderSize.height > 0 ? renderSize.height / intendedRenderSize.height : 1.0 + + // 1. Define a common structure for all renderable items + enum RenderableItem { + case video(image: CIImage, clip: VideoClip, trackID: CMPersistentTrackID) + case imageLayer(layer: ImageLayer) + + var zIndex: Int { + switch self { + case .video(_, let clip, _): return clip.zIndex ?? 0 + case .imageLayer: return Int.max } } } - var center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) - - // Apply user-defined effects (crop, rotation, flip, scale) - var transform = CGAffineTransform.identity - - // Apply LUT, blur, and flip BEFORE overlay when imageBytesWithCropping is enabled - // This ensures these effects only affect the video, not the overlay - if imageBytesWithCropping { - // Apply color filter (timed LUT) to video only - outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) + var items: [RenderableItem] = [] + + // 2. Collect active video frames + for trackIDValue in request.sourceTrackIDs { + let trackID = trackIDValue.int32Value + if let sourceBuffer = request.sourceFrame(byTrackID: trackID), + let clipConfig = videoClipConfigs[trackID] { + + var frameImage = CIImage(cvPixelBuffer: sourceBuffer) + + // Apply individual track transform from layer instructions + if let customInstruction = request.videoCompositionInstruction as? CustomVideoCompositionInstruction { + for layerInstruction in customInstruction.layerInstructions { + if layerInstruction.trackID == trackID { + var startTransform = CGAffineTransform.identity + var endTransform = CGAffineTransform.identity + var timeRange = CMTimeRange.zero + + let hasTransform = layerInstruction.getTransformRamp( + for: request.compositionTime, + start: &startTransform, + end: &endTransform, + timeRange: &timeRange + ) + + if hasTransform && !startTransform.isIdentity { + let imageHeight = frameImage.extent.height + let flipY = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -imageHeight) + let convertedTransform = flipY.concatenating(startTransform) + frameImage = frameImage.transformed(by: convertedTransform) + + let transformedExtent = frameImage.extent + let flipBack = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -transformedExtent.height) + frameImage = frameImage.transformed(by: flipBack) + + // Normalize + let finalExtent = frameImage.extent + if finalExtent.origin.x != 0 || finalExtent.origin.y != 0 { + frameImage = frameImage.transformed(by: CGAffineTransform(translationX: -finalExtent.origin.x, y: -finalExtent.origin.y)) + } + } + break + } + } + } - // Apply blur to video only - if blurSigma > 0 { - outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) + items.append(.video(image: frameImage, clip: clipConfig, trackID: trackID)) } + } - // Apply flip to video only (before adding overlay) - if flipX || flipY { - let flipScaleX: CGFloat = flipX ? -1 : 1 - let flipScaleY: CGFloat = flipY ? -1 : 1 - - let flipTransform = CGAffineTransform(translationX: center.x, y: center.y) - .scaledBy(x: flipScaleX, y: flipScaleY) - .translatedBy(x: -center.x, y: -center.y) - - outputImage = outputImage.transformed(by: flipTransform) - - // Normalize position after flip - let flippedExtent = outputImage.extent - if flippedExtent.origin.x != 0 || flippedExtent.origin.y != 0 { - let translation = CGAffineTransform( - translationX: -flippedExtent.origin.x, - y: -flippedExtent.origin.y - ) - outputImage = outputImage.transformed(by: translation) - } - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) + // 3. Collect active image layers + for layer in overlayImageLayers { + let inRange = (layer.startUs == -1 || currentTimeUs >= layer.startUs) && (layer.endUs == -1 || currentTimeUs <= layer.endUs) + if inRange { + items.append(.imageLayer(layer: layer)) } } - // Apply overlay BEFORE crop if imageBytesWithCropping is enabled - if imageBytesWithCropping { - let imageRect = outputImage.extent - - // Apply time-based overlay layers - let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) - for layer in overlayImageLayers { - let inTimeRange = - (layer.startUs == -1 || currentTimeUs >= layer.startUs) - && (layer.endUs == -1 || currentTimeUs <= layer.endUs) - - if inTimeRange { - var img = layer.image + if items.isEmpty { + PluginLog.print("⚠️ VideoCompositor: No active items found at time \(request.compositionTime.seconds)s") + request.finish(with: NSError(domain: "VideoCompositor", code: 0, userInfo: [NSLocalizedDescriptionKey: "No active items found"])) + return + } - if let w = layer.width, let h = layer.height { - let sx = CGFloat(w) / img.extent.width - let sy = CGFloat(h) / img.extent.height - img = img.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) - } + // 4. Sort all items by zIndex + let sortedItems = items.sorted { $0.zIndex < $1.zIndex } + + // 5. Initialize background image (black frame) + var outputImage = CIImage(color: .black).cropped(to: CGRect(origin: .zero, size: renderSize)) + + // 6. Composite each item + for item in sortedItems { + switch item { + case .video(let img, let clip, _): + var frameImg = img + + // Apply custom size if provided, otherwise scale by global factor + if let w = clip.width, let h = clip.height { + let targetW = CGFloat(w) * scaleFactorX + let targetH = CGFloat(h) * scaleFactorY + let sx = targetW / frameImg.extent.width + let sy = targetH / frameImg.extent.height + frameImg = frameImg.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) + } else if scaleFactorX != 1.0 || scaleFactorY != 1.0 { + frameImg = frameImg.transformed(by: CGAffineTransform(scaleX: scaleFactorX, y: scaleFactorY)) + } - let overlay: CIImage - if layer.x == nil && layer.y == nil { - overlay = img.transformed( - by: CGAffineTransform( - scaleX: imageRect.width / img.extent.width, - y: imageRect.height / img.extent.height)) - } else { - let posX = CGFloat(layer.x ?? 0) - let posY = CGFloat(layer.y ?? 0) - let cgY = imageRect.height - posY - img.extent.height - overlay = img.transformed( - by: CGAffineTransform(translationX: posX, y: cgY)) + // Apply custom offset if provided + if clip.x != nil || clip.y != nil { + let posX = CGFloat(clip.x ?? 0) * scaleFactorX + let posY = CGFloat(clip.y ?? 0) * scaleFactorY + // Convert from top-left (Flutter) to bottom-left (Core Image) + let cgY = renderSize.height - posY - frameImg.extent.height + frameImg = frameImg.transformed(by: CGAffineTransform(translationX: posX, y: cgY)) + } else if scaleFactorX != 1.0 || scaleFactorY != 1.0 { + // Normalize position if we scaled but didn't translate manually + let extent = frameImg.extent + if extent.origin.x != 0 || extent.origin.y != 0 { + frameImg = frameImg.transformed(by: CGAffineTransform(translationX: -extent.origin.x, y: -extent.origin.y)) } - - let (opacity, animTransform) = computeAnimation( - layer: layer, - currentTimeUs: currentTimeUs, - overlayExtent: overlay.extent, - frameExtent: imageRect - ) - outputImage = compositeOverlay( - overlay, over: outputImage, opacity: opacity, transform: animTransform) } - } - } - // Cropping - if cropX != 0 || cropY != 0 || cropWidth != nil || cropHeight != nil { - let inputExtent = outputImage.extent - let videoWidth = inputExtent.width - let videoHeight = inputExtent.height - - let x = cropX - var y = cropY - let width = cropWidth ?? (videoWidth - x) - let height = cropHeight ?? (videoHeight - y) + // Apply opacity if needed + if let opacity = clip.opacity, opacity < 1.0 { + frameImg = frameImg.applyingFilter("CIColorMatrix", parameters: [ + "inputAVector": CIVector(x: 0, y: 0, z: 0, w: CGFloat(opacity)), + ]) + } - y = videoHeight - height - y + outputImage = frameImg.composited(over: outputImage) - let cropRect = CGRect(x: x, y: y, width: width, height: height) + case .imageLayer(let layer): + var layerImg = layer.image + if let w = layer.width, let h = layer.height { + let targetW = CGFloat(w) * scaleFactorX + let targetH = CGFloat(h) * scaleFactorY + layerImg = layerImg.transformed(by: CGAffineTransform(scaleX: targetW/layerImg.extent.width, y: targetH/layerImg.extent.height)) + } - outputImage = outputImage.cropped(to: cropRect) - outputImage = outputImage.transformed( - by: CGAffineTransform( - translationX: -cropRect.origin.x, - y: -cropRect.origin.y + let overlay: CIImage + if layer.x == nil && layer.y == nil { + overlay = layerImg.transformed(by: CGAffineTransform(scaleX: renderSize.width/layerImg.extent.width, y: renderSize.height/layerImg.extent.height)) + } else { + let posX = CGFloat(layer.x ?? 0) * scaleFactorX + let posY = CGFloat(layer.y ?? 0) * scaleFactorY + let cgY = renderSize.height - posY - layerImg.extent.height + overlay = layerImg.transformed(by: CGAffineTransform(translationX: posX, y: cgY)) + } - )) - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) + let (opacity, animTransform) = computeAnimation(layer: layer, currentTimeUs: currentTimeUs, overlayExtent: overlay.extent, frameExtent: CGRect(origin: .zero, size: renderSize)) + outputImage = compositeOverlay(overlay, over: outputImage, opacity: opacity, transform: animTransform) + } } - // Rotation - if rotateRadians != 0 { - // Rotate the image - let rotation = CGAffineTransform(rotationAngle: rotateRadians) - let rotatedImage = outputImage.transformed(by: rotation) - - // Get the new bounding box after rotation - let rotatedExtent = rotatedImage.extent - - // Translate to (0, 0) - let translation = CGAffineTransform( - translationX: -rotatedExtent.origin.x, y: -rotatedExtent.origin.y) - outputImage = rotatedImage.transformed(by: translation) - center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) - } + // 7. Apply global effects (if any) + let center = CGPoint(x: outputImage.extent.midX, y: outputImage.extent.midY) + var transform = CGAffineTransform.identity - // Flipping (only if NOT imageBytesWithCropping - otherwise already applied before overlay) - if !imageBytesWithCropping && (flipX || flipY) { + // Apply flip (Global) + if flipX || flipY { let scaleX: CGFloat = flipX ? -1 : 1 let scaleY: CGFloat = flipY ? -1 : 1 - - let flipTransform = CGAffineTransform(translationX: center.x, y: center.y) + transform = transform.concatenating(CGAffineTransform(translationX: center.x, y: center.y) .scaledBy(x: scaleX, y: scaleY) - .translatedBy(x: -center.x, y: -center.y) - - transform = transform.concatenating(flipTransform) + .translatedBy(x: -center.x, y: -center.y)) } - // Apply Scale + // Apply Global Scale if scaleX != 1 || scaleY != 1 { transform = transform.scaledBy(x: scaleX, y: scaleY) } outputImage = outputImage.transformed(by: transform) - // Apply color filter (only if NOT imageBytesWithCropping - otherwise already applied before overlay) - if !imageBytesWithCropping { - outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) - - // Apply blur - if blurSigma > 0 { - outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) - } - } - - // Apply overlay image layers (only if not already applied before crop) - if !imageBytesWithCropping { - let imageRect = outputImage.extent - - let currentTimeUs = Int64(CMTimeGetSeconds(request.compositionTime) * 1_000_000) - for layer in overlayImageLayers { - let inTimeRange = - (layer.startUs == -1 || currentTimeUs >= layer.startUs) - && (layer.endUs == -1 || currentTimeUs <= layer.endUs) - if inTimeRange { - var img = layer.image - - if let w = layer.width, let h = layer.height { - let sx = CGFloat(w) / img.extent.width - let sy = CGFloat(h) / img.extent.height - img = img.transformed(by: CGAffineTransform(scaleX: sx, y: sy)) - } - - let overlay: CIImage - if layer.x == nil && layer.y == nil { - overlay = img.transformed( - by: CGAffineTransform( - scaleX: imageRect.width / img.extent.width, - y: imageRect.height / img.extent.height)) - } else { - let posX = CGFloat(layer.x ?? 0) - let posY = CGFloat(layer.y ?? 0) - let cgY = imageRect.height - posY - img.extent.height - overlay = img.transformed( - by: CGAffineTransform(translationX: posX, y: cgY)) - } - - let (opacity, animTransform) = computeAnimation( - layer: layer, - currentTimeUs: currentTimeUs, - overlayExtent: overlay.extent, - frameExtent: imageRect - ) - outputImage = compositeOverlay( - overlay, over: outputImage, opacity: opacity, transform: animTransform) - } - } + // Apply LUT and Blur (Global) + outputImage = applyColorFilter(to: outputImage, at: request.compositionTime) + if blurSigma > 0 { + outputImage = outputImage.applyingGaussianBlur(sigma: blurSigma) } guard let outputBuffer = request.renderContext.newPixelBuffer() else {