diff --git a/CHANGELOG.md b/CHANGELOG.md index b8fddd9..332d4cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 1.16.2 +- **FIX**(android, iOS, macOS): Eliminate audible clicks/gaps at custom audio loop boundaries and clip transitions. Each custom audio track is now pre-rendered to a single gap-less PCM WAV file before being inserted into the composition, avoiding AAC encoder frame realignment (Android Media3) and codec priming/padding artifacts (AVFoundation `insertTimeRange` per loop iteration on iOS/macOS). Volume control remains in the mixer layer for post-hoc adjustments without re-decoding. + ## 1.16.1 - **FIX**(iOS, macOS): Fix crash `No source tracks available for compositing` when rendering H.264 MP4 files whose container duration slightly exceeds the video track's actual frame duration. The compositor time range is now clamped to the video track's real `timeRange` before inserting into the composition, preventing AVFoundation from requesting frames that have no pixel buffer. A black-frame fallback was also added as a safety net. diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt index 7335ec2..ba7f7f0 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/RenderVideo.kt @@ -310,7 +310,7 @@ class RenderVideo(private val context: Context) { // Create composition (now fast - no manual audio mixing needed, Media3 handles it natively) Thread { try { - val composition = applyComposition( + val compositionResult = applyComposition( context = context, config = config, videoEffects = videoEffects, @@ -318,7 +318,27 @@ class RenderVideo(private val context: Context) { ) mainHandler.post { - if (composition != null) { + if (compositionResult != null) { + val composition = compositionResult.composition + val audioTempFiles = compositionResult.temporaryFiles + + transformer.addListener(object : Transformer.Listener { + override fun onCompleted( + composition: Composition, + result: ExportResult + ) { + cleanupAudioTempFiles(audioTempFiles) + } + + override fun onError( + composition: Composition, + result: ExportResult, + exception: ExportException + ) { + cleanupAudioTempFiles(audioTempFiles) + } + }) + transformer.start(composition, outputFile.absolutePath) // Start progress tracking loop @@ -349,4 +369,27 @@ class RenderVideo(private val context: Context) { } }.start() } + + /** + * Deletes pre-rendered audio temp files (typically WAVs from + * AudioPreRenderer) created while building the composition. + */ + private fun cleanupAudioTempFiles(files: List) { + for (file in files) { + try { + if (file.exists()) { + val deleted = file.delete() + Log.d( + RENDER_TAG, + "Cleanup pre-rendered audio file: ${file.name}, deleted=$deleted" + ) + } + } catch (e: Exception) { + Log.w( + RENDER_TAG, + "Failed to delete pre-rendered audio file ${file.name}: ${e.message}" + ) + } + } + } } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt index 3582caf..b84893a 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/ApplyComposition.kt @@ -6,6 +6,18 @@ import androidx.media3.common.audio.AudioProcessor import androidx.media3.common.util.UnstableApi import androidx.media3.transformer.Composition import ch.waio.pro_video_editor.src.features.render.models.RenderConfig +import java.io.File + +/** + * Result of building a composition: the composition itself plus a list of + * temporary files that were created during the build (e.g. pre-rendered + * audio WAVs). The caller MUST delete these temporary files after the + * Transformer export finishes (success or failure). + */ +data class CompositionResult( + val composition: Composition, + val temporaryFiles: List +) /** * Creates a Media3 Composition from render configuration. @@ -18,7 +30,8 @@ import ch.waio.pro_video_editor.src.features.render.models.RenderConfig * @param config The render configuration containing all composition parameters * @param videoEffects List of video effects to apply (from EffectsProcessor) * @param audioEffects List of audio effects to apply (from EffectsProcessor) - * @return Composition ready for transformer, or null if no video clips provided + * @return [CompositionResult] with the composition and any temporary files + * created during the build, or null if no video clips were provided. */ @UnstableApi fun applyComposition( @@ -26,9 +39,10 @@ fun applyComposition( config: RenderConfig, videoEffects: List, audioEffects: List -): Composition? { - return CompositionBuilder(context, config) +): CompositionResult? { + val builder = CompositionBuilder(context, config) .setVideoEffects(videoEffects) .setAudioEffects(audioEffects) - .build() + val composition = builder.build() ?: return null + return CompositionResult(composition, builder.temporaryFiles.toList()) } \ No newline at end of file diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioPreRenderer.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioPreRenderer.kt new file mode 100644 index 0000000..d40cd73 --- /dev/null +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioPreRenderer.kt @@ -0,0 +1,521 @@ +package ch.waio.pro_video_editor.src.features.render.helpers + +import RENDER_TAG +import android.content.Context +import android.media.AudioFormat +import android.media.MediaCodec +import android.media.MediaExtractor +import android.media.MediaFormat +import android.os.Build +import androidx.media3.common.util.UnstableApi +import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log +import java.io.File +import java.io.RandomAccessFile +import java.nio.ByteBuffer +import java.nio.ByteOrder + +/** + * Pre-renders a custom audio track into a single, gap-less PCM WAV file + * that is ready to be inserted as ONE EditedMediaItem in the Media3 + * composition. + * + * This is the core fix for audible clicks/gaps that occurred at every + * loop boundary and silence/audio transition with the previous + * implementation, where each loop iteration and silence segment was a + * separate `EditedMediaItem`. Each item boundary forced AAC encoder + * frame realignment, producing audible artifacts. + * + * The output file contains, in order: + * 1. Leading silence (matching `compositionStartUs`) + * 2. The trimmed source audio looped (or played once) to cover + * `compositionDurationUs`, with sample-exact tail trimming + * 3. Trailing silence (matching `videoDurationUs - compositionStartUs - + * compositionDurationUs`) + * + * The output sample rate, channel count and bit depth match the decoder + * output of the source file. Float PCM is converted to 16-bit signed PCM. + * Final resampling/mixing happens later inside Media3's encoder pipeline. + */ +@UnstableApi +object AudioPreRenderer { + + /** + * Result of a successful pre-render operation. + * + * @property outputFile The pre-rendered PCM WAV file (caller is + * responsible for deleting it when no longer needed). + * @property sampleRate Sample rate of the WAV body (Hz). + * @property channelCount Number of channels in the WAV body. + */ + data class Result( + val outputFile: File, + val sampleRate: Int, + val channelCount: Int + ) + + /** + * Pre-renders the audio track described by the parameters. + * + * @param context Android context (used for `cacheDir`). + * @param audioPath Absolute path to the source audio file. + * @param audioStartUs Trim start within the source (microseconds, >=0). + * @param audioEndUs Trim end within the source (microseconds, null + * = use full source duration). + * @param loop If true, the trimmed window repeats to fill + * `compositionDurationUs`. If false, plays once and any remaining + * composition time is filled with silence. + * @param compositionStartUs Where on the composition timeline the + * audio body should start. The output file contains this much + * leading silence. + * @param compositionDurationUs How long the audio body should sound + * on the composition timeline. + * @param videoDurationUs Total duration of the composition (used to + * determine trailing silence). + * @return [Result] on success, null on failure (file missing, decode + * error, invalid parameters). + */ + fun render( + context: Context, + audioPath: String, + audioStartUs: Long, + audioEndUs: Long?, + loop: Boolean, + compositionStartUs: Long, + compositionDurationUs: Long, + videoDurationUs: Long + ): Result? { + val sourceFile = File(audioPath) + if (!sourceFile.exists()) { + Log.e(RENDER_TAG, "AudioPreRenderer: source file not found: $audioPath") + return null + } + + if (compositionDurationUs <= 0L) { + Log.w(RENDER_TAG, "AudioPreRenderer: compositionDurationUs <= 0, skipping") + return null + } + + // Step 1: Decode the trimmed source range into a PCM byte array. + val decoded = try { + decodeRange(audioPath, audioStartUs.coerceAtLeast(0L), audioEndUs) + } catch (e: Exception) { + Log.e(RENDER_TAG, "AudioPreRenderer: decode failed: ${e.message}") + return null + } ?: return null + + if (decoded.pcmBytes.isEmpty()) { + Log.e(RENDER_TAG, "AudioPreRenderer: decoder produced no PCM data") + return null + } + + val sampleRate = decoded.sampleRate + val channelCount = decoded.channelCount + val bytesPerFrame = channelCount * 2 // 16-bit PCM + + // Step 2: Compute byte sizes for leading silence, audio body and + // trailing silence using the native sample rate. + val leadingSilenceBytes = alignToFrame( + usToBytes(compositionStartUs, sampleRate, bytesPerFrame), + bytesPerFrame + ) + val bodyBytes = alignToFrame( + usToBytes(compositionDurationUs, sampleRate, bytesPerFrame), + bytesPerFrame + ) + val totalCompositionBytes = leadingSilenceBytes + bodyBytes + val totalVideoBytes = alignToFrame( + usToBytes(videoDurationUs, sampleRate, bytesPerFrame), + bytesPerFrame + ) + val trailingSilenceBytes = (totalVideoBytes - totalCompositionBytes) + .coerceAtLeast(0L) + + // Step 3: Open the output WAV file and stream the data. + val outputFile = File( + context.cacheDir, + "prerender_audio_${System.currentTimeMillis()}_${System.nanoTime()}.wav" + ) + + try { + RandomAccessFile(outputFile, "rw").use { raf -> + writeWavHeader(raf, sampleRate, channelCount, dataSize = 0) + + writeSilence(raf, leadingSilenceBytes) + + writeAudioBody( + raf = raf, + sourcePcm = decoded.pcmBytes, + targetBytes = bodyBytes, + loop = loop, + bytesPerFrame = bytesPerFrame + ) + + writeSilence(raf, trailingSilenceBytes) + + // Update RIFF/data chunk sizes in the header. + val totalDataBytes = leadingSilenceBytes + + actualBodyBytesWritten(decoded.pcmBytes.size.toLong(), bodyBytes, loop) + + trailingSilenceBytes + updateWavSizes(raf, totalDataBytes) + } + } catch (e: Exception) { + Log.e(RENDER_TAG, "AudioPreRenderer: write failed: ${e.message}") + outputFile.delete() + return null + } + + Log.d( + RENDER_TAG, + "AudioPreRenderer: rendered ${outputFile.length()} bytes, " + + "${sampleRate}Hz x ${channelCount}ch, " + + "leadSilence=${compositionStartUs / 1000}ms, " + + "body=${compositionDurationUs / 1000}ms, " + + "loop=$loop" + ) + + return Result(outputFile, sampleRate, channelCount) + } + + // --------------------------------------------------------------------- + // Internal: decoding + // --------------------------------------------------------------------- + + private data class DecodedAudio( + val pcmBytes: ByteArray, + val sampleRate: Int, + val channelCount: Int + ) + + /** + * Decodes the audio range `[startUs, endUs)` from `path` into a + * 16-bit signed little-endian PCM byte array. + * + * Float PCM is converted to int16. Output sample rate / channel + * count match the decoder output. + */ + private fun decodeRange( + path: String, + startUs: Long, + endUs: Long? + ): DecodedAudio? { + val extractor = MediaExtractor() + var decoder: MediaCodec? = null + + try { + extractor.setDataSource(path) + + var audioTrackIndex = -1 + var inputFormat: MediaFormat? = null + for (i in 0 until extractor.trackCount) { + val format = extractor.getTrackFormat(i) + val mime = format.getString(MediaFormat.KEY_MIME) ?: continue + if (mime.startsWith("audio/")) { + audioTrackIndex = i + inputFormat = format + break + } + } + + if (audioTrackIndex < 0 || inputFormat == null) { + Log.e(RENDER_TAG, "AudioPreRenderer: no audio track in $path") + return null + } + + extractor.selectTrack(audioTrackIndex) + if (startUs > 0) { + extractor.seekTo(startUs, MediaExtractor.SEEK_TO_CLOSEST_SYNC) + } + + val mime = inputFormat.getString(MediaFormat.KEY_MIME)!! + decoder = MediaCodec.createDecoderByType(mime) + decoder.configure(inputFormat, null, null, 0) + decoder.start() + + // Initial format from decoder (may change later). + var sampleRate = inputFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + var channelCount = inputFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + var isFloatPcm = false + + val decoderInitialFormat = decoder.outputFormat + if (decoderInitialFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + sampleRate = decoderInitialFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } + if (decoderInitialFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + channelCount = decoderInitialFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } + isFloatPcm = readIsFloatPcm(decoderInitialFormat) ?: false + + val pcmOutput = java.io.ByteArrayOutputStream() + val effectiveEndUs = endUs ?: Long.MAX_VALUE + val timeoutUs = 10_000L + var inputEos = false + var outputEos = false + + // Track the actual presentation time of the first sample we emit. + // We only start writing PCM once we have crossed `startUs` so the + // resulting buffer is sample-aligned with the requested trim. + var hasCrossedStart = false + + while (!outputEos) { + if (!inputEos) { + val inputBufferId = decoder.dequeueInputBuffer(timeoutUs) + if (inputBufferId >= 0) { + val inputBuffer = decoder.getInputBuffer(inputBufferId)!! + inputBuffer.clear() + + val sampleSize = extractor.readSampleData(inputBuffer, 0) + val presentationTimeUs = extractor.sampleTime + + if (sampleSize < 0 || presentationTimeUs > effectiveEndUs) { + decoder.queueInputBuffer( + inputBufferId, 0, 0, 0, + MediaCodec.BUFFER_FLAG_END_OF_STREAM + ) + inputEos = true + } else { + decoder.queueInputBuffer( + inputBufferId, 0, sampleSize, presentationTimeUs, 0 + ) + extractor.advance() + } + } + } + + val info = MediaCodec.BufferInfo() + val outputBufferId = decoder.dequeueOutputBuffer(info, timeoutUs) + when { + outputBufferId == MediaCodec.INFO_OUTPUT_FORMAT_CHANGED -> { + val newFormat = decoder.outputFormat + if (newFormat.containsKey(MediaFormat.KEY_SAMPLE_RATE)) { + sampleRate = newFormat.getInteger(MediaFormat.KEY_SAMPLE_RATE) + } + if (newFormat.containsKey(MediaFormat.KEY_CHANNEL_COUNT)) { + channelCount = newFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT) + } + readIsFloatPcm(newFormat)?.let { isFloatPcm = it } + } + + outputBufferId >= 0 -> { + val outputBuffer = decoder.getOutputBuffer(outputBufferId)!! + + if (info.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM != 0) { + outputEos = true + } + + if (info.size > 0) { + val bufferStartUs = info.presentationTimeUs + val bytesPerFrameOut = channelCount * 2 + val frameDurationUs = + if (sampleRate > 0) 1_000_000.0 / sampleRate else 0.0 + + outputBuffer.position(info.offset) + outputBuffer.limit(info.offset + info.size) + + // Convert decoder bytes to int16 PCM bytes. + val pcmChunk = if (isFloatPcm) { + convertFloatToInt16(outputBuffer, info.size) + } else { + val arr = ByteArray(info.size) + outputBuffer.get(arr) + arr + } + + // Determine how many leading bytes to skip so + // the output starts exactly at `startUs`. + val skipBytes = if (!hasCrossedStart) { + val bytesPerFrameLocal = bytesPerFrameOut.coerceAtLeast(1) + val deltaUs = (startUs - bufferStartUs).coerceAtLeast(0L) + val rawSkip = (deltaUs * sampleRate / 1_000_000L) * + bytesPerFrameLocal + rawSkip.coerceAtMost(pcmChunk.size.toLong()).toInt() + } else 0 + + // Determine how many trailing bytes to drop so + // the output ends exactly at `endUs`. + val dropBytes = if (effectiveEndUs != Long.MAX_VALUE) { + val bufferEndUs = bufferStartUs + + ((pcmChunk.size / bytesPerFrameOut) * frameDurationUs).toLong() + if (bufferEndUs > effectiveEndUs) { + val overUs = bufferEndUs - effectiveEndUs + val rawDrop = (overUs * sampleRate / 1_000_000L) * + bytesPerFrameOut + rawDrop.coerceAtMost((pcmChunk.size - skipBytes).toLong()) + .toInt() + } else 0 + } else 0 + + val writeLen = pcmChunk.size - skipBytes - dropBytes + if (writeLen > 0) { + pcmOutput.write(pcmChunk, skipBytes, writeLen) + hasCrossedStart = true + } + } + + decoder.releaseOutputBuffer(outputBufferId, false) + + if (effectiveEndUs != Long.MAX_VALUE && + info.presentationTimeUs >= effectiveEndUs + ) { + // We have decoded past the requested end; signal + // EOS so we can finish quickly. + outputEos = true + } + } + } + } + + return DecodedAudio( + pcmBytes = pcmOutput.toByteArray(), + sampleRate = sampleRate, + channelCount = channelCount + ) + } finally { + try { + decoder?.stop() + } catch (_: Exception) { + } + try { + decoder?.release() + } catch (_: Exception) { + } + try { + extractor.release() + } catch (_: Exception) { + } + } + } + + private fun readIsFloatPcm(format: MediaFormat): Boolean? { + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.N) return null + if (!format.containsKey(MediaFormat.KEY_PCM_ENCODING)) return null + return when (format.getInteger(MediaFormat.KEY_PCM_ENCODING)) { + AudioFormat.ENCODING_PCM_FLOAT -> true + else -> false + } + } + + private fun convertFloatToInt16(buffer: ByteBuffer, byteCount: Int): ByteArray { + val floatCount = byteCount / 4 + val out = ByteBuffer.allocate(floatCount * 2).order(ByteOrder.LITTLE_ENDIAN) + val savedOrder = buffer.order() + buffer.order(ByteOrder.nativeOrder()) + for (i in 0 until floatCount) { + val sample = buffer.float.coerceIn(-1.0f, 1.0f) + out.putShort((sample * 32767.0f).toInt().toShort()) + } + buffer.order(savedOrder) + return out.array() + } + + // --------------------------------------------------------------------- + // Internal: WAV writing + // --------------------------------------------------------------------- + + private fun writeWavHeader( + raf: RandomAccessFile, + sampleRate: Int, + channelCount: Int, + dataSize: Int + ) { + val bitsPerSample = 16 + val byteRate = sampleRate * channelCount * bitsPerSample / 8 + val blockAlign = (channelCount * bitsPerSample / 8).toShort() + + val header = ByteBuffer.allocate(44).order(ByteOrder.LITTLE_ENDIAN) + header.put("RIFF".toByteArray(Charsets.US_ASCII)) + header.putInt(36 + dataSize) // RIFF chunk size + header.put("WAVE".toByteArray(Charsets.US_ASCII)) + header.put("fmt ".toByteArray(Charsets.US_ASCII)) + header.putInt(16) // fmt subchunk size (PCM) + header.putShort(1) // PCM format + header.putShort(channelCount.toShort()) + header.putInt(sampleRate) + header.putInt(byteRate) + header.putShort(blockAlign) + header.putShort(bitsPerSample.toShort()) + header.put("data".toByteArray(Charsets.US_ASCII)) + header.putInt(dataSize) + + raf.seek(0) + raf.write(header.array()) + } + + private fun updateWavSizes(raf: RandomAccessFile, dataSize: Long) { + val safeDataSize = dataSize.coerceAtMost(Int.MAX_VALUE.toLong()).toInt() + // RIFF chunk size at offset 4 (little-endian). + raf.seek(4) + raf.write(intToLittleEndian(36 + safeDataSize)) + // data chunk size at offset 40 (little-endian). + raf.seek(40) + raf.write(intToLittleEndian(safeDataSize)) + // Move back to end so subsequent writes append correctly. + raf.seek(raf.length()) + } + + private fun writeSilence(raf: RandomAccessFile, byteCount: Long) { + if (byteCount <= 0L) return + val chunk = ByteArray(8192) + var remaining = byteCount + while (remaining > 0) { + val toWrite = minOf(remaining, chunk.size.toLong()).toInt() + raf.write(chunk, 0, toWrite) + remaining -= toWrite + } + } + + private fun writeAudioBody( + raf: RandomAccessFile, + sourcePcm: ByteArray, + targetBytes: Long, + loop: Boolean, + bytesPerFrame: Int + ) { + if (sourcePcm.isEmpty() || targetBytes <= 0L) return + + // Align targetBytes to frame boundary (defensive). + val alignedTarget = (targetBytes / bytesPerFrame) * bytesPerFrame + var written = 0L + + if (loop) { + while (written < alignedTarget) { + val remaining = alignedTarget - written + val toWrite = minOf(remaining, sourcePcm.size.toLong()).toInt() + raf.write(sourcePcm, 0, toWrite) + written += toWrite + } + } else { + val toWrite = minOf(alignedTarget, sourcePcm.size.toLong()).toInt() + raf.write(sourcePcm, 0, toWrite) + } + } + + private fun actualBodyBytesWritten( + sourceSize: Long, + targetBytes: Long, + loop: Boolean + ): Long { + if (sourceSize <= 0L || targetBytes <= 0L) return 0L + return if (loop) targetBytes else minOf(targetBytes, sourceSize) + } + + // --------------------------------------------------------------------- + // Internal: math helpers + // --------------------------------------------------------------------- + + private fun usToBytes(durationUs: Long, sampleRate: Int, bytesPerFrame: Int): Long { + if (durationUs <= 0L) return 0L + // (durationUs * sampleRate / 1_000_000) frames * bytesPerFrame + // Use Math.multiplyExact-style guard via Long multiplication. + val frames = (durationUs.toDouble() * sampleRate / 1_000_000.0).toLong() + return frames * bytesPerFrame + } + + private fun alignToFrame(byteCount: Long, bytesPerFrame: Int): Long { + if (bytesPerFrame <= 1) return byteCount + return (byteCount / bytesPerFrame) * bytesPerFrame + } + + private fun intToLittleEndian(value: Int): ByteArray { + return ByteArray(4) { i -> ((value ushr (8 * i)) and 0xFF).toByte() } + } +} diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt index 8a72386..c9adf1e 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/AudioSequenceBuilder.kt @@ -1,11 +1,10 @@ package ch.waio.pro_video_editor.src.features.render.helpers import RENDER_TAG +import android.content.Context import android.net.Uri import androidx.media3.common.MediaItem import androidx.media3.common.audio.AudioProcessor -import androidx.media3.common.audio.ChannelMixingAudioProcessor -import androidx.media3.common.audio.ChannelMixingMatrix import androidx.media3.common.util.UnstableApi import androidx.media3.transformer.EditedMediaItem import androidx.media3.transformer.EditedMediaItemSequence @@ -16,16 +15,36 @@ import java.io.File /** * Builder class for creating custom audio sequences in video compositions. * - * Handles looping, volume control, and channel normalization for custom - * audio tracks that play alongside or replace original video audio. + * Pre-renders the entire audio track (leading silence + looped/clipped + * source + trailing silence) into a single PCM WAV file via + * [AudioPreRenderer], then exposes it as ONE [EditedMediaItem] inside an + * [EditedMediaItemSequence]. + * + * Using a single item per audio track removes the AAC frame realignment + * artifacts (clicks/gaps) that previously occurred at every loop boundary + * and silence/audio transition when the sequence contained multiple + * items. + * + * The generated WAV file path is returned from [build] alongside the + * sequence so the caller can register it for cleanup once the + * Transformer export finishes. */ @UnstableApi class AudioSequenceBuilder( + private val context: Context, private val audioPath: String, private val videoDurationUs: Long ) { - private var volume: Float = 1.0f - private var needsNormalization: Boolean = false + /** + * The result of a successful [build]: the audio sequence ready to be + * added to the composition, plus the temporary WAV file that must be + * deleted by the caller after rendering completes. + */ + data class BuildResult( + val sequence: EditedMediaItemSequence, + val temporaryFile: File + ) + private var loopAudio: Boolean = true private var startTimeUs: Long = 0 private var audioEndTimeUs: Long? = null @@ -33,30 +52,7 @@ class AudioSequenceBuilder( private var compositionEndTimeUs: Long? = null /** - * Sets the volume multiplier for the custom audio. - * - * @param volume Volume factor (0.0=silent, 1.0=unchanged, >1.0=amplified) - */ - fun setVolume(volume: Float): AudioSequenceBuilder { - this.volume = volume - return this - } - - /** - * Enables channel normalization (convert to stereo). - * - * Should be enabled when video clips have different channel counts - * to ensure compatibility. - */ - fun setNormalization(enabled: Boolean): AudioSequenceBuilder { - this.needsNormalization = enabled - return this - } - - /** - * Sets whether the audio should loop to match video duration. - * - * @param loop If true, audio repeats; if false, plays once + * Sets whether the audio should loop to fill the play range. */ fun setLoop(loop: Boolean): AudioSequenceBuilder { this.loopAudio = loop @@ -64,19 +60,15 @@ class AudioSequenceBuilder( } /** - * Sets the start time offset for the custom audio. - * - * @param startTimeUs Start time in microseconds from the beginning of the audio file + * Sets the start time offset within the source audio file. */ fun setStartTime(startTimeUs: Long?): AudioSequenceBuilder { - this.startTimeUs = startTimeUs ?: 0 + this.startTimeUs = (startTimeUs ?: 0L).coerceAtLeast(0L) return this } /** - * Sets the end time within the audio source file. - * - * @param endTimeUs End time in microseconds within the audio file (null = use full file) + * Sets the end time within the source audio file. */ fun setAudioEndTime(endTimeUs: Long?): AudioSequenceBuilder { this.audioEndTimeUs = endTimeUs @@ -84,9 +76,8 @@ class AudioSequenceBuilder( } /** - * Sets when this audio track should start playing in the composition timeline. - * - * @param startTimeUs Composition time in microseconds (null = from beginning) + * Sets when this audio track should start playing on the composition + * timeline. */ fun setCompositionStartTime(startTimeUs: Long?): AudioSequenceBuilder { this.compositionStartTimeUs = startTimeUs @@ -94,9 +85,8 @@ class AudioSequenceBuilder( } /** - * Sets when this audio track should stop playing in the composition timeline. - * - * @param endTimeUs Composition time in microseconds (null = until end) + * Sets when this audio track should stop playing on the composition + * timeline. */ fun setCompositionEndTime(endTimeUs: Long?): AudioSequenceBuilder { this.compositionEndTimeUs = endTimeUs @@ -104,358 +94,51 @@ class AudioSequenceBuilder( } /** - * Builds the audio sequence with looping to match video duration. + * Builds the audio sequence by pre-rendering the source into a single + * gap-less PCM WAV file. * - * @return EditedMediaItemSequence for custom audio, or null if file not found + * @return [BuildResult] with the sequence and the temporary file to + * delete after export, or null if the audio could not be prepared. */ - fun build(): EditedMediaItemSequence? { - Log.d(RENDER_TAG, "Building custom audio sequence: $audioPath") - Log.d(RENDER_TAG, "Custom audio volume: $volume") - if (startTimeUs > 0) { - Log.d(RENDER_TAG, "Custom audio start offset: ${startTimeUs / 1000} ms") - } - - val audioFile = File(audioPath) - if (!audioFile.exists()) { + fun build(): BuildResult? { + val sourceFile = File(audioPath) + if (!sourceFile.exists()) { Log.e(RENDER_TAG, "Custom audio file not found: $audioPath") return null } - val totalAudioDurationUs = MediaInfoExtractor.getAudioDuration(audioPath) - if (totalAudioDurationUs == 0L) { - Log.w(RENDER_TAG, "Cannot determine custom audio duration") - return null - } + val compStart = (compositionStartTimeUs ?: 0L).coerceAtLeast(0L) + val compEnd = (compositionEndTimeUs ?: videoDurationUs) + .coerceAtMost(videoDurationUs) + val playDurationUs = (compEnd - compStart).coerceAtLeast(0L) - // Calculate effective audio duration considering source clipping - val sourceEndUs = audioEndTimeUs?.coerceAtMost(totalAudioDurationUs) ?: totalAudioDurationUs - val effectiveAudioDurationUs = sourceEndUs - startTimeUs - if (effectiveAudioDurationUs <= 0) { - Log.w( - RENDER_TAG, - "Start time ($startTimeUs us) exceeds audio end ($sourceEndUs us)" - ) + if (playDurationUs <= 0L) { + Log.w(RENDER_TAG, "Custom audio play duration <= 0, skipping track") return null } - // Calculate target duration based on composition placement - val compStart = compositionStartTimeUs ?: 0L - val compEnd = compositionEndTimeUs ?: videoDurationUs - val targetDurationUs = (compEnd - compStart).coerceAtLeast(0L) - - // Build audio effects - val audioProcessors = buildAudioProcessors() - val audioEffects = Effects(audioProcessors, emptyList()) - - // Create audio content items with looping or single play - val audioContentItems = if (loopAudio) { - createLoopedAudioItems( - audioFile, - sourceEndUs, - effectiveAudioDurationUs, - targetDurationUs, - audioEffects - ) - } else { - createSingleAudioItem(audioFile, sourceEndUs, effectiveAudioDurationUs, targetDurationUs, audioEffects) - } - - val allItems = mutableListOf() - - // Add leading silence if audio starts after composition time 0. - // Media3 parallel sequences always start at time 0, so we need - // silence padding to offset the audio to the correct position. - if (compStart > 0) { - val silentItem = createSilentAudioItem(compStart, audioEffects) - if (silentItem != null) { - allItems.add(silentItem) - Log.d(RENDER_TAG, "Added ${compStart / 1000}ms leading silence for composition offset") - } - } - - allItems.addAll(audioContentItems) - - // Add trailing silence so the sequence spans the full video duration. - // This ensures all parallel sequences have matching lengths. - val totalContentDurationUs = compStart + targetDurationUs - if (totalContentDurationUs < videoDurationUs) { - val trailingDurationUs = videoDurationUs - totalContentDurationUs - val silentItem = createSilentAudioItem(trailingDurationUs, audioEffects) - if (silentItem != null) { - allItems.add(silentItem) - Log.d(RENDER_TAG, "Added ${trailingDurationUs / 1000}ms trailing silence") - } - } - - return EditedMediaItemSequence.Builder(allItems).build() - } - - /** - * Builds audio processors for custom audio (channel mixing + volume). - * - * Uses ITU-R BS.775 standard coefficients for multi-channel downmixing. - */ - private fun buildAudioProcessors(): List { - val processors = mutableListOf() - - // Add channel mixing if needed - if (needsNormalization) { - val channelMixer = ChannelMixingAudioProcessor() - - // 7.1 Surround (8 channels) to Stereo (2 channels) - // Channel order: FL, FR, FC, LFE, BL, BR, SL, SR - val eightToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.707f, 0.0f, 0.0f, 0.707f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(8, 2, eightToTwo) - ) - - // 5.1 Surround (6 channels) to Stereo (2 channels) - // ITU-R BS.775 standard - val sixToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.707f, 0.0f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(6, 2, sixToTwo) - ) - - // Quad (4 channels) to Stereo (2 channels) - val fourToTwo = floatArrayOf( - 1.0f, 0.0f, 0.707f, 0.0f, // Left output - 0.0f, 1.0f, 0.0f, 0.707f // Right output - ) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix(4, 2, fourToTwo) - ) - - // Stereo (2 channels) to Stereo (2 channels) - passthrough - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(2, 2) - ) - - // Mono (1 channel) to Stereo (2 channels) - channelMixer.putChannelMixingMatrix( - ChannelMixingMatrix.createForConstantGain(1, 2) - ) - - processors.add(channelMixer) - Log.d(RENDER_TAG, "Added channel normalization for custom audio") - } - - // NOTE: Volume control is now handled by VolumeControlAudioMixerFactory - // because Media3's AudioProcessors on EditedMediaItems are NOT invoked - // when using parallel sequences (multiple EditedMediaItemSequence). - // The VolumeAudioProcessor was being configured but never actually processing audio. - // See VolumeControlAudioMixer which applies volumes during the mixing stage. - if (volume != 1.0f) { - Log.d( - RENDER_TAG, - "Custom audio volume: ${volume}x (applied via VolumeControlAudioMixer)" - ) - } - - return processors - } - - /** - * Creates audio items with looping to match target duration. - * First iteration uses startTimeUs offset, subsequent loops start from beginning. - */ - private fun createLoopedAudioItems( - audioFile: File, - sourceEndUs: Long, - effectiveAudioDurationUs: Long, - targetDurationUs: Long, - effects: Effects - ): List { - val audioItems = mutableListOf() - - if (effectiveAudioDurationUs <= 0 || targetDurationUs <= 0) { - // Fallback: add audio once without duration constraints - val audioItem = createAudioItem(audioFile, startTimeUs, null, effects) - audioItems.add(audioItem) - return audioItems - } - - var remainingDurationUs = targetDurationUs - var loopCount = 0 - var isFirstLoop = true - - while (remainingDurationUs > 0) { - loopCount++ - - // First loop uses startTimeUs offset, subsequent loops start from 0 - val loopStartUs = if (isFirstLoop) startTimeUs else 0L - val loopEndUs = if (isFirstLoop) sourceEndUs else sourceEndUs - val loopAudioDurationUs = - if (isFirstLoop) effectiveAudioDurationUs else (sourceEndUs - 0L) - - val endPositionUs = if (remainingDurationUs < loopAudioDurationUs) { - Log.d( - RENDER_TAG, - "Loop $loopCount: Trimming audio to ${remainingDurationUs / 1000} ms (final loop)" - ) - loopStartUs + remainingDurationUs - } else { - Log.d( - RENDER_TAG, - "Loop $loopCount: Using audio duration ${loopAudioDurationUs / 1000} ms" + - if (isFirstLoop && startTimeUs > 0) " (starting at ${startTimeUs / 1000} ms)" else "" - ) - if (audioEndTimeUs != null) loopEndUs else null - } - - val audioItem = createAudioItem(audioFile, loopStartUs, endPositionUs, effects) - audioItems.add(audioItem) - remainingDurationUs -= loopAudioDurationUs - isFirstLoop = false - } - - Log.d(RENDER_TAG, "Custom audio will loop $loopCount times to match target duration") - return audioItems - } - - /** - * Creates a single audio item (no looping). Trims if audio is longer than target duration. - */ - private fun createSingleAudioItem( - audioFile: File, - sourceEndUs: Long, - effectiveAudioDurationUs: Long, - targetDurationUs: Long, - effects: Effects - ): List { - val endPositionUs = if (effectiveAudioDurationUs > targetDurationUs && targetDurationUs > 0) { - Log.d(RENDER_TAG, "Trimming audio to ${targetDurationUs / 1000} ms (no loop)") - startTimeUs + targetDurationUs - } else if (audioEndTimeUs != null) { - Log.d( - RENDER_TAG, "Playing audio once (${effectiveAudioDurationUs / 1000} ms, no loop)" + - if (startTimeUs > 0) " starting at ${startTimeUs / 1000} ms" else "" - ) - sourceEndUs - } else { - Log.d( - RENDER_TAG, "Playing audio once (${effectiveAudioDurationUs / 1000} ms, no loop)" + - if (startTimeUs > 0) " starting at ${startTimeUs / 1000} ms" else "" - ) - null - } - return listOf(createAudioItem(audioFile, startTimeUs, endPositionUs, effects)) - } - - /** - * Creates a single audio EditedMediaItem with start offset and optional end position. - */ - private fun createAudioItem( - audioFile: File, - startPositionUs: Long, - endPositionUs: Long?, - effects: Effects - ): EditedMediaItem { - val mediaItemBuilder = MediaItem.Builder().setUri(Uri.fromFile(audioFile)) - - if (startPositionUs > 0 || endPositionUs != null) { - val clippingConfig = MediaItem.ClippingConfiguration.Builder() - .setStartPositionMs(startPositionUs / 1000) - - if (endPositionUs != null) { - clippingConfig.setEndPositionMs(endPositionUs / 1000) - } - - mediaItemBuilder.setClippingConfiguration(clippingConfig.build()) - } - - val mediaItem = mediaItemBuilder.build() - return EditedMediaItem.Builder(mediaItem) - .setRemoveVideo(true) - .setEffects(effects) + val preRender = AudioPreRenderer.render( + context = context, + audioPath = audioPath, + audioStartUs = startTimeUs, + audioEndUs = audioEndTimeUs, + loop = loopAudio, + compositionStartUs = compStart, + compositionDurationUs = playDurationUs, + videoDurationUs = videoDurationUs + ) ?: return null + + val mediaItem = MediaItem.Builder() + .setUri(Uri.fromFile(preRender.outputFile)) .build() - } - /** - * Creates a silent audio EditedMediaItem of the specified duration. - * - * Media3 parallel sequences always start at time 0, so we use silence - * to offset audio to the correct composition position. - */ - private fun createSilentAudioItem(durationUs: Long, effects: Effects): EditedMediaItem? { - if (durationUs <= 0) return null - - val silentFile = generateSilentWavFile(durationUs) - if (silentFile == null) { - Log.e(RENDER_TAG, "Failed to create silent audio item") - return null - } - - val mediaItem = MediaItem.Builder().setUri(Uri.fromFile(silentFile)).build() - return EditedMediaItem.Builder(mediaItem) + val editedItem = EditedMediaItem.Builder(mediaItem) .setRemoveVideo(true) - .setEffects(effects) + .setEffects(Effects(emptyList(), emptyList())) .build() - } - /** - * Generates a temporary WAV file containing silence of the specified duration. - * - * Creates a valid PCM WAV file with stereo 44100Hz 16-bit silence. - */ - private fun generateSilentWavFile(durationUs: Long): File? { - try { - val sampleRate = 44100 - val channels = 2 - val bitsPerSample = 16 - val bytesPerSample = bitsPerSample / 8 - val numSamples = (sampleRate * durationUs / 1_000_000.0).toInt() - val dataSize = numSamples * channels * bytesPerSample - val fileSize = 36 + dataSize - - val file = File.createTempFile("silence_", ".wav") - file.deleteOnExit() - - file.outputStream().use { out -> - // RIFF header - out.write("RIFF".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(fileSize, 4)) - out.write("WAVE".toByteArray(Charsets.US_ASCII)) - - // fmt subchunk - out.write("fmt ".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(16, 4)) // Subchunk1Size (PCM) - out.write(toLittleEndian(1, 2)) // AudioFormat (PCM = 1) - out.write(toLittleEndian(channels, 2)) - out.write(toLittleEndian(sampleRate, 4)) - out.write(toLittleEndian(sampleRate * channels * bytesPerSample, 4)) - out.write(toLittleEndian(channels * bytesPerSample, 2)) - out.write(toLittleEndian(bitsPerSample, 2)) - - // data subchunk - out.write("data".toByteArray(Charsets.US_ASCII)) - out.write(toLittleEndian(dataSize, 4)) - - // Write silence (all zeros) - val buffer = ByteArray(8192) - var remaining = dataSize - while (remaining > 0) { - val toWrite = minOf(remaining, buffer.size) - out.write(buffer, 0, toWrite) - remaining -= toWrite - } - } - - Log.d(RENDER_TAG, "Generated ${durationUs / 1000}ms silent WAV: ${file.absolutePath}") - return file - } catch (e: Exception) { - Log.e(RENDER_TAG, "Failed to generate silent WAV: ${e.message}") - return null - } - } + val sequence = EditedMediaItemSequence.Builder(editedItem).build() - /** Converts an integer to little-endian byte array. */ - private fun toLittleEndian(value: Int, numBytes: Int): ByteArray { - return ByteArray(numBytes) { i -> ((value shr (8 * i)) and 0xFF).toByte() } + return BuildResult(sequence, preRender.outputFile) } } diff --git a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt index a83ae3b..53a1f6a 100644 --- a/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt +++ b/android/src/main/kotlin/ch/waio/pro_video_editor/src/features/render/helpers/CompositionBuilder.kt @@ -10,6 +10,7 @@ import androidx.media3.transformer.EditedMediaItemSequence import ch.waio.pro_video_editor.src.features.render.models.AudioTrackConfig import ch.waio.pro_video_editor.src.features.render.models.RenderConfig import ch.waio.pro_video_editor.src.shared.logging.PluginLog as Log +import java.io.File /** * Main builder class for creating Media3 Compositions from render configurations. @@ -28,6 +29,13 @@ class CompositionBuilder( private var videoEffects: List = emptyList() private var audioEffects: List = emptyList() + /** + * Temporary files (e.g. pre-rendered audio WAVs) created while building + * the composition. The caller MUST delete these files after the + * Transformer export finishes (success or failure). + */ + val temporaryFiles: MutableList = mutableListOf() + /** * Sets the video effects to apply from EffectsProcessor. */ @@ -107,19 +115,20 @@ class CompositionBuilder( "Created video EditedMediaItemSequence with ${config.videoClips.size} items" ) - // Add audio tracks as separate sequences - Media3 will mix all tracks natively + // Add audio tracks as separate sequences - Media3 will mix all tracks natively. + // Each audio track is pre-rendered to a single gap-less PCM WAV file via + // AudioPreRenderer to avoid encoder frame realignment artifacts (clicks/gaps) + // at loop and silence boundaries. if (hasCustomAudio) { val totalVideoDuration = videoBuilder.calculateTotalDuration() for ((index, track) in config.audioTracks.withIndex()) { Log.d( RENDER_TAG, - "๐ŸŽต Adding audio track $index: path=${track.path}, volume=${track.volume}, loop=${track.loop}" + "๐ŸŽต Pre-rendering audio track $index: path=${track.path}, volume=${track.volume}, loop=${track.loop}" ) - val audioSequence = AudioSequenceBuilder(track.path, totalVideoDuration) - .setVolume(track.volume) - .setNormalization(needsNormalization) + val result = AudioSequenceBuilder(context, track.path, totalVideoDuration) .setLoop(track.loop) .setStartTime(track.audioStartUs) .setAudioEndTime(track.audioEndUs) @@ -127,9 +136,14 @@ class CompositionBuilder( .setCompositionEndTime(track.endUs) .build() - if (audioSequence != null) { - sequences.add(audioSequence) - Log.d(RENDER_TAG, "Audio track $index added (will be mixed natively by Media3)") + if (result != null) { + sequences.add(result.sequence) + temporaryFiles.add(result.temporaryFile) + Log.d( + RENDER_TAG, + "Audio track $index pre-rendered to ${result.temporaryFile.name} " + + "(${result.temporaryFile.length()} bytes)" + ) } } } diff --git a/example/lib/features/render/video_renderer_page.dart b/example/lib/features/render/video_renderer_page.dart index b9880b1..070f4e1 100644 --- a/example/lib/features/render/video_renderer_page.dart +++ b/example/lib/features/render/video_renderer_page.dart @@ -389,6 +389,71 @@ class _VideoRendererPageState extends State { await _renderVideo(data); } + /// **Loop seam stress test** โ€” short clipped audio looped many times. + /// + /// Extracts only a ~1 second window from the source audio file and lets it + /// loop continuously across the full video duration. With a typical 21s + /// demo video this produces ~20 loop boundaries โ€” the most aggressive + /// scenario to expose audible clicks/gaps at each loop restart. + /// + /// Use this to A/B compare the audio quality at every seam before and + /// after the seamless audio pre-render implementation. + Future _loopSeamStressShort() async { + final customAudioFile = await _writeAssetAudioToFile( + kVideoEditorExampleAudio1Path, + ); + + var data = VideoRenderData( + videoSegments: [VideoSegment(video: _video, volume: 0)], + audioTracks: [ + VideoAudioTrack( + path: customAudioFile.path, + volume: 1.0, + loop: true, + audioStartTime: const Duration(seconds: 2), + audioEndTime: const Duration(milliseconds: 3000), + ), + ], + ); + + await _renderVideo(data); + } + + /// **Loop seam stress test** โ€” looped audio across multiple video clips. + /// + /// Concatenates the same video three times (~63s total) and lets a + /// short 2-second audio window loop continuously across the entire + /// timeline. This stresses both: + /// - Loop boundaries inside the audio track + /// - Audio continuity across video clip transitions + /// + /// Use this to A/B compare seamless audio behaviour before and after + /// the pre-render implementation. + Future _loopSeamStressMultiClip() async { + final customAudioFile = await _writeAssetAudioToFile( + kVideoEditorExampleAudio1Path, + ); + + var data = VideoRenderData( + videoSegments: [ + VideoSegment(video: _video, volume: 0), + VideoSegment(video: _video, volume: 0), + VideoSegment(video: _video, volume: 0), + ], + audioTracks: [ + VideoAudioTrack( + path: customAudioFile.path, + volume: 1.0, + loop: true, + audioStartTime: const Duration(seconds: 1), + audioEndTime: const Duration(seconds: 3), + ), + ], + ); + + await _renderVideo(data); + } + /// Different volume levels per video segment. /// /// This example demonstrates per-clip volume control when concatenating @@ -994,6 +1059,7 @@ class _VideoRendererPageState extends State { await _playerPreview.open(Media(outputPath)); await _playerPreview.play(); + await _playerPreview.setPlaylistMode(.loop); } Future _cancelRender() async { @@ -1349,6 +1415,22 @@ class _VideoRendererPageState extends State { title: const Text('Audio Clip Range'), subtitle: const Text('Extract 3sโ€“8s from audio file'), ), + ListTile( + onTap: _loopSeamStressShort, + leading: const Icon(Icons.repeat_outlined), + title: const Text('Loop Seam Stress (short)'), + subtitle: const Text( + '1s audio window looped across full video โ€” exposes loop clicks', + ), + ), + ListTile( + onTap: _loopSeamStressMultiClip, + leading: const Icon(Icons.repeat_on_outlined), + title: const Text('Loop Seam Stress (multi-clip)'), + subtitle: const Text( + '2s audio window looped across 3 concatenated clips', + ), + ), ListTile( onTap: _perClipVolume, leading: const Icon(Icons.tune_outlined), diff --git a/ios/Classes/src/features/render/RenderVideo.swift b/ios/Classes/src/features/render/RenderVideo.swift index c06f243..972bec8 100644 --- a/ios/Classes/src/features/render/RenderVideo.swift +++ b/ios/Classes/src/features/render/RenderVideo.swift @@ -86,11 +86,17 @@ class RenderVideo { } var outputURL: URL! + var temporaryAudioURLs: [URL] = [] let finalize: () -> Void = { try? cleanup(config.outputPath == nil ? [outputURL] : []) // Clean up transcoded files VideoTranscoder.cleanupTranscodedFiles(transcodedFiles) + // Clean up pre-rendered audio temp files + for url in temporaryAudioURLs { + try? FileManager.default.removeItem(at: url) + PluginLog.print("๐Ÿงน Removed pre-rendered audio: \(url.lastPathComponent)") + } } let handleCompletion: (Result) -> Void = { result in @@ -139,13 +145,14 @@ class RenderVideo { var effectsConfig = VideoCompositorConfig() // Use composition helper to merge multiple video clips - let (composition, videoCompData, renderSize, audioMix, sourceTrackID) = + let (composition, videoCompData, renderSize, audioMix, sourceTrackID, audioTempURLs) = try await applyComposition( videoClips: workingConfig.videoClips, videoEffects: effectsConfig, enableAudio: workingConfig.enableAudio, audioTracks: workingConfig.audioTracks ) + temporaryAudioURLs = audioTempURLs var videoCompConfig = videoCompData // Set source track ID for fallback on older iOS versions (e.g., iPhone 7) diff --git a/ios/Classes/src/features/render/helpers/ApplyComposition.swift b/ios/Classes/src/features/render/helpers/ApplyComposition.swift index 98d3080..0142618 100644 --- a/ios/Classes/src/features/render/helpers/ApplyComposition.swift +++ b/ios/Classes/src/features/render/helpers/ApplyComposition.swift @@ -19,6 +19,7 @@ import Foundation /// - CGSize: Final render size (max dimensions from all clips) /// - AVAudioMix?: Audio mix with volume controls (nil if no audio mixing needed) /// - CMPersistentTrackID: The track ID of the video composition track (for fallback on older iOS) +/// - [URL]: Temporary file URLs (e.g. pre-rendered audio WAVs) the caller MUST delete after export /// /// - Throws: NSError if video clips are empty, files don't exist, or tracks can't be loaded. func applyComposition( @@ -27,7 +28,7 @@ func applyComposition( enableAudio: Bool, audioTracks: [AudioTrackConfig] ) async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, [URL] ) { return try await CompositionBuilder(videoClips: videoClips, videoEffects: videoEffects) .setEnableAudio(enableAudio) diff --git a/ios/Classes/src/features/render/helpers/AudioPreRenderer.swift b/ios/Classes/src/features/render/helpers/AudioPreRenderer.swift new file mode 100644 index 0000000..43551d1 --- /dev/null +++ b/ios/Classes/src/features/render/helpers/AudioPreRenderer.swift @@ -0,0 +1,327 @@ +import AVFoundation +import Foundation + +/// Pre-renders a custom audio track into a single, gap-less PCM WAV file +/// that is ready to be inserted into an AVMutableComposition with ONE +/// `insertTimeRange` call. +/// +/// This avoids audible clicks at every loop restart when the source audio +/// is a compressed format (AAC, MP3) and `AVMutableComposition.insertTimeRange` +/// is called multiple times on the source โ€” each call respects encoder +/// priming/padding samples and aligns to compressed-frame boundaries +/// (~1024 samples for AAC, ~1152 for MP3), producing audible artifacts. +/// +/// By decoding to PCM once and looping/trimming on raw samples, every +/// loop boundary is sample-exact and silent transitions are perfectly +/// continuous. +internal enum AudioPreRenderer { + + /// Result of a successful pre-render operation. + struct Result { + /// The pre-rendered PCM WAV file URL. The caller is responsible + /// for deleting this file when no longer needed. + let outputURL: URL + /// Total duration of the pre-rendered audio. + let duration: CMTime + } + + /// Pre-renders the audio described by the parameters. + /// + /// The output file contains exactly `targetBodyDuration` of audio: + /// the trimmed source `[audioStartTime, audioEndTime)` looped (or + /// played once) to fill the duration, with sample-exact tail trim. + /// + /// No silence padding is added โ€” leading/trailing silence on the + /// composition timeline is handled implicitly by inserting this file + /// at the correct `compositionInsertTime`. + /// + /// - Parameters: + /// - audioPath: Absolute path to the source audio file. + /// - audioStartTime: Trim start within the source. + /// - audioEndTime: Trim end within the source (nil = use full + /// source duration). + /// - loop: If true, the trimmed window repeats to fill + /// `targetBodyDuration`. If false, the source plays once and the + /// remaining time is filled with silence. + /// - targetBodyDuration: How long the output audio should sound. + /// - Returns: A [Result] on success, nil on failure. + static func render( + audioPath: String, + audioStartTime: CMTime, + audioEndTime: CMTime?, + loop: Bool, + targetBodyDuration: CMTime + ) async -> Result? { + let sourceURL = URL(fileURLWithPath: audioPath) + guard FileManager.default.fileExists(atPath: sourceURL.path) else { + PluginLog.print("โš ๏ธ AudioPreRenderer: source file not found: \(audioPath)") + return nil + } + + if CMTimeCompare(targetBodyDuration, .zero) <= 0 { + PluginLog.print("โš ๏ธ AudioPreRenderer: targetBodyDuration <= 0, skipping") + return nil + } + + // Step 1: decode the trimmed range to PCM bytes. + let asset = AVURLAsset(url: sourceURL) + + // Resolve the source duration. + let sourceDuration: CMTime + if #available(iOS 15.0, *) { + sourceDuration = (try? await asset.load(.duration)) ?? .zero + } else { + sourceDuration = asset.duration + } + + let effectiveStart = CMTimeMaximum(audioStartTime, .zero) + let effectiveEnd = CMTimeMinimum(audioEndTime ?? sourceDuration, sourceDuration) + let trimDuration = CMTimeSubtract(effectiveEnd, effectiveStart) + if CMTimeCompare(trimDuration, .zero) <= 0 { + PluginLog.print( + "โš ๏ธ AudioPreRenderer: invalid trim range start=\(effectiveStart.seconds)s end=\(effectiveEnd.seconds)s" + ) + return nil + } + + let audioTracks: [AVAssetTrack] + do { + if #available(iOS 15.0, *) { + audioTracks = try await asset.loadTracks(withMediaType: .audio) + } else { + audioTracks = asset.tracks(withMediaType: .audio) + } + } catch { + PluginLog.print("โš ๏ธ AudioPreRenderer: failed to load tracks: \(error)") + return nil + } + + guard let audioTrack = audioTracks.first else { + PluginLog.print("โš ๏ธ AudioPreRenderer: no audio tracks in source") + return nil + } + + // Output PCM format: 44.1kHz stereo 16-bit signed little-endian + // (matches typical AAC/MP3 source rate; AVAssetWriter will + // resample internally during the final video export if needed). + // Using a fixed format keeps the pre-render simple and + // predictable for the AVMutableComposition consumer. + let sampleRate: Double = 44100 + let channelCount: Int = 2 + let bitsPerSample: Int = 16 + let bytesPerFrame = channelCount * (bitsPerSample / 8) + + let outputSettings: [String: Any] = [ + AVFormatIDKey: kAudioFormatLinearPCM, + AVSampleRateKey: sampleRate, + AVNumberOfChannelsKey: channelCount, + AVLinearPCMBitDepthKey: bitsPerSample, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false, + AVLinearPCMIsNonInterleaved: false, + ] + + let trimmedPcm: Data + do { + trimmedPcm = try await readPcm( + from: asset, + track: audioTrack, + start: effectiveStart, + duration: trimDuration, + outputSettings: outputSettings + ) + } catch { + PluginLog.print("โš ๏ธ AudioPreRenderer: PCM read failed: \(error)") + return nil + } + + if trimmedPcm.isEmpty { + PluginLog.print("โš ๏ธ AudioPreRenderer: decoded PCM is empty") + return nil + } + + // Step 2: build the output PCM data (loop / trim / pad). + let targetBytes = bytesForDuration( + targetBodyDuration, + sampleRate: sampleRate, + bytesPerFrame: bytesPerFrame + ) + + var outputBytes = Data(capacity: targetBytes) + + if loop { + // Repeat trimmedPcm until we have exactly targetBytes. + while outputBytes.count < targetBytes { + let remaining = targetBytes - outputBytes.count + if remaining >= trimmedPcm.count { + outputBytes.append(trimmedPcm) + } else { + outputBytes.append(trimmedPcm.subdata(in: 0.. Data { + let reader = try AVAssetReader(asset: asset) + reader.timeRange = CMTimeRange(start: start, duration: duration) + + let trackOutput = AVAssetReaderTrackOutput( + track: track, + outputSettings: outputSettings + ) + trackOutput.alwaysCopiesSampleData = false + guard reader.canAdd(trackOutput) else { + throw NSError( + domain: "AudioPreRenderer", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "Cannot add track output to reader"] + ) + } + reader.add(trackOutput) + + guard reader.startReading() else { + throw reader.error ?? NSError( + domain: "AudioPreRenderer", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "AVAssetReader.startReading failed"] + ) + } + + var pcm = Data() + while reader.status == .reading, + let sampleBuffer = trackOutput.copyNextSampleBuffer() { + if let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) { + let length = CMBlockBufferGetDataLength(blockBuffer) + if length > 0 { + var tempBytes = [UInt8](repeating: 0, count: length) + let status = CMBlockBufferCopyDataBytes( + blockBuffer, + atOffset: 0, + dataLength: length, + destination: &tempBytes + ) + if status == kCMBlockBufferNoErr { + pcm.append(contentsOf: tempBytes) + } + } + } + } + + if reader.status == .failed, let error = reader.error { + throw error + } + + return pcm + } + + /// Computes the number of PCM bytes that represent `duration` at the + /// given sample rate and bytes-per-frame, aligned to a frame + /// boundary. + private static func bytesForDuration( + _ duration: CMTime, + sampleRate: Double, + bytesPerFrame: Int + ) -> Int { + let seconds = duration.seconds + if !seconds.isFinite || seconds <= 0 { return 0 } + let frames = Int(seconds * sampleRate) + return frames * bytesPerFrame + } + + /// Creates a unique temporary WAV file URL in the cache directory. + private static func makeTemporaryWavURL() -> URL { + let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory()) + let name = "prerender_audio_\(Int(Date().timeIntervalSince1970 * 1000))_\(UUID().uuidString).wav" + return tmpDir.appendingPathComponent(name) + } + + /// Builds a complete WAV file (RIFF header + PCM data) as `Data`. + private static func makeWav( + pcmBytes: Data, + sampleRate: Int, + channelCount: Int, + bitsPerSample: Int + ) -> Data { + let byteRate = sampleRate * channelCount * bitsPerSample / 8 + let blockAlign = channelCount * bitsPerSample / 8 + let dataSize = UInt32(pcmBytes.count) + let chunkSize = UInt32(36) + dataSize + + var header = Data(capacity: 44) + header.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // "RIFF" + header.appendLittleEndian(UInt32(chunkSize)) + header.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // "WAVE" + header.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // "fmt " + header.appendLittleEndian(UInt32(16)) // PCM fmt chunk size + header.appendLittleEndian(UInt16(1)) // PCM format + header.appendLittleEndian(UInt16(channelCount)) + header.appendLittleEndian(UInt32(sampleRate)) + header.appendLittleEndian(UInt32(byteRate)) + header.appendLittleEndian(UInt16(blockAlign)) + header.appendLittleEndian(UInt16(bitsPerSample)) + header.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // "data" + header.appendLittleEndian(dataSize) + + var output = Data(capacity: header.count + pcmBytes.count) + output.append(header) + output.append(pcmBytes) + return output + } +} + +private extension Data { + mutating func appendLittleEndian(_ value: UInt32) { + var v = value.littleEndian + Swift.withUnsafeBytes(of: &v) { buffer in + append(contentsOf: buffer) + } + } + mutating func appendLittleEndian(_ value: UInt16) { + var v = value.littleEndian + Swift.withUnsafeBytes(of: &v) { buffer in + append(contentsOf: buffer) + } + } +} diff --git a/ios/Classes/src/features/render/helpers/AudioSequenceBuilder.swift b/ios/Classes/src/features/render/helpers/AudioSequenceBuilder.swift index 71431a9..c257423 100644 --- a/ios/Classes/src/features/render/helpers/AudioSequenceBuilder.swift +++ b/ios/Classes/src/features/render/helpers/AudioSequenceBuilder.swift @@ -1,15 +1,26 @@ import AVFoundation import Foundation -/// Builder class for creating custom audio sequences. +/// Builder for inserting a custom audio track into an `AVMutableComposition`. /// -/// Handles custom audio track with volume control, looping/trimming -/// to match video duration or a specific time range in the composition. +/// The track is first **pre-rendered to a single, gap-less PCM WAV file** +/// via [AudioPreRenderer]. The composition then inserts this WAV with a +/// single `insertTimeRange` call, which avoids audible clicks at every +/// loop restart caused by AAC/MP3 codec priming when the source is +/// inserted multiple times directly into the composition. internal class AudioSequenceBuilder { + /// Result of a successful build operation. + struct BuildResult { + let track: AVMutableCompositionTrack + /// Temporary pre-rendered audio file. The owner of the + /// composition (RenderVideo) MUST delete this file once the + /// export has finished (success or failure). + let temporaryURL: URL + } + private let audioPath: String private let targetDuration: CMTime - private var volume: Float = 1.0 private var loopAudio: Bool = true private var audioStartTime: CMTime = .zero private var audioEndTime: CMTime? @@ -19,38 +30,19 @@ internal class AudioSequenceBuilder { /// If nil, uses targetDuration minus compositionInsertTime. private var compositionPlayDuration: CMTime? - /// Initializes builder with audio path and target duration. - /// - /// - Parameters: - /// - audioPath: Absolute path to audio file - /// - targetDuration: Target duration to match (total video duration) + /// Initializes builder with audio path and target (full video) duration. init(audioPath: String, targetDuration: CMTime) { self.audioPath = audioPath self.targetDuration = targetDuration } - /// Sets volume for custom audio. - /// - /// - Parameter volume: Volume multiplier (0.0 to 1.0+) - /// - Returns: Self for chaining - func setVolume(_ volume: Float) -> AudioSequenceBuilder { - self.volume = volume - return self - } - - /// Sets whether the audio should loop to match video duration. - /// - /// - Parameter loop: If true, audio repeats; if false, plays once - /// - Returns: Self for chaining + @discardableResult func setLoop(_ loop: Bool) -> AudioSequenceBuilder { self.loopAudio = loop return self } - /// Sets the start time offset within the audio file. - /// - /// - Parameter startTimeUs: Start time in microseconds from the beginning of the audio file - /// - Returns: Self for chaining + @discardableResult func setAudioStartTime(_ startTimeUs: Int64?) -> AudioSequenceBuilder { if let startTimeUs = startTimeUs, startTimeUs > 0 { self.audioStartTime = CMTime(value: startTimeUs, timescale: 1_000_000) @@ -58,10 +50,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets the end time offset within the audio file. - /// - /// - Parameter endTimeUs: End time in microseconds within the audio file - /// - Returns: Self for chaining + @discardableResult func setAudioEndTime(_ endTimeUs: Int64?) -> AudioSequenceBuilder { if let endTimeUs = endTimeUs, endTimeUs > 0 { self.audioEndTime = CMTime(value: endTimeUs, timescale: 1_000_000) @@ -69,10 +58,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets where in the composition timeline this audio should start playing. - /// - /// - Parameter startUs: Composition start time in microseconds (-1 or nil = from start) - /// - Returns: Self for chaining + @discardableResult func setCompositionStartTime(_ startUs: Int64?) -> AudioSequenceBuilder { if let startUs = startUs, startUs > 0 { self.compositionInsertTime = CMTime(value: startUs, timescale: 1_000_000) @@ -80,10 +66,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets the duration this audio should play in the composition. - /// - /// - Parameter endUs: Composition end time in microseconds (-1 or nil = until end) - /// - Returns: Self for chaining + @discardableResult func setCompositionEndTime(_ endUs: Int64?) -> AudioSequenceBuilder { if let endUs = endUs, endUs > 0 { let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -92,50 +75,10 @@ internal class AudioSequenceBuilder { return self } - /// Builds custom audio track and adds it to composition. - /// - /// Trims or loops the audio to match target duration and applies volume. - /// - /// - Parameter composition: Composition to add audio track to - /// - Returns: The created composition track, or nil if failed - func build(in composition: AVMutableComposition) async throws -> AVMutableCompositionTrack? { - let audioURL = URL(fileURLWithPath: audioPath) - guard FileManager.default.fileExists(atPath: audioURL.path) else { - PluginLog.print("โš ๏ธ Custom audio file does not exist: \(audioPath)") - return nil - } - - let audioAsset = AVURLAsset(url: audioURL) - - guard let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: audioAsset), - let compositionAudioTrack = composition.addMutableTrack( - withMediaType: .audio, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - else { - PluginLog.print("โš ๏ธ Failed to add custom audio track") - return nil - } - - // Get audio duration - let audioDuration: CMTime - if #available(iOS 15.0, *) { - audioDuration = (try? await audioAsset.load(.duration)) ?? .zero - } else { - audioDuration = audioAsset.duration - } - - // Calculate effective audio source range - let effectiveAudioEnd = audioEndTime ?? audioDuration - let effectiveAudioDuration = CMTimeSubtract(effectiveAudioEnd, audioStartTime) - if CMTimeCompare(effectiveAudioDuration, .zero) <= 0 { - PluginLog.print( - "โš ๏ธ Audio start/end time range is invalid (start: \(audioStartTime.seconds)s, end: \(effectiveAudioEnd.seconds)s)" - ) - return nil - } - - // Calculate how long this track should play in the composition + /// Pre-renders the audio and inserts it into `composition` with a + /// single `insertTimeRange` call. + func build(in composition: AVMutableComposition) async throws -> BuildResult? { + // Compute play duration in the composition. let remainingCompositionTime = CMTimeSubtract(targetDuration, compositionInsertTime) let playDuration = compositionPlayDuration ?? remainingCompositionTime let effectivePlayDuration = CMTimeMinimum(playDuration, remainingCompositionTime) @@ -145,136 +88,70 @@ internal class AudioSequenceBuilder { return nil } - if CMTimeCompare(audioStartTime, .zero) > 0 { - PluginLog.print("๐ŸŽต Custom audio start offset: \(audioStartTime.seconds)s") - } - if audioEndTime != nil { - PluginLog.print("๐ŸŽต Custom audio end offset: \(effectiveAudioEnd.seconds)s") - } - if CMTimeCompare(compositionInsertTime, .zero) > 0 { - PluginLog.print( - "๐ŸŽต Audio placed at composition time: \(compositionInsertTime.seconds)s" - ) + // Pre-render the audio: handles trim, loop and silence-padding + // entirely on PCM samples. + guard let prerender = await AudioPreRenderer.render( + audioPath: audioPath, + audioStartTime: audioStartTime, + audioEndTime: audioEndTime, + loop: loopAudio, + targetBodyDuration: effectivePlayDuration + ) else { + return nil } - // Trim or loop custom audio to match the effective play duration - if CMTimeCompare(effectiveAudioDuration, effectivePlayDuration) > 0 { - // Trim audio to match play duration (starting from audioStartTime) - let timeRange = CMTimeRange(start: audioStartTime, duration: effectivePlayDuration) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: compositionInsertTime) - PluginLog.print("โœ‚๏ธ Custom audio trimmed to \(effectivePlayDuration.seconds)s") - } else if loopAudio { - // Loop audio to match play duration - var currentTime = compositionInsertTime - let compositionEndTime = CMTimeAdd(compositionInsertTime, effectivePlayDuration) - var loopCount = 0 - var isFirstLoop = true - - while CMTimeCompare(currentTime, compositionEndTime) < 0 { - loopCount += 1 - let remainingDuration = CMTimeSubtract(compositionEndTime, currentTime) - - // First loop uses audioStartTime offset, subsequent loops start from beginning of source range - let loopStartTime = isFirstLoop ? audioStartTime : audioStartTime - let loopAudioDuration = effectiveAudioDuration - - let insertDuration = CMTimeMinimum(loopAudioDuration, remainingDuration) - let timeRange = CMTimeRange(start: loopStartTime, duration: insertDuration) + // Load the pre-rendered audio and insert it once into the composition. + let prerenderAsset = AVURLAsset(url: prerender.outputURL) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: currentTime) - currentTime = CMTimeAdd(currentTime, insertDuration) - isFirstLoop = false - } - - PluginLog.print( - "๐Ÿ”„ Custom audio looped \(loopCount) times to match \(effectivePlayDuration.seconds)s duration" - ) + let prerenderTracks: [AVAssetTrack] + if #available(iOS 15.0, *) { + prerenderTracks = (try? await prerenderAsset.loadTracks(withMediaType: .audio)) ?? [] } else { - // Play audio once without looping (starting from audioStartTime) - let insertDuration = CMTimeMinimum(effectiveAudioDuration, effectivePlayDuration) - let timeRange = CMTimeRange(start: audioStartTime, duration: insertDuration) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: compositionInsertTime) - PluginLog.print( - "โ–ถ๏ธ Custom audio plays once (\(insertDuration.seconds)s, no loop)" - + (CMTimeCompare(audioStartTime, .zero) > 0 - ? " starting at \(audioStartTime.seconds)s" : "")) - } - - if volume != 1.0 { - PluginLog.print("๐Ÿ”Š Custom audio volume: \(volume)") + prerenderTracks = prerenderAsset.tracks(withMediaType: .audio) } - - return compositionAudioTrack - } - - /// Checks if custom audio sample rate is compatible with video audio. - /// - /// - Parameter videoClips: Array of video clips to check against - /// - Returns: true if compatible or no video audio exists - func checkSampleRateCompatibility(videoClips: [VideoClip]) async -> Bool { - let customSampleRate = await MediaInfoExtractor.getAudioSampleRate(audioPath) - - guard customSampleRate > 0 else { - PluginLog.print("โš ๏ธ Could not detect custom audio sample rate") - return true // Assume compatible if we can't detect - } - - for clip in videoClips { - if let videoSampleRate = await getVideoAudioSampleRate(clip.inputPath), - videoSampleRate > 0 && videoSampleRate != customSampleRate - { - PluginLog.print( - "โŒ Sample rate mismatch: custom audio (\(customSampleRate) Hz) vs video (\(videoSampleRate) Hz)" - ) - return false - } + guard let sourceTrack = prerenderTracks.first else { + PluginLog.print("โš ๏ธ Pre-rendered audio has no audio track") + try? FileManager.default.removeItem(at: prerender.outputURL) + return nil } - PluginLog.print("โœ… Sample rates are compatible") - return true - } - - /// Gets sample rate of audio track in video file. - private func getVideoAudioSampleRate(_ videoPath: String) async -> Int? { - let url = URL(fileURLWithPath: videoPath) - guard FileManager.default.fileExists(atPath: url.path) else { + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + PluginLog.print("โš ๏ธ Failed to add custom audio track") + try? FileManager.default.removeItem(at: prerender.outputURL) return nil } - let asset = AVURLAsset(url: url) + // Insert the entire pre-rendered file at the composition offset. + // The pre-render duration is already aligned to the requested + // play duration (loop+trim handled at PCM level). + let insertDuration = CMTimeMinimum(prerender.duration, effectivePlayDuration) + let timeRange = CMTimeRange(start: .zero, duration: insertDuration) do { - let tracks: [AVAssetTrack] - if #available(iOS 15.0, *) { - tracks = try await asset.loadTracks(withMediaType: .audio) - } else { - tracks = asset.tracks(withMediaType: .audio) - } - - guard let audioTrack = tracks.first else { - return nil - } - - let formatDescriptions: [Any] - if #available(iOS 15.0, *) { - formatDescriptions = try await audioTrack.load(.formatDescriptions) - } else { - formatDescriptions = audioTrack.formatDescriptions - } - - for description in formatDescriptions { - let formatDesc = description as! CMFormatDescription - if let basicDesc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc) { - return Int(basicDesc.pointee.mSampleRate) - } - } - - return nil + try compositionAudioTrack.insertTimeRange( + timeRange, of: sourceTrack, at: compositionInsertTime + ) } catch { - return nil + PluginLog.print("โš ๏ธ Failed to insert pre-rendered audio: \(error)") + try? FileManager.default.removeItem(at: prerender.outputURL) + throw error + } + + if CMTimeCompare(compositionInsertTime, .zero) > 0 { + PluginLog.print( + "๐ŸŽต Audio placed at composition time: \(compositionInsertTime.seconds)s" + ) } + PluginLog.print( + "๐ŸŽผ Pre-rendered audio inserted: \(insertDuration.seconds)s (loop=\(loopAudio))" + ) + + return BuildResult( + track: compositionAudioTrack, + temporaryURL: prerender.outputURL + ) } } diff --git a/ios/Classes/src/features/render/helpers/CompositionBuilder.swift b/ios/Classes/src/features/render/helpers/CompositionBuilder.swift index 0590ec2..9bbe07d 100644 --- a/ios/Classes/src/features/render/helpers/CompositionBuilder.swift +++ b/ios/Classes/src/features/render/helpers/CompositionBuilder.swift @@ -43,10 +43,10 @@ internal class CompositionBuilder { /// Builds the complete composition. /// - /// - Returns: Tuple containing composition, video composition, render size, audio mix, and source track ID + /// - Returns: Tuple containing composition, video composition, render size, audio mix, source track ID, and temporary file URLs to clean up after export /// - Throws: Error if composition creation fails func build() async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, [URL] ) { guard !videoClips.isEmpty else { throw NSError( @@ -67,22 +67,23 @@ internal class CompositionBuilder { let videoResult = try await videoBuilder.build(in: composition) - // Add custom audio tracks + // Add custom audio tracks (each pre-rendered to a single PCM WAV). var customAudioTracks: [(track: AVMutableCompositionTrack, config: AudioTrackConfig)] = [] + var temporaryAudioURLs: [URL] = [] for trackConfig in audioTracks { PluginLog.print("๐ŸŽต Adding audio track: \(trackConfig.path)") let audioBuilder = AudioSequenceBuilder( audioPath: trackConfig.path, targetDuration: videoResult.totalDuration - ).setVolume(trackConfig.volume) - .setLoop(trackConfig.loop) + ).setLoop(trackConfig.loop) .setAudioStartTime(trackConfig.audioStartUs) .setAudioEndTime(trackConfig.audioEndUs) .setCompositionStartTime(trackConfig.startUs == -1 ? nil : trackConfig.startUs) .setCompositionEndTime(trackConfig.endUs == -1 ? nil : trackConfig.endUs) - if let track = try await audioBuilder.build(in: composition) { - customAudioTracks.append((track: track, config: trackConfig)) + if let result = try await audioBuilder.build(in: composition) { + customAudioTracks.append((track: result.track, config: trackConfig)) + temporaryAudioURLs.append(result.temporaryURL) } } @@ -176,7 +177,7 @@ internal class CompositionBuilder { // Return the track ID for fallback on older iOS versions let sourceTrackID = videoResult.videoTrack.trackID - return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID) + return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID, temporaryAudioURLs) } /// Creates audio mix with per-clip and per-track volume parameters. diff --git a/macos/Classes/src/features/render/RenderVideo.swift b/macos/Classes/src/features/render/RenderVideo.swift index 30f37fb..8566d9e 100644 --- a/macos/Classes/src/features/render/RenderVideo.swift +++ b/macos/Classes/src/features/render/RenderVideo.swift @@ -87,11 +87,17 @@ class RenderVideo { } var outputURL: URL! + var temporaryAudioURLs: [URL] = [] let finalize: () -> Void = { try? cleanup(config.outputPath == nil ? [outputURL] : []) // Clean up transcoded files VideoTranscoder.cleanupTranscodedFiles(transcodedFiles) + // Clean up pre-rendered audio temp files + for url in temporaryAudioURLs { + try? FileManager.default.removeItem(at: url) + PluginLog.print("๐Ÿงน Removed pre-rendered audio: \(url.lastPathComponent)") + } } let handleCompletion: (Result) -> Void = { result in @@ -140,13 +146,14 @@ class RenderVideo { var effectsConfig = VideoCompositorConfig() // Use composition helper to merge multiple video clips - let (composition, videoCompData, renderSize, audioMix, sourceTrackID) = + let (composition, videoCompData, renderSize, audioMix, sourceTrackID, audioTempURLs) = try await applyComposition( videoClips: workingConfig.videoClips, videoEffects: effectsConfig, enableAudio: workingConfig.enableAudio, audioTracks: workingConfig.audioTracks ) + temporaryAudioURLs = audioTempURLs var videoCompConfig = videoCompData // Set source track ID for fallback on older macOS versions diff --git a/macos/Classes/src/features/render/helpers/ApplyComposition.swift b/macos/Classes/src/features/render/helpers/ApplyComposition.swift index d1c28b9..28ca982 100644 --- a/macos/Classes/src/features/render/helpers/ApplyComposition.swift +++ b/macos/Classes/src/features/render/helpers/ApplyComposition.swift @@ -19,6 +19,7 @@ import Foundation /// - CGSize: Final render size (max dimensions from all clips) /// - AVAudioMix?: Audio mix with volume controls (nil if no audio mixing needed) /// - CMPersistentTrackID: The track ID of the video composition track (for fallback on older macOS) +/// - [URL]: Temporary file URLs (e.g. pre-rendered audio WAVs) the caller MUST delete after export /// /// - Throws: NSError if video clips are empty, files don't exist, or tracks can't be loaded. func applyComposition( @@ -27,7 +28,7 @@ func applyComposition( enableAudio: Bool, audioTracks: [AudioTrackConfig] ) async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, [URL] ) { return try await CompositionBuilder(videoClips: videoClips, videoEffects: videoEffects) .setEnableAudio(enableAudio) diff --git a/macos/Classes/src/features/render/helpers/AudioPreRenderer.swift b/macos/Classes/src/features/render/helpers/AudioPreRenderer.swift new file mode 100644 index 0000000..eca8e40 --- /dev/null +++ b/macos/Classes/src/features/render/helpers/AudioPreRenderer.swift @@ -0,0 +1,327 @@ +import AVFoundation +import Foundation + +/// Pre-renders a custom audio track into a single, gap-less PCM WAV file +/// that is ready to be inserted into an AVMutableComposition with ONE +/// `insertTimeRange` call. +/// +/// This avoids audible clicks at every loop restart when the source audio +/// is a compressed format (AAC, MP3) and `AVMutableComposition.insertTimeRange` +/// is called multiple times on the source โ€” each call respects encoder +/// priming/padding samples and aligns to compressed-frame boundaries +/// (~1024 samples for AAC, ~1152 for MP3), producing audible artifacts. +/// +/// By decoding to PCM once and looping/trimming on raw samples, every +/// loop boundary is sample-exact and silent transitions are perfectly +/// continuous. +internal enum AudioPreRenderer { + + /// Result of a successful pre-render operation. + struct Result { + /// The pre-rendered PCM WAV file URL. The caller is responsible + /// for deleting this file when no longer needed. + let outputURL: URL + /// Total duration of the pre-rendered audio. + let duration: CMTime + } + + /// Pre-renders the audio described by the parameters. + /// + /// The output file contains exactly `targetBodyDuration` of audio: + /// the trimmed source `[audioStartTime, audioEndTime)` looped (or + /// played once) to fill the duration, with sample-exact tail trim. + /// + /// No silence padding is added โ€” leading/trailing silence on the + /// composition timeline is handled implicitly by inserting this file + /// at the correct `compositionInsertTime`. + /// + /// - Parameters: + /// - audioPath: Absolute path to the source audio file. + /// - audioStartTime: Trim start within the source. + /// - audioEndTime: Trim end within the source (nil = use full + /// source duration). + /// - loop: If true, the trimmed window repeats to fill + /// `targetBodyDuration`. If false, the source plays once and the + /// remaining time is filled with silence. + /// - targetBodyDuration: How long the output audio should sound. + /// - Returns: A [Result] on success, nil on failure. + static func render( + audioPath: String, + audioStartTime: CMTime, + audioEndTime: CMTime?, + loop: Bool, + targetBodyDuration: CMTime + ) async -> Result? { + let sourceURL = URL(fileURLWithPath: audioPath) + guard FileManager.default.fileExists(atPath: sourceURL.path) else { + PluginLog.print("โš ๏ธ AudioPreRenderer: source file not found: \(audioPath)") + return nil + } + + if CMTimeCompare(targetBodyDuration, .zero) <= 0 { + PluginLog.print("โš ๏ธ AudioPreRenderer: targetBodyDuration <= 0, skipping") + return nil + } + + // Step 1: decode the trimmed range to PCM bytes. + let asset = AVURLAsset(url: sourceURL) + + // Resolve the source duration. + let sourceDuration: CMTime + if #available(macOS 13.0, *) { + sourceDuration = (try? await asset.load(.duration)) ?? .zero + } else { + sourceDuration = asset.duration + } + + let effectiveStart = CMTimeMaximum(audioStartTime, .zero) + let effectiveEnd = CMTimeMinimum(audioEndTime ?? sourceDuration, sourceDuration) + let trimDuration = CMTimeSubtract(effectiveEnd, effectiveStart) + if CMTimeCompare(trimDuration, .zero) <= 0 { + PluginLog.print( + "โš ๏ธ AudioPreRenderer: invalid trim range start=\(effectiveStart.seconds)s end=\(effectiveEnd.seconds)s" + ) + return nil + } + + let audioTracks: [AVAssetTrack] + do { + if #available(macOS 13.0, *) { + audioTracks = try await asset.loadTracks(withMediaType: .audio) + } else { + audioTracks = asset.tracks(withMediaType: .audio) + } + } catch { + PluginLog.print("โš ๏ธ AudioPreRenderer: failed to load tracks: \(error)") + return nil + } + + guard let audioTrack = audioTracks.first else { + PluginLog.print("โš ๏ธ AudioPreRenderer: no audio tracks in source") + return nil + } + + // Output PCM format: 44.1kHz stereo 16-bit signed little-endian + // (matches typical AAC/MP3 source rate; AVAssetWriter will + // resample internally during the final video export if needed). + // Using a fixed format keeps the pre-render simple and + // predictable for the AVMutableComposition consumer. + let sampleRate: Double = 44100 + let channelCount: Int = 2 + let bitsPerSample: Int = 16 + let bytesPerFrame = channelCount * (bitsPerSample / 8) + + let outputSettings: [String: Any] = [ + AVFormatIDKey: kAudioFormatLinearPCM, + AVSampleRateKey: sampleRate, + AVNumberOfChannelsKey: channelCount, + AVLinearPCMBitDepthKey: bitsPerSample, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false, + AVLinearPCMIsNonInterleaved: false, + ] + + let trimmedPcm: Data + do { + trimmedPcm = try await readPcm( + from: asset, + track: audioTrack, + start: effectiveStart, + duration: trimDuration, + outputSettings: outputSettings + ) + } catch { + PluginLog.print("โš ๏ธ AudioPreRenderer: PCM read failed: \(error)") + return nil + } + + if trimmedPcm.isEmpty { + PluginLog.print("โš ๏ธ AudioPreRenderer: decoded PCM is empty") + return nil + } + + // Step 2: build the output PCM data (loop / trim / pad). + let targetBytes = bytesForDuration( + targetBodyDuration, + sampleRate: sampleRate, + bytesPerFrame: bytesPerFrame + ) + + var outputBytes = Data(capacity: targetBytes) + + if loop { + // Repeat trimmedPcm until we have exactly targetBytes. + while outputBytes.count < targetBytes { + let remaining = targetBytes - outputBytes.count + if remaining >= trimmedPcm.count { + outputBytes.append(trimmedPcm) + } else { + outputBytes.append(trimmedPcm.subdata(in: 0.. Data { + let reader = try AVAssetReader(asset: asset) + reader.timeRange = CMTimeRange(start: start, duration: duration) + + let trackOutput = AVAssetReaderTrackOutput( + track: track, + outputSettings: outputSettings + ) + trackOutput.alwaysCopiesSampleData = false + guard reader.canAdd(trackOutput) else { + throw NSError( + domain: "AudioPreRenderer", + code: 1, + userInfo: [NSLocalizedDescriptionKey: "Cannot add track output to reader"] + ) + } + reader.add(trackOutput) + + guard reader.startReading() else { + throw reader.error ?? NSError( + domain: "AudioPreRenderer", + code: 2, + userInfo: [NSLocalizedDescriptionKey: "AVAssetReader.startReading failed"] + ) + } + + var pcm = Data() + while reader.status == .reading, + let sampleBuffer = trackOutput.copyNextSampleBuffer() { + if let blockBuffer = CMSampleBufferGetDataBuffer(sampleBuffer) { + let length = CMBlockBufferGetDataLength(blockBuffer) + if length > 0 { + var tempBytes = [UInt8](repeating: 0, count: length) + let status = CMBlockBufferCopyDataBytes( + blockBuffer, + atOffset: 0, + dataLength: length, + destination: &tempBytes + ) + if status == kCMBlockBufferNoErr { + pcm.append(contentsOf: tempBytes) + } + } + } + } + + if reader.status == .failed, let error = reader.error { + throw error + } + + return pcm + } + + /// Computes the number of PCM bytes that represent `duration` at the + /// given sample rate and bytes-per-frame, aligned to a frame + /// boundary. + private static func bytesForDuration( + _ duration: CMTime, + sampleRate: Double, + bytesPerFrame: Int + ) -> Int { + let seconds = duration.seconds + if !seconds.isFinite || seconds <= 0 { return 0 } + let frames = Int(seconds * sampleRate) + return frames * bytesPerFrame + } + + /// Creates a unique temporary WAV file URL in the cache directory. + private static func makeTemporaryWavURL() -> URL { + let tmpDir = URL(fileURLWithPath: NSTemporaryDirectory()) + let name = "prerender_audio_\(Int(Date().timeIntervalSince1970 * 1000))_\(UUID().uuidString).wav" + return tmpDir.appendingPathComponent(name) + } + + /// Builds a complete WAV file (RIFF header + PCM data) as `Data`. + private static func makeWav( + pcmBytes: Data, + sampleRate: Int, + channelCount: Int, + bitsPerSample: Int + ) -> Data { + let byteRate = sampleRate * channelCount * bitsPerSample / 8 + let blockAlign = channelCount * bitsPerSample / 8 + let dataSize = UInt32(pcmBytes.count) + let chunkSize = UInt32(36) + dataSize + + var header = Data(capacity: 44) + header.append(contentsOf: [0x52, 0x49, 0x46, 0x46]) // "RIFF" + header.appendLittleEndian(UInt32(chunkSize)) + header.append(contentsOf: [0x57, 0x41, 0x56, 0x45]) // "WAVE" + header.append(contentsOf: [0x66, 0x6D, 0x74, 0x20]) // "fmt " + header.appendLittleEndian(UInt32(16)) // PCM fmt chunk size + header.appendLittleEndian(UInt16(1)) // PCM format + header.appendLittleEndian(UInt16(channelCount)) + header.appendLittleEndian(UInt32(sampleRate)) + header.appendLittleEndian(UInt32(byteRate)) + header.appendLittleEndian(UInt16(blockAlign)) + header.appendLittleEndian(UInt16(bitsPerSample)) + header.append(contentsOf: [0x64, 0x61, 0x74, 0x61]) // "data" + header.appendLittleEndian(dataSize) + + var output = Data(capacity: header.count + pcmBytes.count) + output.append(header) + output.append(pcmBytes) + return output + } +} + +private extension Data { + mutating func appendLittleEndian(_ value: UInt32) { + var v = value.littleEndian + Swift.withUnsafeBytes(of: &v) { buffer in + append(contentsOf: buffer) + } + } + mutating func appendLittleEndian(_ value: UInt16) { + var v = value.littleEndian + Swift.withUnsafeBytes(of: &v) { buffer in + append(contentsOf: buffer) + } + } +} diff --git a/macos/Classes/src/features/render/helpers/AudioSequenceBuilder.swift b/macos/Classes/src/features/render/helpers/AudioSequenceBuilder.swift index 9f22a65..2dc6829 100644 --- a/macos/Classes/src/features/render/helpers/AudioSequenceBuilder.swift +++ b/macos/Classes/src/features/render/helpers/AudioSequenceBuilder.swift @@ -1,15 +1,26 @@ import AVFoundation import Foundation -/// Builder class for creating custom audio sequences. +/// Builder for inserting a custom audio track into an `AVMutableComposition`. /// -/// Handles custom audio track with volume control, looping/trimming -/// to match video duration or a specific time range in the composition. +/// The track is first **pre-rendered to a single, gap-less PCM WAV file** +/// via [AudioPreRenderer]. The composition then inserts this WAV with a +/// single `insertTimeRange` call, which avoids audible clicks at every +/// loop restart caused by AAC/MP3 codec priming when the source is +/// inserted multiple times directly into the composition. internal class AudioSequenceBuilder { + /// Result of a successful build operation. + struct BuildResult { + let track: AVMutableCompositionTrack + /// Temporary pre-rendered audio file. The owner of the + /// composition (RenderVideo) MUST delete this file once the + /// export has finished (success or failure). + let temporaryURL: URL + } + private let audioPath: String private let targetDuration: CMTime - private var volume: Float = 1.0 private var loopAudio: Bool = true private var audioStartTime: CMTime = .zero private var audioEndTime: CMTime? @@ -19,38 +30,19 @@ internal class AudioSequenceBuilder { /// If nil, uses targetDuration minus compositionInsertTime. private var compositionPlayDuration: CMTime? - /// Initializes builder with audio path and target duration. - /// - /// - Parameters: - /// - audioPath: Absolute path to audio file - /// - targetDuration: Target duration to match (total video duration) + /// Initializes builder with audio path and target (full video) duration. init(audioPath: String, targetDuration: CMTime) { self.audioPath = audioPath self.targetDuration = targetDuration } - /// Sets volume for custom audio. - /// - /// - Parameter volume: Volume multiplier (0.0 to 1.0+) - /// - Returns: Self for chaining - func setVolume(_ volume: Float) -> AudioSequenceBuilder { - self.volume = volume - return self - } - - /// Sets whether the audio should loop to match video duration. - /// - /// - Parameter loop: If true, audio repeats; if false, plays once - /// - Returns: Self for chaining + @discardableResult func setLoop(_ loop: Bool) -> AudioSequenceBuilder { self.loopAudio = loop return self } - /// Sets the start time offset within the audio file. - /// - /// - Parameter startTimeUs: Start time in microseconds from the beginning of the audio file - /// - Returns: Self for chaining + @discardableResult func setAudioStartTime(_ startTimeUs: Int64?) -> AudioSequenceBuilder { if let startTimeUs = startTimeUs, startTimeUs > 0 { self.audioStartTime = CMTime(value: startTimeUs, timescale: 1_000_000) @@ -58,10 +50,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets the end time offset within the audio file. - /// - /// - Parameter endTimeUs: End time in microseconds within the audio file - /// - Returns: Self for chaining + @discardableResult func setAudioEndTime(_ endTimeUs: Int64?) -> AudioSequenceBuilder { if let endTimeUs = endTimeUs, endTimeUs > 0 { self.audioEndTime = CMTime(value: endTimeUs, timescale: 1_000_000) @@ -69,10 +58,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets where in the composition timeline this audio should start playing. - /// - /// - Parameter startUs: Composition start time in microseconds (-1 or nil = from start) - /// - Returns: Self for chaining + @discardableResult func setCompositionStartTime(_ startUs: Int64?) -> AudioSequenceBuilder { if let startUs = startUs, startUs > 0 { self.compositionInsertTime = CMTime(value: startUs, timescale: 1_000_000) @@ -80,10 +66,7 @@ internal class AudioSequenceBuilder { return self } - /// Sets the duration this audio should play in the composition. - /// - /// - Parameter endUs: Composition end time in microseconds (-1 or nil = until end) - /// - Returns: Self for chaining + @discardableResult func setCompositionEndTime(_ endUs: Int64?) -> AudioSequenceBuilder { if let endUs = endUs, endUs > 0 { let endTime = CMTime(value: endUs, timescale: 1_000_000) @@ -92,50 +75,10 @@ internal class AudioSequenceBuilder { return self } - /// Builds custom audio track and adds it to composition. - /// - /// Trims or loops the audio to match target duration and applies volume. - /// - /// - Parameter composition: Composition to add audio track to - /// - Returns: The created composition track, or nil if failed - func build(in composition: AVMutableComposition) async throws -> AVMutableCompositionTrack? { - let audioURL = URL(fileURLWithPath: audioPath) - guard FileManager.default.fileExists(atPath: audioURL.path) else { - PluginLog.print("โš ๏ธ Custom audio file does not exist: \(audioPath)") - return nil - } - - let audioAsset = AVURLAsset(url: audioURL) - - guard let audioTrack = try? await MediaInfoExtractor.loadAudioTrack(from: audioAsset), - let compositionAudioTrack = composition.addMutableTrack( - withMediaType: .audio, - preferredTrackID: kCMPersistentTrackID_Invalid - ) - else { - PluginLog.print("โš ๏ธ Failed to add custom audio track") - return nil - } - - // Get audio duration - let audioDuration: CMTime - if #available(macOS 13.0, *) { - audioDuration = try await audioAsset.load(.duration) - } else { - audioDuration = audioAsset.duration - } - - // Calculate effective audio source range - let effectiveAudioEnd = audioEndTime ?? audioDuration - let effectiveAudioDuration = CMTimeSubtract(effectiveAudioEnd, audioStartTime) - if CMTimeCompare(effectiveAudioDuration, .zero) <= 0 { - PluginLog.print( - "โš ๏ธ Audio start/end time range is invalid (start: \(audioStartTime.seconds)s, end: \(effectiveAudioEnd.seconds)s)" - ) - return nil - } - - // Calculate how long this track should play in the composition + /// Pre-renders the audio and inserts it into `composition` with a + /// single `insertTimeRange` call. + func build(in composition: AVMutableComposition) async throws -> BuildResult? { + // Compute play duration in the composition. let remainingCompositionTime = CMTimeSubtract(targetDuration, compositionInsertTime) let playDuration = compositionPlayDuration ?? remainingCompositionTime let effectivePlayDuration = CMTimeMinimum(playDuration, remainingCompositionTime) @@ -145,136 +88,70 @@ internal class AudioSequenceBuilder { return nil } - if CMTimeCompare(audioStartTime, .zero) > 0 { - PluginLog.print("๐ŸŽต Custom audio start offset: \(audioStartTime.seconds)s") - } - if audioEndTime != nil { - PluginLog.print("๐ŸŽต Custom audio end offset: \(effectiveAudioEnd.seconds)s") - } - if CMTimeCompare(compositionInsertTime, .zero) > 0 { - PluginLog.print( - "๐ŸŽต Audio placed at composition time: \(compositionInsertTime.seconds)s" - ) + // Pre-render the audio: handles trim, loop and silence-padding + // entirely on PCM samples. + guard let prerender = await AudioPreRenderer.render( + audioPath: audioPath, + audioStartTime: audioStartTime, + audioEndTime: audioEndTime, + loop: loopAudio, + targetBodyDuration: effectivePlayDuration + ) else { + return nil } - // Trim or loop custom audio to match the effective play duration - if CMTimeCompare(effectiveAudioDuration, effectivePlayDuration) > 0 { - // Trim audio to match play duration (starting from audioStartTime) - let timeRange = CMTimeRange(start: audioStartTime, duration: effectivePlayDuration) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: compositionInsertTime) - PluginLog.print("โœ‚๏ธ Custom audio trimmed to \(effectivePlayDuration.seconds)s") - } else if loopAudio { - // Loop audio to match play duration - var currentTime = compositionInsertTime - let compositionEndTime = CMTimeAdd(compositionInsertTime, effectivePlayDuration) - var loopCount = 0 - var isFirstLoop = true - - while CMTimeCompare(currentTime, compositionEndTime) < 0 { - loopCount += 1 - let remainingDuration = CMTimeSubtract(compositionEndTime, currentTime) - - // First loop uses audioStartTime offset, subsequent loops start from beginning of source range - let loopStartTime = isFirstLoop ? audioStartTime : audioStartTime - let loopAudioDuration = effectiveAudioDuration - - let insertDuration = CMTimeMinimum(loopAudioDuration, remainingDuration) - let timeRange = CMTimeRange(start: loopStartTime, duration: insertDuration) + // Load the pre-rendered audio and insert it once into the composition. + let prerenderAsset = AVURLAsset(url: prerender.outputURL) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: currentTime) - currentTime = CMTimeAdd(currentTime, insertDuration) - isFirstLoop = false - } - - PluginLog.print( - "๐Ÿ”„ Custom audio looped \(loopCount) times to match \(effectivePlayDuration.seconds)s duration" - ) + let prerenderTracks: [AVAssetTrack] + if #available(macOS 13.0, *) { + prerenderTracks = (try? await prerenderAsset.loadTracks(withMediaType: .audio)) ?? [] } else { - // Play audio once without looping (starting from audioStartTime) - let insertDuration = CMTimeMinimum(effectiveAudioDuration, effectivePlayDuration) - let timeRange = CMTimeRange(start: audioStartTime, duration: insertDuration) - try compositionAudioTrack.insertTimeRange( - timeRange, of: audioTrack, at: compositionInsertTime) - PluginLog.print( - "โ–ถ๏ธ Custom audio plays once (\(insertDuration.seconds)s, no loop)" - + (CMTimeCompare(audioStartTime, .zero) > 0 - ? " starting at \(audioStartTime.seconds)s" : "")) - } - - if volume != 1.0 { - PluginLog.print("๐Ÿ”Š Custom audio volume: \(volume)") + prerenderTracks = prerenderAsset.tracks(withMediaType: .audio) } - - return compositionAudioTrack - } - - /// Checks if custom audio sample rate is compatible with video audio. - /// - /// - Parameter videoClips: Array of video clips to check against - /// - Returns: true if compatible or no video audio exists - func checkSampleRateCompatibility(videoClips: [VideoClip]) async -> Bool { - let customSampleRate = await MediaInfoExtractor.getAudioSampleRate(audioPath) - - guard customSampleRate > 0 else { - PluginLog.print("โš ๏ธ Could not detect custom audio sample rate") - return true // Assume compatible if we can't detect - } - - for clip in videoClips { - if let videoSampleRate = await getVideoAudioSampleRate(clip.inputPath), - videoSampleRate > 0 && videoSampleRate != customSampleRate - { - PluginLog.print( - "โŒ Sample rate mismatch: custom audio (\(customSampleRate) Hz) vs video (\(videoSampleRate) Hz)" - ) - return false - } + guard let sourceTrack = prerenderTracks.first else { + PluginLog.print("โš ๏ธ Pre-rendered audio has no audio track") + try? FileManager.default.removeItem(at: prerender.outputURL) + return nil } - PluginLog.print("โœ… Sample rates are compatible") - return true - } - - /// Gets sample rate of audio track in video file. - private func getVideoAudioSampleRate(_ videoPath: String) async -> Int? { - let url = URL(fileURLWithPath: videoPath) - guard FileManager.default.fileExists(atPath: url.path) else { + guard let compositionAudioTrack = composition.addMutableTrack( + withMediaType: .audio, + preferredTrackID: kCMPersistentTrackID_Invalid + ) else { + PluginLog.print("โš ๏ธ Failed to add custom audio track") + try? FileManager.default.removeItem(at: prerender.outputURL) return nil } - let asset = AVURLAsset(url: url) + // Insert the entire pre-rendered file at the composition offset. + // The pre-render duration is already aligned to the requested + // play duration (loop+trim handled at PCM level). + let insertDuration = CMTimeMinimum(prerender.duration, effectivePlayDuration) + let timeRange = CMTimeRange(start: .zero, duration: insertDuration) do { - let tracks: [AVAssetTrack] - if #available(macOS 13.0, *) { - tracks = try await asset.loadTracks(withMediaType: .audio) - } else { - tracks = asset.tracks(withMediaType: .audio) - } - - guard let audioTrack = tracks.first else { - return nil - } - - let formatDescriptions: [Any] - if #available(macOS 13.0, *) { - formatDescriptions = try await audioTrack.load(.formatDescriptions) - } else { - formatDescriptions = audioTrack.formatDescriptions - } - - for description in formatDescriptions { - let formatDesc = description as! CMFormatDescription - if let basicDesc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDesc) { - return Int(basicDesc.pointee.mSampleRate) - } - } - - return nil + try compositionAudioTrack.insertTimeRange( + timeRange, of: sourceTrack, at: compositionInsertTime + ) } catch { - return nil + PluginLog.print("โš ๏ธ Failed to insert pre-rendered audio: \(error)") + try? FileManager.default.removeItem(at: prerender.outputURL) + throw error + } + + if CMTimeCompare(compositionInsertTime, .zero) > 0 { + PluginLog.print( + "๐ŸŽต Audio placed at composition time: \(compositionInsertTime.seconds)s" + ) } + PluginLog.print( + "๐ŸŽผ Pre-rendered audio inserted: \(insertDuration.seconds)s (loop=\(loopAudio))" + ) + + return BuildResult( + track: compositionAudioTrack, + temporaryURL: prerender.outputURL + ) } } diff --git a/macos/Classes/src/features/render/helpers/CompositionBuilder.swift b/macos/Classes/src/features/render/helpers/CompositionBuilder.swift index 224909d..e1b9e2f 100644 --- a/macos/Classes/src/features/render/helpers/CompositionBuilder.swift +++ b/macos/Classes/src/features/render/helpers/CompositionBuilder.swift @@ -43,10 +43,10 @@ internal class CompositionBuilder { /// Builds the complete composition. /// - /// - Returns: Tuple containing composition, video composition, render size, audio mix, and source track ID + /// - Returns: Tuple containing composition, video composition, render size, audio mix, source track ID, and temporary file URLs to clean up after export /// - Throws: Error if composition creation fails func build() async throws -> ( - AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID + AVMutableComposition, VideoCompositionData, CGSize, AVAudioMix?, CMPersistentTrackID, [URL] ) { guard !videoClips.isEmpty else { throw NSError( @@ -67,22 +67,23 @@ internal class CompositionBuilder { let videoResult = try await videoBuilder.build(in: composition) - // Add custom audio tracks + // Add custom audio tracks (each pre-rendered to a single PCM WAV). var customAudioTracks: [(track: AVMutableCompositionTrack, config: AudioTrackConfig)] = [] + var temporaryAudioURLs: [URL] = [] for trackConfig in audioTracks { PluginLog.print("๐ŸŽต Adding audio track: \(trackConfig.path)") let audioBuilder = AudioSequenceBuilder( audioPath: trackConfig.path, targetDuration: videoResult.totalDuration - ).setVolume(trackConfig.volume) - .setLoop(trackConfig.loop) + ).setLoop(trackConfig.loop) .setAudioStartTime(trackConfig.audioStartUs) .setAudioEndTime(trackConfig.audioEndUs) .setCompositionStartTime(trackConfig.startUs == -1 ? nil : trackConfig.startUs) .setCompositionEndTime(trackConfig.endUs == -1 ? nil : trackConfig.endUs) - if let track = try await audioBuilder.build(in: composition) { - customAudioTracks.append((track: track, config: trackConfig)) + if let result = try await audioBuilder.build(in: composition) { + customAudioTracks.append((track: result.track, config: trackConfig)) + temporaryAudioURLs.append(result.temporaryURL) } } @@ -176,7 +177,7 @@ internal class CompositionBuilder { // Return the track ID for fallback on older macOS versions let sourceTrackID = videoResult.videoTrack.trackID - return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID) + return (composition, videoCompositionData, videoResult.renderSize, audioMix, sourceTrackID, temporaryAudioURLs) } /// Creates audio mix with per-clip and per-track volume parameters. diff --git a/pubspec.yaml b/pubspec.yaml index 482d691..f0c06e3 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,6 +1,6 @@ name: pro_video_editor description: "A Flutter video editor: Seamlessly enhance your videos with user-friendly editing features." -version: 1.16.1 +version: 1.16.2 homepage: https://github.com/hm21/pro_video_editor/ repository: https://github.com/hm21/pro_video_editor/ documentation: https://github.com/hm21/pro_video_editor/