diff --git a/Fluid.xcodeproj/project.pbxproj b/Fluid.xcodeproj/project.pbxproj index d72a6a01..f3060480 100644 --- a/Fluid.xcodeproj/project.pbxproj +++ b/Fluid.xcodeproj/project.pbxproj @@ -12,6 +12,7 @@ 7C3697892ED70F9C005874CE /* DynamicNotchKit in Frameworks */ = {isa = PBXBuildFile; productRef = 7C3697882ED70F9C005874CE /* DynamicNotchKit */; }; 7C5AF14B2F15041600DE21B0 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = 7C5AF14A2F15041600DE21B0 /* MediaRemoteAdapter */; }; 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; }; + 7CFA1D0A2F500000C0DEF001 /* LocalAPIAudioDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */; }; 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; }; 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; }; 7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; }; @@ -33,6 +34,7 @@ 7C078D8F2E3B339200FB7CAC /* FluidVoice Debug.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FluidVoice Debug.app"; sourceTree = BUILT_PRODUCTS_DIR; }; 7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = ""; }; + 7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalAPIAudioDecoderTests.swift; sourceTree = ""; }; 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = ""; }; 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = ""; }; 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = ""; }; @@ -104,6 +106,7 @@ 7CDB0A272F3C4D5600FB7CAD /* Resources */, 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */, 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */, + 7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */, ); path = FluidDictationIntegrationTests; sourceTree = ""; @@ -258,6 +261,7 @@ 7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */, 7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */, 7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */, + 7CFA1D0A2F500000C0DEF001 /* LocalAPIAudioDecoderTests.swift in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/Sources/Fluid/Services/LocalAPI/LocalAPIAudioDecoder.swift b/Sources/Fluid/Services/LocalAPI/LocalAPIAudioDecoder.swift index 83b0b5a4..026dc569 100644 --- a/Sources/Fluid/Services/LocalAPI/LocalAPIAudioDecoder.swift +++ b/Sources/Fluid/Services/LocalAPI/LocalAPIAudioDecoder.swift @@ -9,7 +9,10 @@ enum LocalAPIAudioDecoder { let file = try AVAudioFile(forReading: fileURL) let sourceFormat = file.processingFormat let maxFrames = AVAudioFramePosition(sourceFormat.sampleRate * self.maxDurationSeconds) - let framesToRead = min(file.length, maxFrames) + guard file.length <= maxFrames else { + throw self.durationLimitExceededError() + } + let framesToRead = file.length guard framesToRead > 0 else { return [] } guard let sourceBuffer = AVAudioPCMBuffer( @@ -44,16 +47,20 @@ enum LocalAPIAudioDecoder { let maxFrames = AVAudioFramePosition(sourceFormat.sampleRate * self.maxDurationSeconds) guard file.length <= maxFrames else { - throw NSError( - domain: "LocalAPIAudioDecoder", - code: -5, - userInfo: [NSLocalizedDescriptionKey: "Audio file exceeds the \(Int(self.maxDurationSeconds)) second API limit."] - ) + throw self.durationLimitExceededError() } return Int((Double(file.length) * self.sampleRate / sourceFormat.sampleRate).rounded()) } + private static func durationLimitExceededError() -> NSError { + NSError( + domain: "LocalAPIAudioDecoder", + code: -5, + userInfo: [NSLocalizedDescriptionKey: "Audio file exceeds the \(Int(self.maxDurationSeconds)) second API limit."] + ) + } + private static func convertToMono16k(_ sourceBuffer: AVAudioPCMBuffer) throws -> [Float] { guard let targetFormat = AVAudioFormat( commonFormat: .pcmFormatFloat32, diff --git a/Tests/FluidDictationIntegrationTests/LocalAPIAudioDecoderTests.swift b/Tests/FluidDictationIntegrationTests/LocalAPIAudioDecoderTests.swift new file mode 100644 index 00000000..6d7a15e2 --- /dev/null +++ b/Tests/FluidDictationIntegrationTests/LocalAPIAudioDecoderTests.swift @@ -0,0 +1,105 @@ +import AVFoundation +@testable import FluidVoice_Debug +import Foundation +import XCTest + +final class LocalAPIAudioDecoderTests: XCTestCase { + // The LocalAPI enforces a 300-second limit on transcription audio. + private let overLimitSeconds: Double = 305 + private let underLimitSeconds: Double = 1 + private let fixtureSampleRate: Double = 8_000 + + func testInlineAudioOverLimitThrowsInsteadOfTruncating() throws { + let data = try Self.makeSilentWavData(durationSeconds: self.overLimitSeconds, sampleRate: self.fixtureSampleRate) + + XCTAssertThrowsError( + try LocalAPIAudioDecoder.samples(fromAudioData: data, suggestedExtension: "wav"), + "Inline audio over the 300s limit must throw instead of silently truncating." + ) { error in + let nsError = error as NSError + XCTAssertEqual(nsError.domain, "LocalAPIAudioDecoder") + XCTAssertEqual(nsError.code, -5) + } + } + + func testInlineAudioUnderLimitDecodesNormally() throws { + let data = try Self.makeSilentWavData(durationSeconds: self.underLimitSeconds, sampleRate: self.fixtureSampleRate) + + let samples = try LocalAPIAudioDecoder.samples(fromAudioData: data, suggestedExtension: "wav") + + // Source is 1s of mono audio resampled to 16 kHz, so expect roughly 16k samples. + XCTAssertGreaterThan(samples.count, 8_000, "Under-limit inline audio should decode to samples.") + XCTAssertLessThan(samples.count, 24_000, "Decoded sample count should stay near the 16 kHz expectation.") + } + + func testFilePathDecodeRejectsOverLimitAudio() throws { + let url = try Self.makeSilentWavFile(durationSeconds: self.overLimitSeconds, sampleRate: self.fixtureSampleRate) + defer { try? FileManager.default.removeItem(at: url) } + + XCTAssertThrowsError(try LocalAPIAudioDecoder.samples(from: url)) { error in + let nsError = error as NSError + XCTAssertEqual(nsError.domain, "LocalAPIAudioDecoder") + XCTAssertEqual(nsError.code, -5) + } + } + + // MARK: - Helpers + + private static func makeSilentWavData(durationSeconds: Double, sampleRate: Double) throws -> Data { + let url = try makeSilentWavFile(durationSeconds: durationSeconds, sampleRate: sampleRate) + defer { try? FileManager.default.removeItem(at: url) } + return try Data(contentsOf: url) + } + + private static func makeSilentWavFile(durationSeconds: Double, sampleRate: Double) throws -> URL { + let url = FileManager.default.temporaryDirectory + .appendingPathComponent("fluidvoice-decoder-test-\(UUID().uuidString)") + .appendingPathExtension("wav") + try writeSilentWav(to: url, durationSeconds: durationSeconds, sampleRate: sampleRate) + return url + } + + private static func writeSilentWav(to url: URL, durationSeconds: Double, sampleRate: Double) throws { + let settings: [String: Any] = [ + AVFormatIDKey: kAudioFormatLinearPCM, + AVSampleRateKey: sampleRate, + AVNumberOfChannelsKey: 1, + AVLinearPCMBitDepthKey: 16, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false + ] + // AVAudioFile flushes and closes the file when this local reference deallocates + // at the end of this function, so callers can safely read the bytes afterwards. + let file = try AVAudioFile(forWriting: url, settings: settings) + + guard let format = AVAudioFormat( + commonFormat: .pcmFormatFloat32, + sampleRate: sampleRate, + channels: 1, + interleaved: false + ) else { + throw NSError( + domain: "LocalAPIAudioDecoderTests", + code: -1, + userInfo: [NSLocalizedDescriptionKey: "Unable to create writer audio format."] + ) + } + + let totalFrames = AVAudioFrameCount((sampleRate * durationSeconds).rounded()) + let chunkSize: AVAudioFrameCount = 16_000 + var remaining = totalFrames + while remaining > 0 { + let thisChunk = min(chunkSize, remaining) + guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: thisChunk) else { + throw NSError( + domain: "LocalAPIAudioDecoderTests", + code: -2, + userInfo: [NSLocalizedDescriptionKey: "Unable to allocate writer buffer."] + ) + } + buffer.frameLength = thisChunk + try file.write(from: buffer) + remaining -= thisChunk + } + } +}