Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Fluid.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
7C3697892ED70F9C005874CE /* DynamicNotchKit in Frameworks */ = {isa = PBXBuildFile; productRef = 7C3697882ED70F9C005874CE /* DynamicNotchKit */; };
7C5AF14B2F15041600DE21B0 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = 7C5AF14A2F15041600DE21B0 /* MediaRemoteAdapter */; };
7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */; };
7CFA1D0A2F500000C0DEF001 /* LocalAPIAudioDecoderTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */; };
7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */; };
7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */; };
7CDB0A2F2F3C4D5600FB7CAD /* dictation_fixture.wav in Resources */ = {isa = PBXBuildFile; fileRef = 7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */; };
Expand All @@ -33,6 +34,7 @@
7C078D8F2E3B339200FB7CAC /* FluidVoice Debug.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "FluidVoice Debug.app"; sourceTree = BUILT_PRODUCTS_DIR; };
7CDB0A202F3C4D5600FB7CAD /* FluidDictationIntegrationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = FluidDictationIntegrationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HotkeyShortcutTests.swift; sourceTree = "<group>"; };
7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LocalAPIAudioDecoderTests.swift; sourceTree = "<group>"; };
7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DictationE2ETests.swift; sourceTree = "<group>"; };
7CDB0A2A2F3C4D5600FB7CAD /* AudioFixtureLoader.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AudioFixtureLoader.swift; sourceTree = "<group>"; };
7CDB0A2B2F3C4D5600FB7CAD /* dictation_fixture.wav */ = {isa = PBXFileReference; lastKnownFileType = audio.wav; path = dictation_fixture.wav; sourceTree = "<group>"; };
Expand Down Expand Up @@ -104,6 +106,7 @@
7CDB0A272F3C4D5600FB7CAD /* Resources */,
7CDB0A292F3C4D5600FB7CAD /* DictationE2ETests.swift */,
7C91B0022F42AA0100C0DEF0 /* HotkeyShortcutTests.swift */,
7CFA1D0A2F500000C0DEF002 /* LocalAPIAudioDecoderTests.swift */,
);
path = FluidDictationIntegrationTests;
sourceTree = "<group>";
Expand Down Expand Up @@ -258,6 +261,7 @@
7CDB0A2E2F3C4D5600FB7CAD /* AudioFixtureLoader.swift in Sources */,
7CDB0A2D2F3C4D5600FB7CAD /* DictationE2ETests.swift in Sources */,
7C91B0012F42AA0100C0DEF0 /* HotkeyShortcutTests.swift in Sources */,
7CFA1D0A2F500000C0DEF001 /* LocalAPIAudioDecoderTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
19 changes: 13 additions & 6 deletions Sources/Fluid/Services/LocalAPI/LocalAPIAudioDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ enum LocalAPIAudioDecoder {
let file = try AVAudioFile(forReading: fileURL)
let sourceFormat = file.processingFormat
let maxFrames = AVAudioFramePosition(sourceFormat.sampleRate * self.maxDurationSeconds)
let framesToRead = min(file.length, maxFrames)
guard file.length <= maxFrames else {
throw self.durationLimitExceededError()
}
let framesToRead = file.length
guard framesToRead > 0 else { return [] }

guard let sourceBuffer = AVAudioPCMBuffer(
Expand Down Expand Up @@ -44,16 +47,20 @@ enum LocalAPIAudioDecoder {

let maxFrames = AVAudioFramePosition(sourceFormat.sampleRate * self.maxDurationSeconds)
guard file.length <= maxFrames else {
throw NSError(
domain: "LocalAPIAudioDecoder",
code: -5,
userInfo: [NSLocalizedDescriptionKey: "Audio file exceeds the \(Int(self.maxDurationSeconds)) second API limit."]
)
throw self.durationLimitExceededError()
}

return Int((Double(file.length) * self.sampleRate / sourceFormat.sampleRate).rounded())
}

private static func durationLimitExceededError() -> NSError {
NSError(
domain: "LocalAPIAudioDecoder",
code: -5,
userInfo: [NSLocalizedDescriptionKey: "Audio file exceeds the \(Int(self.maxDurationSeconds)) second API limit."]
)
}

private static func convertToMono16k(_ sourceBuffer: AVAudioPCMBuffer) throws -> [Float] {
guard let targetFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
Expand Down
105 changes: 105 additions & 0 deletions Tests/FluidDictationIntegrationTests/LocalAPIAudioDecoderTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import AVFoundation
@testable import FluidVoice_Debug
import Foundation
import XCTest

final class LocalAPIAudioDecoderTests: XCTestCase {
// The LocalAPI enforces a 300-second limit on transcription audio.
private let overLimitSeconds: Double = 305
private let underLimitSeconds: Double = 1
private let fixtureSampleRate: Double = 8_000

func testInlineAudioOverLimitThrowsInsteadOfTruncating() throws {
let data = try Self.makeSilentWavData(durationSeconds: self.overLimitSeconds, sampleRate: self.fixtureSampleRate)

XCTAssertThrowsError(
try LocalAPIAudioDecoder.samples(fromAudioData: data, suggestedExtension: "wav"),
"Inline audio over the 300s limit must throw instead of silently truncating."
) { error in
let nsError = error as NSError
XCTAssertEqual(nsError.domain, "LocalAPIAudioDecoder")
XCTAssertEqual(nsError.code, -5)
}
}

func testInlineAudioUnderLimitDecodesNormally() throws {
let data = try Self.makeSilentWavData(durationSeconds: self.underLimitSeconds, sampleRate: self.fixtureSampleRate)

let samples = try LocalAPIAudioDecoder.samples(fromAudioData: data, suggestedExtension: "wav")

// Source is 1s of mono audio resampled to 16 kHz, so expect roughly 16k samples.
XCTAssertGreaterThan(samples.count, 8_000, "Under-limit inline audio should decode to samples.")
XCTAssertLessThan(samples.count, 24_000, "Decoded sample count should stay near the 16 kHz expectation.")
}

func testFilePathDecodeRejectsOverLimitAudio() throws {
let url = try Self.makeSilentWavFile(durationSeconds: self.overLimitSeconds, sampleRate: self.fixtureSampleRate)
defer { try? FileManager.default.removeItem(at: url) }

XCTAssertThrowsError(try LocalAPIAudioDecoder.samples(from: url)) { error in
let nsError = error as NSError
XCTAssertEqual(nsError.domain, "LocalAPIAudioDecoder")
XCTAssertEqual(nsError.code, -5)
}
}

// MARK: - Helpers

private static func makeSilentWavData(durationSeconds: Double, sampleRate: Double) throws -> Data {
let url = try makeSilentWavFile(durationSeconds: durationSeconds, sampleRate: sampleRate)
defer { try? FileManager.default.removeItem(at: url) }
return try Data(contentsOf: url)
}

private static func makeSilentWavFile(durationSeconds: Double, sampleRate: Double) throws -> URL {
let url = FileManager.default.temporaryDirectory
.appendingPathComponent("fluidvoice-decoder-test-\(UUID().uuidString)")
.appendingPathExtension("wav")
try writeSilentWav(to: url, durationSeconds: durationSeconds, sampleRate: sampleRate)
return url
}

private static func writeSilentWav(to url: URL, durationSeconds: Double, sampleRate: Double) throws {
let settings: [String: Any] = [
AVFormatIDKey: kAudioFormatLinearPCM,
AVSampleRateKey: sampleRate,
AVNumberOfChannelsKey: 1,
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsFloatKey: false,
AVLinearPCMIsBigEndianKey: false
]
// AVAudioFile flushes and closes the file when this local reference deallocates
// at the end of this function, so callers can safely read the bytes afterwards.
let file = try AVAudioFile(forWriting: url, settings: settings)

guard let format = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: sampleRate,
channels: 1,
interleaved: false
) else {
throw NSError(
domain: "LocalAPIAudioDecoderTests",
code: -1,
userInfo: [NSLocalizedDescriptionKey: "Unable to create writer audio format."]
)
}

let totalFrames = AVAudioFrameCount((sampleRate * durationSeconds).rounded())
let chunkSize: AVAudioFrameCount = 16_000
var remaining = totalFrames
while remaining > 0 {
let thisChunk = min(chunkSize, remaining)
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: thisChunk) else {
throw NSError(
domain: "LocalAPIAudioDecoderTests",
code: -2,
userInfo: [NSLocalizedDescriptionKey: "Unable to allocate writer buffer."]
)
}
buffer.frameLength = thisChunk
try file.write(from: buffer)
remaining -= thisChunk
}
}
}
Loading