MATLAB-audio-denoising/denoising.m at master · FechL/MATLAB-audio-denoising · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
%% Noise Reduction on Audio using FFT
% This program reduces noise from an audio file using spectrum analysis
% through FFT (Fast Fourier Transform)
% Steps:
% 1. Read the audio file
% 2. Perform FFT to convert the time-domain signal to frequency-domain
% 3. Identify and reduce noise based on a threshold
% 4. Perform IFFT to return to the time-domain
% 5. Save the processed audio result

clear all;
close all;
clc;

%% PART 1: Reading and Preparing the Audio

% Read the audio file
[audioIn, Fs] = audioread('rekaman.wav');

% Convert stereo audio to mono if necessary
if size(audioIn, 2) > 1
    audioIn = mean(audioIn, 2); % Convert stereo to mono by averaging channels
end

% Parameters for FFT processing
frameLength = 2048; % Frame length (must be power of 2 for efficient FFT)
hopSize = frameLength / 4; % Overlap between frames (75% overlap)
numFrames = floor((length(audioIn) - frameLength) / hopSize) + 1; % Number of frames

% Prepare window function to reduce spectral leakage
window = hann(frameLength, 'periodic'); % Hanning window
windowSum = sum(window.^2); % For later normalization

% Prepare output audio
audioOut = zeros(size(audioIn));

%% PART 2: Noise Reduction Process using FFT

% Estimate noise from the first 5 frames (assume beginning is noise)
noiseEstStart = 1;
noiseEstEnd = min(5, numFrames);
noiseProfile = zeros(frameLength, 1);

% Build noise profile from the first few frames
for frameIdx = noiseEstStart:noiseEstEnd
    % Starting index of the current frame
    startIdx = (frameIdx - 1) * hopSize + 1;

    % Extract frame and apply window
    frame = audioIn(startIdx:startIdx+frameLength-1) .* window;

    % FFT transformation
    frameFFT = fft(frame);

    % Accumulate magnitude spectrum for noise profile
    noiseProfile = noiseProfile + abs(frameFFT);
end

% Average the noise profile
noiseProfile = noiseProfile / (noiseEstEnd - noiseEstStart + 1);

% Noise reduction parameters
% Threshold factor (higher = more aggressive noise reduction)
noiseReductionFactor = 3.0;
% Minimum gain/attenuation to be applied
minGain = 0.05;

% Spectral subtraction parameters
alpha = 100; % Oversubtraction factor
beta = 0.0000001; % Noise floor

% Process each frame
for frameIdx = 1:numFrames
    % Starting index of the current frame
    startIdx = (frameIdx - 1) * hopSize + 1;

    % Extract frame and apply window
    frame = audioIn(startIdx:startIdx+frameLength-1) .* window;

    % FFT transformation to frequency domain
    frameFFT = fft(frame);

    % Compute magnitude and phase of FFT
    magFrame = abs(frameFFT);
    phaseFrame = angle(frameFFT);

    % Spectral Subtraction with enhancements
    % Method subtracts estimated noise from magnitude spectrum

    % 1. Spectral subtraction with alpha factor (oversubtraction)
    magEnhanced = magFrame - alpha * noiseProfile;

    % 2. Ensure non-negative values and apply noise floor
    magEnhanced = max(magEnhanced, beta * magFrame);

    % 3. Compute gain mask based on local SNR
    gainMask = magEnhanced ./ max(magFrame, eps);

    % 4. Limit gain to avoid musical noise
    gainMask = max(gainMask, minGain);

    % 5. Smooth gain mask using moving average to reduce artifacts
    smoothLength = 5; % Length of smoothing filter
    smoothFilter = ones(smoothLength, 1) / smoothLength;

    % Apply smoothing to the first half of the spectrum (due to symmetry)
    halfLength = floor(frameLength/2) + 1;
    gainMask(1:halfLength) = filter(smoothFilter, 1, gainMask(1:halfLength));

    % Mirror the gain mask to the second half of the spectrum (maintain symmetry)
    gainMask(halfLength+1:end) = flipud(gainMask(2:frameLength-halfLength+1));

    % 6. Apply gain mask to the spectrum while preserving phase
    enhancedFFT = gainMask .* frameFFT;

    % 7. Inverse FFT back to time domain
    enhancedFrame = real(ifft(enhancedFFT));

    % 8. Apply window again for overlap-add method
    enhancedFrame = enhancedFrame .* window;

    % 9. Add to output using overlap-add
    audioOut(startIdx:startIdx+frameLength-1) = audioOut(startIdx:startIdx+frameLength-1) + enhancedFrame;
end

% Normalize output based on window overlap
% Compute normalization factor for overlap-add
overlapFactor = frameLength / hopSize;
normalizeCoeff = zeros(size(audioOut));

for frameIdx = 1:numFrames
    startIdx = (frameIdx - 1) * hopSize + 1;
    normalizeCoeff(startIdx:startIdx+frameLength-1) = normalizeCoeff(startIdx:startIdx+frameLength-1) + window;
end

% Normalize based on overlap
validIdx = normalizeCoeff > eps;
audioOut(validIdx) = audioOut(validIdx) ./ normalizeCoeff(validIdx);

% Match the amplitude of output to input (final normalization)
audioOut = audioOut * (rms(audioIn) / max(eps, rms(audioOut)));

%% PART 3: Plot Results and Save Output

% Trim audio to match input length
audioOut = audioOut(1:length(audioIn));

% Plot comparison between original and noise-reduced signal
t = (0:length(audioIn)-1) / Fs; % Time in seconds

figure('Name', 'Time Domain Comparison', 'Position', [100, 100, 900, 600]);

% Plot original signal
subplot(2, 1, 1);
plot(t, audioIn);
title('Original Audio Signal');
xlabel('Time (seconds)');
ylabel('Amplitude');
grid on;

% Plot noise-reduced signal
subplot(2, 1, 2);
plot(t, audioOut);
title('Audio Signal After Noise Reduction');
xlabel('Time (seconds)');
ylabel('Amplitude');
grid on;

% Frequency domain comparison plot
figure('Name', 'Frequency Domain Comparison', 'Position', [100, 100, 900, 600]);

% Frequency for plotting (only first half is meaningful)
f = Fs * (0:(frameLength/2)) / frameLength;

% Original spectrum
audioInSegment = audioIn(1:min(length(audioIn), frameLength)) .* hann(min(length(audioIn), frameLength));
audioInFFT = fft(audioInSegment, frameLength);
audioInMag = abs(audioInFFT(1:frameLength/2+1));

% Spectrum after noise reduction
audioOutSegment = audioOut(1:min(length(audioOut), frameLength)) .* hann(min(length(audioOut), frameLength));
audioOutFFT = fft(audioOutSegment, frameLength);
audioOutMag = abs(audioOutFFT(1:frameLength/2+1));

% Plot original spectrum
subplot(2, 1, 1);
plot(f, 20*log10(audioInMag + eps));
title('Original Audio Spectrum');
xlabel('Frequency (Hz)');
ylabel('Magnitude (dB)');
grid on;
xlim([0, Fs/2]);

% Plot spectrum after noise reduction
subplot(2, 1, 2);
plot(f, 20*log10(audioOutMag + eps));
title('Spectrum After Noise Reduction');
xlabel('Frequency (Hz)');
ylabel('Magnitude (dB)');
grid on;
xlim([0, Fs/2]);

% Compute SNR before and after
% Assume noise is the first second of the audio
noiseSegment = audioIn(1:min(length(audioIn), Fs)); % First 1 second as noise
signalPower = mean(audioIn.^2);
noisePower = mean(noiseSegment.^2);
originalSNR = 10 * log10(signalPower / noisePower);

% Remaining noise after processing
remainingNoise = audioIn - audioOut;
remainingNoisePower = mean(remainingNoise.^2);
enhancedSNR = 10 * log10(signalPower / remainingNoisePower);

fprintf('Original SNR: %.2f dB\n', originalSNR);
fprintf('SNR After Noise Reduction: %.2f dB\n', enhancedSNR);
fprintf('SNR Improvement: %.2f dB\n', enhancedSNR - originalSNR);

% Save the output audio
audiowrite('record_filtered.wav', audioOut, Fs);
fprintf('Noise-reduced audio file saved as record_filtered.wav\n');

% Prepare time and frequency
t = (0:length(audioIn)-1) / Fs; % Time (seconds)
segmentLength = frameLength;

% Take short segment for spectrum
audioInSegment = audioIn(1:segmentLength) .* hann(segmentLength);
audioOutSegment = audioOut(1:segmentLength) .* hann(segmentLength);

% FFT without dB conversion
audioInFFT = fft(audioInSegment);
audioOutFFT = fft(audioOutSegment);
audioInMag = abs(audioInFFT(1:segmentLength/2+1));
audioOutMag = abs(audioOutFFT(1:segmentLength/2+1));
f = Fs * (0:(segmentLength/2)) / segmentLength;

figure('Name', 'Original vs Denoised Audio Comparison', 'Position', [100, 100, 1000, 700]);

% Subplot 1: Time Domain
subplot(2,1,1);
plot(t, audioIn, 'b', 'DisplayName', 'Original'); hold on;
plot(t, audioOut, 'r', 'DisplayName', 'After Denoising');
title('Audio Signal Comparison (Time Domain)');
xlabel('Time (seconds)');
ylabel('Amplitude');
legend('Location', 'best');
grid on;

% Subplot 2: Frequency Domain (Linear scale, not dB, with zoom)
subplot(2,1,2);
plot(f, 20*log10(audioInMag + eps), 'b', 'DisplayName', 'Original'); hold on;
plot(f, 20*log10(audioOutMag + eps), 'r', 'DisplayName', 'After Denoising');
title('Audio Spectrum Comparison');
xlabel('Frequency (Hz)');
ylabel('Magnitude (dB)');
legend('show');
grid on;
xlim([0, Fs/2]);

%% PART 4: Additional Functions (Optional)

% Function to objectively evaluate audio quality
function [snr, segmentalSNR] = evaluateAudioQuality(clean, noisy, Fs)
% Compute Signal-to-Noise Ratio (SNR)
signalPower = sum(clean.^2);
noisePower = sum((clean - noisy).^2);
snr = 10 * log10(signalPower / max(noisePower, eps));

% Compute Segmental SNR
frameLength = round(0.03 * Fs); % 30ms frame
hopSize = frameLength / 2; % 50% overlap
numFrames = floor((length(clean) - frameLength) / hopSize) + 1;

segmentalSNRValues = zeros(numFrames, 1);

for i = 1:numFrames
    startIdx = (i - 1) * hopSize + 1;
    endIdx = startIdx + frameLength - 1;

    cleanFrame = clean(startIdx:endIdx);
    noisyFrame = noisy(startIdx:endIdx);

    signalFramePower = sum(cleanFrame.^2);
    noiseFramePower = sum((cleanFrame - noisyFrame).^2);

    frameSnr = 10 * log10(signalFramePower / max(noiseFramePower, eps));

    % Limit SNR to a reasonable range
    frameSnr = min(max(frameSnr, -10), 35);

    segmentalSNRValues(i) = frameSnr;
end

segmentalSNR = mean(segmentalSNRValues);
end