Skip to content

Commit a350d7a

Browse files
authored
Merge pull request #47 from tbfly/master
*): Adjust for coding style.
2 parents 3cd379e + 8dd2bfe commit a350d7a

2 files changed

Lines changed: 47 additions & 47 deletions

File tree

python_speech_features/base.py

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
1919
:param nfft: the FFT size. Default is 512.
2020
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
2121
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
22-
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
23-
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
22+
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
23+
:param ceplifter: apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22.
2424
:param appendEnergy: if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.
25-
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
25+
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
2626
:returns: A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.
27-
"""
27+
"""
2828
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
2929
feat = numpy.log(feat)
3030
feat = dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
@@ -39,7 +39,7 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
3939
4040
:param signal: the audio signal from which to compute features. Should be an N*1 array
4141
:param samplerate: the samplerate of the signal we are working with.
42-
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
42+
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
4343
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
4444
:param nfilt: the number of filters in the filterbank, default 26.
4545
:param nfft: the FFT size. Default is 512.
@@ -49,18 +49,18 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
4949
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
5050
:returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
5151
second return value is the energy in each frame (total energy, unwindowed)
52-
"""
52+
"""
5353
highfreq= highfreq or samplerate/2
5454
signal = sigproc.preemphasis(signal,preemph)
5555
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
5656
pspec = sigproc.powspec(frames,nfft)
5757
energy = numpy.sum(pspec,1) # this stores the total energy in each frame
5858
energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
59-
59+
6060
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
6161
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
6262
feat = numpy.where(feat == 0,numpy.finfo(float).eps,feat) # if feat is zero, we get problems with log
63-
63+
6464
return feat,energy
6565

6666
def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
@@ -69,15 +69,15 @@ def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
6969
7070
:param signal: the audio signal from which to compute features. Should be an N*1 array
7171
:param samplerate: the samplerate of the signal we are working with.
72-
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
73-
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
72+
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
73+
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
7474
:param nfilt: the number of filters in the filterbank, default 26.
7575
:param nfft: the FFT size. Default is 512.
7676
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
7777
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
78-
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
79-
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
80-
"""
78+
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
79+
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
80+
"""
8181
feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph)
8282
return numpy.log(feat)
8383

@@ -88,36 +88,36 @@ def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
8888
8989
:param signal: the audio signal from which to compute features. Should be an N*1 array
9090
:param samplerate: the samplerate of the signal we are working with.
91-
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
92-
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
91+
:param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
92+
:param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
9393
:param nfilt: the number of filters in the filterbank, default 26.
9494
:param nfft: the FFT size. Default is 512.
9595
:param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
9696
:param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
9797
:param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
9898
:param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
99-
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
100-
"""
99+
:returns: A numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector.
100+
"""
101101
highfreq= highfreq or samplerate/2
102102
signal = sigproc.preemphasis(signal,preemph)
103103
frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
104104
pspec = sigproc.powspec(frames,nfft)
105105
pspec = numpy.where(pspec == 0,numpy.finfo(float).eps,pspec) # if things are all zeros we get problems
106-
106+
107107
fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
108108
feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
109109
R = numpy.tile(numpy.linspace(1,samplerate/2,numpy.size(pspec,1)),(numpy.size(pspec,0),1))
110-
110+
111111
return numpy.dot(pspec*R,fb.T) / feat
112-
112+
113113
def hz2mel(hz):
114114
"""Convert a value in Hertz to Mels
115115
116116
:param hz: a value in Hz. This can also be a numpy array, conversion proceeds element-wise.
117117
:returns: a value in Mels. If an array was passed in, an identical sized array is returned.
118118
"""
119119
return 2595 * numpy.log10(1+hz/700.)
120-
120+
121121
def mel2hz(mel):
122122
"""Convert a value in Mels to Hertz
123123
@@ -139,7 +139,7 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
139139
"""
140140
highfreq= highfreq or samplerate/2
141141
assert highfreq <= samplerate/2, "highfreq is greater than samplerate/2"
142-
142+
143143
# compute points evenly spaced in mels
144144
lowmel = hz2mel(lowfreq)
145145
highmel = hz2mel(highfreq)
@@ -154,12 +154,12 @@ def get_filterbanks(nfilt=20,nfft=512,samplerate=16000,lowfreq=0,highfreq=None):
154154
fbank[j,i] = (i - bin[j]) / (bin[j+1]-bin[j])
155155
for i in range(int(bin[j+1]), int(bin[j+2])):
156156
fbank[j,i] = (bin[j+2]-i) / (bin[j+2]-bin[j+1])
157-
return fbank
158-
157+
return fbank
158+
159159
def lifter(cepstra, L=22):
160160
"""Apply a cepstral lifter the the matrix of cepstra. This has the effect of increasing the
161161
magnitude of the high frequency DCT coeffs.
162-
162+
163163
:param cepstra: the matrix of mel-cepstra, will be numframes * numcep in size.
164164
:param L: the liftering coefficient to use. Default is 22. L <= 0 disables lifter.
165165
"""

python_speech_features/sigproc.py

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -53,68 +53,68 @@ def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x
5353
frame_step = round_half_up(frame_step)
5454
numframes = numpy.shape(frames)[0]
5555
assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
56-
56+
5757
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
5858
indices = numpy.array(indices,dtype=numpy.int32)
59-
padlen = (numframes-1)*frame_step + frame_len
60-
59+
padlen = (numframes-1)*frame_step + frame_len
60+
6161
if siglen <= 0: siglen = padlen
62-
62+
6363
rec_signal = numpy.zeros((padlen,))
6464
window_correction = numpy.zeros((padlen,))
6565
win = winfunc(frame_len)
66-
66+
6767
for i in range(0,numframes):
6868
window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
6969
rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
70-
70+
7171
rec_signal = rec_signal/window_correction
7272
return rec_signal[0:siglen]
73-
73+
7474
def magspec(frames,NFFT):
75-
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
75+
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
7676
7777
:param frames: the array of frames. Each row is a frame.
78-
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
78+
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
7979
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame.
80-
"""
81-
if numpy.shape(frames)[1] > NFFT:
80+
"""
81+
if numpy.shape(frames)[1] > NFFT:
8282
logging.warn('frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', numpy.shape(frames)[1], NFFT)
8383
complex_spec = numpy.fft.rfft(frames,NFFT)
8484
return numpy.absolute(complex_spec)
85-
85+
8686
def powspec(frames,NFFT):
87-
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
87+
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
8888
8989
:param frames: the array of frames. Each row is a frame.
90-
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
90+
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
9191
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame.
92-
"""
92+
"""
9393
return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
94-
94+
9595
def logpowspec(frames,NFFT,norm=1):
96-
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
96+
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
9797
9898
:param frames: the array of frames. Each row is a frame.
99-
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
99+
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
100100
:param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0.
101101
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame.
102-
"""
102+
"""
103103
ps = powspec(frames,NFFT);
104104
ps[ps<=1e-30] = 1e-30
105105
lps = 10*numpy.log10(ps)
106106
if norm:
107107
return lps - numpy.max(lps)
108108
else:
109109
return lps
110-
110+
111111
def preemphasis(signal,coeff=0.95):
112112
"""perform preemphasis on the input signal.
113-
113+
114114
:param signal: The signal to filter.
115115
:param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
116116
:returns: the filtered signal.
117-
"""
117+
"""
118118
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])
119119

120120

0 commit comments

Comments
 (0)