Skip to content

Commit e51df9e

Browse files
authored
Merge pull request #48 from chanansh/faster_framesig_by_stride_trick
Faster framesig by stride trick
2 parents a8da5ac + 9f244cf commit e51df9e

2 files changed

Lines changed: 81 additions & 31 deletions

File tree

python_speech_features/sigproc.py

Lines changed: 50 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,26 @@
66
import math
77
import logging
88

9+
910
def round_half_up(number):
1011
return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP))
1112

1213

13-
def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
14+
def rolling_window(a, window, step=1):
15+
# http://ellisvalentiner.com/post/2017-03-21-np-strides-trick
16+
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
17+
strides = a.strides + (a.strides[-1],)
18+
return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step]
19+
20+
21+
def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True):
1422
"""Frame a signal into overlapping frames.
1523
1624
:param sig: the audio signal to frame.
1725
:param frame_len: length of each frame measured in samples.
1826
:param frame_step: number of samples after the start of the previous frame that the next frame should begin.
1927
:param winfunc: the analysis window to apply to each frame. By default no window is applied.
28+
:param stride_trick: use stride trick to compute the rolling window and window multiplication faster
2029
:returns: an array of frames. Size is NUMFRAMES by frame_len.
2130
"""
2231
slen = len(sig)
@@ -25,21 +34,26 @@ def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
2534
if slen <= frame_len:
2635
numframes = 1
2736
else:
28-
numframes = 1 + int(math.ceil((1.0*slen - frame_len)/frame_step))
37+
numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step))
2938

30-
padlen = int((numframes-1)*frame_step + frame_len)
39+
padlen = int((numframes - 1) * frame_step + frame_len)
3140

3241
zeros = numpy.zeros((padlen - slen,))
33-
padsignal = numpy.concatenate((sig,zeros))
42+
padsignal = numpy.concatenate((sig, zeros))
43+
if stride_trick:
44+
win = winfunc(frame_len)
45+
frames = rolling_window(padsignal, window=frame_len, step=frame_step)
46+
else:
47+
indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
48+
numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
49+
indices = numpy.array(indices, dtype=numpy.int32)
50+
frames = padsignal[indices]
51+
win = numpy.tile(winfunc(frame_len), (numframes, 1))
3452

35-
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
36-
indices = numpy.array(indices,dtype=numpy.int32)
37-
frames = padsignal[indices]
38-
win = numpy.tile(winfunc(frame_len),(numframes,1))
39-
return frames*win
53+
return frames * win
4054

4155

42-
def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
56+
def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))):
4357
"""Does overlap-add procedure to undo the action of framesig.
4458
4559
:param frames: the array of frames.
@@ -54,68 +68,73 @@ def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x
5468
numframes = numpy.shape(frames)[0]
5569
assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
5670

57-
indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
58-
indices = numpy.array(indices,dtype=numpy.int32)
59-
padlen = (numframes-1)*frame_step + frame_len
71+
indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
72+
numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
73+
indices = numpy.array(indices, dtype=numpy.int32)
74+
padlen = (numframes - 1) * frame_step + frame_len
6075

6176
if siglen <= 0: siglen = padlen
6277

6378
rec_signal = numpy.zeros((padlen,))
6479
window_correction = numpy.zeros((padlen,))
6580
win = winfunc(frame_len)
6681

67-
for i in range(0,numframes):
68-
window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
69-
rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
82+
for i in range(0, numframes):
83+
window_correction[indices[i, :]] = window_correction[
84+
indices[i, :]] + win + 1e-15 # add a little bit so it is never zero
85+
rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :]
7086

71-
rec_signal = rec_signal/window_correction
87+
rec_signal = rec_signal / window_correction
7288
return rec_signal[0:siglen]
7389

74-
def magspec(frames,NFFT):
90+
91+
def magspec(frames, NFFT):
7592
"""Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
7693
7794
:param frames: the array of frames. Each row is a frame.
7895
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
7996
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame.
8097
"""
8198
if numpy.shape(frames)[1] > NFFT:
82-
logging.warn('frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', numpy.shape(frames)[1], NFFT)
83-
complex_spec = numpy.fft.rfft(frames,NFFT)
99+
logging.warn(
100+
'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.',
101+
numpy.shape(frames)[1], NFFT)
102+
complex_spec = numpy.fft.rfft(frames, NFFT)
84103
return numpy.absolute(complex_spec)
85104

86-
def powspec(frames,NFFT):
105+
106+
def powspec(frames, NFFT):
87107
"""Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
88108
89109
:param frames: the array of frames. Each row is a frame.
90110
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
91111
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame.
92112
"""
93-
return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
113+
return 1.0 / NFFT * numpy.square(magspec(frames, NFFT))
94114

95-
def logpowspec(frames,NFFT,norm=1):
115+
116+
def logpowspec(frames, NFFT, norm=1):
96117
"""Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
97118
98119
:param frames: the array of frames. Each row is a frame.
99120
:param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
100121
:param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0.
101122
:returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame.
102123
"""
103-
ps = powspec(frames,NFFT);
104-
ps[ps<=1e-30] = 1e-30
105-
lps = 10*numpy.log10(ps)
124+
ps = powspec(frames, NFFT);
125+
ps[ps <= 1e-30] = 1e-30
126+
lps = 10 * numpy.log10(ps)
106127
if norm:
107128
return lps - numpy.max(lps)
108129
else:
109130
return lps
110131

111-
def preemphasis(signal,coeff=0.95):
132+
133+
def preemphasis(signal, coeff=0.95):
112134
"""perform preemphasis on the input signal.
113135
114136
:param signal: The signal to filter.
115137
:param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
116138
:returns: the filtered signal.
117139
"""
118-
return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])
119-
120-
121-
140+
return numpy.append(signal[0], signal[1:] - coeff * signal[:-1])

test/test_sigproc.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from python_speech_features import sigproc
2+
import unittest
3+
import numpy as np
4+
import time
5+
6+
7+
class test_case(unittest.TestCase):
8+
def test_frame_sig(self):
9+
n = 10000124
10+
frame_len = 37
11+
frame_step = 13
12+
x = np.random.rand(n)
13+
t0 = time.time()
14+
y_old = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=False)
15+
t1 = time.time()
16+
y_new = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=True)
17+
t_new = time.time() - t1
18+
t_old = t1 - t0
19+
self.assertTupleEqual(y_old.shape, y_new.shape)
20+
np.testing.assert_array_equal(y_old, y_new)
21+
self.assertLess(t_new, t_old)
22+
print('new run time %3.2f < %3.2f sec' % (t_new, t_old))
23+
24+
def test_rolling(self):
25+
x = np.arange(10)
26+
y = sigproc.rolling_window(x, window=4, step=3)
27+
y_expected = np.array([[0, 1, 2, 3],
28+
[3, 4, 5, 6],
29+
[6, 7, 8, 9]]
30+
)
31+
y = np.testing.assert_array_equal(y, y_expected)

0 commit comments

Comments
 (0)