Merge pull request #48 from chanansh/faster_framesig_by_stride_trick

jameslyons · web-flow · commit e51df9e484da · 2017-08-31T15:44:35.000+10:00
Faster framesig by stride trick
diff --git a/python_speech_features/sigproc.py b/python_speech_features/sigproc.py
@@ -6,17 +6,26 @@
 import math
 import logging
 
+
 def round_half_up(number):
     return int(decimal.Decimal(number).quantize(decimal.Decimal('1'), rounding=decimal.ROUND_HALF_UP))
 
 
-def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
+def rolling_window(a, window, step=1):
+    # http://ellisvalentiner.com/post/2017-03-21-np-strides-trick
+    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
+    strides = a.strides + (a.strides[-1],)
+    return numpy.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)[::step]
+
+
+def framesig(sig, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,)), stride_trick=True):
     """Frame a signal into overlapping frames.
 
     :param sig: the audio signal to frame.
     :param frame_len: length of each frame measured in samples.
     :param frame_step: number of samples after the start of the previous frame that the next frame should begin.
     :param winfunc: the analysis window to apply to each frame. By default no window is applied.
+    :param stride_trick: use stride trick to compute the rolling window and window multiplication faster
     :returns: an array of frames. Size is NUMFRAMES by frame_len.
     """
     slen = len(sig)
@@ -25,21 +34,26 @@ def framesig(sig,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
     if slen <= frame_len:
         numframes = 1
     else:
-        numframes = 1 + int(math.ceil((1.0*slen - frame_len)/frame_step))
+        numframes = 1 + int(math.ceil((1.0 * slen - frame_len) / frame_step))
 
-    padlen = int((numframes-1)*frame_step + frame_len)
+    padlen = int((numframes - 1) * frame_step + frame_len)
 
     zeros = numpy.zeros((padlen - slen,))
-    padsignal = numpy.concatenate((sig,zeros))
+    padsignal = numpy.concatenate((sig, zeros))
+    if stride_trick:
+        win = winfunc(frame_len)
+        frames = rolling_window(padsignal, window=frame_len, step=frame_step)
+    else:
+        indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
+            numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
+        indices = numpy.array(indices, dtype=numpy.int32)
+        frames = padsignal[indices]
+        win = numpy.tile(winfunc(frame_len), (numframes, 1))
 
-    indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
-    indices = numpy.array(indices,dtype=numpy.int32)
-    frames = padsignal[indices]
-    win = numpy.tile(winfunc(frame_len),(numframes,1))
-    return frames*win
+    return frames * win
 
 
-def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x,))):
+def deframesig(frames, siglen, frame_len, frame_step, winfunc=lambda x: numpy.ones((x,))):
     """Does overlap-add procedure to undo the action of framesig.
 
     :param frames: the array of frames.
@@ -54,68 +68,73 @@ def deframesig(frames,siglen,frame_len,frame_step,winfunc=lambda x:numpy.ones((x
     numframes = numpy.shape(frames)[0]
     assert numpy.shape(frames)[1] == frame_len, '"frames" matrix is wrong size, 2nd dim is not equal to frame_len'
 
-    indices = numpy.tile(numpy.arange(0,frame_len),(numframes,1)) + numpy.tile(numpy.arange(0,numframes*frame_step,frame_step),(frame_len,1)).T
-    indices = numpy.array(indices,dtype=numpy.int32)
-    padlen = (numframes-1)*frame_step + frame_len
+    indices = numpy.tile(numpy.arange(0, frame_len), (numframes, 1)) + numpy.tile(
+        numpy.arange(0, numframes * frame_step, frame_step), (frame_len, 1)).T
+    indices = numpy.array(indices, dtype=numpy.int32)
+    padlen = (numframes - 1) * frame_step + frame_len
 
     if siglen <= 0: siglen = padlen
 
     rec_signal = numpy.zeros((padlen,))
     window_correction = numpy.zeros((padlen,))
     win = winfunc(frame_len)
 
-    for i in range(0,numframes):
-        window_correction[indices[i,:]] = window_correction[indices[i,:]] + win + 1e-15 #add a little bit so it is never zero
-        rec_signal[indices[i,:]] = rec_signal[indices[i,:]] + frames[i,:]
+    for i in range(0, numframes):
+        window_correction[indices[i, :]] = window_correction[
+                                               indices[i, :]] + win + 1e-15  # add a little bit so it is never zero
+        rec_signal[indices[i, :]] = rec_signal[indices[i, :]] + frames[i, :]
 
-    rec_signal = rec_signal/window_correction
+    rec_signal = rec_signal / window_correction
     return rec_signal[0:siglen]
 
-def magspec(frames,NFFT):
+
+def magspec(frames, NFFT):
     """Compute the magnitude spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
 
     :param frames: the array of frames. Each row is a frame.
     :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
     :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the magnitude spectrum of the corresponding frame.
     """
     if numpy.shape(frames)[1] > NFFT:
-        logging.warn('frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.', numpy.shape(frames)[1], NFFT)
-    complex_spec = numpy.fft.rfft(frames,NFFT)
+        logging.warn(
+            'frame length (%d) is greater than FFT size (%d), frame will be truncated. Increase NFFT to avoid.',
+            numpy.shape(frames)[1], NFFT)
+    complex_spec = numpy.fft.rfft(frames, NFFT)
     return numpy.absolute(complex_spec)
 
-def powspec(frames,NFFT):
+
+def powspec(frames, NFFT):
     """Compute the power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
 
     :param frames: the array of frames. Each row is a frame.
     :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
     :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the power spectrum of the corresponding frame.
     """
-    return 1.0/NFFT * numpy.square(magspec(frames,NFFT))
+    return 1.0 / NFFT * numpy.square(magspec(frames, NFFT))
 
-def logpowspec(frames,NFFT,norm=1):
+
+def logpowspec(frames, NFFT, norm=1):
     """Compute the log power spectrum of each frame in frames. If frames is an NxD matrix, output will be Nx(NFFT/2+1).
 
     :param frames: the array of frames. Each row is a frame.
     :param NFFT: the FFT length to use. If NFFT > frame_len, the frames are zero-padded.
     :param norm: If norm=1, the log power spectrum is normalised so that the max value (across all frames) is 0.
     :returns: If frames is an NxD matrix, output will be Nx(NFFT/2+1). Each row will be the log power spectrum of the corresponding frame.
     """
-    ps = powspec(frames,NFFT);
-    ps[ps<=1e-30] = 1e-30
-    lps = 10*numpy.log10(ps)
+    ps = powspec(frames, NFFT);
+    ps[ps <= 1e-30] = 1e-30
+    lps = 10 * numpy.log10(ps)
     if norm:
         return lps - numpy.max(lps)
     else:
         return lps
 
-def preemphasis(signal,coeff=0.95):
+
+def preemphasis(signal, coeff=0.95):
     """perform preemphasis on the input signal.
 
     :param signal: The signal to filter.
     :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95.
     :returns: the filtered signal.
     """
-    return numpy.append(signal[0],signal[1:]-coeff*signal[:-1])
-
-
-
+    return numpy.append(signal[0], signal[1:] - coeff * signal[:-1])
diff --git a/test/test_sigproc.py b/test/test_sigproc.py
@@ -0,0 +1,31 @@
+from python_speech_features import sigproc
+import unittest
+import numpy as np
+import time
+
+
+class test_case(unittest.TestCase):
+    def test_frame_sig(self):
+        n = 10000124
+        frame_len = 37
+        frame_step = 13
+        x = np.random.rand(n)
+        t0 = time.time()
+        y_old = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=False)
+        t1 = time.time()
+        y_new = sigproc.framesig(x, frame_len=frame_len, frame_step=frame_step, stride_trick=True)
+        t_new = time.time() - t1
+        t_old = t1 - t0
+        self.assertTupleEqual(y_old.shape, y_new.shape)
+        np.testing.assert_array_equal(y_old, y_new)
+        self.assertLess(t_new, t_old)
+        print('new run time %3.2f < %3.2f sec' % (t_new, t_old))
+
+    def test_rolling(self):
+        x = np.arange(10)
+        y = sigproc.rolling_window(x, window=4, step=3)
+        y_expected = np.array([[0, 1, 2, 3],
+                               [3, 4, 5, 6],
+                               [6, 7, 8, 9]]
+                              )
+        y = np.testing.assert_array_equal(y, y_expected)