Merge sync_timestamps into master with changes made for v1.15.1

agricolab · agricolab · commit cf4b6bec9672 · 2019-04-26T08:16:52.000+02:00
A merge was necessary because the histories were misaligned.
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# PyCharm
+.idea
diff --git a/README.md b/README.md
@@ -3,6 +3,10 @@ Python library for importing Extensible Data Format (XDF)
 
 Python importer for [xdf](https://github.com/sccn/xdf).
 
+Install with `pip install pyxdf`.
+
+For the latest version, use `pip install git+https://github.com/xdf-modules/xdf-Python.git`.
+
 ## For maintainers
 
 1. For pypi
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -0,0 +1,54 @@
+# Run tests, build wheel, and if this is a tagged commit
+# beginning with v then deploy to pypi (pypi test for now).
+
+trigger:
+  tags:
+    include:
+    - v*
+  branches:
+    include:
+    - master
+
+pool:
+  vmImage: 'Ubuntu-16.04'
+strategy:
+  matrix:
+    #Python27:
+    #  python.version: '2.7'
+    #Python35:
+    #  python.version: '3.5'
+    Python36:
+      python.version: '3.6'
+    #Python37:
+    #  python.version: '3.7'
+
+steps:
+- task: UsePythonVersion@0
+  inputs:
+    versionSpec: '$(python.version)'
+  displayName: 'Use Python $(python.version)'
+
+- script: |
+    python -m pip install --upgrade pip wheel twine setuptools_scm
+    pip install numpy
+  displayName: 'Install Dependencies'
+
+- script: |
+    pip install pytest pytest-azurepipelines
+    pytest
+  displayName: 'pytest'
+
+- script: |
+    python setup.py sdist bdist_wheel
+  displayName: 'Build Wheel'
+
+- task: TwineAuthenticate@0
+  condition: and(succeeded(), contains(variables['Build.SourceBranch'], 'refs/tags/'))
+  inputs:
+    externalSources: '5ba26794-ed5b-43b0-8414-7bb1416ec0ad'  # -r pypi
+#    externalSources: '025cb3c4-642b-4fac-829c-fe8634f1a504'  # -r testpypi
+
+- script: 'python -m twine upload -r pypi --config-file $(PYPIRC_PATH) --skip-existing dist/*'
+  condition: and(succeeded(), contains(variables['Build.SourceBranch'], 'refs/tags/'))
+  displayName: 'Deploy to PyPI'
+
diff --git a/pyxdf/__init__.py b/pyxdf/__init__.py
@@ -1 +1,8 @@
-from .pyxdf import load_xdf
+from pkg_resources import get_distribution, DistributionNotFound
+try:
+    __version__ = get_distribution(__name__).version
+except DistributionNotFound:
+    # package is not installed
+    __version__ = None
+from .pyxdf import load_xdf
+
diff --git a/pyxdf/example.py b/pyxdf/example.py
@@ -1,10 +1,14 @@
 import os
 import logging
 import pyxdf
+import sys
 
 
 logging.basicConfig(level=logging.DEBUG)  # Use logging.INFO to reduce output.
-fname = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'xdf_sample.xdf'))
+if len(sys.argv) > 1:
+    fname = sys.argv[1]
+else:
+    fname = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'xdf_sample.xdf'))
 streams, fileheader = pyxdf.load_xdf(fname)
 
 print("Found {} streams:".format(len(streams)))
diff --git a/pyxdf/pyxdf.py b/pyxdf/pyxdf.py
@@ -12,15 +12,55 @@
 import xml.etree.ElementTree as ET
 from collections import OrderedDict, defaultdict
 import logging
+from pathlib import Path
 
 import numpy as np
 
 __all__ = ['load_xdf']
-__version__ = '1.14.0'
 
 logger = logging.getLogger(__name__)
 
 
+class StreamData:
+    """Temporary per-stream data."""
+
+    def __init__(self, xml):
+        """Init a new StreamData object from a stream header."""
+        fmts = dict([
+            ('double64', np.float64),
+            ('float32', np.float32),
+            ('string', np.object),
+            ('int32', np.int32),
+            ('int16', np.int16),
+            ('int8', np.int8),
+            ('int64', np.int64)
+        ])
+        # number of channels
+        self.nchns = int(xml['info']['channel_count'][0])
+        # nominal sampling rate in Hz
+        self.srate = round(float(xml['info']['nominal_srate'][0]))
+        # format string (int8, int16, int32, float32, double64, string)
+        self.fmt = xml['info']['channel_format'][0]
+        # list of time-stamp chunks (each an ndarray, in seconds)
+        self.time_stamps = []
+        # list of time-series chunks (each an ndarray or list of lists)
+        self.time_series = []
+        # list of clock offset measurement times (in seconds)
+        self.clock_times = []
+        # list of clock offset measurement values (in seconds)
+        self.clock_values = []
+        # last observed time stamp, for delta decompression
+        self.last_timestamp = 0.0
+        # nominal sampling interval, in seconds, for delta decompression
+        self.tdiff = 1.0 / self.srate if self.srate > 0 else 0.0
+        self.effective_srate = 0.0
+        # pre-calc some parsing parameters for efficiency
+        if self.fmt != 'string':
+            self.dtype = np.dtype(fmts[self.fmt])
+            # number of bytes to read from stream to handle one sample
+            self.samplebytes = self.nchns * self.dtype.itemsize
+
+
 def load_xdf(filename,
              on_chunk=None,
              synchronize_clocks=True,
@@ -189,39 +229,6 @@ def load_xdf(filename,
 
     """
 
-    class StreamData:
-        """Temporary per-stream data."""
-        def __init__(self, xml):
-            """Init a new StreamData object from a stream header."""
-            fmt2char = {'int8': 'b', 'int16': 'h', 'int32': 'i', 'int64': 'q',
-                        'float32': 'f', 'double64': 'd'}
-            fmt2nbytes = {'int8': 1, 'int16': 2, 'int32': 4, 'int64': 8,
-                          'float32': 4, 'double64': 8}
-            # number of channels
-            self.nchns = int(xml['info']['channel_count'][0])
-            # nominal sampling rate in Hz
-            self.srate = round(float(xml['info']['nominal_srate'][0]))
-            # format string (int8, int16, int32, float32, double64, string)
-            self.fmt = xml['info']['channel_format'][0]
-            # list of time-stamp chunks (each an ndarray, in seconds)
-            self.time_stamps = []
-            # list of time-series chunks (each an ndarray or list of lists)
-            self.time_series = []
-            # list of clock offset measurement times (in seconds)
-            self.clock_times = []
-            # list of clock offset measurement values (in seconds)
-            self.clock_values = []
-            # last observed time stamp, for delta decompression
-            self.last_timestamp = 0.0
-            # nominal sampling interval, in seconds, for delta decompression
-            self.tdiff = 1.0 / self.srate if self.srate > 0 else 0.0
-            # pre-calc some parsing parameters for efficiency
-            if self.fmt != 'string':
-                # number of bytes to read from stream to handle one sample
-                self.samplebytes = self.nchns * fmt2nbytes[self.fmt]
-                # format string to pass to struct.unpack() to handle one sample
-                self.structfmt = '<%s%s' % (self.nchns, fmt2char[self.fmt])
-
     logger.info('Importing XDF file %s...' % filename)
     if not os.path.exists(filename):
         raise Exception('file %s does not exist.' % filename)
@@ -236,8 +243,13 @@ def __init__(self, xml):
     filesize = os.path.getsize(filename)
 
     # read file contents ([SomeText] below refers to items in the XDF Spec)
-    with gzip.GzipFile(filename, 'rb') if filename.endswith('.xdfz') else open(filename, 'rb') as f:
+    filename = Path(filename)  # convert to pathlib object
+    if filename.suffix == '.xdfz' or filename.suffixes == ['.xdf', '.gz']:
+        f_open = gzip.open
+    else:
+        f_open = open
 
+    with f_open(filename, 'rb') as f:
         # read [MagicCode]
         if f.read(4) != b'XDF:':
             raise Exception('not a valid XDF file: %s' % filename)
@@ -288,48 +300,14 @@ def __init__(self, xml):
                 # read [Samples] chunk...
                 # noinspection PyBroadException
                 try:
-                    # read [NumSampleBytes], [NumSamples]
-                    nsamples = _read_varlen_int(f)
-                    # allocate space
-                    stamps = np.zeros((nsamples,))
-                    if temp[StreamId].fmt == 'string':
-                        # read a sample comprised of strings
-                        values = [[None] * temp[StreamId].nchns
-                                  for _ in range(nsamples)]
-                        # for each sample...
-                        for k in range(nsamples):
-                            # read or deduce time stamp
-                            if struct.unpack('B', f.read(1))[0]:
-                                stamps[k] = struct.unpack('<d', f.read(8))[0]
-                            else:
-                                stamps[k] = (temp[StreamId].last_timestamp +
-                                             temp[StreamId].tdiff)
-                            temp[StreamId].last_timestamp = stamps[k]
-                            # read the values
-                            for ch in range(temp[StreamId].nchns):
-                                raw = f.read(_read_varlen_int(f))
-                                values[k][ch] = raw.decode(errors='replace')
-                    else:
-                        # read a sample comprised of numeric values
-                        values = np.zeros((nsamples, temp[StreamId].nchns))
-                        # for each sample...
-                        for k in range(nsamples):
-                            # read or deduce time stamp
-                            if struct.unpack('B', f.read(1))[0]:
-                                stamps[k] = struct.unpack('<d', f.read(8))[0]
-                            else:
-                                stamps[k] = (temp[StreamId].last_timestamp +
-                                             temp[StreamId].tdiff)
-                            temp[StreamId].last_timestamp = stamps[k]
-                            # read the values
-                            raw = f.read(temp[StreamId].samplebytes)
-                            values[k, :] = struct.unpack(temp[StreamId].structfmt, raw)
+                    nsamples, stamps, values = _read_chunk3(f, temp[StreamId])
+
                     logger.debug('  reading [%s,%s]' % (temp[StreamId].nchns,
                                                             nsamples))
                     # optionally send through the on_chunk function
                     if on_chunk is not None:
                         values, stamps, streams[StreamId] = on_chunk(values, stamps,
-                                                              streams[StreamId], s)
+                                                              streams[StreamId], StreamId)
                     # append to the time series...
                     temp[StreamId].time_series.append(values)
                     temp[StreamId].time_stamps.append(stamps)
@@ -409,19 +387,59 @@ def __init__(self, xml):
 
 
     streams = [s for s in streams.values()]
-    sort_data = [s['info']['name'][0] for s in streams]
-    streams = [x for _, x in sorted(zip(sort_data, streams))]
     return streams, fileheader
 
 
+def _read_chunk3(f, s):
+    # read [NumSampleBytes], [NumSamples]
+    nsamples = _read_varlen_int(f)
+    # allocate space
+    stamps = np.zeros((nsamples,))
+    if s.fmt == 'string':
+        # read a sample comprised of strings
+        values = [[None] * s.nchns
+                  for _ in range(nsamples)]
+        # for each sample...
+        for k in range(nsamples):
+            # read or deduce time stamp
+            if f.read(1) != b'\x00':
+                stamps[k] = struct.unpack('<d', f.read(8))[0]
+            else:
+                stamps[k] = (s.last_timestamp + s.tdiff)
+            s.last_timestamp = stamps[k]
+            # read the values
+            for ch in range(s.nchns):
+                raw = f.read(_read_varlen_int(f))
+                values[k][ch] = raw.decode(errors='replace')
+    else:
+        # read a sample comprised of numeric values
+        values = np.zeros((nsamples, s.nchns), dtype=s.dtype)
+        # for each sample...
+        for k in range(values.shape[0]):
+            # read or deduce time stamp
+            if f.read(1) != b'\x00':
+                stamps[k] = struct.unpack('<d', f.read(8))[0]
+            else:
+                stamps[k] = s.last_timestamp + s.tdiff
+            s.last_timestamp = stamps[k]
+            # read the values
+            raw = f.read(s.nchns * values.dtype.itemsize)
+            # no fromfile(), see
+            # https://github.com/numpy/numpy/issues/13319
+            values[k, :] = np.frombuffer(raw,
+                                         dtype=s.dtype,
+                                         count=s.nchns)
+    return nsamples, stamps, values
+
+
 def _read_varlen_int(f):
     """Read a variable-length integer."""
-    nbytes = struct.unpack('B', f.read(1))[0]
-    if nbytes == 1:
-        return struct.unpack('B', f.read(1))[0]
-    elif nbytes == 4:
+    nbytes = f.read(1)
+    if nbytes == b'\x01':
+        return ord(f.read(1))
+    elif nbytes == b'\x04':
         return struct.unpack('<I', f.read(4))[0]
-    elif nbytes == 8:
+    elif nbytes == b'\x08':
         return struct.unpack('<Q', f.read(8))[0]
     else:
         raise RuntimeError('invalid variable-length integer encountered.')
@@ -437,8 +455,7 @@ def _xml2dict(t):
 
 
 def _scan_forward(f):
-    """Scan forward through the given file object until after the next
-    boundary chunk."""
+    """Scan forward through file object until after the next boundary chunk."""
     blocklen = 2**20
     signature = bytes([0x43, 0xA5, 0x46, 0xDC, 0xCB, 0xF5, 0x41, 0x0F,
                        0xB3, 0x0E, 0xD5, 0x46, 0x73, 0x83, 0xCB, 0xE4])
@@ -447,7 +464,7 @@ def _scan_forward(f):
         block = f.read(blocklen)
         matchpos = block.find(signature)
         if matchpos != -1:
-            f.seek(curpos + matchpos + 15)
+            f.seek(curpos + matchpos + len(signature))
             logger.debug('  scan forward found a boundary chunk.')
             break
         if len(block) < blocklen:
@@ -572,7 +589,7 @@ def _jitter_removal(streams,
                     indices = np.arange(range_i[0], range_i[1] + 1, 1)[:, None]
                     X = np.concatenate((np.ones_like(indices), indices), axis=1)
                     y = stream.time_stamps[indices]
-                    mapping = np.linalg.lstsq(X, y, rcond=None)[0]
+                    mapping = np.linalg.lstsq(X, y, rcond=-1)[0]
                     stream.time_stamps[indices] = (mapping[0] + mapping[1] *
                                                    indices)
                     # Store num_samples and segment duration
diff --git a/pyxdf/test/test_library_basic.py b/pyxdf/test/test_library_basic.py
@@ -0,0 +1,12 @@
+import pyxdf
+import pytest
+
+
+#%% test
+def test_load_xdf_present():
+    """
+    Check that pyxdf has the all important load_xdf.
+    This is nothing more than a placeholder so the CI system has a test to pass.
+    """
+    assert(hasattr(pyxdf, 'load_xdf'))
+
diff --git a/setup.py b/setup.py