1010
1111import numpy as np
1212
13- from ...utils import warn
13+ from ...utils import _check_option , _validate_type , logger , warn
14+
15+ # Offsets from SETUP structure in http://paulbourke.net/dataformats/eeg/
16+ _NCHANNELS_OFFSET = 370
17+ _NSAMPLES_OFFSET = 864
18+ _RATE_OFFSET = 376
19+ _EVENTTABLEPOS_OFFSET = 886
20+ _DATA_OFFSET = 900 # Size of the 'SETUP' header.
21+ _CH_SIZE = 75 # Size of each channel in bytes
1422
1523
1624def _read_teeg (f , teeg_offset ):
@@ -105,8 +113,8 @@ def _session_date_2_meas_date(session_date, date_format):
105113 return (int_part , frac_part )
106114
107115
108- def _compute_robust_event_table_position ( fid , data_format = "int32" ):
109- """Compute ` event_table_position` .
116+ def _compute_robust_sizes ( * , fid , data_format , recompute_n_samples ):
117+ """Compute n_channels, n_samples, n_bytes, and event_table_position.
110118
111119 When recording event_table_position is computed (as accomulation). If the
112120 file recording is large then this value overflows and ends up pointing
@@ -115,36 +123,113 @@ def _compute_robust_event_table_position(fid, data_format="int32"):
115123 If the file is smaller than 2G the value in the SETUP is returned.
116124 Otherwise, the address of the table position is computed from:
117125 n_samples, n_channels, and the bytes size.
118- """
119- SETUP_NCHANNELS_OFFSET = 370
120- SETUP_NSAMPLES_OFFSET = 864
121- SETUP_EVENTTABLEPOS_OFFSET = 886
122-
123- fid_origin = fid .tell () # save the state
124-
125- if fid .seek (0 , SEEK_END ) < 2e9 :
126- fid .seek (SETUP_EVENTTABLEPOS_OFFSET )
127- (event_table_pos ,) = np .frombuffer (fid .read (4 ), dtype = "<i4" )
128126
127+ Reference: https://paulbourke.net/dataformats/eeg/
128+ Header has a field for number of samples, but it does not seem to be
129+ too reliable.
130+ """
131+ _check_option ("data_format" , data_format , ["auto" , "int16" , "int32" ])
132+ # Read the number of channels and samples from the header
133+ fid .seek (_NCHANNELS_OFFSET )
134+ n_channels = int (np .fromfile (fid , dtype = "<u2" , count = 1 ).item ())
135+ logger .debug ("Number of channels: %d" , n_channels )
136+ fid .seek (_NSAMPLES_OFFSET )
137+ n_samples = int (np .frombuffer (fid .read (4 ), dtype = "<i4" ).item ()) # may be unreliable
138+ logger .debug ("Header number of samples: %d" , n_samples )
139+ file_size = fid .seek (0 , SEEK_END )
140+ workaround = "pass data_format='int16' or 'int32' explicitly"
141+ samples_offset = _DATA_OFFSET + _CH_SIZE * n_channels
142+ _validate_type (recompute_n_samples , (bool , None ), "recompute_n_samples" )
143+ if file_size < 2e9 :
144+ logger .debug ("File size < 2GB, using header values" )
145+ fid .seek (_EVENTTABLEPOS_OFFSET )
146+ event_offset = int (np .frombuffer (fid .read (4 ), dtype = "<i4" ).item ())
147+ logger .debug ("Event table offset from header: %d" , event_offset )
148+ if event_offset > file_size :
149+ problem = (
150+ f"Event table offset from header ({ event_offset } ) is larger than file "
151+ f"size ({ file_size } )"
152+ )
153+ if data_format == "auto" :
154+ raise RuntimeError (
155+ f"{ problem } , cannot automatically compute data format, { workaround } "
156+ )
157+ warn (
158+ f"Event table offset from header ({ event_offset } ) is larger than file "
159+ f"size ({ file_size } ), recomputing event table offset."
160+ )
161+ n_bytes = 2 if data_format == "int16" else 4
162+ event_offset = samples_offset + n_samples * n_channels * n_bytes
163+ n_data_bytes = event_offset - samples_offset
164+ if data_format == "auto" :
165+ n_bytes_per_chan , rem = divmod (n_data_bytes , n_channels )
166+ why = ""
167+ # starting assumption is 16-bit ints
168+ n_bytes = 2
169+ if rem != 0 :
170+ why = (
171+ f"number of data bytes { n_data_bytes } is not evenly divisible by "
172+ f"{ n_channels = } "
173+ )
174+ elif n_samples == 0 :
175+ why = "number of samples (according to header) is 0"
176+ else :
177+ # we know `n_channels` divides evenly into `n_data_bytes`, and header
178+ # said `n_samples` was non-zero, so try to infer `n_bytes`:
179+ n_bytes , rem = divmod (n_bytes_per_chan , n_samples )
180+ if rem != 0 or n_bytes not in [2 , 4 ]:
181+ why = (
182+ f"number of bytes per channel { n_bytes_per_chan } is not evenly "
183+ f"divisible by { n_samples = } or does not result in 2 or 4 bytes "
184+ f"per sample ({ n_bytes = } )"
185+ )
186+ logger .debug ("Inferred data format with %d bytes per sample" , n_bytes )
187+ if why :
188+ raise RuntimeError (
189+ "Could not automatically compute number of bytes per sample as the "
190+ f"{ why } . set data_format manually."
191+ )
192+ else :
193+ n_bytes = 2 if data_format == "int16" else 4
194+ logger .debug (
195+ "Using %d bytes per sample from data_format=%s" , n_bytes , data_format
196+ )
197+ # Our most reliable way to get the number of samples is to compute it
198+ recomputed_n_samples , rem = divmod (n_data_bytes , (n_channels * n_bytes ))
199+ logger .debug ("Computed number of samples: %d" , recomputed_n_samples )
200+ if recompute_n_samples is None :
201+ recompute_n_samples = n_samples <= 0
202+ if recompute_n_samples :
203+ logger .info (
204+ "Number of samples in header (%d) is not positive, setting "
205+ "recompute_n_samples=True" ,
206+ n_samples ,
207+ )
208+ if recompute_n_samples :
209+ n_samples = recomputed_n_samples
210+ if rem != 0 :
211+ warn (
212+ "Inconsistent file information detected, number of data bytes "
213+ f"({ n_data_bytes } ) not evenly divisible by number of channels "
214+ f"({ n_channels } ) times number of bytes ({ n_bytes } )"
215+ )
129216 else :
217+ logger .debug ("File size >= 2GB, computing event table offset" )
218+ if recompute_n_samples :
219+ raise ValueError (
220+ "Cannot recompute number of samples for files larger than 2GB, set "
221+ "recompute_samples=False"
222+ )
130223 if data_format == "auto" :
131- warn (
224+ raise RuntimeError (
132225 "Using `data_format='auto' for a CNT file larger"
133- " than 2Gb is not granted to work. Please pass "
134- " 'int16' or 'int32'.` (assuming int32) "
226+ " than 2Gb is not supported, explicitly pass data_format as "
227+ "'int16' or 'int32'"
135228 )
136-
137229 n_bytes = 2 if data_format == "int16" else 4
138-
139- fid .seek (SETUP_NSAMPLES_OFFSET )
140- (n_samples ,) = np .frombuffer (fid .read (4 ), dtype = "<i4" )
141-
142- fid .seek (SETUP_NCHANNELS_OFFSET )
143- (n_channels ,) = np .frombuffer (fid .read (2 ), dtype = "<u2" )
144-
145- event_table_pos = (
146- 900 + 75 * int (n_channels ) + n_bytes * int (n_channels ) * int (n_samples )
230+ event_offset = (
231+ _DATA_OFFSET + _CH_SIZE * n_channels + n_bytes * n_channels * n_samples
147232 )
233+ logger .debug ("Computed event table offset: %d" , event_offset )
148234
149- fid .seek (fid_origin ) # restore the state
150- return event_table_pos
235+ return n_channels , n_samples , n_bytes , event_offset
0 commit comments