Skip to content

Commit 6d91c76

Browse files
authored
Fix MAG L1A performance (IMAP-Science-Operations-Center#2276)
* Move from concating an array to using a list * Adding new log message * PR updates * Fix docs
1 parent 45a4723 commit 6d91c76

2 files changed

Lines changed: 73 additions & 15 deletions

File tree

imap_processing/mag/l1a/mag_l1a.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def mag_l1a(packet_filepath: Path) -> list[xr.Dataset]:
4343
A list of generated filenames.
4444
"""
4545
packets = decom_mag.decom_packets(packet_filepath)
46-
46+
logger.info("Packet decoding complete, beginning L1A processing.")
4747
norm_data = packets["norm"]
4848
burst_data = packets["burst"]
4949

@@ -188,7 +188,7 @@ def process_packets(
188188
secondary_packet_data.start_time,
189189
)
190190

191-
# Sort primary and secondary into MAGo and MAGi by 24 hour chunks
191+
# Sort primary and secondary into MAGo and MAGi
192192

193193
if mago is None:
194194
mago = MagL1a(

imap_processing/mag/l1a/mag_l1a_data.py

Lines changed: 71 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,25 @@ class MagL1a:
205205
1 if the sensor is active, 0 if not
206206
shcoarse : int
207207
Mission elapsed time for the first packet, the start time for the whole day
208-
vectors : numpy.ndarray
209-
List of magnetic vector samples, starting at start_time. [x, y, z, range, time],
210-
where time is numpy.datetime64[ns]
208+
starting_vectors : InitVar[numpy.ndarray]
209+
Initvar to create the first entry in the vector list. This is to preserve the
210+
external API of creating the object with the first set of vectors.
211+
This cannot be accessed from an instance of the class. Instead, vectors
212+
should be used.
211213
starting_packet : InitVar[MagL1aPacketProperties]
212214
The packet properties for the first packet in the day. As an InitVar, this
213215
cannot be accessed from an instance of the class. Instead, packet_definitions
214216
should be used.
217+
vectors : numpy.ndarray
218+
List of magnetic vector samples, starting at start_time. [x, y, z, range, time],
219+
where time is numpy.datetime64[ns]. This is a property that concatenates the
220+
internal vector list on demand.
221+
compression_flags : numpy.ndarray
222+
Array of flags to indicate compression and width for all timestamps in the
223+
L1A file. Shaped like (n, 2) where n is the number of vectors. First value
224+
is a boolean for compressed/uncompressed, second vector is a number between 0-20
225+
if the data is compressed, which is the width in bits of the compressed data.
226+
This is a property that concatenates the internal compression flags list.
215227
packet_definitions : dict[numpy.datetime64, MagL1aPacketProperties]
216228
Dictionary of packet properties for each packet in the day. The key is the start
217229
time of the packet, and the value is a dataclass of packet properties.
@@ -221,11 +233,20 @@ class MagL1a:
221233
List of missing sequence numbers in the day
222234
start_time : numpy.int64
223235
Start time of the day, in ns since J2000 epoch
224-
compression_flags : np.ndarray
236+
_compression_flags_list : np.ndarray
225237
Array of flags to indication compression and width for all timestamps in the
226238
L1A file. Shaped like (n, 2) where n is the number of vectors. First value
227239
is a boolean for compressed/uncompressed, second vector is a number between 0-20
228240
if the data is compressed, which is the width in bits of the compressed data.
241+
Transformed into a numpy array upon retrieval.
242+
_vector_list : list
243+
Internal list of vectors, used to build the final vectors attribute.
244+
This is a list of numpy arrays, each with shape (n, 5) where n is the
245+
number of vectors in that packet, and each vector is (x, y, z, range, time).
246+
_vector_cache : numpy.ndarray | None
247+
A cache of the concatenated vector list. This is None until the vectors
248+
property is accessed, at which point it is created and stored here for future
249+
access.
229250
230251
Methods
231252
-------
@@ -248,30 +269,67 @@ class MagL1a:
248269
is_mago: bool
249270
is_active: int
250271
shcoarse: int
251-
vectors: np.ndarray
272+
starting_vectors: InitVar[np.ndarray]
252273
starting_packet: InitVar[MagL1aPacketProperties]
253274
packet_definitions: dict[np.int64, MagL1aPacketProperties] = field(init=False)
254275
most_recent_sequence: int = field(init=False)
255276
missing_sequences: list[int] = field(default_factory=list)
256277
start_time: np.int64 = field(init=False)
257-
compression_flags: np.ndarray | None = field(init=False, default=None)
278+
_compression_flags_list: list = field(default_factory=list)
279+
_vector_list: list = field(init=False)
280+
_vector_cache: np.ndarray | None = field(init=False, default=None)
258281

259-
def __post_init__(self, starting_packet: MagL1aPacketProperties) -> None:
282+
def __post_init__(
283+
self, starting_vectors: np.ndarray, starting_packet: MagL1aPacketProperties
284+
) -> None:
260285
"""
261-
Initialize the packet_definition dictionary and most_recent_sequence.
286+
Initialize the vector list, packet_definition dictionary & most_recent_sequence.
262287
263288
Parameters
264289
----------
290+
starting_vectors : numpy.ndarray
291+
The vectors for the first packet in the day.
265292
starting_packet : MagL1aPacketProperties
266293
The packet properties for the first packet in the day, including start time.
267294
"""
295+
self._vector_list = [starting_vectors]
268296
self.start_time = np.int64(met_to_ttj2000ns(starting_packet.shcoarse))
269297
self.packet_definitions = {self.start_time: starting_packet}
270298
# most_recent_sequence is the sequence number of the packet used to initialize
271299
# the object
272300
self.most_recent_sequence = starting_packet.src_seq_ctr
273301
self.update_compression_array(starting_packet, self.vectors.shape[0])
274302

303+
@property
304+
def vectors(self) -> np.ndarray:
305+
"""
306+
Concatenate the internal vector list into a numpy array.
307+
308+
If the array has already been created, return the cached version.
309+
310+
Returns
311+
-------
312+
np.ndarray
313+
Array of vectors with shape (n, 5) where n is the number of vectors,
314+
and each vector is (x, y, z, range, time).
315+
"""
316+
if self._vector_cache is None:
317+
self._vector_cache = np.concatenate(self._vector_list, axis=0)
318+
return self._vector_cache
319+
320+
@property
321+
def compression_flags(self) -> np.ndarray:
322+
"""
323+
Return the compression flags array.
324+
325+
Returns
326+
-------
327+
np.ndarray
328+
Array of compression flags with shape (n, 2) where n is the number of
329+
vectors, and each entry is (is_compressed, compression_width).
330+
"""
331+
return np.concatenate(self._compression_flags_list, axis=0)
332+
275333
def append_vectors(
276334
self, additional_vectors: np.ndarray, packet_properties: MagL1aPacketProperties
277335
) -> None:
@@ -285,9 +343,12 @@ def append_vectors(
285343
packet_properties : MagL1aPacketProperties
286344
Additional vector definition to add to the l0_packets dictionary.
287345
"""
346+
self._vector_list.append(additional_vectors)
347+
# Invalidate the cache
348+
self._vector_cache = None
349+
288350
vector_sequence = packet_properties.src_seq_ctr
289351

290-
self.vectors = np.concatenate([self.vectors, additional_vectors])
291352
start_time = np.int64(met_to_ttj2000ns(packet_properties.shcoarse))
292353
self.packet_definitions[start_time] = packet_properties
293354

@@ -322,10 +383,7 @@ def update_compression_array(
322383
[packet_properties.compression, packet_properties.compression_width],
323384
dtype=np.int8,
324385
)
325-
if self.compression_flags is None:
326-
self.compression_flags = new_flags
327-
else:
328-
self.compression_flags = np.concatenate([self.compression_flags, new_flags])
386+
self._compression_flags_list.append(new_flags)
329387

330388
@staticmethod
331389
def calculate_vector_time(

0 commit comments

Comments
 (0)