1- """Example of converting I3-files to SQLite, Parquet, and LMDB."""
1+ """Example of converting I3-files to SQLite, Parquet, and LMDB.
22
3- from glob import glob
3+ When using the LMDB backend, the ``--precompute-representation`` flag can be
4+ used to pre-compute a DataRepresentation and store it alongside the raw
5+ data. Pre-computed representations can later be loaded directly,
6+ avoiding the cost of real-time DataRepresentation construction during training.
7+ """
48
9+ from glob import glob
10+ from typing import Any , Dict
511from graphnet .constants import EXAMPLE_OUTPUT_DIR , TEST_DATA_DIR
12+ from graphnet .data .constants import FEATURES , TRUTH
613from graphnet .data .extractors .icecube import (
714 I3FeatureExtractorIceCubeUpgrade ,
815 I3FeatureExtractorIceCube86 ,
1320from graphnet .data .parquet import ParquetDataConverter
1421from graphnet .data .sqlite import SQLiteDataConverter
1522from graphnet .data .pre_configured .dataconverters import I3ToLMDBConverter
23+ from graphnet .models .detector .icecube import IceCube86 , IceCubeUpgrade
24+ from graphnet .models .graphs import KNNGraph
1625from graphnet .utilities .argparse import ArgumentParser
1726from graphnet .utilities .imports import has_icecube_package
1827from graphnet .utilities .logging import Logger
3645}
3746
3847
39- def main_icecube86 (backend : str ) -> None :
48+ def main_icecube86 (
49+ backend : str ,
50+ precompute_representation : bool = False ,
51+ num_workers : int = 1 ,
52+ ) -> None :
4053 """Convert IceCube-86 I3 files to intermediate `backend` format."""
41- # Check(s)
4254 assert backend in CONVERTER_CLASS
4355
4456 inputs = [f"{ TEST_DATA_DIR } /i3/oscNext_genie_level7_v02" ]
@@ -47,43 +59,97 @@ def main_icecube86(backend: str) -> None:
4759 f"{ TEST_DATA_DIR } /i3/oscNext_genie_level7_v02/*GeoCalib*"
4860 )[0 ]
4961
50- converter = CONVERTER_CLASS [backend ](
51- extractors = [
52- I3FeatureExtractorIceCube86 ("SRTInIcePulses" ),
53- I3TruthExtractor (),
54- ],
55- outdir = outdir ,
56- gcd_rescue = gcd_rescue ,
57- workers = 1 ,
58- )
62+ extractors = [
63+ I3FeatureExtractorIceCube86 ("SRTInIcePulses" ),
64+ I3TruthExtractor (),
65+ ]
66+
67+ if backend == "lmdb" :
68+ lmdb_kwargs : Dict [str , Any ] = {}
69+ if precompute_representation :
70+ # Could be any DataRepresentation, not just KNNGraph
71+ data_representation = KNNGraph (
72+ detector = IceCube86 (),
73+ nb_nearest_neighbours = 8 ,
74+ input_feature_names = FEATURES .ICECUBE86 ,
75+ )
76+ lmdb_kwargs .update (
77+ data_representation = data_representation ,
78+ pulsemap_extractor_name = "SRTInIcePulses" ,
79+ truth_extractor_name = "truth" ,
80+ truth_label_names = TRUTH .ICECUBE86 ,
81+ )
82+ converter : DataConverter = I3ToLMDBConverter (
83+ extractors = extractors ,
84+ outdir = outdir ,
85+ gcd_rescue = gcd_rescue ,
86+ num_workers = num_workers ,
87+ ** lmdb_kwargs ,
88+ )
89+ else :
90+ converter = CONVERTER_CLASS [backend ](
91+ extractors = extractors ,
92+ outdir = outdir ,
93+ gcd_rescue = gcd_rescue ,
94+ workers = num_workers ,
95+ )
96+
5997 converter (inputs )
6098 if backend in ["sqlite" , "lmdb" ]:
6199 converter .merge_files ()
62100
63101
64- def main_icecube_upgrade (backend : str ) -> None :
102+ def main_icecube_upgrade (
103+ backend : str ,
104+ precompute_representation : bool = False ,
105+ num_workers : int = 1 ,
106+ ) -> None :
65107 """Convert IceCube-Upgrade I3 files to intermediate `backend` format."""
66- # Check(s)
67108 assert backend in CONVERTER_CLASS
68109
69110 inputs = [f"{ TEST_DATA_DIR } /i3/upgrade_genie_step4_140028_000998" ]
70111 outdir = f"{ EXAMPLE_OUTPUT_DIR } /convert_i3_files/upgrade"
71112 gcd_rescue = glob (
72113 "{TEST_DATA_DIR}/i3/upgrade_genie_step4_140028_000998/*GeoCalib*"
73114 )[0 ]
74- workers = 1
75-
76- converter : DataConverter = CONVERTER_CLASS [backend ](
77- extractors = [
78- I3TruthExtractor (),
79- I3RetroExtractor (),
80- I3FeatureExtractorIceCubeUpgrade ("I3RecoPulseSeriesMap_mDOM" ),
81- I3FeatureExtractorIceCubeUpgrade ("I3RecoPulseSeriesMap_DEgg" ),
82- ],
83- outdir = outdir ,
84- workers = workers ,
85- gcd_rescue = gcd_rescue ,
86- )
115+
116+ pulsemap = "I3RecoPulseSeriesMap_mDOM"
117+ extractors = [
118+ I3TruthExtractor (),
119+ I3RetroExtractor (),
120+ I3FeatureExtractorIceCubeUpgrade (pulsemap ),
121+ I3FeatureExtractorIceCubeUpgrade ("I3RecoPulseSeriesMap_DEgg" ),
122+ ]
123+
124+ if backend == "lmdb" :
125+ lmdb_kwargs : Dict [str , Any ] = {}
126+ if precompute_representation :
127+ data_representation = KNNGraph (
128+ detector = IceCubeUpgrade (),
129+ nb_nearest_neighbours = 8 ,
130+ input_feature_names = FEATURES .UPGRADE ,
131+ )
132+ lmdb_kwargs .update (
133+ data_representation = data_representation ,
134+ pulsemap_extractor_name = pulsemap ,
135+ truth_extractor_name = "truth" ,
136+ truth_label_names = TRUTH .UPGRADE ,
137+ )
138+ converter : DataConverter = I3ToLMDBConverter (
139+ extractors = extractors ,
140+ outdir = outdir ,
141+ gcd_rescue = gcd_rescue ,
142+ num_workers = num_workers ,
143+ ** lmdb_kwargs ,
144+ )
145+ else :
146+ converter = CONVERTER_CLASS [backend ](
147+ extractors = extractors ,
148+ outdir = outdir ,
149+ gcd_rescue = gcd_rescue ,
150+ workers = num_workers ,
151+ )
152+
87153 converter (inputs )
88154 if backend in ["sqlite" , "lmdb" ]:
89155 converter .merge_files ()
@@ -94,7 +160,6 @@ def main_icecube_upgrade(backend: str) -> None:
94160 if not has_icecube_package ():
95161 Logger (log_folder = None ).error (ERROR_MESSAGE_MISSING_ICETRAY )
96162 else :
97- # Parse command-line arguments
98163 parser = ArgumentParser (
99164 description = """
100165Convert I3 files to an intermediate format.
@@ -111,11 +176,39 @@ def main_icecube_upgrade(backend: str) -> None:
111176 parser .add_argument (
112177 "detector" , choices = ["icecube-86" , "icecube-upgrade" ]
113178 )
179+ parser .add_argument (
180+ "--precompute-representation" ,
181+ action = "store_true" ,
182+ default = False ,
183+ help = "Pre-compute a KNN graph representation and store it in "
184+ "the LMDB database. Only supported with the lmdb backend." ,
185+ )
186+ parser .add_argument (
187+ "--workers" ,
188+ type = int ,
189+ default = 1 ,
190+ help = "Number of worker processes for parallel conversion "
191+ "(default: %(default)s)." ,
192+ )
114193
115194 args , unknown = parser .parse_known_args ()
116195
117- # Run example script
196+ if args .precompute_representation and args .backend != "lmdb" :
197+ Logger (log_folder = None ).warning (
198+ "--precompute-representation is only supported with the lmdb "
199+ "backend. Ignoring."
200+ )
201+ args .precompute_representation = False
202+
118203 if args .detector == "icecube-86" :
119- main_icecube86 (args .backend )
204+ main_icecube86 (
205+ args .backend ,
206+ args .precompute_representation ,
207+ args .workers ,
208+ )
120209 else :
121- main_icecube_upgrade (args .backend )
210+ main_icecube_upgrade (
211+ args .backend ,
212+ args .precompute_representation ,
213+ args .workers ,
214+ )
0 commit comments