Skip to content

Commit 36672d0

Browse files
committed
Add precomputed representation for conversion example.
1 parent e6bc3c3 commit 36672d0

1 file changed

Lines changed: 125 additions & 32 deletions

File tree

Lines changed: 125 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,15 @@
1-
"""Example of converting I3-files to SQLite, Parquet, and LMDB."""
1+
"""Example of converting I3-files to SQLite, Parquet, and LMDB.
22
3-
from glob import glob
3+
When using the LMDB backend, the ``--precompute-representation`` flag can be
4+
used to pre-compute a DataRepresentation and store it alongside the raw
5+
data. Pre-computed representations can later be loaded directly,
6+
avoiding the cost of real-time DataRepresentation construction during training.
7+
"""
48

9+
from glob import glob
10+
from typing import Any, Dict
511
from graphnet.constants import EXAMPLE_OUTPUT_DIR, TEST_DATA_DIR
12+
from graphnet.data.constants import FEATURES, TRUTH
613
from graphnet.data.extractors.icecube import (
714
I3FeatureExtractorIceCubeUpgrade,
815
I3FeatureExtractorIceCube86,
@@ -13,6 +20,8 @@
1320
from graphnet.data.parquet import ParquetDataConverter
1421
from graphnet.data.sqlite import SQLiteDataConverter
1522
from graphnet.data.pre_configured.dataconverters import I3ToLMDBConverter
23+
from graphnet.models.detector.icecube import IceCube86, IceCubeUpgrade
24+
from graphnet.models.graphs import KNNGraph
1625
from graphnet.utilities.argparse import ArgumentParser
1726
from graphnet.utilities.imports import has_icecube_package
1827
from graphnet.utilities.logging import Logger
@@ -36,9 +45,12 @@
3645
}
3746

3847

39-
def main_icecube86(backend: str) -> None:
48+
def main_icecube86(
49+
backend: str,
50+
precompute_representation: bool = False,
51+
num_workers: int = 1,
52+
) -> None:
4053
"""Convert IceCube-86 I3 files to intermediate `backend` format."""
41-
# Check(s)
4254
assert backend in CONVERTER_CLASS
4355

4456
inputs = [f"{TEST_DATA_DIR}/i3/oscNext_genie_level7_v02"]
@@ -47,43 +59,97 @@ def main_icecube86(backend: str) -> None:
4759
f"{TEST_DATA_DIR}/i3/oscNext_genie_level7_v02/*GeoCalib*"
4860
)[0]
4961

50-
converter = CONVERTER_CLASS[backend](
51-
extractors=[
52-
I3FeatureExtractorIceCube86("SRTInIcePulses"),
53-
I3TruthExtractor(),
54-
],
55-
outdir=outdir,
56-
gcd_rescue=gcd_rescue,
57-
workers=1,
58-
)
62+
extractors = [
63+
I3FeatureExtractorIceCube86("SRTInIcePulses"),
64+
I3TruthExtractor(),
65+
]
66+
67+
if backend == "lmdb":
68+
lmdb_kwargs: Dict[str, Any] = {}
69+
if precompute_representation:
70+
# Could be any DataRepresentation, not just KNNGraph
71+
data_representation = KNNGraph(
72+
detector=IceCube86(),
73+
nb_nearest_neighbours=8,
74+
input_feature_names=FEATURES.ICECUBE86,
75+
)
76+
lmdb_kwargs.update(
77+
data_representation=data_representation,
78+
pulsemap_extractor_name="SRTInIcePulses",
79+
truth_extractor_name="truth",
80+
truth_label_names=TRUTH.ICECUBE86,
81+
)
82+
converter: DataConverter = I3ToLMDBConverter(
83+
extractors=extractors,
84+
outdir=outdir,
85+
gcd_rescue=gcd_rescue,
86+
num_workers=num_workers,
87+
**lmdb_kwargs,
88+
)
89+
else:
90+
converter = CONVERTER_CLASS[backend](
91+
extractors=extractors,
92+
outdir=outdir,
93+
gcd_rescue=gcd_rescue,
94+
workers=num_workers,
95+
)
96+
5997
converter(inputs)
6098
if backend in ["sqlite", "lmdb"]:
6199
converter.merge_files()
62100

63101

64-
def main_icecube_upgrade(backend: str) -> None:
102+
def main_icecube_upgrade(
103+
backend: str,
104+
precompute_representation: bool = False,
105+
num_workers: int = 1,
106+
) -> None:
65107
"""Convert IceCube-Upgrade I3 files to intermediate `backend` format."""
66-
# Check(s)
67108
assert backend in CONVERTER_CLASS
68109

69110
inputs = [f"{TEST_DATA_DIR}/i3/upgrade_genie_step4_140028_000998"]
70111
outdir = f"{EXAMPLE_OUTPUT_DIR}/convert_i3_files/upgrade"
71112
gcd_rescue = glob(
72113
"{TEST_DATA_DIR}/i3/upgrade_genie_step4_140028_000998/*GeoCalib*"
73114
)[0]
74-
workers = 1
75-
76-
converter: DataConverter = CONVERTER_CLASS[backend](
77-
extractors=[
78-
I3TruthExtractor(),
79-
I3RetroExtractor(),
80-
I3FeatureExtractorIceCubeUpgrade("I3RecoPulseSeriesMap_mDOM"),
81-
I3FeatureExtractorIceCubeUpgrade("I3RecoPulseSeriesMap_DEgg"),
82-
],
83-
outdir=outdir,
84-
workers=workers,
85-
gcd_rescue=gcd_rescue,
86-
)
115+
116+
pulsemap = "I3RecoPulseSeriesMap_mDOM"
117+
extractors = [
118+
I3TruthExtractor(),
119+
I3RetroExtractor(),
120+
I3FeatureExtractorIceCubeUpgrade(pulsemap),
121+
I3FeatureExtractorIceCubeUpgrade("I3RecoPulseSeriesMap_DEgg"),
122+
]
123+
124+
if backend == "lmdb":
125+
lmdb_kwargs: Dict[str, Any] = {}
126+
if precompute_representation:
127+
data_representation = KNNGraph(
128+
detector=IceCubeUpgrade(),
129+
nb_nearest_neighbours=8,
130+
input_feature_names=FEATURES.UPGRADE,
131+
)
132+
lmdb_kwargs.update(
133+
data_representation=data_representation,
134+
pulsemap_extractor_name=pulsemap,
135+
truth_extractor_name="truth",
136+
truth_label_names=TRUTH.UPGRADE,
137+
)
138+
converter: DataConverter = I3ToLMDBConverter(
139+
extractors=extractors,
140+
outdir=outdir,
141+
gcd_rescue=gcd_rescue,
142+
num_workers=num_workers,
143+
**lmdb_kwargs,
144+
)
145+
else:
146+
converter = CONVERTER_CLASS[backend](
147+
extractors=extractors,
148+
outdir=outdir,
149+
gcd_rescue=gcd_rescue,
150+
workers=num_workers,
151+
)
152+
87153
converter(inputs)
88154
if backend in ["sqlite", "lmdb"]:
89155
converter.merge_files()
@@ -94,7 +160,6 @@ def main_icecube_upgrade(backend: str) -> None:
94160
if not has_icecube_package():
95161
Logger(log_folder=None).error(ERROR_MESSAGE_MISSING_ICETRAY)
96162
else:
97-
# Parse command-line arguments
98163
parser = ArgumentParser(
99164
description="""
100165
Convert I3 files to an intermediate format.
@@ -111,11 +176,39 @@ def main_icecube_upgrade(backend: str) -> None:
111176
parser.add_argument(
112177
"detector", choices=["icecube-86", "icecube-upgrade"]
113178
)
179+
parser.add_argument(
180+
"--precompute-representation",
181+
action="store_true",
182+
default=False,
183+
help="Pre-compute a KNN graph representation and store it in "
184+
"the LMDB database. Only supported with the lmdb backend.",
185+
)
186+
parser.add_argument(
187+
"--workers",
188+
type=int,
189+
default=1,
190+
help="Number of worker processes for parallel conversion "
191+
"(default: %(default)s).",
192+
)
114193

115194
args, unknown = parser.parse_known_args()
116195

117-
# Run example script
196+
if args.precompute_representation and args.backend != "lmdb":
197+
Logger(log_folder=None).warning(
198+
"--precompute-representation is only supported with the lmdb "
199+
"backend. Ignoring."
200+
)
201+
args.precompute_representation = False
202+
118203
if args.detector == "icecube-86":
119-
main_icecube86(args.backend)
204+
main_icecube86(
205+
args.backend,
206+
args.precompute_representation,
207+
args.workers,
208+
)
120209
else:
121-
main_icecube_upgrade(args.backend)
210+
main_icecube_upgrade(
211+
args.backend,
212+
args.precompute_representation,
213+
args.workers,
214+
)

0 commit comments

Comments
 (0)