-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathh5_handler.py
More file actions
96 lines (81 loc) · 2.95 KB
/
h5_handler.py
File metadata and controls
96 lines (81 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#
# Copyright (c) 2022-2023 Geosiris.
# SPDX-License-Identifier: Apache-2.0
#
import logging
from typing import Dict
import h5py
from etptypes.energistics.etp.v12.protocol.data_array.put_data_arrays import (
PutDataArrays,
)
from etpclient.utils import (
search_all_element_value,
get_xml_dict_from_string,
xml_dict_get_uri,
)
def descend_obj(obj, sep="\t"):
"""
Iterate through groups in a HDF5 file and prints the groups and datasets names and datasets attributes
"""
if type(obj) in [h5py._hl.group.Group, h5py._hl.files.File]:
for key in obj.keys():
print(sep, "-", key, ":", obj[key])
descend_obj(obj[key], sep=sep + "\t")
elif type(obj) == h5py._hl.dataset.Dataset:
print("-------------------")
for key in obj.attrs.keys():
print(sep + "\t", "-", key, ":", obj.attrs[key])
print(obj[...])
def h5dump(path, group="/"):
"""
print HDF5 file metadata
group: you can give a specific group, defaults to the root group
"""
with h5py.File(path, "r") as f:
descend_obj(f[group])
def h5_search_dataset(h5_file_path, path_in_hdf):
with h5py.File(h5_file_path, "r") as f:
# _h5_search_dataset_in_obj(f[group])
dataset = f[path_in_hdf]
if dataset is not None:
return dataset[...], dataset.shape, dataset.dtype
print("Dataset", path_in_hdf, "not found in", h5_file_path)
return None
def generate_put_data_arrays(
xml_obj: str, h5_file_path: str, dataspace: str = None
):
res = []
obj_dict = get_xml_dict_from_string(xml_obj)
for path_in_hdf in search_all_element_value(
obj_dict, "PathInExternalFile"
) + search_all_element_value(obj_dict, "PathInHdfFile"):
logging.info(f"search in h5 {path_in_hdf}")
try:
data, shape, dtype = h5_search_dataset(h5_file_path, path_in_hdf)
logging.info(f"\t==> shape {shape}")
pda_dict = {
"dataArrays": {
"0": {
"uid": {
"uri": xml_dict_get_uri(obj_dict, dataspace),
"pathInResource": path_in_hdf,
},
"array": {
"dimensions": list(shape),
"data": {"item": {"values": data.flatten().tolist()}},
},
"customData": {},
}
}
}
res.append(PutDataArrays.parse_obj(pda_dict))
except Exception as e:
logging.error("Failed to find data")
logging.error(e)
# res.append(h5_search_dataset(h5_file_path, path_in_hdf))
return res
def write_h5(h5_path: str, arrays: Dict):
with h5py.File(h5_path, "a") as f:
for d_name, d_array in arrays.items():
dset = f.create_dataset(d_name, d_array.shape, d_array.dtype)
dset[()] = d_array