Skip to content

Commit 995838e

Browse files
authored
Merge pull request hed-standard#755 from IanCa/develop
Add BaseInput.series_filtered
2 parents 0e443ef + 833812b commit 995838e

15 files changed

Lines changed: 290 additions & 87 deletions

hed/errors/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues
1+
from .error_reporter import ErrorHandler, get_printable_issue_string, sort_issues, replace_tag_references
22
from .error_types import DefinitionErrors, OnsetErrors, SchemaErrors, SchemaWarnings, SidecarErrors, \
33
ValidationErrors, ColumnErrors
44
from .error_types import ErrorContext, ErrorSeverity

hed/errors/error_messages.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,11 @@ def onset_error_offset_before_onset(tag):
339339
return f"Offset tag '{tag}' does not have a matching onset."
340340

341341

342+
@hed_tag_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
343+
def onset_error_same_defs_one_row(tag, def_name):
344+
return f"'{tag}' uses name '{def_name}', which was already used at this onset time."
345+
346+
342347
@hed_tag_error(OnsetErrors.INSET_BEFORE_ONSET, actual_code=ValidationErrors.ONSET_OFFSET_INSET_ERROR)
343348
def onset_error_inset_before_onset(tag):
344349
return f"Inset tag '{tag}' does not have a matching onset."

hed/errors/error_reporter.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -671,3 +671,31 @@ def _create_error_tree(error_dict, parent_element=None, add_link=True):
671671
_create_error_tree(value, context_ul, add_link)
672672

673673
return parent_element
674+
675+
676+
def replace_tag_references(list_or_dict):
677+
"""Utility function to remove any references to tags, strings, etc from any type of nested list or dict
678+
679+
Use this if you want to save out issues to a file.
680+
681+
If you'd prefer a copy returned, use replace_tag_references(list_or_dict.copy())
682+
683+
Parameters:
684+
list_or_dict(list or dict): An arbitrarily nested list/dict structure
685+
"""
686+
if isinstance(list_or_dict, dict):
687+
for key, value in list_or_dict.items():
688+
if isinstance(value, (dict, list)):
689+
replace_tag_references(value)
690+
elif isinstance(value, (bool, float, int)):
691+
list_or_dict[key] = value
692+
else:
693+
list_or_dict[key] = str(value)
694+
elif isinstance(list_or_dict, list):
695+
for key, value in enumerate(list_or_dict):
696+
if isinstance(value, (dict, list)):
697+
replace_tag_references(value)
698+
elif isinstance(value, (bool, float, int)):
699+
list_or_dict[key] = value
700+
else:
701+
list_or_dict[key] = str(value)

hed/errors/error_types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ class OnsetErrors:
160160
ONSET_TOO_MANY_DEFS = "ONSET_TOO_MANY_DEFS"
161161
ONSET_TAG_OUTSIDE_OF_GROUP = "ONSET_TAG_OUTSIDE_OF_GROUP"
162162
INSET_BEFORE_ONSET = "INSET_BEFORE_ONSET"
163+
ONSET_SAME_DEFS_ONE_ROW = "ONSET_SAME_DEFS_ONE_ROW"
164+
163165

164166
class ColumnErrors:
165167
INVALID_COLUMN_REF = "INVALID_COLUMN_REF"

hed/models/base_input.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,54 @@ def dataframe_a(self):
105105
@property
106106
def series_a(self):
107107
"""Return the assembled dataframe as a series
108-
Probably a placeholder name.
109108
110109
Returns:
111-
Series: the assembled dataframe with columns merged"""
110+
Series: the assembled dataframe with columns merged
111+
"""
112112
return self.combine_dataframe(self.assemble())
113113

114+
@property
115+
def series_filtered(self):
116+
"""Return the assembled dataframe as a series, with rows that have the same onset combined
117+
118+
Returns:
119+
Series: the assembled dataframe with columns merged, and the rows filtered together
120+
"""
121+
if self.onsets is not None:
122+
indexed_dict = self._indexed_dict_from_onsets(self.onsets.astype(float))
123+
return self._filter_by_index_list(self.series_a, indexed_dict=indexed_dict)
124+
125+
@staticmethod
126+
def _indexed_dict_from_onsets(onsets):
127+
current_onset = -1000000.0
128+
tol = 1e-9
129+
from collections import defaultdict
130+
indexed_dict = defaultdict(list)
131+
for i, onset in enumerate(onsets):
132+
if abs(onset - current_onset) > tol:
133+
current_onset = onset
134+
indexed_dict[current_onset].append(i)
135+
136+
return indexed_dict
137+
138+
@staticmethod
139+
def _filter_by_index_list(original_series, indexed_dict):
140+
new_series = ["n/a"] * len(original_series) # Initialize new_series with "n/a"
141+
142+
for onset, indices in indexed_dict.items():
143+
if indices:
144+
first_index = indices[0] # Take the first index of each onset group
145+
# Join the corresponding original series entries and place them at the first index
146+
new_series[first_index] = ",".join([str(original_series[i]) for i in indices])
147+
148+
return new_series
149+
150+
@property
151+
def onsets(self):
152+
"""Returns the onset column if it exists"""
153+
if "onset" in self.columns:
154+
return self._dataframe["onset"]
155+
114156
@property
115157
def name(self):
116158
""" Name of the data. """

hed/validator/def_validator.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
from hed.models.hed_string import HedString
22
from hed.models.hed_tag import HedTag
3+
from hed.models.hed_group import HedGroup
34
from hed.models.definition_dict import DefinitionDict
45
from hed.errors.error_types import ValidationErrors
56
from hed.errors.error_reporter import ErrorHandler
7+
from hed.models.model_constants import DefTagNames
8+
from hed.errors.error_types import OnsetErrors
69

710

811
class DefValidator(DefinitionDict):
9-
""" Handles validating Def/ and Def-expand/.
12+
""" Handles validating Def/ and Def-expand/, as well as Temporal groups: Onset, Inset, and Offset
1013
1114
"""
1215
def __init__(self, def_dicts=None, hed_schema=None):
@@ -128,3 +131,71 @@ def _validate_def_contents(self, def_tag, def_expand_group, tag_validator):
128131
def_issues += self._report_missing_or_invalid_value(def_tag, def_entry, is_def_expand_tag)
129132

130133
return def_issues
134+
135+
def validate_onset_offset(self, hed_string_obj):
136+
""" Validate onset/offset
137+
138+
Parameters:
139+
hed_string_obj (HedString): The hed string to check.
140+
141+
Returns:
142+
list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
143+
"""
144+
onset_issues = []
145+
for found_onset, found_group in self._find_onset_tags(hed_string_obj):
146+
if not found_onset:
147+
return []
148+
149+
def_tags = found_group.find_def_tags()
150+
if not def_tags:
151+
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
152+
continue
153+
154+
if len(def_tags) > 1:
155+
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
156+
tag=def_tags[0][0],
157+
tag_list=[tag[0] for tag in def_tags[1:]])
158+
continue
159+
160+
# Get all children but def group and onset/offset, then validate #/type of children.
161+
def_tag, def_group, _ = def_tags[0]
162+
if def_group is None:
163+
def_group = def_tag
164+
children = [child for child in found_group.children if
165+
def_group is not child and found_onset is not child]
166+
max_children = 1
167+
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
168+
max_children = 0
169+
if len(children) > max_children:
170+
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
171+
def_tag,
172+
found_group.children)
173+
continue
174+
175+
if children:
176+
# Make this a loop if max_children can be > 1
177+
child = children[0]
178+
if not isinstance(child, HedGroup):
179+
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
180+
child,
181+
def_tag)
182+
183+
# At this point we have either an onset or offset tag and it's name
184+
onset_issues += self._handle_onset_or_offset(def_tag)
185+
186+
return onset_issues
187+
188+
def _find_onset_tags(self, hed_string_obj):
189+
return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS)
190+
191+
def _handle_onset_or_offset(self, def_tag):
192+
def_name, _, placeholder = def_tag.extension.partition('/')
193+
194+
def_entry = self.defs.get(def_name.lower())
195+
if def_entry is None:
196+
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
197+
if bool(def_entry.takes_value) != bool(placeholder):
198+
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
199+
has_placeholder=bool(def_entry.takes_value))
200+
201+
return []

hed/validator/hed_validator.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,17 @@
1212
from hed.models import HedTag
1313
from hed.validator.tag_validator import TagValidator
1414
from hed.validator.def_validator import DefValidator
15-
from hed.validator.onset_validator import OnsetValidator
1615

1716

1817
class HedValidator:
1918
""" Top level validation of HED strings. """
2019

21-
def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, definitions_allowed=False):
20+
def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False):
2221
""" Constructor for the HedValidator class.
2322
2423
Parameters:
2524
hed_schema (HedSchema or HedSchemaGroup): HedSchema object to use for validation.
2625
def_dicts(DefinitionDict or list or dict): the def dicts to use for validation
27-
run_full_onset_checks(bool): If True, check for matching onset/offset tags
2826
definitions_allowed(bool): If False, flag definitions found as errors
2927
"""
3028
super().__init__()
@@ -33,8 +31,6 @@ def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, defin
3331

3432
self._tag_validator = TagValidator(hed_schema=self._hed_schema)
3533
self._def_validator = DefValidator(def_dicts, hed_schema)
36-
self._onset_validator = OnsetValidator(def_dict=self._def_validator,
37-
run_full_onset_checks=run_full_onset_checks)
3834
self._definitions_allowed = definitions_allowed
3935

4036
def validate(self, hed_string, allow_placeholders, error_handler=None):
@@ -80,7 +76,7 @@ def run_full_string_checks(self, hed_string):
8076
issues = []
8177
issues += self._validate_tags_in_hed_string(hed_string)
8278
issues += self._validate_groups_in_hed_string(hed_string)
83-
issues += self._onset_validator.validate_onset_offset(hed_string)
79+
issues += self._def_validator.validate_onset_offset(hed_string)
8480
return issues
8581

8682
def _validate_groups_in_hed_string(self, hed_string_obj):

hed/validator/onset_validator.py

Lines changed: 27 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,11 @@
77
class OnsetValidator:
88
""" Validates onset/offset pairs. """
99

10-
def __init__(self, def_dict, run_full_onset_checks=True):
11-
self._defs = def_dict
10+
def __init__(self):
1211
self._onsets = {}
13-
self._run_full_onset_checks = run_full_onset_checks
1412

15-
def validate_onset_offset(self, hed_string_obj):
16-
""" Validate onset/offset
13+
def validate_temporal_relations(self, hed_string_obj):
14+
""" Validate onset/offset/inset tag relations
1715
1816
Parameters:
1917
hed_string_obj (HedString): The hed string to check.
@@ -22,76 +20,46 @@ def validate_onset_offset(self, hed_string_obj):
2220
list: A list of issues found in validating onsets (i.e., out of order onsets, unknown def names).
2321
"""
2422
onset_issues = []
25-
for found_onset, found_group in self._find_onset_tags(hed_string_obj):
26-
if not found_onset:
23+
used_def_names = set()
24+
for temporal_tag, temporal_group in self._find_temporal_tags(hed_string_obj):
25+
if not temporal_tag:
2726
return []
2827

29-
def_tags = found_group.find_def_tags()
28+
def_tags = temporal_group.find_def_tags(include_groups=0)
3029
if not def_tags:
31-
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
3230
continue
3331

34-
if len(def_tags) > 1:
35-
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TOO_MANY_DEFS,
36-
tag=def_tags[0][0],
37-
tag_list=[tag[0] for tag in def_tags[1:]])
32+
def_tag = def_tags[0]
33+
def_name = def_tag.extension
34+
if def_name.lower() in used_def_names:
35+
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_SAME_DEFS_ONE_ROW, tag=temporal_tag,
36+
def_name=def_name)
3837
continue
3938

40-
# Get all children but def group and onset/offset, then validate #/type of children.
41-
def_tag, def_group, _ = def_tags[0]
42-
if def_group is None:
43-
def_group = def_tag
44-
children = [child for child in found_group.children if
45-
def_group is not child and found_onset is not child]
46-
max_children = 1
47-
if found_onset.short_base_tag == DefTagNames.OFFSET_ORG_KEY:
48-
max_children = 0
49-
if len(children) > max_children:
50-
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_WRONG_NUMBER_GROUPS,
51-
def_tag,
52-
found_group.children)
53-
continue
54-
55-
if children:
56-
# Make this a loop if max_children can be > 1
57-
child = children[0]
58-
if not isinstance(child, HedGroup):
59-
onset_issues += ErrorHandler.format_error(OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP,
60-
child,
61-
def_tag)
39+
used_def_names.add(def_tag.extension.lower())
6240

6341
# At this point we have either an onset or offset tag and it's name
64-
onset_issues += self._handle_onset_or_offset(def_tag, found_onset)
42+
onset_issues += self._handle_onset_or_offset(def_tag, temporal_tag)
6543

6644
return onset_issues
6745

68-
def _find_onset_tags(self, hed_string_obj):
46+
def _find_temporal_tags(self, hed_string_obj):
6947
return hed_string_obj.find_top_level_tags(anchor_tags=DefTagNames.TEMPORAL_KEYS)
7048

7149
def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
7250
is_onset = onset_offset_tag.short_base_tag == DefTagNames.ONSET_ORG_KEY
7351
full_def_name = def_tag.extension
74-
def_name, _, placeholder = def_tag.extension.partition('/')
75-
76-
def_entry = self._defs.get(def_name)
77-
if def_entry is None:
78-
return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED, tag=def_tag)
79-
if bool(def_entry.takes_value) != bool(placeholder):
80-
return ErrorHandler.format_error(OnsetErrors.ONSET_PLACEHOLDER_WRONG, tag=def_tag,
81-
has_placeholder=bool(def_entry.takes_value))
82-
83-
if self._run_full_onset_checks:
84-
if is_onset:
85-
# onset can never fail as it implies an offset
86-
self._onsets[full_def_name.lower()] = full_def_name
87-
else:
88-
is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY
89-
if full_def_name.lower() not in self._onsets:
90-
if is_offset:
91-
return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
92-
else:
93-
return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag)
94-
elif is_offset:
95-
del self._onsets[full_def_name.lower()]
52+
if is_onset:
53+
# onset can never fail as it implies an offset
54+
self._onsets[full_def_name.lower()] = full_def_name
55+
else:
56+
is_offset = onset_offset_tag.short_base_tag == DefTagNames.OFFSET_ORG_KEY
57+
if full_def_name.lower() not in self._onsets:
58+
if is_offset:
59+
return ErrorHandler.format_error(OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
60+
else:
61+
return ErrorHandler.format_error(OnsetErrors.INSET_BEFORE_ONSET, tag=def_tag)
62+
elif is_offset:
63+
del self._onsets[full_def_name.lower()]
9664

9765
return []

hed/validator/sidecar_validator.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ def validate(self, sidecar, extra_def_dicts=None, name=None, error_handler=None)
5151
sidecar_def_dict = sidecar.get_def_dict(hed_schema=self._schema, extra_def_dicts=extra_def_dicts)
5252
hed_validator = HedValidator(self._schema,
5353
def_dicts=sidecar_def_dict,
54-
run_full_onset_checks=False,
5554
definitions_allowed=True)
5655

5756
issues += sidecar._extract_definition_issues

0 commit comments

Comments
 (0)