forked from hed-standard/hed-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhed_tag.py
More file actions
659 lines (485 loc) · 19.8 KB
/
hed_tag.py
File metadata and controls
659 lines (485 loc) · 19.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
from hed.schema.hed_schema_constants import HedKey
import copy
class HedTag:
""" A single HED tag.
Notes:
- HedTag is a smart class in that it keeps track of its original value and positioning
as well as pointers to the relevant HED schema information, if relevant.
"""
def __init__(self, hed_string, hed_schema, span=None, def_dict=None):
""" Creates a HedTag.
Parameters:
hed_string (str): Source hed string for this tag.
hed_schema (HedSchema): A parameter for calculating canonical forms on creation.
span (int, int): The start and end indexes of the tag in the hed_string.
def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags.
"""
self._hed_string = hed_string
if span is None:
span = (0, len(hed_string))
# This is the span into the original hed string for this tag
self.span = span
# If this is present, use this as the org tag for most purposes.
# This is not generally used anymore, but you can use it to replace a tag in place.
self._tag = None
self._namespace = self._get_schema_namespace(self.org_tag)
# This is the schema this tag was converted to.
self._schema = None
self._schema_entry = None
self._extension_value = ""
self._parent = None
self._expandable = None
self._expanded = False
self._calculate_to_canonical_forms(hed_schema)
if def_dict:
def_dict.construct_def_tag(self)
def copy(self):
""" Return a deep copy of this tag.
Returns:
HedTag: The copied group.
"""
save_parent = self._parent
self._parent = None
return_copy = copy.deepcopy(self)
self._parent = save_parent
return return_copy
@property
def schema_namespace(self):
""" Library namespace for this tag if one exists.
Returns:
namespace (str): The library namespace, including the colon.
"""
return self._namespace
@property
def short_tag(self):
""" Short form including value or extension.
Returns:
short_tag (str): The short form of the tag, including value or extension.
"""
if self._schema_entry:
return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}"
return str(self)
@property
def base_tag(self):
""" Long form without value or extension.
Returns:
base_tag (str): The long form of the tag, without value or extension.
"""
if self._schema_entry:
return self._schema_entry.long_tag_name
return str(self)
@property
def short_base_tag(self):
""" Short form without value or extension
Returns:
base_tag (str): The short non-extension port of a tag.
Notes:
- ParentNodes/Def/DefName would return just "Def".
"""
if self._schema_entry:
return self._schema_entry.short_tag_name
return str(self)
@short_base_tag.setter
def short_base_tag(self, new_tag_val):
""" Change base tag, leaving extension or value.
Parameters:
new_tag_val (str): The new short_base_tag for this tag.
:raises ValueError:
- If the tag wasn't already identified
Note:
- Generally this is used to swap def to def-expand.
"""
if self._schema_entry:
tag_entry = None
if self._schema:
if self.is_takes_value_tag():
new_tag_val = new_tag_val + "/#"
tag_entry = self._schema.get_tag_entry(new_tag_val, schema_namespace=self.schema_namespace)
self._schema_entry = tag_entry
else:
raise ValueError("Cannot set unidentified tags")
@property
def org_base_tag(self):
""" Original form without value or extension.
Returns:
base_tag (str): The original form of the tag, without value or extension.
Notes:
- Warning: This could be empty if the original tag had a name_prefix prepended.
e.g. a column where "Label/" is prepended, thus the column value has zero base portion.
"""
if self._schema_entry:
extension_len = len(self._extension_value)
if not extension_len:
return self.tag
org_len = len(self.tag)
if org_len == extension_len:
return ""
return self.tag[:org_len - extension_len]
return str(self)
def tag_modified(self):
""" Return true if tag has been modified from original.
Returns:
bool: Return True if the tag is modified.
Notes:
- Modifications can include adding a column name_prefix.
"""
return bool(self._tag)
@property
def tag(self):
""" Returns the tag.
Returns the original tag if no user form set.
Returns:
tag (str): The custom set user form of the tag.
"""
if self._tag:
return self._tag
return self.org_tag
@tag.setter
def tag(self, new_tag_val):
""" Allow you to overwrite the tag output text.
Parameters:
new_tag_val (str): New (implicitly long form) of tag to set.
Notes:
- You probably don't actually want to call this.
"""
self._tag = new_tag_val
self._schema_entry = None
self._calculate_to_canonical_forms(self._schema)
@property
def extension(self):
""" Get the extension or value of tag
Generally this is just the portion after the last slash.
Returns an empty string if no extension or value.
Returns:
str: The tag name.
Notes:
- This tag must have been computed first.
"""
if self._extension_value:
return self._extension_value[1:]
return ""
@extension.setter
def extension(self, x):
self._extension_value = f"/{x}"
@property
def long_tag(self):
""" Long form including value or extension.
Returns:
str: The long form of this tag.
"""
if self._schema_entry:
return f"{self._namespace}{self._schema_entry.long_tag_name}{self._extension_value}"
return str(self)
@property
def org_tag(self):
""" Return the original unmodified tag.
Returns:
str: The original unmodified tag.
"""
return self._hed_string[self.span[0]:self.span[1]]
@property
def tag_terms(self):
""" Return a tuple of all the terms in this tag Lowercase.
Returns:
tag_terms (str): Tuple of terms or empty tuple for unidentified tag.
Notes:
- Does not include any extension.
"""
if self._schema_entry:
return self._schema_entry.tag_terms
return tuple()
@property
def expanded(self):
"""Returns if this is currently expanded or not.
Will always be false unless expandable is set. This is primarily used for Def/Def-expand tags at present.
Returns:
bool: Returns true if this is currently expanded
"""
return self._expanded
@property
def expandable(self):
"""Returns what this expands to
This is primarily used for Def/Def-expand tags at present.
Returns:
HedGroup or HedTag or None: Returns the expanded form of this tag
"""
return self._expandable
def is_column_ref(self):
""" Returns if this tag is a column reference from a sidecar.
You should only see these if you are directly accessing sidecar strings, tools should remove them otherwise.
Returns:
bool: Returns True if this is a column ref
"""
return self.org_tag.startswith('{') and self.org_tag.endswith('}')
def __str__(self):
""" Convert this HedTag to a string.
Returns:
str: The original tag if we haven't set a new tag.(e.g. short to long).
"""
if self._schema_entry:
return self.short_tag
if self._tag:
return self._tag
return self._hed_string[self.span[0]:self.span[1]]
def lower(self):
""" Convenience function, equivalent to str(self).lower(). """
return str(self).lower()
def _calculate_to_canonical_forms(self, hed_schema):
""" Update internal state based on schema.
Parameters:
hed_schema (HedSchema or HedSchemaGroup): The schema to use to validate this tag
Returns:
list: A list of issues found during conversion. Each element is a dictionary.
"""
tag_entry, remainder, tag_issues = hed_schema.find_tag_entry(self, self.schema_namespace)
self._schema_entry = tag_entry
self._schema = hed_schema
if self._schema_entry:
if remainder:
self._extension_value = remainder
return tag_issues
def get_stripped_unit_value(self):
""" Return the extension divided into value and units, if the units are valid.
Returns:
stripped_unit_value (str): The extension portion with the units removed.
unit (str or None): None if no valid unit found.
Examples:
'Duration/3 ms' will return '3'
"""
tag_unit_classes = self.unit_classes
stripped_value, unit = self._get_tag_units_portion(tag_unit_classes)
if stripped_value:
return stripped_value, unit
return self.extension, None
def value_as_default_unit(self):
""" Returns the value converted to default units if possible.
Returns None if the units are invalid.(No default unit or invalid)
Returns:
value (float or None): The extension value as default units.
If there are not default units, returns None.
Examples:
'Duration/300 ms' will return .3
"""
tag_unit_classes = self.unit_classes
value, _, units = self.extension.rpartition(" ")
if not value:
stripped_value = units
unit = self.default_unit
else:
stripped_value, unit = self._get_tag_units_portion(tag_unit_classes)
if stripped_value:
if unit.attributes.get("conversionFactor"):
conversion_factor = unit.attributes.get("conversionFactor", 1.0)
return float(stripped_value) * float(conversion_factor)
@property
def unit_classes(self):
""" Return a dict of all the unit classes this tag accepts.
Returns:
unit_classes (dict): A dict of unit classes this tag accepts.
Notes:
- Returns empty dict if this is not a unit class tag.
- The dictionary has unit name as the key and HedSchemaEntry as value.
"""
if self._schema_entry:
return self._schema_entry.unit_classes
return {}
@property
def value_classes(self):
""" Return a dict of all the value classes this tag accepts.
Returns:
dict: A dictionary of HedSchemaEntry value classes this tag accepts.
Notes:
- Returns empty dict if this is not a value class.
- The dictionary has unit name as the key and HedSchemaEntry as value.
"""
if self._schema_entry:
return self._schema_entry.value_classes
return {}
@property
def attributes(self):
""" Return a dict of all the attributes this tag has.
Returns empty dict if this is not a value tag.
Returns:
dict: A dict of attributes this tag has.
Notes:
- Returns empty dict if this is not a unit class tag.
- The dictionary has unit name as the key and HedSchemaEntry as value.
"""
if self._schema_entry:
return self._schema_entry.attributes
return {}
def tag_exists_in_schema(self):
""" Get the schema entry for this tag.
Returns:
bool: True if this tag exists.
Notes:
- This does NOT assure this is a valid tag.
"""
return bool(self._schema_entry)
def is_takes_value_tag(self):
""" Return true if this is a takes value tag.
Returns:
bool: True if this is a takes value tag.
"""
if self._schema_entry:
return self._schema_entry.has_attribute(HedKey.TakesValue)
return False
def is_unit_class_tag(self):
""" Return true if this is a unit class tag.
Returns:
bool: True if this is a unit class tag.
"""
if self._schema_entry:
return bool(self._schema_entry.unit_classes)
return False
def is_value_class_tag(self):
""" Return true if this is a value class tag.
Returns:
bool: True if this is a tag with a value class.
"""
if self._schema_entry:
return bool(self._schema_entry.value_classes)
return False
def is_basic_tag(self):
""" Return True if a known tag with no extension or value.
Returns:
bool: True if this is a known tag without extension or value.
"""
return bool(self._schema_entry and not self.extension)
def has_attribute(self, attribute):
""" Return true if this is an attribute this tag has.
Parameters:
attribute (str): Name of the attribute.
Returns:
bool: True if this tag has the attribute.
"""
if self._schema_entry:
return self._schema_entry.has_attribute(attribute)
return False
def get_tag_unit_class_units(self):
""" Get the unit class units associated with a particular tag.
Returns:
list: A list containing the unit class units associated with a particular tag or an empty list.
"""
units = []
unit_classes = self.unit_classes
for unit_class_entry in unit_classes.values():
units += unit_class_entry.units.keys()
return units
@property
def default_unit(self):
""" Get the default unit class unit for this tag.
Only a tag with a single unit class can have default units.
Returns:
unit(UnitEntry or None): the default unit entry for this tag, or None
"""
unit_classes = self.unit_classes.values()
if len(unit_classes) == 1:
first_unit_class_entry = list(unit_classes)[0]
default_unit = first_unit_class_entry.has_attribute(HedKey.DefaultUnits, return_value=True)
return first_unit_class_entry.units.get(default_unit, None)
def base_tag_has_attribute(self, tag_attribute):
""" Check to see if the tag has a specific attribute.
This is primarily used to check for things like TopLevelTag on Definitions and similar.
Parameters:
tag_attribute (str): A tag attribute.
Returns:
bool: True if the tag has the specified attribute. False, if otherwise.
"""
if not self._schema_entry:
return False
return self._schema_entry.base_tag_has_attribute(tag_attribute)
@staticmethod
def _get_schema_namespace(org_tag):
""" Finds the library namespace for the tag.
Parameters:
org_tag (str): A string representing a tag.
Returns:
str: Library namespace string or empty.
"""
first_slash = org_tag.find("/")
first_colon = org_tag.find(":")
if first_colon != -1:
if first_slash != -1 and first_colon > first_slash:
return ""
return org_tag[:first_colon + 1]
return ""
def _get_tag_units_portion(self, tag_unit_classes):
""" Check that this string has valid units and remove them.
Parameters:
tag_unit_classes (dict): Dictionary of valid UnitClassEntry objects for this tag.
Returns:
stripped_value (str or None): The value with the units removed.
This is filled in if there are no units as well.
unit (UnitEntry or None): The matching unit entry if one is found
"""
value, _, units = self.extension.rpartition(" ")
if not units:
return None, None
for unit_class_entry in tag_unit_classes.values():
all_valid_unit_permutations = unit_class_entry.derivative_units
possible_match = self._find_modifier_unit_entry(units, all_valid_unit_permutations)
if possible_match and not possible_match.has_attribute(HedKey.UnitPrefix):
return value, possible_match
# Repeat the above, but as a prefix
possible_match = self._find_modifier_unit_entry(value, all_valid_unit_permutations)
if possible_match and possible_match.has_attribute(HedKey.UnitPrefix):
return possible_match, value
return None, None
@staticmethod
def _find_modifier_unit_entry(units, all_valid_unit_permutations):
possible_match = all_valid_unit_permutations.get(units)
# If we have a match that's a unit symbol, we're done, return it.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
return possible_match
possible_match = all_valid_unit_permutations.get(units.lower())
# Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None
return possible_match
def is_placeholder(self):
if "#" in self.org_tag or "#" in self._extension_value:
return True
return False
def replace_placeholder(self, placeholder_value):
""" If tag has a placeholder character(#), replace with value.
Parameters:
placeholder_value (str): Value to replace placeholder with.
"""
if self.is_placeholder():
if self._schema_entry:
self._extension_value = self._extension_value.replace("#", placeholder_value)
else:
self._tag = self.tag.replace("#", placeholder_value)
def __hash__(self):
if self._schema_entry:
return hash(
self._namespace + self._schema_entry.short_tag_name.lower() + self._extension_value.lower())
else:
return hash(self.lower())
def __eq__(self, other):
if self is other:
return True
if isinstance(other, str):
return self.lower() == other
if not isinstance(other, HedTag):
return False
if self.short_tag.lower() == other.short_tag.lower():
return True
if self.org_tag.lower() == other.org_tag.lower():
return True
return False
def __deepcopy__(self, memo):
# check if the object has already been copied
if id(self) in memo:
return memo[id(self)]
# create a new instance of HedTag class
new_tag = self.__class__.__new__(self.__class__)
new_tag.__dict__.update(self.__dict__)
# add the new object to the memo dictionary
memo[id(self)] = new_tag
# Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
new_tag._parent = copy.deepcopy(self._parent, memo)
new_tag._expandable = copy.deepcopy(self._expandable, memo)
new_tag._expanded = copy.deepcopy(self._expanded, memo)
return new_tag