Skip to content

Commit ac7b51f

Browse files
committed
stripped down handler interface
1 parent 7f71863 commit ac7b51f

9 files changed

Lines changed: 61 additions & 100 deletions

File tree

src/nexus/handlers/__init__.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ class GenericHandler(object):
1919
2020
Handlers have (at least) the following attributes:
2121
22-
1. parse(self, data) - the function for parsing the block
23-
2. write(self, data) - a function for returning the block to a text
22+
1. __init__(self, **kw) - the function for parsing the block
23+
2. iter_lines(self) - a function for returning the block to a text
2424
representation (used to regenerate a nexus file).
2525
3. block - a list of raw strings in this block
2626
"""
27-
def __init__(self, data=None):
27+
def __init__(self, name=None, data=None):
2828
"""Initialise datastore in <block> under <keyname>"""
29+
self.name = name
2930
self.block = data or []
3031
self.comments = []
3132

@@ -34,13 +35,22 @@ def __init__(self, data=None):
3435
if line.strip().startswith("[") and line.strip().endswith("]"):
3536
self.comments.append(line)
3637

38+
def iter_lines(self):
39+
for i, line in enumerate(self.block):
40+
if (i == 0 and BEGIN_PATTERN.search(line)) or \
41+
(i == len(self.block) - 1 and END_PATTERN.search(line)):
42+
continue
43+
yield line
44+
3745
def write(self):
3846
"""
39-
Generates a string containing a generic nexus block for this data.
40-
41-
:return: String
47+
Generates a string containing a nexus block.
4248
"""
43-
return "\n".join(self.block)
49+
return "".join(
50+
['begin {0};\n'.format(self.name)] +
51+
[l + '\n' for l in self.iter_lines()] +
52+
['end;\n']
53+
)
4454

4555
@staticmethod
4656
def remove_comments(line):
@@ -64,11 +74,5 @@ def remove_comments(line):
6474
def is_mesquite_attribute(line):
6575
"""
6676
Returns True if the line is a mesquite attribute
67-
68-
:return: Boolean
6977
"""
70-
if MESQUITE_TITLE_PATTERN.match(line):
71-
return True
72-
elif MESQUITE_LINK_PATTERN.match(line):
73-
return True
74-
return False
78+
return bool(MESQUITE_TITLE_PATTERN.match(line)) or bool(MESQUITE_LINK_PATTERN.match(line))

src/nexus/handlers/data.py

Lines changed: 16 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class DataHandler(GenericHandler):
2121
re.IGNORECASE | re.DOTALL | re.MULTILINE
2222
)
2323

24-
def __init__(self, data=None):
25-
super(DataHandler, self).__init__(data)
24+
def __init__(self, **kw):
25+
super(DataHandler, self).__init__(**kw)
2626
self.charlabels = {}
2727
self.attributes = []
2828
self.format = {}
@@ -118,7 +118,7 @@ def characters(self):
118118
return self._characters
119119

120120
def is_missing_or_gap(self, state):
121-
return True if state in ('-', '?') else False
121+
return state in ('-', '?')
122122

123123
def parse_format_line(self, data):
124124
"""
@@ -136,11 +136,10 @@ def parse_format_line(self, data):
136136
except IndexError:
137137
return None
138138

139-
line = line.lower()
140-
line = line.replace(" =", "=").replace("= ", "=") # standardise
139+
line = line.lower().replace(" =", "=").replace("= ", "=") # standardise
141140
for chunk in WHITESPACE_PATTERN.split(line):
142141
try:
143-
key, value = chunk.split("=")
142+
key, value = chunk.split("=", maxsplit=1)
144143
value = QUOTED_PATTERN.sub('\\1', value)
145144
except ValueError:
146145
key, value = chunk, True
@@ -240,7 +239,7 @@ def _parse_charstate_block(self, data):
240239
char_index += 1
241240
return new_data.split("\n")
242241

243-
def write(self):
242+
def iter_lines(self):
244243
"""
245244
Generates a string containing a nexus data block.
246245
@@ -269,34 +268,23 @@ def _make_format_line(self):
269268
fstring.append("%s=%s" % (key, value))
270269
return " ".join(fstring) + ";"
271270

272-
out = []
273-
out.append('begin data;')
274271
for att in self.attributes:
275-
out.append("\t%s" % att)
276-
out.append('\tdimensions ntax=%d nchar=%d;' % (self.ntaxa, self.nchar))
277-
out.append(_make_format_line(self))
272+
yield "\t%s" % att
273+
yield '\tdimensions ntax=%d nchar=%d;' % (self.ntaxa, self.nchar)
274+
yield _make_format_line(self)
278275
# handle char block
279276
if self.charlabels:
280-
out.append('\tcharstatelabels')
277+
yield '\tcharstatelabels'
281278
max_id = max(self.charlabels)
282279
for char_id in sorted(self.charlabels):
283-
out.append('\t\t%d %s%s' % (
284-
char_id + 1, # zero-indexing
285-
self.charlabels[char_id],
286-
',' if char_id < max_id else ''
287-
))
288-
out.append('\t;')
289-
out.append("matrix")
280+
yield '\t\t%d %s%s' % ( # zero-indexing
281+
char_id + 1, self.charlabels[char_id], ',' if char_id < max_id else '')
282+
yield '\t;'
283+
yield "matrix"
290284
max_taxon_len = max([len(_) for _ in self.matrix])
291285
for taxon in sorted(self.matrix):
292-
out.append("%s %s" % (taxon.ljust(max_taxon_len), ''.join(self.matrix[taxon])))
293-
out.append(" ;")
294-
out.append("end;")
295-
return "\n".join(out)
296-
297-
def __repr__(self):
298-
return "<NexusDataBlock: %d characters from %d taxa>" % \
299-
(self.nchar, self.ntaxa)
286+
yield "%s %s" % (taxon.ljust(max_taxon_len), ''.join(self.matrix[taxon]))
287+
yield " ;"
300288

301289

302290
class CharacterHandler(DataHandler):

src/nexus/handlers/taxa.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ class TaxaHandler(GenericHandler):
1313
is_dimensions = re.compile(r"""dimensions\s*ntax\s*=\s*(\d+)""", re.IGNORECASE)
1414
is_taxlabel_block = re.compile(r"""\btaxlabels\b""", re.IGNORECASE)
1515

16-
def __init__(self, data=None):
17-
super(TaxaHandler, self).__init__(data)
16+
def __init__(self, **kw):
17+
super(TaxaHandler, self).__init__(**kw)
1818
self.taxa = []
1919
self.attributes = []
2020
self.annotations = {}
@@ -69,28 +69,16 @@ def _parse_taxa(self, line):
6969
if taxon and taxon not in self.taxa:
7070
yield (taxon, annot)
7171

72-
def write(self):
73-
"""
74-
Generates a string containing a taxa block for this data.
75-
76-
:return: String
77-
"""
72+
def iter_lines(self):
7873
def wrap(s):
7974
return s if ' ' not in s else "'%s'" % s
8075

81-
out = ['begin taxa;']
82-
# handle any attributes
8376
for att in self.attributes:
84-
out.append("\t%s" % att)
85-
out.append('\tdimensions ntax=%d;' % self.ntaxa)
86-
out.append('\ttaxlabels')
77+
yield "\t%s" % att
78+
yield '\tdimensions ntax=%d;' % self.ntaxa
79+
yield '\ttaxlabels'
8780
# taxa labels
8881
for idx, taxon in enumerate(self.taxa, 1):
8982
taxon = "%s%s" % (taxon, self.annotations.get(taxon, ''))
90-
out.append("\t[%d] %s" % (idx, wrap(taxon)))
91-
out.append(';')
92-
out.append('end;')
93-
return "\n".join(out)
94-
95-
def __repr__(self):
96-
return "<NexusTaxaBlock: %d taxa>" % self.ntaxa
83+
yield "\t[%d] %s" % (idx, wrap(taxon))
84+
yield ';'

src/nexus/handlers/tree.py

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ class TreeHandler(GenericHandler):
4848
(?=[),])? # end boundary
4949
""", re.IGNORECASE + re.VERBOSE + re.DOTALL)
5050

51-
def __init__(self, data=None):
52-
super(TreeHandler, self).__init__(data)
51+
def __init__(self, **kw):
52+
super(TreeHandler, self).__init__(**kw)
5353
# does the treefile have a translate block?
5454
self.was_translated = False
5555
# has detranslate been called?
@@ -115,12 +115,13 @@ def detranslate(self):
115115
self.trees[idx] = Tree(self._detranslate_tree(tree, self.translators))
116116
self._been_detranslated = True
117117

118-
def _findall_chunks(self, tree):
118+
@staticmethod
119+
def _findall_chunks(tree):
119120
"""Helper function to find groups used by detranslate."""
120121
matches = []
121122
index = 0
122123
while True:
123-
match = self.translate_regex.search(tree, index)
124+
match = TreeHandler.translate_regex.search(tree, index)
124125
if not match:
125126
break
126127
m = dict(zip(['start', 'taxon', 'comment', 'branch'], match.groups()))
@@ -163,27 +164,16 @@ def _detranslate_tree(self, tree, translatetable):
163164
tree = tree.replace(found['match'], sub)
164165
return tree
165166

166-
def write(self):
167-
"""
168-
Generates a string containing a trees block.
169-
170-
:return: String
171-
"""
172-
out = ['begin trees;']
167+
def iter_lines(self):
173168
for attr in self.attributes:
174-
out.append("\t" + attr)
169+
yield "\t" + attr
175170
if self.was_translated and not self._been_detranslated:
176-
out.append('\ttranslate')
177-
for index in sorted([int(k) for k in self.translators.keys()]):
178-
out.append("\t%d %s," % (index, self.translators[str(index)]))
179-
# handle last taxa label in translate block
180-
out[-1] = out[-1].replace(',', '')
171+
yield '\ttranslate'
172+
translator_keys = [int(k) for k in self.translators.keys()]
173+
for i, index in enumerate(sorted(translator_keys), start=1):
174+
yield "\t%d %s%s" % (
175+
index, self.translators[str(index)], '' if i == len(translator_keys) else ',')
181176
# work around bug https://github.com/CompEvol/beast2/issues/713
182-
out.append(';')
177+
yield ';'
183178
for tree in self.trees:
184-
out.append("\t" + tree)
185-
out.append('end;\n')
186-
return "\n".join(out)
187-
188-
def __repr__(self):
189-
return "<NexusTreeBlock: %d trees>" % self.ntrees
179+
yield "\t" + tree

src/nexus/reader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ def _set_blocks(self, blocks):
7575
for block, lines in (blocks.items() if isinstance(blocks, dict) else blocks):
7676
if block in self.blocks:
7777
raise NexusFormatException("Duplicate Block %s" % block)
78-
self.blocks[block] = HANDLERS.get(block, GenericHandler)(lines)
78+
self.blocks[block] = HANDLERS.get(block, GenericHandler)(
79+
name='data' if block == 'characters' else block, data=lines)
7980

8081
if self.blocks.get('characters') and not self.blocks.get('data'):
8182
self.blocks['data'] = self.blocks['characters']

tests/test_handler_DataHandler.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ def test_DataHandler_parse_sites(input, expected):
3030
'Louise': ['1', '1'],
3131
}
3232

33-
def test_repr(nex):
34-
assert repr(nex.data) == "<NexusDataBlock: 2 characters from 4 taxa>"
35-
3633
def test_block_find(nex):
3734
assert 'data' in nex.blocks
3835
assert hasattr(nex, 'data')

tests/test_handler_GenericHandler.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ def test_generic_readwrite():
1515
]
1616
nex = NexusReader.from_string("\n".join(expected))
1717
for line in nex.sets.write().split("\n"):
18-
e = expected.pop(0).strip()
19-
assert line.strip() == e
18+
if line:
19+
e = expected.pop(0).strip()
20+
assert line.strip() == e
2021

2122

2223
def test_write_produces_end():

tests/test_handler_TaxaHandler.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,6 @@ def test_iterable(nex2):
2222
assert taxa == expected[idx]
2323

2424

25-
def test_repr(nex2):
26-
assert repr(nex2.blocks['taxa']) == "<NexusTaxaBlock: 4 taxa>"
27-
28-
2925
def test_wrap_label_in_quotes_only_when_needed(nex2):
3026
nex2.taxa.taxa[0] = "long name"
3127
output = nex2.taxa.write()

tests/test_handler_TreeHandler.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,6 @@ def test_write(trees, examples):
4242
assert expected == written
4343

4444

45-
def test_repr(trees):
46-
assert repr(trees.trees) == '<NexusTreeBlock: 3 trees>'
47-
48-
4945
def test_write_produces_end(trees):
5046
assert "end;" in trees.trees.write()
5147
assert len([_ for _ in trees.trees[0].newick_tree.walk()]) == 25

0 commit comments

Comments
 (0)