Skip to content

Commit c69153b

Browse files
committed
Update parse_mtg.jl
use Symbol for feature names directly
1 parent 459f1da commit c69153b

1 file changed

Lines changed: 15 additions & 25 deletions

File tree

src/read_MTG/parse_mtg.jl

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,15 @@ function parse_mtg!(f, classes, features, line, l, mtg_type)
7171
node_id = [1]
7272

7373
tree_dict = Dict{Int,Node}()
74+
feature_names = isnothing(features) ? nothing : Symbol.(features.NAME)
7475

7576
# for i in Iterators.drop(eachindex(splitted_MTG), 1)
7677
# node_attributes(tree_dict[4])
7778
try
7879
while !eof(f)
7980
l[1] = next_line!(f, line; whitespace=false)
8081
length(l[1]) == 0 && continue # ignore empty line
81-
parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
82+
parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
8283
end
8384
catch e
8485
error(
@@ -142,7 +143,7 @@ Parse MTG node attributes names, values and type
142143
A list of attributes
143144
144145
"""
145-
function parse_MTG_node_attr(node_data, features, attr_column_start, line; force=false)
146+
function parse_MTG_node_attr(node_data, features, feature_names, attr_column_start, line; force=false)
146147

147148
if length(node_data) < attr_column_start
148149
return init_empty_attr()
@@ -155,75 +156,64 @@ function parse_MTG_node_attr(node_data, features, attr_column_start, line; force
155156
". Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
156157
end
157158

158-
node_attr = Dict{String,Any}(zip(features.NAME[eachindex(node_data_attr)],
159-
fill(missing, length(node_data_attr))))
159+
node_attr = Dict{Symbol,Any}()
160+
sizehint!(node_attr, length(node_data_attr))
160161

161162
node_type = features.TYPE
162163

163164
# node_data_attr is always read in order so names and types correspond to values in features
164165
for i in eachindex(node_data_attr)
166+
feature_name = feature_names[i]
165167
if node_data_attr[i] == "" || node_data_attr[i] == "NA"
166-
pop!(node_attr, features.NAME[i])
167168
continue
168169
end
169170

170171
if node_type[i] == "INT"
171172
try
172-
node_attr[features.NAME[i]] = parse(Int, node_data_attr[i])
173+
node_attr[feature_name] = parse(Int, node_data_attr[i])
173174
catch e
174175
if !force
175176
error("Found issue in the MTG when converting column $(features[i,1]) ",
176177
"with value $(node_data_attr[i]) into Integer.",
177178
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
178179
end
179-
pop!(node_attr, features.NAME[i])
180180
end
181181
elseif node_type[i] == "BOOLEAN"
182182
try
183-
node_attr[features.NAME[i]] = parse(Bool, node_data_attr[i])
183+
node_attr[feature_name] = parse(Bool, node_data_attr[i])
184184
catch e
185185
if !force
186186
error("Found issue in the MTG when converting column $(features[i,1]) ",
187187
"with value $(node_data_attr[i]) into Boolean.",
188188
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
189189
end
190-
pop!(node_attr, features.NAME[i])
191190
end
192191
elseif node_type[i] == "DD/MM/YY"
193192
try
194-
node_attr[features.NAME[i]] = Date(node_data_attr[i], dateformat"d/m/y")
193+
node_attr[feature_name] = Date(node_data_attr[i], dateformat"d/m/y")
195194
catch e
196195
if !force
197196
error("Found issue in the MTG when converting column $(features[i,1]) ",
198197
"with value $(node_data_attr[i]) into a date with format 'day/month/year'.",
199198
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
200199
end
201-
pop!(node_attr, features.NAME[i])
202200
end
203-
elseif node_type[i] == "REAL" || (node_type[i] == "ALPHA" && in(features.NAME[i], ("Width", "Length")))
201+
elseif node_type[i] == "REAL" || (node_type[i] == "ALPHA" && feature_name in (:Width, :Length))
204202
try
205-
node_attr[features.NAME[i]] = parse(Float64, node_data_attr[i])
203+
node_attr[feature_name] = parse(Float64, node_data_attr[i])
206204
catch e
207205
if !force
208206
error("Found issue in the MTG when converting column $(features[i,1]) ",
209207
"with value $(node_data_attr[i]) into Floating point number.",
210208
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
211209
end
212-
pop!(node_attr, features.NAME[i])
213210
end
214211
else
215-
node_attr[features.NAME[i]] = node_data_attr[i]
212+
node_attr[feature_name] = node_data_attr[i]
216213
end
217214
end
218215

219-
parse_node_attributes(node_attr)
220-
end
221-
222-
"""
223-
Instantiate a `ColumnarAttrs` struct with `node_attr` keys and values.
224-
"""
225-
function parse_node_attributes(node_attr)
226-
ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr))))
216+
ColumnarAttrs(node_attr)
227217
end
228218

229219
init_empty_attr() = ColumnarAttrs()
@@ -235,7 +225,7 @@ init_empty_attr() = ColumnarAttrs()
235225
Parse a line of the MTG file to a node and add it to the tree dictionary.
236226
It may also add several nodes if the line contains several MTG elements.
237227
"""
238-
function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
228+
function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
239229

240230
splitted_MTG = split(l[1], "\t")
241231
node_column = findfirst(x -> length(x) > 0, splitted_MTG)
@@ -259,7 +249,7 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co
259249

260250
# Get node attributes:
261251
if features !== nothing
262-
node_attr = parse_MTG_node_attr(node_data, features, node_attr_column_start, line)
252+
node_attr = parse_MTG_node_attr(node_data, features, feature_names, node_attr_column_start, line)
263253
else
264254
# if there are no attribute in the MTG, we create an empty attribute:
265255
node_attr = init_empty_attr()

0 commit comments

Comments
 (0)