@@ -71,14 +71,15 @@ function parse_mtg!(f, classes, features, line, l, mtg_type)
7171 node_id = [1 ]
7272
7373 tree_dict = Dict {Int,Node} ()
74+ feature_names = isnothing (features) ? nothing : Symbol .(features. NAME)
7475
7576 # for i in Iterators.drop(eachindex(splitted_MTG), 1)
7677 # node_attributes(tree_dict[4])
7778 try
7879 while ! eof (f)
7980 l[1 ] = next_line! (f, line; whitespace= false )
8081 length (l[1 ]) == 0 && continue # ignore empty line
81- parse_line_to_node! (tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
82+ parse_line_to_node! (tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
8283 end
8384 catch e
8485 error (
@@ -142,7 +143,7 @@ Parse MTG node attributes names, values and type
142143A list of attributes
143144
144145"""
145- function parse_MTG_node_attr (node_data, features, attr_column_start, line; force= false )
146+ function parse_MTG_node_attr (node_data, features, feature_names, attr_column_start, line; force= false )
146147
147148 if length (node_data) < attr_column_start
148149 return init_empty_attr ()
@@ -155,75 +156,64 @@ function parse_MTG_node_attr(node_data, features, attr_column_start, line; force
155156 " . Please check line " , line, " of the MTG:\n " , join (node_data, " \t " ))
156157 end
157158
158- node_attr = Dict {String ,Any} (zip (features . NAME[ eachindex (node_data_attr)],
159- fill ( missing , length (node_data_attr)) ))
159+ node_attr = Dict {Symbol ,Any} ()
160+ sizehint! (node_attr , length (node_data_attr))
160161
161162 node_type = features. TYPE
162163
163164 # node_data_attr is always read in order so names and types correspond to values in features
164165 for i in eachindex (node_data_attr)
166+ feature_name = feature_names[i]
165167 if node_data_attr[i] == " " || node_data_attr[i] == " NA"
166- pop! (node_attr, features. NAME[i])
167168 continue
168169 end
169170
170171 if node_type[i] == " INT"
171172 try
172- node_attr[features . NAME[i] ] = parse (Int, node_data_attr[i])
173+ node_attr[feature_name ] = parse (Int, node_data_attr[i])
173174 catch e
174175 if ! force
175176 error (" Found issue in the MTG when converting column $(features[i,1 ]) " ,
176177 " with value $(node_data_attr[i]) into Integer." ,
177178 " Please check line " , line, " of the MTG:\n " , join (node_data, " \t " ))
178179 end
179- pop! (node_attr, features. NAME[i])
180180 end
181181 elseif node_type[i] == " BOOLEAN"
182182 try
183- node_attr[features . NAME[i] ] = parse (Bool, node_data_attr[i])
183+ node_attr[feature_name ] = parse (Bool, node_data_attr[i])
184184 catch e
185185 if ! force
186186 error (" Found issue in the MTG when converting column $(features[i,1 ]) " ,
187187 " with value $(node_data_attr[i]) into Boolean." ,
188188 " Please check line " , line, " of the MTG:\n " , join (node_data, " \t " ))
189189 end
190- pop! (node_attr, features. NAME[i])
191190 end
192191 elseif node_type[i] == " DD/MM/YY"
193192 try
194- node_attr[features . NAME[i] ] = Date (node_data_attr[i], dateformat " d/m/y" )
193+ node_attr[feature_name ] = Date (node_data_attr[i], dateformat " d/m/y" )
195194 catch e
196195 if ! force
197196 error (" Found issue in the MTG when converting column $(features[i,1 ]) " ,
198197 " with value $(node_data_attr[i]) into a date with format 'day/month/year'." ,
199198 " Please check line " , line, " of the MTG:\n " , join (node_data, " \t " ))
200199 end
201- pop! (node_attr, features. NAME[i])
202200 end
203- elseif node_type[i] == " REAL" || (node_type[i] == " ALPHA" && in (features . NAME[i], ( " Width" , " Length" ) ))
201+ elseif node_type[i] == " REAL" || (node_type[i] == " ALPHA" && feature_name in ( : Width, : Length ))
204202 try
205- node_attr[features . NAME[i] ] = parse (Float64, node_data_attr[i])
203+ node_attr[feature_name ] = parse (Float64, node_data_attr[i])
206204 catch e
207205 if ! force
208206 error (" Found issue in the MTG when converting column $(features[i,1 ]) " ,
209207 " with value $(node_data_attr[i]) into Floating point number." ,
210208 " Please check line " , line, " of the MTG:\n " , join (node_data, " \t " ))
211209 end
212- pop! (node_attr, features. NAME[i])
213210 end
214211 else
215- node_attr[features . NAME[i] ] = node_data_attr[i]
212+ node_attr[feature_name ] = node_data_attr[i]
216213 end
217214 end
218215
219- parse_node_attributes (node_attr)
220- end
221-
222- """
223- Instantiate a `ColumnarAttrs` struct with `node_attr` keys and values.
224- """
225- function parse_node_attributes (node_attr)
226- ColumnarAttrs (Dict {Symbol,Any} (zip (Symbol .(keys (node_attr)), values (node_attr))))
216+ ColumnarAttrs (node_attr)
227217end
228218
229219init_empty_attr () = ColumnarAttrs ()
@@ -235,7 +225,7 @@ init_empty_attr() = ColumnarAttrs()
235225Parse a line of the MTG file to a node and add it to the tree dictionary.
236226It may also add several nodes if the line contains several MTG elements.
237227"""
238- function parse_line_to_node! (tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
228+ function parse_line_to_node! (tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
239229
240230 splitted_MTG = split (l[1 ], " \t " )
241231 node_column = findfirst (x -> length (x) > 0 , splitted_MTG)
@@ -259,7 +249,7 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co
259249
260250 # Get node attributes:
261251 if features != = nothing
262- node_attr = parse_MTG_node_attr (node_data, features, node_attr_column_start, line)
252+ node_attr = parse_MTG_node_attr (node_data, features, feature_names, node_attr_column_start, line)
263253 else
264254 # if there are no attribute in the MTG, we create an empty attribute:
265255 node_attr = init_empty_attr ()
0 commit comments