Skip to content

Commit 152289a

Browse files
authored
Merge pull request #93 from VEZY/bulk-node-creation-optimisations
Optimize bulk node creation
2 parents b8462ce + d83b33e commit 152289a

5 files changed

Lines changed: 50 additions & 43 deletions

File tree

src/compute_MTG/caching.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,19 @@ function cache_nodes!(node; scale=nothing, symbol=nothing, link=nothing, filter_
5757
symbol = normalize_symbol_filter(symbol)
5858
link = normalize_link_filter(link)
5959
# The cache is already present:
60-
if length(node_traversal_cache(node)) != 0 && haskey(node_traversal_cache(node), cache_name(scale, symbol, link, all, filter_fun))
60+
cache_key = cache_name(scale, symbol, link, all, filter_fun)
61+
cache = _maybe_traversal_cache(node)
62+
if cache !== nothing && !isempty(cache) && haskey(cache, cache_key)
6163
if !overwrite
6264
error("The node already has a cache for this combination of filters. Hint: use `overwrite=true` if needed.")
6365
else
6466
# We have to delete the cache first because else it would be used in the traversal below:
65-
delete!(node_traversal_cache(node), cache_name(scale, symbol, link, all, filter_fun))
67+
delete!(cache, cache_key)
6668
end
6769
end
6870

69-
node_traversal_cache(node)[cache_name(scale, symbol, link, all, filter_fun)] = traverse(
71+
cache = node_traversal_cache(node)
72+
cache[cache_key] = traverse(
7073
node,
7174
node -> node,
7275
scale=scale, symbol=symbol, link=link, filter_fun=filter_fun, all=all

src/compute_MTG/traverse.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ function traverse!(node::Node, f::Function, args...; scale=nothing, symbol=nothi
113113
link = normalize_link_filter(link)
114114

115115
# If the node has already a cache of the traversal, we use it instead of traversing the mtg:
116-
cache = node_traversal_cache(node)
117-
if !isempty(cache)
116+
cache = _maybe_traversal_cache(node)
117+
if cache !== nothing && !isempty(cache)
118118
cache_key = cache_name(scale, symbol, link, all, filter_fun)
119119
cached_nodes = get(cache, cache_key, nothing)
120120
if cached_nodes !== nothing
@@ -172,8 +172,8 @@ function traverse(node::Node, f::Function, args...; scale=nothing, symbol=nothin
172172
# NB: f has to return someting here, if its a mutating function, use traverse!
173173

174174
# If the node has already a cache of the traversal, we use it instead of traversing the mtg:
175-
cache = node_traversal_cache(node)
176-
if !isempty(cache)
175+
cache = _maybe_traversal_cache(node)
176+
if cache !== nothing && !isempty(cache)
177177
cache_key = cache_name(scale, symbol, link, all, filter_fun)
178178
cached_nodes = get(cache, cache_key, nothing)
179179

src/read_MTG/parse_mtg.jl

Lines changed: 15 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,15 @@ function parse_mtg!(f, classes, features, line, l, mtg_type)
7171
node_id = [1]
7272

7373
tree_dict = Dict{Int,Node}()
74+
feature_names = isnothing(features) ? nothing : Symbol.(features.NAME)
7475

7576
# for i in Iterators.drop(eachindex(splitted_MTG), 1)
7677
# node_attributes(tree_dict[4])
7778
try
7879
while !eof(f)
7980
l[1] = next_line!(f, line; whitespace=false)
8081
length(l[1]) == 0 && continue # ignore empty line
81-
parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
82+
parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
8283
end
8384
catch e
8485
error(
@@ -142,7 +143,7 @@ Parse MTG node attributes names, values and type
142143
A list of attributes
143144
144145
"""
145-
function parse_MTG_node_attr(node_data, features, attr_column_start, line; force=false)
146+
function parse_MTG_node_attr(node_data, features, feature_names, attr_column_start, line; force=false)
146147

147148
if length(node_data) < attr_column_start
148149
return init_empty_attr()
@@ -155,75 +156,64 @@ function parse_MTG_node_attr(node_data, features, attr_column_start, line; force
155156
". Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
156157
end
157158

158-
node_attr = Dict{String,Any}(zip(features.NAME[eachindex(node_data_attr)],
159-
fill(missing, length(node_data_attr))))
159+
node_attr = Dict{Symbol,Any}()
160+
sizehint!(node_attr, length(node_data_attr))
160161

161162
node_type = features.TYPE
162163

163164
# node_data_attr is always read in order so names and types correspond to values in features
164165
for i in eachindex(node_data_attr)
166+
feature_name = feature_names[i]
165167
if node_data_attr[i] == "" || node_data_attr[i] == "NA"
166-
pop!(node_attr, features.NAME[i])
167168
continue
168169
end
169170

170171
if node_type[i] == "INT"
171172
try
172-
node_attr[features.NAME[i]] = parse(Int, node_data_attr[i])
173+
node_attr[feature_name] = parse(Int, node_data_attr[i])
173174
catch e
174175
if !force
175176
error("Found issue in the MTG when converting column $(features[i,1]) ",
176177
"with value $(node_data_attr[i]) into Integer.",
177178
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
178179
end
179-
pop!(node_attr, features.NAME[i])
180180
end
181181
elseif node_type[i] == "BOOLEAN"
182182
try
183-
node_attr[features.NAME[i]] = parse(Bool, node_data_attr[i])
183+
node_attr[feature_name] = parse(Bool, node_data_attr[i])
184184
catch e
185185
if !force
186186
error("Found issue in the MTG when converting column $(features[i,1]) ",
187187
"with value $(node_data_attr[i]) into Boolean.",
188188
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
189189
end
190-
pop!(node_attr, features.NAME[i])
191190
end
192191
elseif node_type[i] == "DD/MM/YY"
193192
try
194-
node_attr[features.NAME[i]] = Date(node_data_attr[i], dateformat"d/m/y")
193+
node_attr[feature_name] = Date(node_data_attr[i], dateformat"d/m/y")
195194
catch e
196195
if !force
197196
error("Found issue in the MTG when converting column $(features[i,1]) ",
198197
"with value $(node_data_attr[i]) into a date with format 'day/month/year'.",
199198
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
200199
end
201-
pop!(node_attr, features.NAME[i])
202200
end
203-
elseif node_type[i] == "REAL" || (node_type[i] == "ALPHA" && in(features.NAME[i], ("Width", "Length")))
201+
elseif node_type[i] == "REAL" || (node_type[i] == "ALPHA" && feature_name in (:Width, :Length))
204202
try
205-
node_attr[features.NAME[i]] = parse(Float64, node_data_attr[i])
203+
node_attr[feature_name] = parse(Float64, node_data_attr[i])
206204
catch e
207205
if !force
208206
error("Found issue in the MTG when converting column $(features[i,1]) ",
209207
"with value $(node_data_attr[i]) into Floating point number.",
210208
" Please check line ", line, " of the MTG:\n", join(node_data, "\t"))
211209
end
212-
pop!(node_attr, features.NAME[i])
213210
end
214211
else
215-
node_attr[features.NAME[i]] = node_data_attr[i]
212+
node_attr[feature_name] = node_data_attr[i]
216213
end
217214
end
218215

219-
parse_node_attributes(node_attr)
220-
end
221-
222-
"""
223-
Instantiate a `ColumnarAttrs` struct with `node_attr` keys and values.
224-
"""
225-
function parse_node_attributes(node_attr)
226-
ColumnarAttrs(Dict{Symbol,Any}(zip(Symbol.(keys(node_attr)), values(node_attr))))
216+
ColumnarAttrs(node_attr)
227217
end
228218

229219
init_empty_attr() = ColumnarAttrs()
@@ -235,7 +225,7 @@ init_empty_attr() = ColumnarAttrs()
235225
Parse a line of the MTG file to a node and add it to the tree dictionary.
236226
It may also add several nodes if the line contains several MTG elements.
237227
"""
238-
function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, classes)
228+
function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_column, node_id, mtg_type, features, feature_names, classes)
239229

240230
splitted_MTG = split(l[1], "\t")
241231
node_column = findfirst(x -> length(x) > 0, splitted_MTG)
@@ -259,7 +249,7 @@ function parse_line_to_node!(tree_dict, l, line, attr_column_start, last_node_co
259249

260250
# Get node attributes:
261251
if features !== nothing
262-
node_attr = parse_MTG_node_attr(node_data, features, node_attr_column_start, line)
252+
node_attr = parse_MTG_node_attr(node_data, features, feature_names, node_attr_column_start, line)
263253
else
264254
# if there are no attribute in the MTG, we create an empty attribute:
265255
node_attr = init_empty_attr()

src/types/Attributes.jl

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -641,13 +641,7 @@ function Base.get(attrs::ColumnarAttrs, key::Symbol, default::T) where {T}
641641
end
642642
Base.get(attrs::ColumnarAttrs, key, default) = get(attrs, _normalize_attr_key(key), default)
643643

644-
function Base.setindex!(attrs::ColumnarAttrs, value::T, key::Symbol) where {T}
645-
if !_isbound(attrs)
646-
attrs.staged[key] = value
647-
return value
648-
end
649-
store, bid, row = _bound_store_bid_row(attrs.ref)
650-
bucket = store.buckets[bid]
644+
@inline function _set_value_bound!(bucket::SymbolBucket, row::Int, key::Symbol, value::T) where {T}
651645
col_idx = get(bucket.col_index, key, 0)
652646
if col_idx == 0
653647
_set_value!(bucket, row, key, value)
@@ -666,6 +660,17 @@ function Base.setindex!(attrs::ColumnarAttrs, value::T, key::Symbol) where {T}
666660

667661
return value
668662
end
663+
664+
function Base.setindex!(attrs::ColumnarAttrs, value::T, key::Symbol) where {T}
665+
if !_isbound(attrs)
666+
attrs.staged[key] = value
667+
return value
668+
end
669+
store, bid, row = _bound_store_bid_row(attrs.ref)
670+
bucket = store.buckets[bid]
671+
_set_value_bound!(bucket, row, key, value)
672+
return value
673+
end
669674
Base.setindex!(attrs::ColumnarAttrs, value, key) = setindex!(attrs, value, _normalize_attr_key(key))
670675

671676
function Base.iterate(attrs::ColumnarAttrs, state=nothing)

src/types/Node.jl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ mutable struct Node{N<:AbstractNodeMTG,A}
5555
"Node attributes. Can be anything really"
5656
attributes::A
5757
"Cache for mtg nodes traversal"
58-
traversal_cache::Dict{String,Vector{Node{N,A}}}
58+
traversal_cache::Union{Nothing,Dict{String,Vector{Node{N,A}}}}
5959
end
6060

6161
# All deprecated methods (the ones with a node name) :
@@ -68,7 +68,7 @@ end
6868

6969
function Node(id::Int, MTG::T, attributes::ColumnarAttrs) where {T<:AbstractNodeMTG}
7070
node = Node{T,ColumnarAttrs}(
71-
id, nothing, Vector{Node{T,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{T,ColumnarAttrs}}}()
71+
id, nothing, Vector{Node{T,ColumnarAttrs}}(), MTG, attributes, nothing
7272
)
7373
init_columnar_root!(attributes, id, getfield(MTG, :symbol))
7474
return node
@@ -101,7 +101,7 @@ end
101101

102102
function Node(id::Int, parent::Node{M,ColumnarAttrs}, MTG::M, attributes::ColumnarAttrs) where {M<:AbstractNodeMTG}
103103
node = Node{M,ColumnarAttrs}(
104-
id, parent, Vector{Node{M,ColumnarAttrs}}(), MTG, attributes, Dict{String,Vector{Node{M,ColumnarAttrs}}}()
104+
id, parent, Vector{Node{M,ColumnarAttrs}}(), MTG, attributes, nothing
105105
)
106106
addchild!(parent, node)
107107
bind_columnar_child!(node_attributes(parent), attributes, id, getfield(MTG, :symbol))
@@ -517,7 +517,16 @@ Base.names(mtg::T) where {T<:MultiScaleTreeGraph.Node} = get_attributes(mtg)
517517
518518
Get the traversal cache of the node if any.
519519
"""
520-
node_traversal_cache(node::Node) = getfield(node, :traversal_cache)
520+
@inline _maybe_traversal_cache(node::Node) = getfield(node, :traversal_cache)
521+
522+
function node_traversal_cache(node::Node{T,A}) where {T,A}
523+
cache = getfield(node, :traversal_cache)
524+
if cache === nothing
525+
cache = Dict{String,Vector{Node{T,A}}}()
526+
setfield!(node, :traversal_cache, cache)
527+
end
528+
return cache
529+
end
521530

522531
Base.getproperty(node::Node, key::Symbol) = unsafe_getindex(node, key)
523532
Base.hasproperty(node::Node, key::Symbol) = haskey(node_attributes(node), key)

0 commit comments

Comments
 (0)