Skip to content

Commit 98f865b

Browse files
authored
Merge pull request #88 from VEZY/optimizations
Optimize traversal and queries
2 parents 652f815 + eefcea4 commit 98f865b

11 files changed

Lines changed: 247 additions & 31 deletions

File tree

benchmark/benchmarks.jl

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ using Tables
66
const SUITE = BenchmarkGroup()
77
const HAS_EXPLICIT_ATTRIBUTE_API = isdefined(MultiScaleTreeGraph, :attribute) && isdefined(MultiScaleTreeGraph, :attribute!)
88
const HAS_TABLE_VIEWS_API = isdefined(MultiScaleTreeGraph, :symbol_table) && isdefined(MultiScaleTreeGraph, :mtg_table)
9+
const HAS_ATTRIBUTE_POSITIONAL_DEFAULT = HAS_EXPLICIT_ATTRIBUTE_API &&
10+
hasmethod(attribute, Tuple{MultiScaleTreeGraph.Node,Symbol,Any})
911
const DEFAULT_ATTR_KEY_IS_SYMBOL = true # always true
1012

1113
const SIZE_TIERS = (
@@ -37,6 +39,15 @@ end
3739
return sym_is_symbol ? syms : Tuple(String(s) for s in syms)
3840
end
3941

42+
@inline function _attribute_default(node, key, default)
43+
HAS_EXPLICIT_ATTRIBUTE_API || return node[key]
44+
if HAS_ATTRIBUTE_POSITIONAL_DEFAULT
45+
return attribute(node, key, default)
46+
else
47+
return attribute(node, key; default=default)
48+
end
49+
end
50+
4051
function synthetic_mtg(; n_nodes::Int=10_000, seed::Int=42)
4152
rng = MersenneTwister(seed)
4253
mass_key = _default_attr_key(:mass)
@@ -204,7 +215,7 @@ end
204215

205216
function traverse_update_one_explicit_api!(root, key_mass)
206217
traverse!(root) do node
207-
m = HAS_EXPLICIT_ATTRIBUTE_API ? attribute(node, key_mass, default=0.0) : node[key_mass]
218+
m = _attribute_default(node, key_mass, 0.0)
208219
m === nothing && (m = 0.0)
209220
HAS_EXPLICIT_ATTRIBUTE_API ? attribute!(node, key_mass, m + 0.1) : (node[key_mass] = m + 0.1)
210221
end
@@ -225,8 +236,8 @@ end
225236

226237
function traverse_update_multi_leaf_explicit_api!(root, key_width, key_area, symbol_leaf)
227238
traverse!(root, symbol=symbol_leaf) do node
228-
width = HAS_EXPLICIT_ATTRIBUTE_API ? attribute(node, key_width, default=0.0) : node[key_width]
229-
area = HAS_EXPLICIT_ATTRIBUTE_API ? attribute(node, key_area, default=0.0) : node[key_area]
239+
width = _attribute_default(node, key_width, 0.0)
240+
area = _attribute_default(node, key_area, 0.0)
230241
width === nothing && (width = 0.0)
231242
area === nothing && (area = 0.0)
232243
if HAS_EXPLICIT_ATTRIBUTE_API
@@ -254,9 +265,9 @@ end
254265

255266
function traverse_update_multi_mixed_explicit_api!(root, key_mass, key_counter, symbol_leaf_internode)
256267
traverse!(root, symbol=symbol_leaf_internode) do node
257-
m = HAS_EXPLICIT_ATTRIBUTE_API ? attribute(node, key_mass, default=0.0) : node[key_mass]
268+
m = _attribute_default(node, key_mass, 0.0)
258269
m === nothing && (m = 0.0)
259-
counter = HAS_EXPLICIT_ATTRIBUTE_API ? attribute(node, key_counter, default=0) : node[key_counter]
270+
counter = _attribute_default(node, key_counter, 0)
260271
isnothing(counter) && (counter = 0)
261272
if HAS_EXPLICIT_ATTRIBUTE_API
262273
attribute!(node, key_mass, m * 0.999 + 0.0001)

docs/Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
[deps]
22
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
33
MultiScaleTreeGraph = "dd4a991b-8a45-4075-bede-262ee62d5583"
4+
5+
[sources]
6+
MultiScaleTreeGraph = { path = ".." }

docs/src/tutorials/0.read_write.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,18 @@ The file given in input can be either a `.mtg`, `.csv`, `.xlsx` or `.xlsm` file.
2222

2323
### Options
2424

25-
The function has two optional arguments to set the type used for the attributes, and the type used for the MTG field (see next section for more details). It also has a keyword argument to choose the sheet name in case you're reading an `xlsx` or `xlsm` file.
25+
The function has one optional argument to set the type used for the MTG field (see next section for more details). It also has a keyword argument to choose the sheet name in case you're reading an `xlsx` or `xlsm` file.
2626

2727
#### Attributes type
2828

29-
The type used for the attributes should be a `NamedTuple`-alike or a `Dict`-alike type. Here is a more in-depth recommendation, use:
29+
Attributes are stored using the columnar backend (`ColumnarAttrs`) by default.
3030

31-
- `NamedTuple` if you don't plan to modify the attributes of the MTG, *e.g.* to use them for plotting or computing statistics...
32-
- `MutableNamedTuple` if you plan to modify the attributes values but not adding new attributes very often, *e.g.* recompute an attribute value...
33-
- `Dict` or similar (*e.g.* `OrderedDict`) if you plan to heavily modify the attributes, *e.g.* adding/removing attributes a lot
31+
This backend keeps one typed table per symbol and is optimized for repeated traversal and attribute retrieval. In practice:
3432

35-
!!! note
36-
If you don't know what to use, just use the default.
33+
- users can still create nodes with `Dict`/`NamedTuple`-like attribute inputs;
34+
- those inputs are converted to the columnar representation when nodes are attached to an MTG.
35+
36+
So for most workflows, there is no extra option to choose here: use the default.
3737

3838
#### MTG encoding type
3939

docs/src/tutorials/2.descendants_ancestors_filters.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ The previous notations are both equivalent to:
5454
node_attributes(node_5)[:Length]
5555
```
5656

57-
But we strongly recommend to avoid this last notation. First, in our case the attributes are stored in a Dictionary (Dict, the default), so we access their values using the Dict notation: `node_attributes(node_5)[:Length]`. But if the attributes are stored as a NamedTuple-alike structure, we must use the dot notation instead: `node_attributes(node_5).Length` (see [Attributes type](@ref) for more details). Second, the attributes of the nodes are not stored in the structure returned by `node_attributes`, so updating the value of an attribute using this notation does not update the value in the MTG. This is a common mistake that can lead to very hard-to-debug issues.
57+
`node_attributes(node_5)` returns the node attribute container view for this node (columnar backend by default), so reading and writing through it does update the MTG.
58+
59+
However, we still recommend the higher-level APIs (`node[:attr]`, `attribute(node, :attr)`, and `attribute!(node, :attr, value)`) because they are clearer and keep user code backend-agnostic.
5860

5961
That is why the package implements the more generic `node_5[:Length]` notation that works with any structure used for the attributes, which helps develop more generic code.
6062

src/compute_MTG/ancestors.jl

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,48 @@
1+
@inline function _ancestors_values_no_filter_ignore!(node, key::Symbol, val, recursivity_level, ignore_nothing::Bool)
2+
current = node
3+
remaining = recursivity_level
4+
5+
while !isroot(current) && remaining != 0
6+
parent_ = parent(current)
7+
v = unsafe_getindex(parent_, key)
8+
ignore_nothing && v === nothing || push!(val, v)
9+
remaining -= 1
10+
current = parent_
11+
end
12+
return val
13+
end
14+
15+
@inline function _ancestors_values_no_filter_collect(node, key::Symbol, recursivity_level, ignore_nothing::Bool)
16+
vals = Any[]
17+
current = node
18+
remaining = recursivity_level
19+
20+
while !isroot(current) && remaining != 0
21+
parent_ = parent(current)
22+
v = unsafe_getindex(parent_, key)
23+
ignore_nothing && v === nothing || push!(vals, v)
24+
remaining -= 1
25+
current = parent_
26+
end
27+
return vals
28+
end
29+
30+
@inline function _typed_from_any(vals::Vector{Any}, fallback_type=Any)
31+
if isempty(vals)
32+
return Array{fallback_type,1}()
33+
end
34+
T = typeof(vals[1])
35+
@inbounds for i in 2:length(vals)
36+
T = Union{T,typeof(vals[i])}
37+
end
38+
out = Vector{T}(undef, length(vals))
39+
@inbounds for i in eachindex(vals)
40+
out[i] = vals[i]
41+
end
42+
return out
43+
end
44+
45+
146
"""
247
ancestors(node::Node,key,<keyword arguments>)
348
ancestors(node::Node,<keyword arguments>)
@@ -60,6 +105,13 @@ function ancestors(
60105
recursivity_level=-1,
61106
ignore_nothing=false,
62107
type::Union{Union,DataType}=Any)
108+
if no_node_filters(scale, symbol, link, filter_fun) && all && !self && type === Any
109+
key_ = Symbol(key)
110+
vals = _ancestors_values_no_filter_collect(node, key_, recursivity_level, ignore_nothing)
111+
fallback_T = infer_columnar_attr_type(node, key_, nothing, ignore_nothing)
112+
return _typed_from_any(vals, fallback_T)
113+
end
114+
63115
symbol = normalize_symbol_filter(symbol)
64116
link = normalize_link_filter(link)
65117
_maybe_depwarn_traversal_type_kw(:ancestors, type)
@@ -216,6 +268,13 @@ function ancestors!(
216268
ignore_nothing=false,
217269
type::Union{Union,DataType}=Any,
218270
)
271+
if no_node_filters(scale, symbol, link, filter_fun) && all && !self && type === Any
272+
key_ = Symbol(key)
273+
empty!(out)
274+
_ancestors_values_no_filter_ignore!(node, key_, out, recursivity_level, ignore_nothing)
275+
return out
276+
end
277+
219278
symbol = normalize_symbol_filter(symbol)
220279
link = normalize_link_filter(link)
221280
_maybe_depwarn_traversal_type_kw(:ancestors!, type)

src/compute_MTG/descendants.jl

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,33 @@ end
2626
return mask
2727
end
2828

29+
@inline function _single_bucket_id(store::MTGAttributeStore, symbol_filter)
30+
symbol_filter === nothing && return 0
31+
bids = _symbol_bucket_ids(store, symbol_filter)
32+
return length(bids) == 1 ? bids[1] : 0
33+
end
34+
35+
@inline function _collect_descendant_values_indexed_single_bucket!(
36+
out::AbstractVector,
37+
idx::SubtreeIndexCache,
38+
store::MTGAttributeStore,
39+
bid::Int,
40+
col::Column{T},
41+
left::Int,
42+
right::Int,
43+
ignore_nothing::Bool,
44+
) where {T}
45+
sizehint!(out, length(out) + (right - left + 1))
46+
@inbounds for i in left:right
47+
nid = idx.dfs_order[i]
48+
store.node_bucket[nid] == bid || continue
49+
v = col.data[store.node_row[nid]]
50+
ignore_nothing && v === nothing && continue
51+
push!(out, v)
52+
end
53+
return true
54+
end
55+
2956
function _collect_descendant_values_indexed!(
3057
out::AbstractVector,
3158
node,
@@ -49,6 +76,34 @@ function _collect_descendant_values_indexed!(
4976
left > right && return true
5077

5178
allow_mask = _bucket_allow_mask(store, symbol_filter)
79+
single_bid = _single_bucket_id(store, symbol_filter)
80+
81+
if single_bid != 0
82+
col_idx = key_plan.col_idx_by_bucket[single_bid]
83+
if col_idx == 0
84+
ignore_nothing && return true
85+
sizehint!(out, length(out) + (right - left + 1))
86+
@inbounds for i in left:right
87+
nid = idx.dfs_order[i]
88+
store.node_bucket[nid] == single_bid || continue
89+
push!(out, nothing)
90+
end
91+
return true
92+
end
93+
94+
col = store.buckets[single_bid].columns[col_idx]
95+
return _collect_descendant_values_indexed_single_bucket!(
96+
out,
97+
idx,
98+
store,
99+
single_bid,
100+
col,
101+
left,
102+
right,
103+
ignore_nothing,
104+
)
105+
end
106+
52107
sizehint!(out, length(out) + (right - left + 1))
53108

54109
@inbounds for i in left:right

0 commit comments

Comments
 (0)