-
Notifications
You must be signed in to change notification settings - Fork 74
Expand file tree
/
Copy pathtree.ex
More file actions
225 lines (197 loc) · 8.25 KB
/
tree.ex
File metadata and controls
225 lines (197 loc) · 8.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
defmodule Graph.Tree do
defmodule Node do
@moduledoc """
A container to hold arbitrary data within a tree.
An implementation detail: Every node is given a unique identifier so that
duplicate data (for example, two nodes that have data as 4) will not be considered identical nodes.
"""
defstruct id: nil, data: nil
@type t :: %__MODULE__{}
def init(data), do: %__MODULE__{id: make_ref(), data: data}
end
@moduledoc """
An abstraction over a graph, this tree implementation can have an arbitrary number of children.
This module can be used as a base for more complex data transformations on data structures that
if you squint hard enough can look like trees (e.g: maps)
"""
alias __MODULE__.Node
@doc """
A stuct representing a Tree. Under the hood, it uses a graph to store the nodes and edges, as a tree is a
special type of graph. Also has the option to define if the tree was created using a `virtual_root` which is
just a word I invented. But to explain it:
A lot of data structures don't quite follow a tree-like structure. Take this map:
%{"foo" => "bar", "baz" => "bux"}
If we wanted to process this map as a tree, we would have to add some top-level key to it such that it can act as the
root. Like this:
%{"root" => %{
"foo" => "bar",
"baz" => "bux"
}
}
That's kind of a pain in the tuckus if you have to do it a lot, plus you have to strip off the root when you're done with
processing to get back to your original data structure. A *`virtual_root`* provides this top-level key behind the scenes
so you can kind of coerce your data structure into a tree, but the guarantee provided by this module is that the virtual root
will not be included in any processing/iterating/reducing/etc. So you can have all the benefits of using this module without
worrying too much about if you have a well-formed tree when you pass the data into a constructor for a `Tree.t()`
"""
defstruct root: nil, graph: nil, has_virtual_root?: false
@type t :: %__MODULE__{}
@typedoc """
A keyword of atoms that form a tree, e.g: [a: [b: [:c, :d]]].
For the purposes of great humor, I have named this subset of keyword lists
treeword lists.
"""
@type treeword :: keyword(treeword()) | list(atom())
@doc """
A quick check to see if something might be a treeword. Should not be relied upon over-much, as it does not
fully check to see if something is a proper treeword. This guard should just be used as a hueristic to make sure
the input is not a completely different type
"""
# You can't pattern match inside guards, so here's what this is supposed to mean:
# [{key, _value}] when is_atom(key)
defguard maybe_treeword?(list)
when is_list(list) and is_tuple(hd(list)) and is_atom(elem(hd(list), 0)) and
tl(list) == []
@doc """
Takes a treeword list.
Returns a tree with atoms as data and there is a parent-child relationship
between atom key and sublist.
"""
@spec from_treeword!(treeword() | keyword(atom()), opts :: keyword()) :: t() | no_return()
def from_treeword!(_, opts \\ [virtual_root: false])
def from_treeword!(keyword, [virtual_root: true] = opts) do
parent = Node.init(:root)
initializer = &do_keyword_init(&1, parent, keyword)
do_init(parent, initializer, opts)
end
def from_treeword!([{key, value}], [virtual_root: false] = opts) do
parent = Node.init(key)
initializer = &do_keyword_init(&1, parent, value)
do_init(parent, initializer, opts)
end
def from_treeword!(_, _) do
raise ArgumentError, """
Treewords can only have one element as the root and must
be comprised of only atoms as the leaves
"""
end
defp do_init(parent, initializer, opts) do
Graph.new()
|> Graph.add_vertex(parent)
|> initializer.()
|> then(&%__MODULE__{root: parent, graph: &1, has_virtual_root?: opts[:virtual_root]})
end
defp do_keyword_init(graph, parent, {key, rest}) do
child = Node.init(key)
graph
|> Graph.add_edge(parent, child)
|> do_keyword_init(child, rest)
end
defp do_keyword_init(graph, parent, children) when is_list(children) do
Enum.reduce(children, graph, fn child, acc -> do_keyword_init(acc, parent, child) end)
end
defp do_keyword_init(graph, parent, child_data) when is_atom(child_data) do
Graph.add_edge(graph, parent, Node.init(child_data))
end
defp do_keyword_init(_graph, _parent, _child_data) do
raise ArgumentError, "Treewords can only contain atoms as leaves"
end
@doc """
Given a node N and tree T, returns true if N is a leaf of T and false otherwise.
"""
@spec is_leaf?(t(), Node.t()) :: boolean()
def is_leaf?(%__MODULE__{} = t, %__MODULE__.Node{} = l) do
Graph.out_degree(t.graph, l) == 0
end
@doc """
Iterates through all of a tree's nodes. For exemplar node (A):
- (A) is guaranteed to be processed before its children
- (A)'s children will be processed before its siblings are processed
- (A)'s children will be processed in an arbitraty order
This is similar to a DFS of the tree, except the nodes visited along the way to the leaves are processed before the leaf nodes.
The reducer is a little different than most reducers you might see. The first argument is the current Node.t() being processed.
The second argument is a list of the ancestors of the current node. The immediate parent is the first element of this list. The
root of the tree is the last element. This format forms a stack. The third argument is the accumulator value. The reducer must return {:next | acc} to continue
or {:halt, acc} to stop processing.
"""
@spec reduce(
tree :: t(),
acc :: term(),
reducer ::
(Node.t(), path_stack :: [], acc :: term() ->
{:next, acc :: term()} | {:halt, acc :: term()})
) :: term()
def reduce(%__MODULE__{has_virtual_root?: false} = t, initial_acc, reducer) do
t.graph
|> reduce(t.root, {:next, initial_acc}, [], reducer)
|> elem(1)
end
def reduce(%__MODULE__{has_virtual_root?: true} = t, initial_acc, reducer) do
t.graph
|> Graph.out_neighbors(t.root)
|> Enum.reduce({:next, initial_acc}, fn %Node{} = n, acc ->
reduce(t.graph, n, acc, [], reducer)
end)
|> elem(1)
end
defp reduce(%Graph{} = g, %Node{} = current_node, {:next, acc}, path_stack, reducer) do
case reducer.(current_node, path_stack, acc) do
{:next, _new_acc} = res ->
g
|> Graph.out_neighbors(current_node)
|> Enum.reduce(res, fn %Node{} = n, acc ->
reduce(g, n, acc, [current_node | path_stack], reducer)
end)
{:halt, _new_acc} = res ->
res
end
end
defp reduce(%Graph{}, %Node{}, {:halt, acc}, _path_stack, _reducer), do: {:halt, acc}
@doc """
A convience function to do a reduce where the reducer is only called when
traversing over a leaf node.
"""
@spec leaf_reduce(
t(),
acc :: term(),
(Node.t(), path_stack :: list(), acc :: term() ->
{:next, acc :: term()} | {:halt, acc :: term()})
) :: term()
def leaf_reduce(%__MODULE__{} = t, initial_acc, reducer) do
modified_reducer = fn node, path_stack, acc ->
if is_leaf?(t, node) do
reducer.(node, path_stack, acc)
else
{:next, acc}
end
end
reduce(t, initial_acc, modified_reducer)
end
@doc """
Map each path to a leaf to something else using `mapper`
"""
@spec leaf_map(
t(),
(current_path :: list(), mapper :: list() -> term())
) :: term()
def leaf_map(%__MODULE__{} = t, mapper) do
reducer = fn node, path_stack, acc ->
current_path = current_path(node, path_stack)
res = mapper.(current_path)
{:next, [res | acc]}
end
leaf_reduce(t, [], reducer)
end
@doc """
A convience function. Given a node that's currently being visited and a path_stack,
this function will return a list of the current path (starting with the root and
ending with the current element). Further, it will strip the `Node`s in the path
to just their data elements. This can be useful when used in conjunction with
reduce and treeword lists do use things like get_in.
"""
def current_path(%Node{} = n, path_stack) do
[n | path_stack]
|> Enum.map(& &1.data)
|> Enum.reverse()
end
end