Skip to content

Commit 4b932b4

Browse files
authored
Merge pull request #7 from COBREXA/mk-proper-grr-parsing
add proper grr parsing
2 parents f969b2e + b19cfc6 commit 4b932b4

6 files changed

Lines changed: 215 additions & 28 deletions

File tree

Project.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
name = "JSONFBCModels"
22
uuid = "475c1105-d6ed-49c1-9b32-c11adca6d3e8"
3-
authors = ["Mirek Kratochvil <miroslav.kratochvil@uni.lu>"]
4-
version = "0.1.0"
3+
authors = ["The authors of JSONFBCModels.jl"]
4+
version = "0.1.1"
55

66
[deps]
77
AbstractFBCModels = "5a4f3dfa-1789-40f8-8221-69268c29937c"
88
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
99
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
10+
PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
1011
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1112

1213
[compat]
1314
AbstractFBCModels = "0.1, 0.2"
1415
DocStringExtensions = "0.8, 0.9"
1516
JSON = "0.21"
17+
PikaParser = "0.6"
1618
SparseArrays = "1"
1719
Test = "1"
1820
julia = "1"

src/JSONFBCModels.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ include("constants.jl")
1212
include("interface.jl")
1313
include("io.jl")
1414
include("utils.jl")
15+
include("grr_utils.jl")
1516

1617
end

src/grr_utils.jl

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
2+
import PikaParser as PP
3+
4+
"""
5+
`PikaParser.jl` grammar for stringy GRR expressions.
6+
"""
7+
const grr_grammar = begin
8+
# characters that typically form the identifiers
9+
isident(x::Char) =
10+
isletter(x) ||
11+
isdigit(x) ||
12+
x == '_' ||
13+
x == '-' ||
14+
x == ':' ||
15+
x == '.' ||
16+
x == '\'' ||
17+
x == '[' ||
18+
x == ']'
19+
20+
# scanner helpers
21+
eat(p) = m -> begin
22+
last = 0
23+
for i in eachindex(m)
24+
p(m[i]) || break
25+
last = i
26+
end
27+
last
28+
end
29+
30+
# eat one of keywords
31+
kws(w...) = m -> begin
32+
last = eat(isident)(m)
33+
m[begin:last] in w ? last : 0
34+
end
35+
36+
PP.make_grammar(
37+
[:expr],
38+
PP.flatten(
39+
Dict(
40+
:space => PP.first(PP.scan(eat(isspace)), PP.epsilon),
41+
:id => PP.scan(eat(isident)),
42+
:orop =>
43+
PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))),
44+
:andop => PP.first(
45+
PP.tokens("&&"),
46+
PP.token('&'),
47+
PP.scan(kws("AND", "and")),
48+
),
49+
:expr => PP.seq(:space, :orexpr, :space, PP.end_of_input),
50+
:orexpr => PP.first(
51+
:or => PP.seq(:andexpr, :space, :orop, :space, :orexpr),
52+
:andexpr,
53+
),
54+
:andexpr => PP.first(
55+
:and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr),
56+
:baseexpr,
57+
),
58+
:baseexpr => PP.first(
59+
:id,
60+
:parenexpr => PP.seq(
61+
PP.token('('),
62+
:space,
63+
:orexpr,
64+
:space,
65+
PP.token(')'),
66+
),
67+
),
68+
),
69+
Char,
70+
),
71+
)
72+
end
73+
74+
grr_grammar_open(m, _) =
75+
m.rule == :expr ? Bool[0, 1, 0, 0] :
76+
m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] :
77+
m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] :
78+
m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] :
79+
(false for _ in m.submatches)
80+
81+
grr_grammar_fold(m, _, subvals) =
82+
m.rule == :id ? Expr(:call, :gene, String(m.view)) :
83+
m.rule == :and ? Expr(:call, :and, subvals[1], subvals[5]) :
84+
m.rule == :or ? Expr(:call, :or, subvals[1], subvals[5]) :
85+
m.rule == :parenexpr ? subvals[3] :
86+
m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1]
87+
88+
"""
89+
$(TYPEDSIGNATURES)
90+
91+
Parses a JSON-ish data reference to a `Expr`-typed gene association. Contains
92+
"calls" to `gene`, `and` and `or` functions that describe the association.
93+
"""
94+
function parse_gene_association(str::String)::Maybe{Expr}
95+
all(isspace, str) && return nothing
96+
tree = PP.parse_lex(grr_grammar, str)
97+
match = PP.find_match_at!(tree, :expr, 1)
98+
match > 0 || throw(DomainError(str, "cannot parse GRR"))
99+
PP.traverse_match(tree, match, open = grr_grammar_open, fold = grr_grammar_fold)
100+
end
101+
102+
"""
103+
$(TYPEDSIGNATURES)
104+
105+
Evaluate the gene association expression with the reference values given by the
106+
`val` function.
107+
"""
108+
function eval_gene_association(ga::Expr, val::Function)::Bool
109+
(ga.head == :call && length(ga.args) >= 2) ||
110+
throw(DomainError(ga, "invalid gene association expr"))
111+
if ga.args[1] == :gene && length(ga.args) == 2
112+
val(ga.args[2])
113+
elseif ga.args[1] == :and
114+
all(eval_gene_association.(ga.args[2:end], Ref(val)))
115+
elseif ga.args[1] == :or
116+
any(eval_gene_association.(ga.args[2:end], Ref(val)))
117+
else
118+
throw(DomainError(ga, "unsupported gene association function"))
119+
end
120+
end
121+
122+
"""
123+
$(TYPEDSIGNATURES)
124+
125+
A helper for producing predictable unique sequences. Might be faster if
126+
compacting would be done directly in sort().
127+
"""
128+
function sortunique(x)
129+
o = collect(x)
130+
sort!(o)
131+
put = prevind(o, firstindex(o))
132+
for i in eachindex(o)
133+
if put >= firstindex(o) && o[i] == o[put]
134+
# we already have this one
135+
continue
136+
else
137+
put = nextind(o, put)
138+
if put != i
139+
o[put] = o[i]
140+
end
141+
end
142+
end
143+
o[begin:put]
144+
end
145+
146+
"""
147+
$(TYPEDSIGNATURES)
148+
149+
Convert the given gene association expression to DNF.
150+
"""
151+
function flatten_gene_association(ga::Expr)::A.GeneAssociationDNF
152+
function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}}
153+
if isempty(dnfs)
154+
[String[]]
155+
else
156+
sortunique(
157+
sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end])
158+
)
159+
end
160+
end
161+
162+
(ga.head == :call && length(ga.args) >= 2) ||
163+
throw(DomainError(ga, "invalid gene association expr"))
164+
if ga.args[1] == :gene && length(ga.args) == 2
165+
[[ga.args[2]]]
166+
elseif ga.args[1] == :and
167+
fold_and(flatten_gene_association.(ga.args[2:end]))
168+
elseif ga.args[1] == :or
169+
sortunique(vcat(flatten_gene_association.(ga.args[2:end])...))
170+
else
171+
throw(DomainError(ga, "unsupported gene association function"))
172+
end
173+
end
174+
175+
"""
176+
$(TYPEDSIGNATURES)
177+
178+
Formats a DNF gene association as a `String`.
179+
"""
180+
function format_gene_association_dnf(
181+
grr::A.GeneAssociationDNF;
182+
and = " && ",
183+
or = " || ",
184+
)::String
185+
return join(("(" * join(gr, and) * ")" for gr in grr), or)
186+
end

src/interface.jl

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,25 @@ A.objective(model::JSONFBCModel) = sparsevec(
9090
Float64[float(get(rxn, "objective_coefficient", 0.0)) for rxn in model.reactions],
9191
)
9292

93-
A.reaction_gene_products_available(model::JSONFBCModel, rid::String, available::Function) =
94-
A.reaction_gene_products_available_from_dnf(model, rid, available)
95-
96-
A.reaction_gene_association_dnf(model::JSONFBCModel, rid::String) = parse_grr(
97-
get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing),
93+
function A.reaction_gene_products_available(
94+
model::JSONFBCModel,
95+
rid::String,
96+
available::Function,
9897
)
98+
x = get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing)
99+
isnothing(x) && return nothing
100+
x = parse_gene_association(x)
101+
isnothing(x) && return nothing
102+
eval_gene_association(x, available)
103+
end
104+
105+
function A.reaction_gene_association_dnf(model::JSONFBCModel, rid::String)
106+
x = get(model.reactions[model.reaction_index[rid]], "gene_reaction_rule", nothing)
107+
isnothing(x) && return nothing
108+
x = parse_gene_association(x)
109+
isnothing(x) && return nothing
110+
flatten_gene_association(x)
111+
end
99112

100113
A.metabolite_formula(model::JSONFBCModel, mid::String) =
101114
parse_formula(get(model.metabolites[model.metabolite_index[mid]], "formula", nothing))
@@ -188,7 +201,7 @@ function Base.convert(::Type{JSONFBCModel}, mm::A.AbstractFBCModel)
188201

189202
grr = A.reaction_gene_association_dnf(mm, rid)
190203
if !isnothing(grr)
191-
res["gene_reaction_rule"] = unparse_grr(grr)
204+
res["gene_reaction_rule"] = format_gene_association_dnf(grr)
192205
end
193206

194207
res["lower_bound"] = lbs[ri]

src/utils.jl

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,6 @@ extract_json_metabolite_id(m, i) = string(get(m, "id", "met$i"))
55

66
extract_json_gene_id(g, i) = string(get(g, "id", "gene$i"))
77

8-
function parse_grr(str::Maybe{String})
9-
isnothing(str) && return nothing
10-
isempty(str) && return nothing
11-
12-
dnf = A.GeneAssociationDNF()
13-
for isozyme in string.(split(str, " or "))
14-
push!(
15-
dnf,
16-
string.(split(replace(isozyme, "(" => "", ")" => "", " and " => " "), " ")),
17-
)
18-
end
19-
return dnf
20-
end
21-
228
function parse_formula(x::Maybe{String})
239
isnothing(x) && return nothing
2410
x == "" && return nothing
@@ -66,8 +52,3 @@ function unparse_formula(x::Maybe{A.MetaboliteFormula})
6652
ks = sort(collect(keys(x)))
6753
join(k * string(x[k]) for k in ks)
6854
end
69-
70-
function unparse_grr(xs::Maybe{A.GeneAssociationDNF})
71-
isnothing(xs) && return nothing
72-
join((join(x, " and ") for x in xs), " or ")
73-
end

test/misc.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
end
1414

1515
@testset "Corner cases" begin
16-
import JSONFBCModels: parse_charge
16+
import JSONFBCModels:
17+
eval_gene_association, flatten_gene_association, parse_charge, sortunique
1718

1819
@test parse_charge(1) == 1
1920
@test parse_charge(2.0) == 2
@@ -22,4 +23,7 @@ end
2223
@test parse_charge(nothing) == nothing
2324
@test_throws ArgumentError parse_charge("totally positive charge")
2425
@test_throws DomainError parse_charge(["very charged"])
26+
@test_throws DomainError eval_gene_association(:(xor(gene("a"), gene("b"))), _ -> false)
27+
@test_throws DomainError flatten_gene_association(:(xor(gene("a"), gene("b"))))
28+
@test sortunique([3, 2, 2, 1]) == [1, 2, 3]
2529
end

0 commit comments

Comments
 (0)