|
| 1 | + |
| 2 | +import PikaParser as PP |
| 3 | + |
| 4 | +""" |
| 5 | +`PikaParser.jl` grammar for stringy GRR expressions. |
| 6 | +""" |
| 7 | +const grr_grammar = begin |
| 8 | + # characters that typically form the identifiers |
| 9 | + isident(x::Char) = |
| 10 | + isletter(x) || |
| 11 | + isdigit(x) || |
| 12 | + x == '_' || |
| 13 | + x == '-' || |
| 14 | + x == ':' || |
| 15 | + x == '.' || |
| 16 | + x == '\'' || |
| 17 | + x == '[' || |
| 18 | + x == ']' |
| 19 | + |
| 20 | + # scanner helpers |
| 21 | + eat(p) = m -> begin |
| 22 | + last = 0 |
| 23 | + for i in eachindex(m) |
| 24 | + p(m[i]) || break |
| 25 | + last = i |
| 26 | + end |
| 27 | + last |
| 28 | + end |
| 29 | + |
| 30 | + # eat one of keywords |
| 31 | + kws(w...) = m -> begin |
| 32 | + last = eat(isident)(m) |
| 33 | + m[begin:last] in w ? last : 0 |
| 34 | + end |
| 35 | + |
| 36 | + PP.make_grammar( |
| 37 | + [:expr], |
| 38 | + PP.flatten( |
| 39 | + Dict( |
| 40 | + :space => PP.first(PP.scan(eat(isspace)), PP.epsilon), |
| 41 | + :id => PP.scan(eat(isident)), |
| 42 | + :orop => |
| 43 | + PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))), |
| 44 | + :andop => PP.first( |
| 45 | + PP.tokens("&&"), |
| 46 | + PP.token('&'), |
| 47 | + PP.scan(kws("AND", "and")), |
| 48 | + ), |
| 49 | + :expr => PP.seq(:space, :orexpr, :space, PP.end_of_input), |
| 50 | + :orexpr => PP.first( |
| 51 | + :or => PP.seq(:andexpr, :space, :orop, :space, :orexpr), |
| 52 | + :andexpr, |
| 53 | + ), |
| 54 | + :andexpr => PP.first( |
| 55 | + :and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr), |
| 56 | + :baseexpr, |
| 57 | + ), |
| 58 | + :baseexpr => PP.first( |
| 59 | + :id, |
| 60 | + :parenexpr => PP.seq( |
| 61 | + PP.token('('), |
| 62 | + :space, |
| 63 | + :orexpr, |
| 64 | + :space, |
| 65 | + PP.token(')'), |
| 66 | + ), |
| 67 | + ), |
| 68 | + ), |
| 69 | + Char, |
| 70 | + ), |
| 71 | + ) |
| 72 | +end |
| 73 | + |
| 74 | +grr_grammar_open(m, _) = |
| 75 | + m.rule == :expr ? Bool[0, 1, 0, 0] : |
| 76 | + m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] : |
| 77 | + m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] : |
| 78 | + m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] : |
| 79 | + (false for _ in m.submatches) |
| 80 | + |
| 81 | +grr_grammar_fold(m, _, subvals) = |
| 82 | + m.rule == :id ? Expr(:call, :gene, String(m.view)) : |
| 83 | + m.rule == :and ? Expr(:call, :and, subvals[1], subvals[5]) : |
| 84 | + m.rule == :or ? Expr(:call, :or, subvals[1], subvals[5]) : |
| 85 | + m.rule == :parenexpr ? subvals[3] : |
| 86 | + m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1] |
| 87 | + |
| 88 | +""" |
| 89 | +$(TYPEDSIGNATURES) |
| 90 | +
|
| 91 | +Parses a JSON-ish data reference to a `Expr`-typed gene association. Contains |
| 92 | +"calls" to `gene`, `and` and `or` functions that describe the association. |
| 93 | +""" |
| 94 | +function parse_gene_association(str::String)::Maybe{Expr} |
| 95 | + all(isspace, str) && return nothing |
| 96 | + tree = PP.parse_lex(grr_grammar, str) |
| 97 | + match = PP.find_match_at!(tree, :expr, 1) |
| 98 | + match > 0 || throw(DomainError(str, "cannot parse GRR")) |
| 99 | + PP.traverse_match(tree, match, open = grr_grammar_open, fold = grr_grammar_fold) |
| 100 | +end |
| 101 | + |
| 102 | +""" |
| 103 | +$(TYPEDSIGNATURES) |
| 104 | +
|
| 105 | +Evaluate the gene association expression with the reference values given by the |
| 106 | +`val` function. |
| 107 | +""" |
| 108 | +function eval_gene_association(ga::Expr, val::Function)::Bool |
| 109 | + (ga.head == :call && length(ga.args) >= 2) || |
| 110 | + throw(DomainError(ga, "invalid gene association expr")) |
| 111 | + if ga.args[1] == :gene && length(ga.args) == 2 |
| 112 | + val(ga.args[2]) |
| 113 | + elseif ga.args[1] == :and |
| 114 | + all(eval_gene_association.(ga.args[2:end], Ref(val))) |
| 115 | + elseif ga.args[1] == :or |
| 116 | + any(eval_gene_association.(ga.args[2:end], Ref(val))) |
| 117 | + else |
| 118 | + throw(DomainError(ga, "unsupported gene association function")) |
| 119 | + end |
| 120 | +end |
| 121 | + |
| 122 | +""" |
| 123 | +$(TYPEDSIGNATURES) |
| 124 | +
|
| 125 | +A helper for producing predictable unique sequences. Might be faster if |
| 126 | +compacting would be done directly in sort(). |
| 127 | +""" |
| 128 | +function sortunique(x) |
| 129 | + o = collect(x) |
| 130 | + sort!(o) |
| 131 | + put = prevind(o, firstindex(o)) |
| 132 | + for i in eachindex(o) |
| 133 | + if put >= firstindex(o) && o[i] == o[put] |
| 134 | + # we already have this one |
| 135 | + continue |
| 136 | + else |
| 137 | + put = nextind(o, put) |
| 138 | + if put != i |
| 139 | + o[put] = o[i] |
| 140 | + end |
| 141 | + end |
| 142 | + end |
| 143 | + o[begin:put] |
| 144 | +end |
| 145 | + |
| 146 | +""" |
| 147 | +$(TYPEDSIGNATURES) |
| 148 | +
|
| 149 | +Convert the given gene association expression to DNF. |
| 150 | +""" |
| 151 | +function flatten_gene_association(ga::Expr)::A.GeneAssociationDNF |
| 152 | + function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}} |
| 153 | + if isempty(dnfs) |
| 154 | + [String[]] |
| 155 | + else |
| 156 | + sortunique( |
| 157 | + sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end]) |
| 158 | + ) |
| 159 | + end |
| 160 | + end |
| 161 | + |
| 162 | + (ga.head == :call && length(ga.args) >= 2) || |
| 163 | + throw(DomainError(ga, "invalid gene association expr")) |
| 164 | + if ga.args[1] == :gene && length(ga.args) == 2 |
| 165 | + [[ga.args[2]]] |
| 166 | + elseif ga.args[1] == :and |
| 167 | + fold_and(flatten_gene_association.(ga.args[2:end])) |
| 168 | + elseif ga.args[1] == :or |
| 169 | + sortunique(vcat(flatten_gene_association.(ga.args[2:end])...)) |
| 170 | + else |
| 171 | + throw(DomainError(ga, "unsupported gene association function")) |
| 172 | + end |
| 173 | +end |
| 174 | + |
| 175 | +""" |
| 176 | +$(TYPEDSIGNATURES) |
| 177 | +
|
| 178 | +Formats a DNF gene association as a `String`. |
| 179 | +""" |
| 180 | +function format_gene_association_dnf( |
| 181 | + grr::A.GeneAssociationDNF; |
| 182 | + and = " && ", |
| 183 | + or = " || ", |
| 184 | +)::String |
| 185 | + return join(("(" * join(gr, and) * ")" for gr in grr), or) |
| 186 | +end |
0 commit comments