-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathgene_summary_stats.sql
More file actions
80 lines (73 loc) · 3.35 KB
/
gene_summary_stats.sql
File metadata and controls
80 lines (73 loc) · 3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
/**
SQL used to generate the counts of gene summaries for all genes in Chado.
e.g.
psql -f gene_summary_stats.sql -h chado.flybase.org -U flybase flybase > gene_summary_chado_stats.tsv
This file is then consumed by merge_chado_alliance_gene_summary_counts.py to produce the final file.
*/
copy (
select
f.uniquename,
flybase.current_symbol(f.uniquename),
gene_snapshot,
uniprot_function,
flybase_pathway,
flybase_group,
interactive_fly
from feature f
left join lateral (
select count(*) as gene_snapshot
from flybase.get_featureprop(f.uniquename,'gene_summary_text')
) gs on true
left join lateral (
select count(*) as uniprot_function
from feature_dbxref fdbx join dbxref dbx on fdbx.dbxref_id = dbx.dbxref_id
join dbxrefprop dbxp on dbx.dbxref_id = dbxp.dbxref_id
join db on dbx.db_id = db.db_id
join cvterm dbxpt on dbxp.type_id = dbxpt.cvterm_id
where f.feature_id = fdbx.feature_id
and lower(dbxpt.name) = 'uniprot_function_comment'
and lower(db.name) = 'uniprot/swiss-prot'
) us on true
left join lateral (
select count(*) as flybase_pathway
from feature_grpmember fgm join grpmember gm on fgm.grpmember_id = gm.grpmember_id
join cvterm gmt on gm.type_id = gmt.cvterm_id
join grp on gm.grp_id = grp.grp_id
join grp_cvterm g_cvt on grp.grp_id = g_cvt.grp_id
join cvterm gt on g_cvt.cvterm_id = gt.cvterm_id
join dbxref dbx on gt.dbxref_id = dbx.dbxref_id
join db on dbx.db_id = db.db_id
where f.feature_id = fgm.feature_id
and gmt.name = 'grpmember_feature'
and db.name = 'FBcv'
and dbx.accession = '0007034'
) fbpath on true
left join lateral (
select count(*) as flybase_group
from feature_grpmember fgm join grpmember gm on fgm.grpmember_id = gm.grpmember_id
join cvterm gmt on gm.type_id = gmt.cvterm_id
join grp on gm.grp_id = grp.grp_id
join grp_cvterm g_cvt on grp.grp_id = g_cvt.grp_id
join cvterm gt on g_cvt.cvterm_id = gt.cvterm_id
join dbxref dbx on gt.dbxref_id = dbx.dbxref_id
join db on dbx.db_id = db.db_id
where f.feature_id = fgm.feature_id
and gmt.name = 'grpmember_feature'
and db.name = 'FBcv'
and dbx.accession != '0007034'
) fbgroup on true
left join lateral (
select count(*) as interactive_fly
from feature_dbxref fdbx join dbxref dbx on fdbx.dbxref_id = dbx.dbxref_id
join dbxrefprop dbxp on dbx.dbxref_id = dbxp.dbxref_id
join db on dbx.db_id = db.db_id
join cvterm dbxpt on dbxp.type_id = dbxpt.cvterm_id
where f.feature_id = fdbx.feature_id
and lower(dbxpt.name) = 'if_summary'
and lower(db.name) = 'interactivefly'
and dbxp.value is not null
) if on true
where f.is_obsolete = false
and f.is_analysis = false
and f.uniquename ~ '^FBgn\d+$'
) to stdout;