Skip to content

Commit c0803b7

Browse files
jbresteliodice
andauthored
Diamond branch for beta site (#28)
* schema changes * schema changes * no project column * bug fixes --------- Co-authored-by: John Iodice <iodice@upenn.edu>
1 parent 02c83c8 commit c0803b7

3 files changed

Lines changed: 47 additions & 133 deletions

File tree

Model/lib/wdk/OrthoMCL/groupRecordQueries.xml

Lines changed: 31 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@
1313
<column name="group_name"/>
1414
<sql>
1515
<![CDATA[
16-
select distinct name as group_name
16+
select distinct group_id as group_name
1717
from apidb.OrthologGroup
18-
where core_peripheral_residual in ('P','R')
19-
and name like '%' || $$numeral$$
18+
where group_id like '%' || $$numeral$$
2019
]]>
2120
</sql>
2221
</sqlQuery>
@@ -26,9 +25,8 @@
2625
<column name="old_group_name"/>
2726
<sql>
2827
<![CDATA[
29-
select distinct name as group_name, name as old_group_name
28+
select distinct group_id as group_name, group_id as old_group_name
3029
from apidb.OrthologGroup
31-
where core_peripheral_residual in ('P','R')
3230
]]>
3331
</sql>
3432
</sqlQuery>
@@ -51,50 +49,20 @@
5149

5250
<sqlQuery name="GroupAttrs">
5351
<column name="group_name"/>
54-
<column name="group_type"/>
55-
<column name="project"/>
52+
<column name="is_residual"/>
5653
<column name="number_of_members"/>
57-
<column name="avg_percent_identity"/>
58-
<column name="number_of_match_pairs"/>
59-
<column name="percent_match_pairs"/>
60-
<column name="avg_percent_match"/>
61-
<column name="evalue"/>
62-
<column name="evalue_display"/>
63-
<column name="avg_connectivity"/>
54+
<column name="number_of_core_members"/>
55+
<column name="number_of_peripheral_members"/>
6456
<sql>
6557
<![CDATA[
66-
SELECT pr.group_name, number_of_members, number_of_match_pairs,
67-
avg_percent_identity, percent_match_pairs, avg_percent_match,
68-
evalue, evalue_display, avg_connectivity, 'OrthoMCL' as project
69-
FROM (SELECT name as group_name, number_of_members, number_of_match_pairs,
70-
decode(avg_percent_identity,null,'',round(avg_percent_identity,1)) as avg_percent_identity,
71-
decode(percent_match_pairs,null,'',round(percent_match_pairs,1)) as percent_match_pairs,
72-
decode(avg_percent_match,null,'',round(avg_percent_match,1)) as avg_percent_match,
73-
CASE WHEN (number_of_members=1) THEN 1 WHEN (avg_evalue_mant IS NULL OR avg_evalue_exp IS NULL) THEN 0 ELSE ROUND(avg_evalue_mant,1) * power(10,avg_evalue_exp) END AS evalue,
74-
CASE WHEN (number_of_members=1) THEN NULL WHEN (avg_evalue_mant IS NULL OR avg_evalue_exp IS NULL) THEN '0' ELSE ROUND(avg_evalue_mant,1) || 'E' || avg_evalue_exp END AS evalue_display
75-
FROM apidb.OrthologGroup
76-
WHERE core_peripheral_residual in ('P','R') ) pr,
77-
(SELECT name as group_name,
78-
decode(avg_connectivity,null,'',round(100*avg_connectivity/(number_of_members-1),1)) as avg_connectivity
79-
FROM apidb.OrthologGroup
80-
WHERE core_peripheral_residual = 'C' ) c
81-
WHERE pr.group_name = c.group_name(+)
58+
SELECT group_id as group_name, is_residual
59+
number_of_members, number_of_core_members,
60+
number_of_peripheral_members
61+
FROM apidb.OrthologGroup
8262
]]>
8363
</sql>
8464
</sqlQuery>
8565

86-
<sqlQuery name="MSA">
87-
<column name="group_name"/>
88-
<column name="msa"/>
89-
<sql>
90-
<![CDATA[
91-
SELECT g.name AS group_name, g.multiple_sequence_alignment AS msa
92-
FROM apidb.OrthologGroup g
93-
where core_peripheral_residual in ('C','R')
94-
]]>
95-
</sql>
96-
</sqlQuery>
97-
9866

9967
<sqlQuery name="MaxSequenceLength">
10068
<column name="group_name"/>
@@ -109,48 +77,19 @@
10977
</sqlQuery>
11078

11179

112-
<sqlQuery name="Layout">
113-
<column name="group_name"/>
114-
<column name="layout"/>
115-
<sql>
116-
<![CDATA[
117-
SELECT g.name AS group_name, l.layout
118-
FROM apidb.OrthologGroup g, apidb.OrthologGroupLayoutCore l
119-
WHERE g.ortholog_group_id = l.ortholog_group_id(+)
120-
and g.core_peripheral_residual in ('P','R')
121-
]]>
122-
</sql>
123-
</sqlQuery>
124-
12580
<sqlQuery name="EcNumbers">
12681
<column name="group_name" />
12782
<column name="ec_numbers" />
12883
<sql>
129-
<!-- formatted in SQL Developer -->
130-
SELECT og.name AS group_name,
131-
ecListMap.ec_list as ec_numbers
132-
FROM apidb.OrthologGroup og,
133-
(SELECT ec_map.peripheral_ortholog_group_id,
134-
listagg(ec.ec_number
135-
|| ' ('
136-
|| ec_count
137-
|| ')', ', ') within GROUP (
138-
ORDER BY ec.ec_number) AS ec_list
139-
FROM sres.EnzymeClass ec,
140-
(SELECT ogs.peripheral_ortholog_group_id,
141-
asec.enzyme_class_id,
142-
COUNT(*) AS ec_count
143-
FROM ApidbTuning.SequenceAttributes ogs,
144-
dots.AaSequenceEnzymeClass asec
145-
WHERE asec.aa_sequence_id = ogs.aa_sequence_id
146-
GROUP BY ogs.peripheral_ortholog_group_id,
147-
asec.enzyme_class_id
148-
) ec_map
149-
WHERE ec_map.enzyme_class_id = ec.enzyme_class_id
150-
GROUP BY ec_map.peripheral_ortholog_group_id
151-
) ecListMap
152-
WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
153-
and og.core_peripheral_residual in ('P','R')
84+
<![CDATA[
85+
select group_name,
86+
listagg(ec_number, ', ') within group (order by ec_number) as ec_numbers
87+
from (select distinct sa.group_name, ec.ec_number
88+
from sres.EnzymeClass ec, dots.AaSequenceEnzymeClass asec, apidbTuning.SequenceAttributes sa
89+
where ec.enzyme_class_id = asec.enzyme_class_id
90+
and asec.aa_sequence_id = sa.aa_sequence_id)
91+
group by group_name
92+
]]>
15493
</sql>
15594
</sqlQuery>
15695

@@ -159,15 +98,14 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
15998
<column name="keywords" />
16099
<sql>
161100
<![CDATA[
162-
select og.name as group_name, verbiage.keywords
101+
select og.group_id as group_name, verbiage.keywords
163102
from apidb.OrthologGroup og,
164103
(select ortholog_group_id,
165104
listagg(keyword, '; ') within group (order by to_number(frequency) desc) as keywords
166105
from apidb.OrthomclGroupKeyword
167106
group by ortholog_group_id
168107
) verbiage
169-
where og.ortholog_group_id = verbiage.ortholog_group_id(+)
170-
and og.core_peripheral_residual in ('P','R')
108+
where og.group_id = verbiage.ortholog_group_id(+)
171109
]]>
172110
</sql>
173111
</sqlQuery>
@@ -177,15 +115,14 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
177115
<column name="descriptions" />
178116
<sql>
179117
<![CDATA[
180-
select og.name as group_name, domains.descriptions
118+
select og.group_id as group_name, domains.descriptions
181119
from apidb.OrthologGroup og,
182120
(select ortholog_group_id,
183121
listagg(description, '; ') within group (order by frequency desc) as descriptions
184122
from apidb.OrthomclGroupDomain
185123
group by ortholog_group_id
186124
) domains
187125
where og.ortholog_group_id = domains.ortholog_group_id(+)
188-
and og.core_peripheral_residual in ('P','R')
189126
]]>
190127
</sql>
191128
</sqlQuery>
@@ -206,15 +143,12 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
206143
<column name="taxon_name"/>
207144
<column name="taxon_abbrev"/>
208145
<column name="length"/>
209-
<column name="previous_groups"/>
210146
<column name="description"/>
211-
<column name="ec_numbers"/>
212147
<sql>
213148
<![CDATA[
214149
select case sa.core_peripheral when 'C' then 'Core' when 'P' then 'Peripheral' else '' end as core_peripheral,
215150
sa.group_name, sa.full_id, sa.organism_name AS taxon_name, sa.taxon_abbreviation AS taxon_abbrev,
216-
sa.length, sa.product as description,
217-
sa.old_groups as previous_groups, sa.ec_numbers, sa.source_id
151+
sa.length, sa.product as description, sa.source_id
218152
from ApidbTuning.SequenceAttributes sa
219153
]]>
220154
</sql>
@@ -223,17 +157,16 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
223157
<sqlQuery name="PFams">
224158
<column name="group_name"/>
225159
<column name="accession"/>
226-
<column name="symbol"/>
227160
<column name="description"/>
228161
<column name="num_proteins"/>
229162
<column name="domain_index"/>
230163
<column name="max_index"/>
231164
<sql>
232165
<![CDATA[
233-
SELECT group_name, accession, symbol, description, domain_index, COUNT(distinct full_id) as num_proteins,
166+
SELECT group_name, accession, description, domain_index, COUNT(distinct full_id) as num_proteins,
234167
(SELECT MAX(domain_index) FROM ApidbTuning.DomainAssignment) AS max_index
235168
FROM ApidbTuning.DomainAssignment
236-
GROUP BY group_name, accession, symbol, description, domain_index
169+
GROUP BY group_name, accession, description, domain_index
237170
ORDER BY accession
238171
]]>
239172
</sql>
@@ -242,7 +175,6 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
242175
<sqlQuery name="ProteinPFams">
243176
<column name="group_name"/>
244177
<column name="accession"/>
245-
<column name="symbol"/>
246178
<column name="full_id"/>
247179
<column name="protein_length"/>
248180
<column name="core_peripheral"/>
@@ -256,7 +188,7 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
256188
<![CDATA[
257189
SELECT sa.group_name, sa.full_id, round(sa.length,0) as protein_length,
258190
case sa.core_peripheral when 'C' then 'Core' when 'P' then 'Peripheral' else '' end as core_peripheral,
259-
da.accession, da.symbol, da.domain_index, da.start_min, da.end_max
191+
da.accession, da.domain_index, da.start_min, da.end_max
260192
from ApidbTuning.SequenceAttributes sa
261193
Left outer join ApidbTuning.DomainAssignment da
262194
on da.full_id = sa.full_id
@@ -290,9 +222,8 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
290222

291223
<sql>
292224
<![CDATA[
293-
select name as group_name, name as name_string
225+
select group_id as group_name, group_id as name_string
294226
from apidb.OrthologGroup
295-
where core_peripheral_residual in ('P','R')
296227
]]>
297228
</sql>
298229
</sqlQuery>
@@ -304,10 +235,9 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
304235
<column name="frequency" />
305236
<sql>
306237
<![CDATA[
307-
select og.name as group_name, k.keyword, k.frequency
238+
select og.group_id as group_name, k.keyword, k.frequency
308239
from apidb.OrthologGroup og, apidb.OrthomclGroupKeyword k
309-
WHERE og.ortholog_group_id = k.ortholog_group_id
310-
and og.core_peripheral_residual in ('P','R')
240+
where og.group_id = k.ortholog_group_id
311241
]]>
312242
</sql>
313243
</sqlQuery>
@@ -318,10 +248,9 @@ WHERE og.ortholog_group_id = ecListMap.peripheral_ortholog_group_id(+)
318248
<column name="frequency" />
319249
<sql>
320250
<![CDATA[
321-
select og.name as group_name, d.description AS keyword, d.frequency
251+
select og.group_id as group_name, d.description AS keyword, d.frequency
322252
from apidb.OrthologGroup og, apidb.OrthomclGroupDomain d
323-
WHERE og.ortholog_group_id = d.ortholog_group_id
324-
and og.core_peripheral_residual in ('P','R')
253+
where og.ortholog_group_id = d.ortholog_group_id
325254
]]>
326255
</sql>
327256
</sqlQuery>

Model/lib/wdk/OrthoMCL/sequenceRecordQueries.xml

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -73,34 +73,17 @@
7373
<column name="full_id"/>
7474
<column name="source_id"/>
7575
<column name="project"/>
76-
<column name="length"/>
7776
<column name="product"/>
78-
<column name="molecular_weight"/>
7977
<column name="abbreviation"/>
8078
<column name="taxon_name"/>
81-
<column name="ec_numbers"/>
82-
<column name="pfam_domains"/>
8379
<sql>
8480
<![CDATA[
85-
SELECT eas.secondary_identifier AS full_id, eas.source_id,
86-
eas.length, eas.description AS product,
87-
eas.molecular_weight,
88-
ot.three_letter_abbrev AS abbreviation,
89-
ot.name AS taxon_name,
90-
(SELECT apidb.tab_to_string(set(cast(collect(ec.description || ' (' || ec.ec_number || ')') as apidb.varchartab)), ', ')
91-
FROM dots.AaSequenceEnzymeClass asec, sres.enzymeClass ec
92-
WHERE eas.aa_sequence_id = asec.aa_sequence_id
93-
AND asec.enzyme_class_id = ec.enzyme_class_id
94-
GROUP by eas.secondary_identifier
95-
) as ec_numbers,
96-
(SELECT apidb.tab_to_string(set(cast(collect(da.description || ' (' || da.accession || ')') as apidb.varchartab)), ', ')
97-
FROM ApidbTuning.DomainAssignment da
98-
WHERE da.aa_sequence_id = eas.aa_sequence_id
99-
GROUP BY da.full_id ) as pfam_domains,
81+
select full_id, source_id,
82+
product, organism_name, group_name,
83+
taxon_abbreviation as abbreviation,
84+
organism_name as taxon_name,
10085
'OrthoMCL' as project
101-
FROM dots.ExternalAaSequence eas, apidb.orthomcltaxon ot
102-
WHERE NVL(SUBSTR(eas.secondary_identifier, 0, INSTR(eas.secondary_identifier, '|')-1), eas.secondary_identifier)
103-
= ot.three_letter_abbrev
86+
from apidbTuning.SequenceAttributes
10487
]]>
10588
</sql>
10689
</sqlQuery>
@@ -115,10 +98,7 @@ WHERE NVL(SUBSTR(eas.secondary_identifier, 0, INSTR(eas.secondary_identifier, '|
11598
<column name="link_text"/>
11699
<sql>
117100
<![CDATA[
118-
SELECT sa.full_id, sa.group_name, sa.source_url,
119-
CASE sa.core_peripheral WHEN 'C' THEN 'Core' WHEN 'P' THEN 'Peripheral' END AS core_peripheral,
120-
CASE WHEN sa.group_name LIKE 'OG%r%_%' THEN 0 ELSE sa.group_size END AS num_core,
121-
CASE WHEN sa.group_name LIKE 'OG%r%_%' THEN sa.group_size ELSE sa.peripheral_group_size-sa.group_size END AS num_peripheral,
101+
SELECT sa.full_id, sa.group_name, sa.source_url as url,
122102
CASE WHEN sa.source_url is null THEN null ELSE sa.full_id END AS link_text
123103
FROM ApidbTuning.SequenceAttributes sa
124104
]]>
@@ -169,7 +149,6 @@ WHERE NVL(SUBSTR(eas.secondary_identifier, 0, INSTR(eas.secondary_identifier, '|
169149
<sqlQuery name="PFamDomains">
170150
<column name="full_id"/>
171151
<column name="accession"/>
172-
<column name="symbol"/>
173152
<column name="description"/>
174153
<column name="domain_index"/>
175154
<column name="max_index"/>
@@ -178,7 +157,7 @@ WHERE NVL(SUBSTR(eas.secondary_identifier, 0, INSTR(eas.secondary_identifier, '|
178157
<column name="length"/>
179158
<sql>
180159
<![CDATA[
181-
SELECT da.full_id, da.accession, da.symbol, da.description, da.domain_index,
160+
SELECT da.full_id, da.accession, da.description, da.domain_index,
182161
(SELECT MAX(domain_index) FROM ApidbTuning.DomainAssignment) AS max_index,
183162
da.start_min, da.end_max, sa.length
184163
FROM ApidbTuning.DomainAssignment da, ApidbTuning.SequenceAttributes sa

Model/lib/wdk/OrthoMCL/siteSearchRecords.xml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@
4242

4343
<attributeQueryRef ref="SequenceAttributes.SequenceAttrs">
4444
<!-- columnAttribute name="full_id" displayName="Full ID"/ -->
45-
<!-- special field for project filtering -->
45+
<!-- special field for project filtering
4646
<columnAttribute name="project" displayName="dontcare" internal="true"/>
47+
-->
4748
<columnAttribute name="source_id" displayName="Source ID">
4849
<propertyList name="boost">
4950
<value>100</value>
@@ -60,6 +61,7 @@
6061
<value>10</value>
6162
</propertyList>
6263
</columnAttribute>
64+
<!--
6365
<columnAttribute name="ec_numbers" displayName="EC Numbers">
6466
<propertyList name="isSummary">
6567
<value>true</value>
@@ -70,6 +72,7 @@
7072
<value>true</value>
7173
</propertyList>
7274
</columnAttribute>
75+
-->
7376
<columnAttribute name="abbreviation" displayName="Taxon Abbreviation"/>
7477
<columnAttribute name="taxon_name" displayName="Taxon Name">
7578
<propertyList name="isSummary">
@@ -125,8 +128,9 @@
125128

126129
<attributeQueryRef ref="GroupAttributes.GroupAttrs">
127130
<!-- columnAttribute name="group_name" displayName="Group Name" / -->
128-
<!-- special field for project filtering -->
131+
<!-- special field for project filtering
129132
<columnAttribute name="project" displayName="dontcare" internal="true"/>
133+
-->
130134
</attributeQueryRef>
131135

132136
<attributeQueryRef ref="GroupAttributes.Keywords">
@@ -180,17 +184,19 @@
180184
<columnAttribute name="taxon_name" displayName="Protein Taxon Name" />
181185
</table>
182186

187+
<!--
183188
<table name="ProteinPreviousGroups"
184189
displayName="Protein Previous Groups"
185190
queryRef="GroupTables.Proteins">
186191
<columnAttribute name="previous_groups" displayName="Protein Previous Groups" />
187192
</table>
193+
-->
188194

189195
<table name="PFams"
190196
displayName="Pfam Domains"
191197
queryRef="GroupTables.PFams">
192198
<columnAttribute name="accession" displayName="Accession Number" />
193-
<columnAttribute name="symbol" displayName="Symbol" />
199+
<!-- <columnAttribute name="symbol" displayName="Symbol" /> -->
194200
<columnAttribute name="description" displayName="Description" />
195201
</table>
196202

0 commit comments

Comments
 (0)