Skip to content

Commit 94c2d4a

Browse files
Community studies (#27)
* introduce community studies * introduce community studies * debug model for community studies * work in progress * w.i.p. * w.i.p. * wip * w.i.p. * Update ssCreateWdkRecordsBatch lose unneeded variable * Update siteSearchModel.xml adjust constant name * Update siteSearchModel.xml fix comment * w.i.p. * adjust document name * use list instead of map for IDs * remove unneeded import * use new VDI_CONTROL_SCHEMA * add community_studies_update.sh * add comm dataset to apicomm * add comm datasets
1 parent 1a36c94 commit 94c2d4a

13 files changed

Lines changed: 471 additions & 19 deletions

File tree

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/bin/sh
2+
3+
set -e
4+
set -x
5+
6+
SERVER_PORT=7783
7+
DESTINATION_DIRECTORY=/tmp/output
8+
9+
# loop across websites (eg plasmodb, clinepidb) and:
10+
# - configure WDK server to know about that project and its database
11+
# - call the script below
12+
13+
# 'PlasmoDB:plas
14+
15+
# start WDK server
16+
echo "$(date -u) starting server"
17+
wdkServer SiteSearchData $SERVER_PORT -cleanCacheAtStartup &
18+
19+
echo "waiting for server to be available"
20+
21+
while true
22+
do
23+
echo "checking port $SERVER_PORT..."
24+
if nc -zv localhost:$SERVER_PORT
25+
then
26+
echo 'server available'
27+
break
28+
fi
29+
sleep 1
30+
done
31+
echo "$(date -u) server available"
32+
33+
# make output dir and run commands to produce output
34+
35+
mkdir $DESTINATION_DIRECTORY &&\
36+
echo "$(date -u) starting ssCreateWdkRecordsBatch" &&\
37+
ssCreateWdkRecordsBatch community-datasets $PROJECT_ID http://localhost:$SERVER_PORT $DESTINATION_DIRECTORY &&\
38+
echo "produced files:"
39+
echo
40+
find $DESTINATION_DIRECTORY -type f -print0 | xargs -0 ls -al
41+
42+
# load produced output into solr
43+
echo "$(date -u) starting ssLoadMultipleBatches"
44+
ssLoadBatch $SOLR_URL index --batch-dir $DESTINATION_DIRECTORY --replace
45+
46+
# shut down running WDK server started above
47+
echo "$(date -u) Shutting down WDK"
48+
kill %1
49+
50+
echo "$(date -u) DONE presenter_update"

Model/bin/dumpEdaWdkBatchesForSolr

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ my $modelProps = SiteSearchData::Model::Utils::getPropsFromFile("$ENV{GUS_HOME}/
2424

2525
SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "dataset-presenter", $modelProps->{PROJECT_ID});
2626

27+
SiteSearchData::Model::Utils::runWdkReport($wdkServiceUrl, $targetDir, $BATCH_DIR_PREFIX, "community-datasets", $modelProps->{PROJECT_ID});
28+
2729
print STDOUT "Done.\n";
2830
exit 0;
2931

Model/bin/ssCreateWdkRecordsBatch

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,21 @@ def validateSearchParam(paramName, searchName, searchParamNames):
6868
elif paramName != searchParamNames[0]:
6969
utils.error(msgPrefix + " does not have a paramter with name " + paramName)
7070

71-
def getSearchUrlName(recordType, recordTypeName, paramName):
71+
# return the search url segment and list of dynamic attributes (if any)
72+
def getSearchInfo(recordType, recordTypeName, paramName):
7273
filteredQuestions = list(filter(lambda search: 'InternalQuestions' not in search['fullName'], recordType['searches']))
7374
if not len(filteredQuestions) == 1:
7475
utils.error("RecordType " + recordTypeName + " has more than a single search")
7576
search = filteredQuestions[0]
77+
dynAttrs = []
78+
for dynAttr in search['dynamicAttributes']:
79+
if dynAttr['name'] != 'weight':
80+
dynAttrs.append(dynAttr['name'])
7681
paramNames = search['paramNames']
7782
validateSearchParam(paramName, search['urlSegment'], paramNames)
78-
return search['urlSegment']
83+
return {'urlSegment': search['urlSegment'], 'dynAttrs': dynAttrs}
7984

80-
def composeUrlParams(paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId):
85+
def composeUrlParams(paramName, paramValue, recordType, dynAttrs, batchType, batchName, batchTimestamp, batchId):
8186
urlParams = {}
8287
if paramName is not None:
8388
urlParams[paramName] = paramValue
@@ -87,15 +92,15 @@ def composeUrlParams(paramName, paramValue, recordType, batchType, batchName, ba
8792
reportConfig['batch-name'] = batchName
8893
reportConfig['batch-timestamp'] = batchTimestamp
8994
reportConfig['batch-id'] = batchId
90-
attributeNames = list(map(lambda attribute: attribute['name'], recordType['attributes']))
95+
attributeNames = dynAttrs + list(map(lambda attribute: attribute['name'], recordType['attributes']))
9196
reportConfig['attributes'] = [attr for attr in attributeNames if attr not in recordType['primaryKeyColumnRefs']]
9297
reportConfig['tables'] = list(map(lambda table: table['name'], recordType['tables']))
9398
urlParams['reportConfig'] = json.dumps(reportConfig)
9499
return urlParams
95100

96-
def runReportToFile(wdkServiceUrl, workingDir, recordTypeName, searchName, paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId):
101+
def runReportToFile(wdkServiceUrl, workingDir, recordTypeName, searchName, dynAttrs, paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId):
97102
reportUrl = wdkServiceUrl + '/record-types/' + recordTypeName + '/searches/' + searchName + '/reports/' + REPORTNAME + '?'
98-
urlParams = composeUrlParams(paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId)
103+
urlParams = composeUrlParams(paramName, paramValue, recordType, dynAttrs, batchType, batchName, batchTimestamp, batchId)
99104
targetFilename = workingDir + "/" + recordTypeName + ".json"
100105
with requests.get(url=reportUrl, params=urlParams, stream=True) as response:
101106
if (response.status_code != 200):
@@ -125,8 +130,8 @@ for recordTypeName in utils.getRecordTypeNames(wdkServiceUrl):
125130
recordType = utils.getRecordType(wdkServiceUrl, recordTypeName)
126131
if (checkRecordTypeBatch(recordType, batchType)):
127132
print(str(datetime.datetime.now()) + " Processing record type: " + recordType['nativeDisplayName'], flush=True)
128-
searchUrlName = getSearchUrlName(recordType, recordTypeName, paramName)
129-
runReportToFile(wdkServiceUrl, outputDir, recordTypeName, searchUrlName, paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId)
133+
searchInfo = getSearchInfo(recordType, recordTypeName, paramName)
134+
runReportToFile(wdkServiceUrl, outputDir, recordTypeName, searchInfo['urlSegment'], searchInfo['dynAttrs'], paramName, paramValue, recordType, batchType, batchName, batchTimestamp, batchId)
130135

131136
# write meta file about this batch
132137
print(str(datetime.datetime.now()) + " writing batch.json", flush=True)

Model/bin/ssLoadBatch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ def solr_request(method, url, data=None, auth=None, headers=None):
410410
print('Solr response:')
411411
print(request.text, flush=True)
412412
except Exception:
413-
print('Solr error response:', file=sys.stderr)
413+
print('Solr error response for url ' + str(url) + ": ", file=sys.stderr)
414414
print(request.text, file=sys.stderr, flush=True)
415415
raise
416416

Model/config/SiteSearchData/model.prop.tmpl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44
PROJECT_ID=$PROJECT_ID
55
COMMENT_DBLINK=@prodn.login_comment
66
COMMENT_SCHEMA=userlogins5.
7+
VDI_CONTROL_SCHEMA=$VDI_CONTROL_SCHEMA
78

Model/data/ApiCommon/documentTypeCategories.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,12 @@
7979
"displayName": "Search",
8080
"displayNamePlural": "Searches",
8181
"hasOrganismField": false
82-
}
82+
},
83+
{ "id": "community-dataset",
84+
"displayName": "Community Dataset",
85+
"displayNamePlural": "Community Datasets",
86+
"hasOrganismField": true
87+
}
8388
]
8489
},
8590
{ "name": "Instructional",

Model/data/EDA/documentTypeCategories.json

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22
{ "name": "Study",
33
"documentTypes": [
44
{ "id": "dataset",
5-
"displayName": "Study",
6-
"displayNamePlural": "Studies",
5+
"displayName": "Curated Study",
6+
"displayNamePlural": "Curated Studies",
77
"hasOrganismField": false
8-
}
8+
},
9+
{ "id": "community-dataset",
10+
"displayName": "Community Study",
11+
"displayNamePlural": "Community Studies",
12+
"hasOrganismField": false
13+
}
914
]
1015
},
1116
{ "name": "Variable",

Model/lib/wdk/EDA/siteSearchModel.xml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
<wdkModel>
1010

11+
<constant name="projectIdPropList">projectId</constant> <!-- used by the community studies process query -->
12+
1113
<modelName displayName="SiteSearch"
1214
version="%%buildNumber%%"
1315
releaseDate="%%releaseDate%%"
@@ -20,6 +22,20 @@
2022
recordClassRef="datasetRecordClasses.dataset">
2123
</question>
2224

25+
<question name="CommunityDatasets" displayName="dontcare"
26+
queryRef="CommunityDatasetIdQueries.AllCommunityDatasets"
27+
recordClassRef="communityDatasetRecordClasses.communityDataset">
28+
<!-- use a property list to pass model.prop's PROJECT_ID through to the process query -->
29+
<!-- the string "projectId" is used by the process query to find this property list-->
30+
<propertyList name="%%projectIdPropList%%">
31+
<value>@PROJECT_ID@</value>
32+
</propertyList>
33+
<dynamicAttributes>
34+
<columnAttribute name="owner_name" displayName="Owner name"/>
35+
<columnAttribute name="owner_institution" displayName="Owner institution"/>
36+
</dynamicAttributes>
37+
</question>
38+
2339
<question name="Variables" displayName="dontcare"
2440
queryRef="VariableIdQueries.AllVariables"
2541
recordClassRef="variableRecordClasses.variable">
@@ -35,6 +51,9 @@
3551
<import file="Shared/datasetRecord.xml"/>
3652
<import file="Shared/datasetQueries.xml"/>
3753

54+
<import file="Shared/commDatasetRecord.xml"/>
55+
<import file="Shared/commDatasetQueries.xml"/>
56+
3857
<import file="EDA/variableRecord.xml"/>
3958
<import file="EDA/variableQueries.xml"/>
4059

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
<wdkModel>
2+
3+
<querySet name="CommunityDatasetIdQueries" queryType="id" isCacheable="true">
4+
5+
<processQuery name="AllCommunityDatasets"
6+
processName="org.eupathdb.sitesearch.wsfplugin.CommunityStudyIdsPlugin">
7+
<wsColumn name="dataset_id" width="50" wsName="dataset_id"/>
8+
<wsColumn name="owner_name" width="100" wsName="owner_name"/>
9+
<wsColumn name="owner_institution" width="200" wsName="owner_institution"/>
10+
</processQuery>
11+
</querySet>
12+
13+
<querySet name="CommunityDatasetAttributes" queryType="attribute" isCacheable="false">
14+
<sqlQuery name="All">
15+
<column name="hyperlinkName"/>
16+
<column name="dataset_id"/>
17+
<column name="display_name"/>
18+
<column name="project"/>
19+
<column name="description"/>
20+
<column name="summary"/>
21+
<sql>
22+
<![CDATA[
23+
select user_dataset_id as dataset_id, project_id as project,
24+
name, name as display_name, name as hyperlinkName, description, summary
25+
from @VDI_CONTROL_SCHEMA@.AvailableUserDatasets
26+
where project_id = '@PROJECT_ID@'
27+
and is_public = 1 and is_owner = 1
28+
]]>
29+
</sql>
30+
</sqlQuery>
31+
32+
</querySet>
33+
34+
<querySet name="CommunityDatasetTables" queryType="table" isCacheable="false">
35+
<sqlQuery name="publications">
36+
<column name="dataset_id"/>
37+
<column name="pubmed_id"/>
38+
<column name="citation"/>
39+
<sql>
40+
<![CDATA[
41+
select aud.user_dataset_id as dataset_id, pubmed_id, citation
42+
from @VDI_CONTROL_SCHEMA@.AvailableUserDatasets aud,
43+
@VDI_CONTROL_SCHEMA@.dataset_publication dp
44+
where aud.project_id = '@PROJECT_ID@'
45+
and aud.is_public = 1 and aud.is_owner = 1
46+
and aud.dataset_id = dp.dataset_id
47+
]]>
48+
</sql>
49+
</sqlQuery>
50+
51+
<sqlQuery name="hyperlinks">
52+
<column name="dataset_id"/>
53+
<column name="url"/>
54+
<column name="text"/>
55+
<column name="description"/>
56+
<sql>
57+
<![CDATA[
58+
select aud.user_dataset_id as dataset_id, url, text, description
59+
from @VDI_CONTROL_SCHEMA@.AvailableUserDatasets aud,
60+
@VDI_CONTROL_SCHEMA@.dataset_hyperlinks dh
61+
where aud.project_id = '@PROJECT_ID@'
62+
and aud.is_public = 1 and aud.is_owner = 1
63+
and aud.dataset_id = dh.dataset_id
64+
]]>
65+
</sql>
66+
</sqlQuery>
67+
68+
<sqlQuery name="organisms">
69+
<column name="dataset_id"/>
70+
<column name="organism_name"/>
71+
<sql>
72+
<![CDATA[
73+
select aud.user_dataset_id as dataset_id, organism_name
74+
from @VDI_CONTROL_SCHEMA@.AvailableUserDatasets aud,
75+
@VDI_CONTROL_SCHEMA@.dataset_organism do, apidbtuning.organismAttributes oa
76+
where aud.project_id = '@PROJECT_ID@'
77+
and aud.is_public = 1 and aud.is_owner = 1
78+
and aud.dataset_id = do.dataset_id
79+
and do.organism_abbrev = oa.internal_abbrev
80+
]]>
81+
</sql>
82+
</sqlQuery>
83+
84+
<sqlQuery name="contacts">
85+
<column name="dataset_id"/>
86+
<column name="contact_name"/>
87+
<column name="affiliation"/>
88+
<sql>
89+
<![CDATA[
90+
select aud.user_dataset_id as dataset_id,
91+
dc.name as contact_name, affiliation
92+
from @VDI_CONTROL_SCHEMA@.AvailableUserDatasets aud,
93+
@VDI_CONTROL_SCHEMA@.dataset_contact dc
94+
where aud.project_id = '@PROJECT_ID@'
95+
and aud.is_public = 1 and aud.is_owner = 1
96+
and aud.dataset_id = dh.dataset_id
97+
]]>
98+
</sql>
99+
</sqlQuery>
100+
101+
102+
</querySet>
103+
104+
</wdkModel>

0 commit comments

Comments
 (0)