Skip to content

Commit 61ee678

Browse files
committed
Simplify mGAP ETL
1 parent 5c19329 commit 61ee678

1 file changed

Lines changed: 26 additions & 114 deletions

File tree

mGAP/resources/etls/clinicalData.xml

Lines changed: 26 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,6 @@
33
<name>ClinicalData</name>
44
<description>EHR Clinical Data</description>
55
<transforms>
6-
<transform id="dataset1" type="RemoteQueryTransformStep">
7-
<source schemaName="lists" queryName="chemistryResults" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
8-
<sourceColumns>
9-
<column>Id</column>
10-
<column>date</column>
11-
<column>ageAtTime</column>
12-
<column>testId</column>
13-
<column>result</column>
14-
<column>units</column>
15-
<column>runid</column>
16-
<column>objectid</column>
17-
</sourceColumns>
18-
</source>
19-
<destination schemaName="study" queryName="chemistryResults" targetOption="merge" bulkLoad="true" batchSize="2500">
20-
21-
</destination>
22-
</transform>
236
<transform id="dataset2" type="RemoteQueryTransformStep">
247
<source schemaName="lists" queryName="demographics" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
258
<sourceColumns>
@@ -33,111 +16,40 @@
3316
<column>avgKinship</column>
3417
<column>objectid</column>
3518
</sourceColumns>
19+
<sourceFilters>
20+
<sourceFilter column="QCState/Label" operator="eq" value="Completed"/>
21+
</sourceFilters>
3622
</source>
3723
<destination schemaName="study" queryName="demographics" targetOption="merge" bulkLoad="true" batchSize="2500">
3824

3925
</destination>
4026
</transform>
41-
<transform id="dataset3" type="RemoteQueryTransformStep">
42-
<source schemaName="lists" queryName="microbiology" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
43-
<sourceColumns>
44-
<column>Id</column>
45-
<column>date</column>
46-
<column>ageAtTime</column>
47-
<column>tissue</column>
48-
<column>organism</column>
49-
<column>quantity</column>
50-
<column>runid</column>
51-
<column>objectid</column>
52-
</sourceColumns>
53-
</source>
54-
<destination schemaName="study" queryName="microbiology" targetOption="merge" bulkLoad="true" batchSize="2500">
27+
<transform id="rc_2" type="TaskrefTransformStep">
28+
<taskref ref="org.labkey.primeseq.etl.VerifyRowCount">
29+
<settings>
30+
<setting name="sourceRemoteSource" value="EHR_ClinicalSource"/>
31+
<setting name="sourceSchema" value="lists"/>
32+
<setting name="sourceQuery" value="demographics"/>
33+
<setting name="sourceColumn" value="objectId"/>
34+
<setting name="sourceAdditionalFilters" value="qcstate/label~eq=Completed"/>
5535

56-
</destination>
57-
</transform>
58-
<transform id="dataset4" type="RemoteQueryTransformStep">
59-
<source schemaName="lists" queryName="weight" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
60-
<sourceColumns>
61-
<column>Id</column>
62-
<column>date</column>
63-
<column>ageAtTime</column>
64-
<column>weight</column>
65-
<column>objectid</column>
66-
</sourceColumns>
67-
</source>
68-
<destination schemaName="study" queryName="weight" targetOption="merge" bulkLoad="true" batchSize="2500">
69-
70-
</destination>
71-
</transform>
72-
<transform id="dataset5" type="RemoteQueryTransformStep">
73-
<source schemaName="lists" queryName="clinpathRuns" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
74-
<sourceColumns>
75-
<column>Id</column>
76-
<column>date</column>
77-
<column>ageAtTime</column>
78-
<column>type</column>
79-
<column>tissue</column>
80-
<column>units</column>
81-
<column>servicerequested</column>
82-
<column>collectionMethod</column>
83-
<column>method</column>
84-
<column>objectid</column>
85-
</sourceColumns>
86-
</source>
87-
<destination schemaName="study" queryName="clinpathRuns" bulkLoad="true" targetOption="merge">
88-
89-
</destination>
90-
</transform>
91-
<transform id="dataset6" type="RemoteQueryTransformStep">
92-
<source schemaName="lists" queryName="hematologyResults" remoteSource="EHR_ClinicalSource" timestampColumnName="modified">
93-
<sourceColumns>
94-
<column>Id</column>
95-
<column>date</column>
96-
<column>ageAtTime</column>
97-
<column>testid</column>
98-
<column>result</column>
99-
<column>units</column>
100-
<column>runid</column>
101-
<column>objectid</column>
102-
</sourceColumns>
103-
</source>
104-
<destination schemaName="study" queryName="hematologyResults" targetOption="merge" bulkLoad="true" batchSize="2500">
105-
106-
</destination>
107-
</transform>
108-
<transform id="dataset7" type="RemoteQueryTransformStep">
109-
<source schemaName="lists" queryName="pathologyDiagnoses" remoteSource="EHR_ClinicalSource" timestampColumnName="modified" sourceTimeout="0">
110-
<sourceColumns>
111-
<column>Id</column>
112-
<column>date</column>
113-
<column>ageAtTime</column>
114-
<column>sort_order</column>
115-
<column>codes</column>
116-
<column>objectid</column>
117-
</sourceColumns>
118-
</source>
119-
<destination schemaName="study" queryName="pathologyDiagnoses" targetOption="merge" bulkLoad="true" batchSize="2500">
120-
121-
</destination>
122-
</transform>
123-
<transform id="dataset8" type="RemoteQueryTransformStep">
124-
<source schemaName="lists" queryName="histology" remoteSource="EHR_ClinicalSource" timestampColumnName="modified" sourceTimeout="0">
125-
<sourceColumns>
126-
<column>Id</column>
127-
<column>date</column>
128-
<column>ageAtTime</column>
129-
<column>sort_order</column>
130-
<column>tissue</column>
131-
<column>codes</column>
132-
<column>objectid</column>
133-
</sourceColumns>
134-
</source>
135-
<destination schemaName="study" queryName="histology" targetOption="merge" bulkLoad="true" batchSize="2500">
136-
137-
</destination>
36+
<setting name="destSchema" value="study"/>
37+
<setting name="destQuery" value="demographics"/>
38+
<setting name="destColumn" value="objectId"/>
39+
</settings>
40+
</taskref>
13841
</transform>
13942
</transforms>
140-
<incrementalFilter className="ModifiedSinceFilterStrategy" timestampColumnName="modified" pkColumnName="objectid"/>
43+
<incrementalFilter className="ModifiedSinceFilterStrategy" timestampColumnName="modified" pkColumnName="objectid">
44+
<deletedRowsSource remoteSource="EHR_ClinicalSource" schemaName="AuditSummary" queryName="DatasetUpdateAuditLog" timestampColumnName="Created" deletedSourceKeyColumnName="primaryKey" targetKeyColumnName="objectid">
45+
<sourceFilters>
46+
<sourceFilter column="Comment" operator="contains" value="Delete"/>
47+
</sourceFilters>
48+
<sourceColumns>
49+
<column>primaryKey</column>
50+
</sourceColumns>
51+
</deletedRowsSource>
52+
</incrementalFilter>
14153
<schedule>
14254
<cron expression="0 30 1 * * ?"/>
14355
</schedule>

0 commit comments

Comments
 (0)