11-- Script to generate csv files for import into neo4j
22-- This is the affymetrix case study tracing the affy seedset of <= 1991 to the DMET Plus
3- -- Panel of 2017
3+ -- Panel of <= 2017 based on keyword searches in PubMed
44-- Author: George Chacko 3/17/2018
55
66-- End point is the garfield_hgraph series, which contains 23 wos_ids from Garfield's microarray historiograph
77-- Starting point is all papers identified in a keyword search in PubMed for DMET PLus
8- -- Publications are connected/related by citation. The target is cited by the source.
8+ -- Publications are connected/related by citation. The target is cited by the source. Two generations each of cited and citing references from
9+ -- start and endpoints respectively are included in this network
910
1011-- Citation endpoint is 23 pubs in the garfield_historiograph
1112DROP TABLE IF EXISTS garfield_hgraph_end;
@@ -15,7 +16,9 @@ FROM wos_publications WHERE source_id IN
1516(select distinct wos_id from garfield_hgraph2) AND
1617publication_year <= 1992 ;
1718
18- -- get first gen of citing references
19+ -- get first gen of citing references (note target:cited_source_uid polarity to preserve a cites b)
20+ -- the 9 endrefs are cited by gen1 pubs
21+
1922DROP TABLE IF EXISTS garfield_gen1;
2023CREATE TABLE garfield_gen1 AS
2124SELECT source_id AS source, cited_source_uid AS target,
@@ -34,7 +37,7 @@ FROM wos_references WHERE cited_source_uid IN
3437CREATE INDEX garfield_gen2_idx ON garfield_gen2(source);
3538
3639-- Citation starting point is publications DMET Plus keyword search
37- -- get one generation of cited references
40+ -- get two generations of cited references not reversed polarity since this is cited reference not citing
3841DROP TABLE IF EXISTS garfield_dmet_begina;
3942CREATE TABLE garfield_dmet_begina AS
4043SELECT source_id AS source, cited_source_uid AS target,
@@ -98,7 +101,7 @@ SELECT DISTINCT 'n'||substring(target,5),target,ttype
98101FROM garfield_dmet_begin;
99102
100103-- gen1_cited
101- INSERT INTO garfield_node_assembly(node_id,node_name,ttype )
104+ INSERT INTO garfield_node_assembly(node_id,node_name,stype )
102105SELECT DISTINCT ' n' || substring (source,5 ),source,stype
103106FROM garfield_dmet_twog;
104107
@@ -141,8 +144,12 @@ CREATE INDEX garfield_edgelist_idx ON garfield_edgelist(source,target);
141144
142145-- create formatted nodelist with unique node_ids
143146DROP TABLE IF EXISTS garfield_nodelist_formatted_a;
144- CREATE TABLE garfield_nodelist_formatted_a (node_id varchar (16 ), node_name varchar (19 ), stype varchar (10 ), ttype varchar (10 ), startref varchar (10 ), endref varchar (10 ));
145- INSERT INTO garfield_nodelist_formatted_a (node_id,node_name,stype,ttype) SELECT DISTINCT * FROM garfield_nodelist;
147+ CREATE TABLE garfield_nodelist_formatted_a (node_id varchar (16 ), node_name varchar (19 ),
148+ stype varchar (10 ), ttype varchar (10 ),
149+ startref varchar (10 ), endref varchar (10 ));
150+ INSERT INTO garfield_nodelist_formatted_a (node_id,node_name,stype,ttype)
151+ SELECT DISTINCT * FROM garfield_nodelist;
152+
146153UPDATE garfield_nodelist_formatted_a SET startref= 1 WHERE stype= ' startref' ;
147154UPDATE garfield_nodelist_formatted_a SET startref= 0 WHERE stype= ' source' OR stype IS NULL ;
148155UPDATE garfield_nodelist_formatted_a SET endref= 1 WHERE ttype= ' endref' ;
0 commit comments