Skip to content
This repository was archived by the owner on Jun 18, 2022. It is now read-only.

Commit ba87c9e

Browse files
author
Dmitriy "DK" Korobskiy
committed
Merge remote-tracking branch 'origin/master'
2 parents 2445a41 + dea555f commit ba87c9e

1 file changed

Lines changed: 14 additions & 7 deletions

File tree

Analysis/affymetrix/garfield_import_dmet_twog.sql

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
-- Script to generate csv files for import into neo4j
22
-- This is the affymetrix case study tracing the affy seedset of <= 1991 to the DMET Plus
3-
-- Panel of 2017
3+
-- Panel of <= 2017 based on keyword searches in PubMed
44
-- Author: George Chacko 3/17/2018
55

66
-- End point is the garfield_hgraph series, which contains 23 wos_ids from Garfield's microarray historiograph
77
-- Starting point is all papers identified in a keyword search in PubMed for DMET PLus
8-
-- Publications are connected/related by citation. The target is cited by the source.
8+
-- Publications are connected/related by citation. The target is cited by the source. Two generations each of cited and citing references from
9+
-- start and endpoints respectively are included in this network
910

1011
-- Citation endpoint is 23 pubs in the garfield_historiograph
1112
DROP TABLE IF EXISTS garfield_hgraph_end;
@@ -15,7 +16,9 @@ FROM wos_publications WHERE source_id IN
1516
(select distinct wos_id from garfield_hgraph2) AND
1617
publication_year <= 1992;
1718

18-
-- get first gen of citing references
19+
-- get first gen of citing references (note target:cited_source_uid polarity to preserve a cites b)
20+
-- the 9 endrefs are cited by gen1 pubs
21+
1922
DROP TABLE IF EXISTS garfield_gen1;
2023
CREATE TABLE garfield_gen1 AS
2124
SELECT source_id AS source, cited_source_uid AS target,
@@ -34,7 +37,7 @@ FROM wos_references WHERE cited_source_uid IN
3437
CREATE INDEX garfield_gen2_idx ON garfield_gen2(source);
3538

3639
--Citation starting point is publications DMET Plus keyword search
37-
-- get one generation of cited references
40+
-- get two generations of cited references not reversed polarity since this is cited reference not citing
3841
DROP TABLE IF EXISTS garfield_dmet_begina;
3942
CREATE TABLE garfield_dmet_begina AS
4043
SELECT source_id AS source, cited_source_uid AS target,
@@ -98,7 +101,7 @@ SELECT DISTINCT 'n'||substring(target,5),target,ttype
98101
FROM garfield_dmet_begin;
99102

100103
-- gen1_cited
101-
INSERT INTO garfield_node_assembly(node_id,node_name,ttype)
104+
INSERT INTO garfield_node_assembly(node_id,node_name,stype)
102105
SELECT DISTINCT 'n'||substring(source,5),source,stype
103106
FROM garfield_dmet_twog;
104107

@@ -141,8 +144,12 @@ CREATE INDEX garfield_edgelist_idx ON garfield_edgelist(source,target);
141144

142145
-- create formatted nodelist with unique node_ids
143146
DROP TABLE IF EXISTS garfield_nodelist_formatted_a;
144-
CREATE TABLE garfield_nodelist_formatted_a (node_id varchar(16), node_name varchar(19), stype varchar(10), ttype varchar(10), startref varchar(10), endref varchar(10));
145-
INSERT INTO garfield_nodelist_formatted_a (node_id,node_name,stype,ttype) SELECT DISTINCT * FROM garfield_nodelist;
147+
CREATE TABLE garfield_nodelist_formatted_a (node_id varchar(16), node_name varchar(19),
148+
stype varchar(10), ttype varchar(10),
149+
startref varchar(10), endref varchar(10));
150+
INSERT INTO garfield_nodelist_formatted_a (node_id,node_name,stype,ttype)
151+
SELECT DISTINCT * FROM garfield_nodelist;
152+
146153
UPDATE garfield_nodelist_formatted_a SET startref=1 WHERE stype='startref';
147154
UPDATE garfield_nodelist_formatted_a SET startref=0 WHERE stype='source' OR stype IS NULL;
148155
UPDATE garfield_nodelist_formatted_a SET endref=1 WHERE ttype='endref';

0 commit comments

Comments
 (0)