forked from USGCRP/gcis-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrefs-to-orgs.pl
More file actions
executable file
·104 lines (86 loc) · 2.69 KB
/
refs-to-orgs.pl
File metadata and controls
executable file
·104 lines (86 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env perl
# Convert organizations in bibliographic entries (which are
# of type 'Report') into orgs. Associate those orgs with
# the reference from which they came.
use v5.14;
use Gcis::Client;
use Data::Dumper;
use Smart::Comments;
use Encode;
sub usage {
print q[
Usage : ./refs-to-orgs [-n] <url> <report>
Example : ./refs-to-orgs -n http://data.gcis-dev-front.joss.ucar.edu nca3draft
];
exit;
}
usage() unless @ARGV;
my $dry_run;
$dry_run = shift @ARGV if $ARGV[0] eq '-n';
my $url = $ARGV[0] or usage();
my $report = $ARGV[1] || 'nca3';
my $c = Gcis::Client->connect(url => $url);
say "Dry run" if $dry_run;
sub split_orgs {
my $str = shift;
my @orgs;
@orgs = split/,/, $str;
do {
s/^ +//g;
s/ +$//g;
s/^//;
s/^and //;
s/; //;
s/\.$//;
s/^U\.?S\.? /U.S. /;
}
for @orgs;
return [ grep defined && length && $_ !~ /^(inc|llc)$/i, @orgs ];
}
my %stats = ( existing => 0, new => 0 );
for my $ref ($c->get("/report/$report/reference?all=1")) { ### Processing--->[%] done
next unless $ref->{attrs}{reftype} eq 'Report';
my $reference_identifier = $ref->{identifier};
# say "examining $reference_identifier";
my $organizations = split_orgs($ref->{attrs}{Institution} || $ref->{attrs}{institution} || $ref->{attrs}{publisher});
unless (@$organizations) {
#say "no orgs for /reference/$reference_identifier";
}
for my $organization_name (@$organizations) {
my $org = $c->post_quiet("/organization/lookup/name",
{name => $organization_name});
if ($org) {
$stats{existing}++;
# say "Found " . encode('UTF-8', $organization_name);
} else {
$stats{new}++;
# say "Creating " . encode('UTF-8', $organization_name);
unless ($dry_run) {
$org = $c->post("/organization", {name => $organization_name}) or do {
warn $c->error;
next;
};
}
}
# Now add this as a contributor to the publication, including the reference.
my $pub = $c->get("/publication/$ref->{child_publication_id}");
my $add_contributor_uri = $pub->{uri};
$add_contributor_uri =~ s[/report][/report/contributors] or do {
warn "could not match update_contributors in uri : $add_contributor_uri";
next;
};
# say "posting to $add_contributor_uri";
!$dry_run and do {
$c->post(
$add_contributor_uri => {
organization_identifier => $org->{identifier},
role => 'author',
reference_identifier => $reference_identifier
}
) or warn $c->error;
};
# say "posted to $add_contributor_uri";
}
}
say "stats : ".Dumper(\%stats);