Skip to content

Commit 664187c

Browse files
authored
Merge pull request #1501 from MITLibraries/etd-685
Add support for DSpace 8 metadata
2 parents e6bf8c9 + 3f47348 commit 664187c

5 files changed

Lines changed: 124 additions & 25 deletions

File tree

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ We use AWS SQS queues to publish theses to DSpace and read data about published
147147
`DSPACE_DOCTORAL_HANDLE` - The handle for the collection to use for depositing Doctoral theses.
148148
`DSPACE_GRADUATE_HANDLE` - The handle for the collection to use for depositing Graduate theses.
149149
`DSPACE_UNDERGRADUATE_HANDLE` - The handle for the collection to use for depositing Undergraduate theses.
150-
150+
`DSPACE_V8_METADATA` - Toggle metadata format for publication payloads. Set to `true` for DSpace 8 format.
151+
set to `false` for DSpace 6 `metadata` array format. Default is `false`.
151152
`SQS_INPUT_QUEUE_URL` - The URL of the SQS input queue used for publication to DSpace.
152153
`SQS_OUTPUT_QUEUE_NAME` - The name of the SQS output queue. This is used to build the SQS message attributes.
153154
`SQS_OUTPUT_QUEUE_URL` - The URL of the SQS output queue used to read the results from a publication run.

app.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"AWS_SECRET_ACCESS_KEY": {
1818
"required": true
1919
},
20+
"DSPACE_V8_METADATA": "false",
2021
"DISABLE_ALL_EMAIL": "true",
2122
"FAKE_AUTH_ENABLED": "true",
2223
"HEROKU_APP_NAME": {

app/models/dspace_metadata.rb

Lines changed: 77 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44

55
class DspaceMetadata
66
def initialize(thesis)
7-
@dc = {}.compare_by_identity
8-
@dc['dc.publisher'] = 'Massachusetts Institute of Technology'
9-
@dc['dc.type'] = 'Thesis'
7+
@metadata_entries = []
8+
add_metadata('dc.publisher', 'Massachusetts Institute of Technology')
9+
add_metadata('dc.type', 'Thesis')
1010
title(thesis)
1111
contributors(thesis.users, thesis.advisors)
1212
departments(thesis.departments)
@@ -17,22 +17,26 @@ def initialize(thesis)
1717

1818
# Generates JSON metadata file required for submission to DSS.
1919
def serialize_dss_metadata
20-
{ 'metadata' => @dc.map { |k, v| { 'key' => k, 'value' => v } } }.to_json
20+
if Flipflop.enabled?(:dspace_v8_metadata)
21+
serialize_dspace8.to_json
22+
else
23+
{ 'metadata' => serialize_dspace6 }.to_json
24+
end
2125
end
2226

2327
def title(thesis)
24-
@dc['dc.title'] = thesis.title
25-
@dc['dc.description.abstract'] = thesis.abstract if thesis.abstract
26-
@dc['dc.date.issued'] = thesis.grad_date.strftime('%Y-%m')
28+
add_metadata('dc.title', thesis.title)
29+
add_metadata('dc.description.abstract', thesis.abstract) if thesis.abstract
30+
add_metadata('dc.date.issued', thesis.grad_date.strftime('%Y-%m'))
2731
end
2832

2933
def contributors(thesis_users, thesis_advisors)
3034
thesis_users.each do |a|
31-
@dc['dc.contributor.author'] = a.preferred_name
35+
add_metadata('dc.contributor.author', a.preferred_name)
3236
end
3337
parse_orcids(thesis_users)
3438
thesis_advisors.each do |adv|
35-
@dc['dc.contributor.advisor'] = adv.name
39+
add_metadata('dc.contributor.advisor', adv.name)
3640
end
3741
end
3842

@@ -44,49 +48,98 @@ def parse_orcids(thesis_users)
4448
return unless orcids.present?
4549

4650
orcids.each do |orcid|
47-
@dc['dc.identifier.orcid'] = orcid
51+
add_metadata('dc.identifier.orcid', orcid)
4852
end
4953
end
5054

5155
def departments(thesis_depts)
5256
thesis_depts.each do |d|
53-
@dc['dc.contributor.department'] = d.name_dspace
57+
add_metadata('dc.contributor.department', d.name_dspace)
5458
end
5559
end
5660

5761
def degrees(thesis_degrees)
5862
thesis_degrees.each do |degree|
59-
@dc['dc.description.degree'] = degree.abbreviation
60-
@dc['thesis.degree.name'] = degree.name_dspace
63+
add_metadata('dc.description.degree', degree.abbreviation)
64+
add_metadata('thesis.degree.name', degree.name_dspace)
6165
end
6266

6367
# Degree types should not be repeated if they are the same type.
6468
types = thesis_degrees.map { |degree| degree.degree_type.name }.uniq
6569
types.each do |t|
66-
@dc['mit.thesis.degree'] = t
70+
add_metadata('mit.thesis.degree', t)
6771
end
6872
end
6973

7074
def copyright(thesis_copyright, thesis_license)
7175
if thesis_copyright.holder != 'Author' # copyright holder is anyone but author
72-
@dc['dc.rights'] = thesis_copyright.statement_dspace
73-
@dc['dc.rights'] = "Copyright #{thesis_copyright.holder}"
74-
@dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url
76+
add_metadata('dc.rights', thesis_copyright.statement_dspace)
77+
add_metadata('dc.rights', "Copyright #{thesis_copyright.holder}")
78+
add_metadata('dc.rights.uri', thesis_copyright.url) if thesis_copyright.url
7579
elsif thesis_license # author holds copyright and provides a license
76-
@dc['dc.rights'] = thesis_license.map_license_type
77-
@dc['dc.rights'] = 'Copyright retained by author(s)'
80+
add_metadata('dc.rights', thesis_license.map_license_type)
81+
add_metadata('dc.rights', 'Copyright retained by author(s)')
7882

7983
# Theoretically both license and copyright URLs are required for publication, but there are no constraints on
8084
# the models, and we want to future-proof this.
81-
@dc['dc.rights.uri'] = thesis_license.evaluate_license_url
85+
add_metadata('dc.rights.uri', thesis_license.evaluate_license_url)
8286
else # author holds copyright and no license provided
83-
@dc['dc.rights'] = thesis_copyright.statement_dspace
84-
@dc['dc.rights'] = 'Copyright retained by author(s)'
85-
@dc['dc.rights.uri'] = thesis_copyright.url if thesis_copyright.url
87+
add_metadata('dc.rights', thesis_copyright.statement_dspace)
88+
add_metadata('dc.rights', 'Copyright retained by author(s)')
89+
add_metadata('dc.rights.uri', thesis_copyright.url) if thesis_copyright.url
8690
end
8791
end
8892

8993
def date_transferred(files)
90-
@dc['dc.date.submitted'] = files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at
94+
add_metadata('dc.date.submitted', files.select { |file| file.purpose == 'thesis_pdf' }.first.blob.created_at)
95+
end
96+
97+
private
98+
99+
def add_metadata(key, value)
100+
return if value.nil?
101+
102+
@metadata_entries << { 'key' => key, 'value' => value }
103+
end
104+
105+
# DSpace 6 expects metadata to be sent as a flat array of key/value pairs under
106+
# a top-level "metadata" key (added by serialize_dss_metadata).
107+
#
108+
# Example returned by this method:
109+
# [
110+
# { 'key' => 'dc.title', 'value' => 'My Thesis' },
111+
# { 'key' => 'dc.contributor.author', 'value' => 'Student, Second' },
112+
# { 'key' => 'dc.contributor.author', 'value' => 'Student, Third' }
113+
# ]
114+
def serialize_dspace6
115+
@metadata_entries
116+
end
117+
118+
# DSpace 8 expects top-level metadata keys, where each key maps to an array of
119+
# value objects. We convert from our internal flat entries so both DSpace 6 and
120+
# DSpace 8 serializers can share the same source data.
121+
#
122+
# Example returned by this method:
123+
# {
124+
# 'dc.title' => [{ 'value' => 'My Thesis' }],
125+
# 'dc.contributor.author' => [
126+
# { 'value' => 'Student, Second' },
127+
# { 'value' => 'Student, Third' }
128+
# ]
129+
# }
130+
#
131+
# Note: language is intentionally omitted for now (out of scope).
132+
def serialize_dspace8
133+
result = {}
134+
135+
@metadata_entries.each do |entry|
136+
key = entry['key']
137+
value = entry['value']
138+
139+
result[key] ||= []
140+
result[key] << { 'value' => value }
141+
end
142+
143+
result
91144
end
92145
end

config/features.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,8 @@
66
feature :maintenance_mode,
77
default: ENV.fetch('MAINTENANCE_MODE', false),
88
description: "Put application in maintenance mode, disabling file transfer uploads."
9+
10+
feature :dspace_v8_metadata,
11+
default: ENV.fetch('DSPACE_V8_METADATA', false),
12+
description: "Use DSpace 8 metadata format instead of DSpace 6 metadata format."
913
end

test/models/dspace_metadata_test.rb

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
require 'test_helper'
22

33
class DspaceMetadataTest < ActiveSupport::TestCase
4+
setup do
5+
Flipflop::FeatureSet.current.test!.switch!(:dspace_v8_metadata, false)
6+
end
7+
48
# Attaching thesis file so tests will pass
59
def dss_friendly_thesis(thesis)
610
file = Rails.root.join('test', 'fixtures', 'files', 'a_pdf.pdf')
@@ -347,4 +351,40 @@ def dss_friendly_thesis(thesis)
347351
assert_equal unserialized['metadata'].first, { 'key' => 'dc.publisher',
348352
'value' => 'Massachusetts Institute of Technology' }
349353
end
354+
355+
test 'metadata serializes in DSpace 6 format when feature flag is disabled' do
356+
test_strategy = Flipflop::FeatureSet.current.test!
357+
test_strategy.switch!(:dspace_v8_metadata, false)
358+
359+
t = theses(:one)
360+
dss_friendly_thesis(t)
361+
serialized = DspaceMetadata.new(t).serialize_dss_metadata
362+
unserialized = JSON.parse(serialized)
363+
364+
assert_equal ['metadata'], unserialized.keys
365+
assert_kind_of Array, unserialized['metadata']
366+
assert unserialized['metadata'].include?({ 'key' => 'dc.title', 'value' => 'MyString' })
367+
end
368+
369+
test 'metadata serializes in DSpace 8 format when feature flag is enabled' do
370+
test_strategy = Flipflop::FeatureSet.current.test!
371+
test_strategy.switch!(:dspace_v8_metadata, true)
372+
373+
t = Thesis.create(title: 'Who cares', graduation_year: '2021', graduation_month: 'February',
374+
advisors: [advisors(:first), advisors(:second)],
375+
users: [users(:second), users(:third)],
376+
degrees: [degrees(:one), degrees(:two)],
377+
departments: [departments(:one), departments(:two)],
378+
copyright: copyrights(:mit))
379+
dss_friendly_thesis(t)
380+
serialized = DspaceMetadata.new(t).serialize_dss_metadata
381+
unserialized = JSON.parse(serialized)
382+
383+
refute unserialized.key?('metadata')
384+
assert_kind_of Array, unserialized['dc.contributor.author']
385+
assert_equal({ 'value' => 'Student, Second' }, unserialized['dc.contributor.author'].first)
386+
assert_includes unserialized['dc.contributor.author'], { 'value' => 'Student, Third' }
387+
assert_includes unserialized['dc.contributor.advisor'], { 'value' => 'Addy McAdvisor' }
388+
assert_includes unserialized['dc.contributor.advisor'], { 'value' => 'Viola McAdvisor' }
389+
end
350390
end

0 commit comments

Comments
 (0)