Skip to content

Commit a9c7c87

Browse files
authored
Merge pull request #3090 from pulibrary/author_roles
Rewrite process_author_roles in Rust
2 parents 3b9a061 + 141d414 commit a9c7c87

14 files changed

Lines changed: 270 additions & 152 deletions

File tree

lib/bibdata_rs/benches/marc_bench.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,27 @@
1-
use bibdata_rs::marc::{call_number::call_number_labels_for_display, genre::genres};
1+
use bibdata_rs::{
2+
marc::{call_number::call_number_labels_for_display, genre::genres},
3+
solr::AuthorRoles,
4+
};
25
use criterion::{Criterion, criterion_group, criterion_main};
36
use marctk::Record;
47

8+
fn author_role_benchmark(c: &mut Criterion) {
9+
let record = Record::from_xml_file("../../spec/fixtures/99100026953506421.mrx")
10+
.unwrap()
11+
.next()
12+
.unwrap()
13+
.unwrap();
14+
let expected = AuthorRoles {
15+
editors: vec!["Nakanishi, Naoki".to_string()],
16+
..Default::default()
17+
};
18+
c.bench_function("author_roles", |b| {
19+
b.iter(|| {
20+
assert_eq!(AuthorRoles::from(&record), expected);
21+
})
22+
});
23+
}
24+
525
fn genre_facet_benchmark(c: &mut Criterion) {
626
let record = Record::from_xml_file("../../spec/fixtures/99100026953506421.mrx")
727
.unwrap()
@@ -31,5 +51,10 @@ fn call_number_benchmark(c: &mut Criterion) {
3151
});
3252
}
3353

34-
criterion_group!(benches, genre_facet_benchmark, call_number_benchmark);
54+
criterion_group!(
55+
benches,
56+
author_role_benchmark,
57+
call_number_benchmark,
58+
genre_facet_benchmark
59+
);
3560
criterion_main!(benches);

lib/bibdata_rs/src/ephemera/ephemera_folder.rs

Lines changed: 7 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use crate::{
22
ephemera::ephemera_folder::subject::log_subjects_without_exact_match,
3-
solr::{self, AccessFacet},
3+
solr::{self, AccessFacet, AuthorRoles},
44
};
5-
use serde::{Deserializer, Serialize};
5+
use serde::Deserializer;
66

77
use super::{
88
born_digital_collection::ephemera_folders_iterator,
@@ -63,14 +63,6 @@ impl Thumbnail {
6363
.replace("/full/!200,150/0/default.jpg", "/square/225,/0/default.jpg")
6464
}
6565
}
66-
#[derive(Debug, Serialize, PartialEq, Clone)]
67-
pub struct AuthorRoles {
68-
pub secondary_authors: Vec<String>,
69-
pub translators: Vec<String>,
70-
pub editors: Vec<String>,
71-
pub compilers: Vec<String>,
72-
pub primary_author: String,
73-
}
7466

7567
// VecSafe is a wrapper around Vec<T> that provides safe deserialization from JSON arrays
7668
// containing objects. It ignores any elements that fail to deserialize into T.
@@ -225,27 +217,14 @@ impl EphemeraFolder {
225217
None => vec![],
226218
}
227219
}
228-
pub fn group_contributors(&self) -> Option<String> {
229-
let primary_author = self
230-
.creator
231-
.as_ref()
232-
.and_then(|v| v.first())
233-
.cloned()
234-
.unwrap_or_default();
235-
let secondary_authors = self.contributor.clone().unwrap_or_default();
236-
if primary_author.is_empty() && secondary_authors.is_empty() {
237-
return None;
238-
}
239-
240-
let roles = AuthorRoles {
241-
secondary_authors,
220+
pub fn group_contributors(&self) -> AuthorRoles {
221+
AuthorRoles {
222+
primary_author: self.creator.as_ref().and_then(|v| v.first()).cloned(),
223+
secondary_authors: self.contributor.clone().unwrap_or_default(),
242224
translators: vec![],
243225
editors: vec![],
244226
compilers: vec![],
245-
primary_author,
246-
};
247-
248-
serde_json::to_string(&roles).ok()
227+
}
249228
}
250229

251230
pub fn all_contributors(&self) -> Vec<String> {

lib/bibdata_rs/src/marc.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use marctk::Record;
44

55
pub mod call_number;
66
pub mod cjk;
7+
pub mod contributors;
78
pub mod control_field;
89
pub mod fixed_field;
910
pub mod genre;
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
use marctk::{Field, Record};
2+
3+
use crate::{marc::trim_punctuation, solr::AuthorRoles};
4+
5+
impl From<&Record> for AuthorRoles {
6+
fn from(record: &Record) -> Self {
7+
let primary_author = record
8+
.extract_values("100a")
9+
.first()
10+
.map(|name| trim_punctuation(name));
11+
let mut secondary_authors: Vec<String> = Default::default();
12+
let mut compilers: Vec<String> = Default::default();
13+
let mut editors: Vec<String> = Default::default();
14+
let mut translators: Vec<String> = Default::default();
15+
let other_author_fields = record.fields().iter().filter(|field| {
16+
["110", "111", "700", "710", "711"].contains(&field.tag()) && field.has_subfield("a")
17+
});
18+
for field in other_author_fields {
19+
match (ContributorType::from(field), field.first_subfield("a")) {
20+
(ContributorType::Compiler, Some(contributor_subfield)) => {
21+
compilers.push(trim_punctuation(contributor_subfield.content()));
22+
}
23+
(ContributorType::Editor, Some(contributor_subfield)) => {
24+
editors.push(trim_punctuation(contributor_subfield.content()));
25+
}
26+
(ContributorType::Translator, Some(contributor_subfield)) => {
27+
translators.push(trim_punctuation(contributor_subfield.content()));
28+
}
29+
(_, Some(contributor_subfield)) => {
30+
secondary_authors.push(trim_punctuation(contributor_subfield.content()));
31+
}
32+
_ => {}
33+
}
34+
}
35+
Self {
36+
primary_author,
37+
secondary_authors,
38+
compilers,
39+
editors,
40+
translators,
41+
}
42+
}
43+
}
44+
45+
enum ContributorType {
46+
Compiler,
47+
Editor,
48+
Translator,
49+
Other,
50+
}
51+
52+
impl From<&Field> for ContributorType {
53+
fn from(field: &Field) -> Self {
54+
let relator = find_potential_relator(field, "4").or(find_potential_relator(field, "e"));
55+
match relator.as_deref() {
56+
Some("COM") => Self::Compiler,
57+
Some("COMPILER") => Self::Compiler,
58+
Some("EDT") => Self::Editor,
59+
Some("EDITOR") => Self::Editor,
60+
Some("TRL") => Self::Translator,
61+
Some("TRANSLATOR") => Self::Translator,
62+
_ => Self::Other,
63+
}
64+
}
65+
}
66+
67+
fn find_potential_relator(field: &Field, subfield: &str) -> Option<String> {
68+
field
69+
.first_subfield(subfield)
70+
.map(|subfield| clean_potential_relator(subfield.content()))
71+
}
72+
73+
fn clean_potential_relator(raw: &str) -> String {
74+
raw.chars()
75+
.filter_map(|c| {
76+
if c.is_ascii_alphabetic() {
77+
Some(c.to_ascii_uppercase())
78+
} else {
79+
None
80+
}
81+
})
82+
.collect()
83+
}
84+
85+
#[cfg(test)]
86+
mod tests {
87+
use super::*;
88+
89+
#[test]
90+
fn it_can_find_primary_author_from_marc_record() {
91+
let record = Record::from_breaker("=100 \\$aPrimary").unwrap();
92+
assert_eq!(
93+
AuthorRoles::from(&record),
94+
AuthorRoles {
95+
primary_author: Some("Primary".to_owned()),
96+
..Default::default()
97+
}
98+
)
99+
}
100+
101+
#[test]
102+
fn it_trims_spaces_and_punctuation_from_primary_author() {
103+
let record = Record::from_breaker("=100 \\$a Primary, $e author").unwrap();
104+
assert_eq!(
105+
AuthorRoles::from(&record),
106+
AuthorRoles {
107+
primary_author: Some("Primary".to_owned()),
108+
..Default::default()
109+
}
110+
)
111+
}
112+
113+
#[test]
114+
fn it_can_find_all_types_of_author() {
115+
let record = Record::from_breaker(
116+
r#"=100 \\$aLahiri, Jhumpa
117+
=700 \\$aEugenides, Jeffrey$4edt
118+
=700 \\$aCole, Teju$4com
119+
=700 \\$aNikolakopoulou, Evangelia$4trl
120+
=700 \\$aMorrison, Toni$4aaa
121+
=700 \\$aOates, Joyce Carol
122+
=700 \\$aMarchesi, Simone$etranslator.
123+
=700 \\$aFitzgerald, F. Scott$eed."#,
124+
)
125+
.unwrap();
126+
assert_eq!(
127+
AuthorRoles::from(&record),
128+
AuthorRoles {
129+
primary_author: Some("Lahiri, Jhumpa".to_owned()),
130+
secondary_authors: vec![
131+
"Morrison, Toni".to_owned(),
132+
"Oates, Joyce Carol".to_owned(),
133+
"Fitzgerald, F. Scott".to_owned()
134+
],
135+
translators: vec![
136+
"Nikolakopoulou, Evangelia".to_owned(),
137+
"Marchesi, Simone".to_owned()
138+
],
139+
editors: vec!["Eugenides, Jeffrey".to_owned()],
140+
compilers: vec!["Cole, Teju".to_owned()]
141+
}
142+
)
143+
}
144+
}

lib/bibdata_rs/src/marc/ruby_bindings.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use crate::solr::AuthorRoles;
2+
13
use super::*;
24
use magnus::{Module, Object, RModule, function};
35

@@ -9,6 +11,8 @@ pub fn register_ruby_methods(parent_module: &RModule) -> Result<(), magnus::Erro
911
submodule_marc.define_singleton_method("access_notes", function!(access_notes, 1))?;
1012
submodule_marc
1113
.define_singleton_method("alma_code_start_22?", function!(alma_code_start_22, 1))?;
14+
submodule_marc
15+
.define_singleton_method("author_roles", function!(author_roles_from_marc_breaker, 1))?;
1216
submodule_marc.define_singleton_method("build_call_number", function!(build_call_number, 1))?;
1317
submodule_marc.define_singleton_method(
1418
"call_number_labels_for_browse",
@@ -82,6 +86,19 @@ fn icpsr_subjects_from_marc_breaker(
8286
Ok(subject::icpsr_subjects(&record))
8387
}
8488

89+
fn author_roles_from_marc_breaker(
90+
ruby: &Ruby,
91+
record_string: String,
92+
) -> Result<String, magnus::Error> {
93+
let record = get_record(ruby, &record_string)?;
94+
serde_json::to_string(&AuthorRoles::from(&record)).map_err(|err| {
95+
magnus::Error::new(
96+
ruby.exception_runtime_error(),
97+
format!("Found error {} while serializing author roles", err),
98+
)
99+
})
100+
}
101+
85102
#[cfg(test)]
86103
mod tests {
87104
use super::*;

lib/bibdata_rs/src/solr.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ pub mod index;
22
pub mod solr_document;
33

44
mod access_facet;
5+
mod author_roles;
56
mod builder;
67
mod dataspace_solr_mapping;
78
mod electronic_access;
@@ -10,6 +11,7 @@ mod format_facet;
1011
mod library_facet;
1112

1213
pub use access_facet::AccessFacet;
14+
pub use author_roles::AuthorRoles;
1315
pub use builder::SolrDocumentBuilder;
1416
pub use electronic_access::DigitalContent;
1517
pub use electronic_access::ElectronicAccess;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// This module is responsible for creating a JSON formatted list of authors
2+
// and their roles.
3+
4+
use serde::{Deserialize, Serialize};
5+
6+
#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize)]
7+
pub struct AuthorRoles {
8+
#[serde(skip_serializing_if = "Option::is_none")]
9+
pub primary_author: Option<String>,
10+
pub secondary_authors: Vec<String>,
11+
pub translators: Vec<String>,
12+
pub editors: Vec<String>,
13+
pub compilers: Vec<String>,
14+
}
15+
16+
#[cfg(test)]
17+
mod tests {
18+
use super::*;
19+
20+
#[test]
21+
fn it_serializes_correctly() {
22+
assert_eq!(
23+
serde_json::to_string(&AuthorRoles::default()).unwrap(),
24+
r#"{"secondary_authors":[],"translators":[],"editors":[],"compilers":[]}"#
25+
);
26+
assert_eq!(
27+
serde_json::to_string(&AuthorRoles {
28+
primary_author: Some("Ginger".to_owned()),
29+
..Default::default()
30+
})
31+
.unwrap(),
32+
r#"{"primary_author":"Ginger","secondary_authors":[],"translators":[],"editors":[],"compilers":[]}"#
33+
);
34+
assert_eq!(
35+
serde_json::to_string(&AuthorRoles {
36+
primary_author: Some("Ginger".to_owned()),
37+
compilers: vec!["Galangal".to_owned()],
38+
..Default::default()
39+
})
40+
.unwrap(),
41+
r#"{"primary_author":"Ginger","secondary_authors":[],"translators":[],"editors":[],"compilers":["Galangal"]}"#
42+
);
43+
assert_eq!(
44+
serde_json::to_string(&AuthorRoles {
45+
secondary_authors: vec!["Cardamom".to_owned(), "Turmeric".to_owned()],
46+
..Default::default()
47+
})
48+
.unwrap(),
49+
r#"{"secondary_authors":["Cardamom","Turmeric"],"translators":[],"editors":[],"compilers":[]}"#
50+
);
51+
}
52+
}

lib/bibdata_rs/src/solr/builder.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
// This module provides a convenient way to create a SolrDocument using the builder pattern
22

3-
use super::{AccessFacet, ElectronicAccess, FormatFacet, LibraryFacet, SolrDocument};
3+
use super::{AccessFacet, AuthorRoles, ElectronicAccess, FormatFacet, LibraryFacet, SolrDocument};
44

55
#[derive(Debug, Default)]
66
pub struct SolrDocumentBuilder {
77
author_s: Option<Vec<String>>,
88
author_sort: Option<String>,
99
author_display: Option<Vec<String>>,
10-
author_roles_1display: Option<String>,
10+
author_roles_1display: Option<AuthorRoles>,
1111
author_citation_display: Option<Vec<String>>,
1212
advisor_display: Option<Vec<String>>,
1313
format: Option<Vec<FormatFacet>>,
@@ -60,11 +60,8 @@ impl SolrDocumentBuilder {
6060
self.id = id.into();
6161
self
6262
}
63-
pub fn with_author_roles_1display(
64-
&mut self,
65-
author_roles_1display: Option<String>,
66-
) -> &mut Self {
67-
self.author_roles_1display = author_roles_1display;
63+
pub fn with_author_roles_1display(&mut self, author_roles_1display: AuthorRoles) -> &mut Self {
64+
self.author_roles_1display = Some(author_roles_1display);
6865
self
6966
}
7067
pub fn with_author_citation_display(

0 commit comments

Comments
 (0)