Skip to content

Commit ac9c10a

Browse files
authored
Merge pull request #2 from marmoure/feature/schema-list
Schema list endpoint
2 parents d862d6f + 76b2e95 commit ac9c10a

8 files changed

Lines changed: 300 additions & 0 deletions

pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@
9090
<version>2.0</version>
9191
<scope>runtime</scope>
9292
</dependency>
93+
94+
<!-- Jackson Databind for JSON parsing -->
95+
<dependency>
96+
<groupId>com.fasterxml.jackson.core</groupId>
97+
<artifactId>jackson-databind</artifactId>
98+
<version>2.15.3</version>
99+
</dependency>
93100
</dependencies>
94101

95102
<build>
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.evolvedbinary.bblValidator.controller;
2+
3+
import com.evolvedbinary.bblValidator.dto.SchemaInfo;
4+
import com.evolvedbinary.bblValidator.service.SchemaService;
5+
import io.micronaut.http.MediaType;
6+
import io.micronaut.http.annotation.Controller;
7+
import io.micronaut.http.annotation.Get;
8+
import io.micronaut.http.annotation.Produces;
9+
10+
import java.util.List;
11+
12+
@Controller("/schema")
13+
public class SchemaController {
14+
15+
private final SchemaService schemaService;
16+
17+
public SchemaController(SchemaService schemaService) {
18+
this.schemaService = schemaService;
19+
}
20+
21+
@Get
22+
@Produces(MediaType.APPLICATION_JSON)
23+
public List<SchemaInfo> listSchemas() {
24+
return schemaService.listSchemas();
25+
}
26+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
package com.evolvedbinary.bblValidator.dto;
2+
3+
import io.micronaut.serde.annotation.Serdeable;
4+
5+
@Serdeable
6+
public class SchemaInfo {
7+
8+
private String id;
9+
private String name;
10+
private String version;
11+
private String date;
12+
private String url;
13+
private String description;
14+
15+
public SchemaInfo() {
16+
}
17+
18+
public SchemaInfo(String id, String name, String version, String date, String url, String description) {
19+
this.id = id;
20+
this.name = name;
21+
this.version = version;
22+
this.date = date;
23+
this.url = url;
24+
this.description = description;
25+
}
26+
27+
public String getId() {
28+
return id;
29+
}
30+
31+
public void setId(String id) {
32+
this.id = id;
33+
}
34+
35+
public String getName() {
36+
return name;
37+
}
38+
39+
public void setName(String name) {
40+
this.name = name;
41+
}
42+
43+
public String getVersion() {
44+
return version;
45+
}
46+
47+
public void setVersion(String version) {
48+
this.version = version;
49+
}
50+
51+
public String getDate() {
52+
return date;
53+
}
54+
55+
public void setDate(String date) {
56+
this.date = date;
57+
}
58+
59+
public String getUrl() {
60+
return url;
61+
}
62+
63+
public void setUrl(String url) {
64+
this.url = url;
65+
}
66+
67+
public String getDescription() {
68+
return description;
69+
}
70+
71+
public void setDescription(String description) {
72+
this.description = description;
73+
}
74+
}
75+
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package com.evolvedbinary.bblValidator.service;
2+
3+
import com.evolvedbinary.bblValidator.dto.SchemaInfo;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
5+
import jakarta.annotation.PostConstruct;
6+
import jakarta.inject.Singleton;
7+
import org.slf4j.Logger;
8+
import org.slf4j.LoggerFactory;
9+
10+
import java.io.IOException;
11+
import java.io.InputStream;
12+
import java.nio.charset.StandardCharsets;
13+
import java.nio.file.Files;
14+
import java.nio.file.Path;
15+
import java.nio.file.Paths;
16+
import java.util.ArrayList;
17+
import java.util.List;
18+
import java.util.stream.Stream;
19+
20+
@Singleton
21+
public class SchemaService {
22+
23+
private static final Logger LOG = LoggerFactory.getLogger(SchemaService.class);
24+
private static final String SCHEMA_DIRECTORY = "schemas";
25+
26+
private final List<SchemaInfo> schemas = new ArrayList<>();
27+
private final ObjectMapper objectMapper = new ObjectMapper();
28+
29+
@PostConstruct
30+
public void loadSchemas() {
31+
try {
32+
// Load schemas from classpath
33+
ClassLoader classLoader = getClass().getClassLoader();
34+
35+
// Get all .json files from the schemas directory
36+
try (InputStream is = classLoader.getResourceAsStream(SCHEMA_DIRECTORY)) {
37+
if (is == null) {
38+
LOG.warn("Schemas directory not found in classpath");
39+
return;
40+
}
41+
}
42+
43+
// Scan for schema metadata files
44+
loadSchemasFromClasspath();
45+
46+
LOG.info("Loaded {} schemas from disk", schemas.size());
47+
} catch (Exception e) {
48+
LOG.error("Error loading schemas from disk", e);
49+
}
50+
}
51+
52+
private void loadSchemasFromClasspath() {
53+
try {
54+
// Get resource URL and list files
55+
ClassLoader classLoader = getClass().getClassLoader();
56+
var resource = classLoader.getResource(SCHEMA_DIRECTORY);
57+
58+
if (resource != null) {
59+
Path schemaPath = Paths.get(resource.toURI());
60+
61+
try (Stream<Path> paths = Files.walk(schemaPath, 1)) {
62+
paths.filter(path -> path.toString().endsWith(".json"))
63+
.forEach(this::loadSchemaMetadata);
64+
}
65+
}
66+
} catch (Exception e) {
67+
LOG.error("Error scanning schema directory", e);
68+
}
69+
}
70+
71+
private void loadSchemaMetadata(Path metadataPath) {
72+
try {
73+
String content = Files.readString(metadataPath, StandardCharsets.UTF_8);
74+
SchemaInfo schemaInfo = objectMapper.readValue(content, SchemaInfo.class);
75+
76+
// Load corresponding schema file
77+
String schemaFileName = metadataPath.getFileName().toString().replace(".json", ".csvs");
78+
Path schemaFilePath = metadataPath.getParent().resolve(schemaFileName);
79+
80+
if (Files.exists(schemaFilePath)) {
81+
schemas.add(schemaInfo);
82+
LOG.debug("Loaded schema: {}", schemaInfo.getId());
83+
} else {
84+
LOG.warn("Schema file not found for metadata: {}", schemaFileName);
85+
}
86+
} catch (IOException e) {
87+
LOG.error("Error loading schema metadata from: {}", metadataPath, e);
88+
}
89+
}
90+
91+
public List<SchemaInfo> listSchemas() {
92+
return new ArrayList<>(schemas);
93+
}
94+
}
95+
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
version 1.0
2+
@totalColumns 42
3+
/*-------------------------------------------------------------------------------
4+
|Schema: ADM_363-technical-acquisition-with-minimal-transcription.csvs |
5+
|Authors: Nicki Welch |
6+
| David Underdown |
7+
|Purpose: To capture metadata about the digitisation of the ADM 363 series |
8+
| Primarily technical metadata, but with a minimal amount of |
9+
| transcription to verify that the records may be publicly released |
10+
| after receipt by The National Archives |
11+
|Revision: 1.0 first release |
12+
| 1.1 update as some official numbers only single digit |
13+
| 1.2 allow M as official number prefix too |
14+
| 1.3 further additions to prefixes, L, S, SS, SSX |
15+
| 1.4 allow for asterisk and ? in official number |
16+
| 1.5 further prefixes MX, KX, JX, and longer volume number |
17+
| 1.6 add explicit check that checksum is not that for a 0 byte file |
18+
| 1.7 Fix errors eg use correct not(), rather than isNot() |
19+
| 1.8 Allow brackets etc in comments, range checking for birth year |
20+
| ???? for birth year |
21+
| 1.9 Add piece check in ordinal: unique($piece,$item,$ordinal) |
22+
| Remove and in($resource_uri) from item: |
23+
| resource_uri, change starts(...) to |
24+
| regex("...") |
25+
| 2.0 Allow LX as a prefix too |
26+
|-------------------------------------------------------------------------------*/
27+
batch_code: length(10) regex("^ADM362B([0-9]{3})$")
28+
department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
29+
series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
30+
piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri))
31+
item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path))
32+
ordinal: if($item/empty,empty,unique($piece,$item,$ordinal))
33+
file_uuid: if($ordinal/empty,empty,uuid4 unique)
34+
file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$"))
35+
file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
36+
resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"))
37+
scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
38+
scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
39+
scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
40+
scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+"))
41+
scan_timestamp: if($ordinal/empty,empty,xDateTime)
42+
image_resolution: if($ordinal/empty,empty,is("300"))
43+
image_width: if($ordinal/empty,empty,positiveInteger)
44+
image_height: if($ordinal/empty,empty,positiveInteger)
45+
image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour"))
46+
image_format: if($ordinal/empty,empty,is("x-fmt/392"))
47+
image_colour_space: if($ordinal/empty,empty,is("sRGB"))
48+
process_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
49+
jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime)
50+
uuid_timestamp: if($ordinal/empty,empty,xDateTime)
51+
embed_timestamp: if($ordinal/empty,empty,xDateTime)
52+
image_split: if($ordinal/empty,empty,is("yes") or is("no"))
53+
image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is("")))
54+
image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
55+
image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is("")))
56+
image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none"))
57+
image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
58+
image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime))
59+
image_deskew: if($ordinal/empty,empty,is("yes") or is("no"))
60+
image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is("")))
61+
image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
62+
QA-code: regex("^[0-9/,]{1,2}$") @optional
63+
comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional
64+
transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
65+
transcribed_birth_date_day: if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is(""))
66+
transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
67+
transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is(""))
68+
transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^(([CDP]\/)?([FJKLMS]|LX|MX|JX|KX|SS|SSX)[/?0-9]{1,6}|[/?1-9][/?0-9]{5}|\*)$"),is(""))
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"id": "ADM_362-technical-acquisition-with-minimal-transcription",
3+
"name": "ADM_362-technical-acquisition-with-minimal-transcription",
4+
"version": "1.0.0",
5+
"date": "2015-11-01",
6+
"url": "https://github.com/digital-preservation/csv-schema/blob/master/example-schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs",
7+
"description": "Minor updates and bug fixes"
8+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
database /tmp/testdb
2+
table customer
3+
# indicate csv format with a delimiter of |
4+
csv |
5+
# Name Type Tag
6+
field CustID varchar(10) 1
7+
field Company varchar(80) 2
8+
field Address varchar(80) 3
9+
field City varchar(20) 4
10+
field State varchar(10) 5
11+
field Zip varchar(10) 6
12+
field Country varchar(10) 7
13+
field Phone varchar(20) 8
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"id": "thunder-stone-sample-csvs",
3+
"name": "thunder-stone-sample-csvs",
4+
"version": "1.0.0",
5+
"date": "2015-11-01",
6+
"url": "https://docs.thunderstone.com/site/texisman/example_schema_comma_separated.html",
7+
"description": "sample file for testing"
8+
}

0 commit comments

Comments
 (0)