Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cds_migrator_kit/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class MultipleModelsMatched(CDSMigrationException):

description = "[Record matched multiple models]"


class UnexpectedValue(CDSMigrationException):
"""The corresponding value is unexpected."""

Expand Down
11 changes: 9 additions & 2 deletions cds_migrator_kit/rdm/records/load/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
dry_run=False,
legacy_pids_to_redirect=None,
collection=None,
update_new_version_publication_date=True,
migration_logger=None,
record_state_logger=None,
):
Expand All @@ -69,6 +70,7 @@ def __init__(
self.legacy_pids_to_redirect = {}
self.clc_sync = False
self.collection = collection
self.update_new_version_publication_date = update_new_version_publication_date
self.migration_logger = migration_logger
self.record_state_logger = record_state_logger
if legacy_pids_to_redirect is not None:
Expand Down Expand Up @@ -455,7 +457,6 @@ def _pre_publish(self, identity, entry, version, draft, uow):
"""Create and process draft before publish."""
versions = entry["versions"]
files = versions[version]["files"]
publication_date = versions[version]["publication_date"]
access = versions[version]["access"]

if version == 1 or (version > 1 and draft is None):
Expand Down Expand Up @@ -487,6 +488,12 @@ def _pre_publish(self, identity, entry, version, draft, uow):
identity, draft["id"], uow=uow
)
draft_dict = draft.to_dict()
if not self.update_new_version_publication_date:
publication_date = arrow.get(
entry["record"]["json"]["metadata"]["publication_date"]
)
else:
publication_date = versions[version]["publication_date"]
missing_data = {
**draft_dict,
"metadata": {
Expand Down Expand Up @@ -527,7 +534,7 @@ def _load_versions(self, entry, uow):
)
# Run after publish fixes
self._after_publish(identity, published_record, entry, version, uow)
records.append(published_record._record)
records.append(published_record._record)

if records:
record_state_context = self._load_record_state(legacy_recid, records)
Expand Down
5 changes: 3 additions & 2 deletions cds_migrator_kit/rdm/records/transform/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
MissingRequiredField,
RecordFlaggedCuration,
RestrictedFileDetected,
UnexpectedValue, MultipleModelsMatched,
UnexpectedValue,
MultipleModelsMatched,
)
from cds_migrator_kit.rdm.migration_config import (
RDM_RECORDS_IDENTIFIERS_SCHEMES,
Expand Down Expand Up @@ -847,7 +848,7 @@ def _transform(self, entry):
UnexpectedValue,
ManualImportRequired,
MissingRequiredField,
MultipleModelsMatched
MultipleModelsMatched,
) as e:
migration_logger.add_log(e, record=entry)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -816,10 +816,10 @@ def related_identifiers_787(self, key, value):
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-conferencepaper"},
},
"article":{
"article": {
"relation_type": {"id": "references"},
"resource_type": {"id": "publication-article"},
}
},
}

if recid:
Expand Down
4 changes: 4 additions & 0 deletions cds_migrator_kit/runner/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def __init__(
stream_config = config.get(definition.name) or {}
self.data_dir = Path(stream_config[collection].get("data_dir"))
self.restricted = stream_config[collection].get("restricted", False)
self.update_new_version_publication_date = stream_config[
collection
].get("update_new_version_publication_date", True)
self.access_grants_view = stream_config[collection].get(
"access_grants_view", False
)
Expand Down Expand Up @@ -112,6 +115,7 @@ def __init__(
tmp_dir=tmp_dir,
dry_run=dry_run,
collection=collection,
update_new_version_publication_date=self.update_new_version_publication_date,
migration_logger=self.migration_logger,
record_state_logger=self.record_state_logger,
**stream_config[collection].get("load", {}),
Expand Down
15 changes: 15 additions & 0 deletions tests/cds-rdm/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ def config(mocker, community, orcid_name_data):
"legacy_pids_to_redirect": "cds_migrator_kit/rdm/data/summer_student_reports/duplicated_pids.json"
},
},
"sspn_publication_date_consistency": {
"data_dir": "tests/cds-rdm/data/sspn",
"tmp_dir": "tests/cds-rdm/data/sspn",
"log_dir": "tests/cds-rdm/data/log/sspn",
"extract": {"dirpath": "tests/cds-rdm/data/sspn/dumps/"},
"update_new_version_publication_date": False,
"transform": {
"files_dump_dir": "tests/cds-rdm/data/sspn/files/",
"missing_users": "tests/cds-rdm/data/users",
"communities_ids": [f"{str(community.id)}"],
},
"load": {
"legacy_pids_to_redirect": "cds_migrator_kit/rdm/data/summer_student_reports/duplicated_pids.json"
},
},
"bulletin_issue": {
"data_dir": "tests/cds-rdm/data/bulletin_issue",
"tmp_dir": "tests/cds-rdm/data/bulletin_issue",
Expand Down
4 changes: 3 additions & 1 deletion tests/cds-rdm/test_full_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,11 +241,13 @@ def multiple_versions(record, record_state):
)
dict_first_version = first_version.to_dict()
# It matches record created date instead of the file creation date
assert dict_first_version["created"] == "2024-02-19T12:42:58+00:00"
assert dict_first_version["created"] == "2024-02-19T13:51:23+00:00"
assert dict_first_version["metadata"]["publication_date"] == "2022-08-31"

assert dict_rec["versions"]["index"] == 2
# Check that the record creation date matches the files creation date
assert dict_rec["created"] == "2024-02-19T12:47:01+00:00"
assert dict_rec["metadata"]["publication_date"] == "2024-02-19"


def multiple_versions_with_cs(record):
Expand Down
74 changes: 74 additions & 0 deletions tests/cds-rdm/test_new_version_publication_date.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2026 CERN.
#
# CDS-RDM is free software; you can redistribute it and/or modify it under
# the terms of the MIT License; see LICENSE file for more details.

"""Tests suites."""

import json
from pathlib import Path

from cds_rdm.legacy.models import CDSMigrationLegacyRecord
from helpers import config
from invenio_access.permissions import system_identity
from invenio_rdm_records.proxies import current_rdm_records_service

from cds_migrator_kit.rdm.records.streams import RecordStreamDefinition
from cds_migrator_kit.runner.runner import Runner


def publication_date_consistency_across_versions(record, record_state):
"""2889522."""

dict_rec = record.to_dict()

all_dates = []

for record_version in record_state["versions"]:
rec = current_rdm_records_service.read(
system_identity, record_version["new_recid"]
)
dict_version = rec.to_dict()

all_dates.append(dict_version["metadata"]["publication_date"])
assert len(all_dates) > 0

# Check all versions have the same publication date
assert len(set(all_dates)) == 1
assert dict_rec["metadata"]["publication_date"] == all_dates[0]


def test_new_version_publication_date(
test_app,
orcid_name_data,
community,
mocker,
groups,
):

stream_config = config(mocker, community, orcid_name_data)

runner = Runner(
stream_definitions=[RecordStreamDefinition],
config_filepath=Path(stream_config).absolute(),
dry_run=False,
collection="sspn_publication_date_consistency",
keep_logs=False,
)
runner.run()

with open(
"tests/cds-rdm/tmp/logs/sspn_publication_date_consistency/rdm_records_state.json",
"r",
) as state_logs:
records = json.load(state_logs)

for record in records:

loaded_rec = current_rdm_records_service.read(
system_identity, record["latest_version"]
)
if record["legacy_recid"] == "2889522":
publication_date_consistency_across_versions(loaded_rec, record)
Loading