Skip to content

Commit f5b21c6

Browse files
Added P019 to the output results, patched issues #37 #36 #35
1 parent 5a7b724 commit f5b21c6

4 files changed

Lines changed: 51 additions & 28 deletions

File tree

src/metacheck/detect_pitfalls_main.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from metacheck.scripts.pitfalls.p016 import detect_different_repository_pitfall
2424
from metacheck.scripts.pitfalls.p017 import detect_codemeta_version_mismatch_pitfall
2525
from metacheck.scripts.pitfalls.p018 import detect_raw_swhid_pitfall
26+
from metacheck.scripts.pitfalls.p019 import detect_inconsistent_author_count
2627

2728
# Warnings
2829
from metacheck.scripts.warnings.w001 import detect_unversioned_requirements
@@ -190,6 +191,13 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
190191
"percentage": 0.0,
191192
"languages": {}
192193
},
194+
{
195+
"pitfall_code": "P019",
196+
"pitfall_desc": "Inconsistent author counts found across metadata files",
197+
"count": 0,
198+
"percentage": 0.0,
199+
"languages": {}
200+
},
193201
{
194202
"warning_code": "W001",
195203
"warning_desc": "Software requirements in metadata files don't have version specifications",
@@ -268,7 +276,7 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
268276
total_repos = 0
269277
repos_with_target_languages = 0
270278
jsonld_files_created = 0
271-
pitfall_counts = [0] * 27
279+
pitfall_counts = [0] * 29
272280

273281
pitfall_detectors = [
274282
(detect_version_mismatch, "P001"), # Index 0 -> P001
@@ -289,16 +297,17 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
289297
(detect_different_repository_pitfall, "P016"), # Index 15 -> P016
290298
(detect_codemeta_version_mismatch_pitfall, "P017"), # Index 16 -> P017
291299
(detect_raw_swhid_pitfall, "P018"), # Index 17 -> P018
292-
(detect_unversioned_requirements, "W001"), # Index 18 -> W001
293-
(detect_outdated_datemodified, "W002"), # Index 19 -> W002
300+
(detect_inconsistent_author_count, "P019"), # Index 18 -> P019
301+
(detect_unversioned_requirements, "W001"), # Index 19 -> W001
302+
(detect_outdated_datemodified, "W002"), # Index 20 -> W002
294303
(detect_dual_license_missing_codemeta_pitfall, "W003"),
295-
(detect_programming_language_no_version_pitfall, "W004"), # Index 20 -> W004
296-
(detect_multiple_requirements_string_warning, "W005"), # Index 21 -> W005
297-
(detect_identifier_name_warning, "W006"), # Index 22 -> W006
298-
(detect_empty_identifier_warning, "W007"), # Index 23 -> W007
299-
(detect_author_name_list_warning, "W008"), # Index 24 -> W008
300-
(detect_development_status_url_pitfall, "W009"), # Index 25 -> W009
301-
(detect_git_remote_shorthand_pitfall, "W010"), # Index 26 -> W010
304+
(detect_programming_language_no_version_pitfall, "W004"), # Index 21 -> W004
305+
(detect_multiple_requirements_string_warning, "W005"), # Index 22 -> W005
306+
(detect_identifier_name_warning, "W006"), # Index 23 -> W006
307+
(detect_empty_identifier_warning, "W007"), # Index 24 -> W007
308+
(detect_author_name_list_warning, "W008"), # Index 25 -> W008
309+
(detect_development_status_url_pitfall, "W009"), # Index 26 -> W009
310+
(detect_git_remote_shorthand_pitfall, "W010"), # Index 27 -> W010
302311
]
303312

304313
for json_file in json_files:

src/metacheck/scripts/pitfalls/p013.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,12 @@ def detect_license_no_version_pitfall(somef_data: Dict, file_name: str) -> Dict:
2626
"requirements.txt", "setup.py"]
2727

2828
versioned_patterns = {
29-
"GPL": r"GPL[-\s]?\d+(\.\d+)?",
30-
"LGPL": r"LGPL[-\s]?\d+(\.\d+)?",
31-
"AGPL": r"AGPL[-\s]?\d+(\.\d+)?",
32-
"Apache": r"Apache[-\s]?\d+(\.\d+)?",
33-
"CC": r"CC[- ]BY[-\s]?\d+(\.\d+)?",
34-
"BSD": r"BSD[-\s]\d+[-\s]Clause"
29+
"GPL": r"\bGPL[-\s]?\d+(\.\d+)?",
30+
"LGPL": r"\bLGPL[-\s]?\d+(\.\d+)?",
31+
"AGPL": r"\bAGPL[-\s]?\d+(\.\d+)?",
32+
"Apache": r"\bApache[-\s]?\d+(\.\d+)?",
33+
"CC": r"\bCC[- ]BY[-\s]?\d+(\.\d+)?",
34+
"BSD": r"\bBSD[-\s]\d+[-\s]Clause"
3535
}
3636

3737
for entry in license_entries:
@@ -52,14 +52,17 @@ def detect_license_no_version_pitfall(somef_data: Dict, file_name: str) -> Dict:
5252

5353
if "0BSD" in license_value:
5454
continue
55+
56+
if "LICENSEREF-" in license_upper:
57+
continue
5558

5659
for license_name, version_pattern in versioned_patterns.items():
57-
if license_name in license_upper:
60+
if re.search(rf"\b{license_name}\b", license_upper):
5861
if not re.search(version_pattern, license_upper, re.IGNORECASE):
5962
result["has_pitfall"] = True
6063
result["license_value"] = license_value
6164
result["source"] = source
6265
result["metadata_source_file"] = extract_metadata_source_filename(source)
63-
return result # Stop at first match
66+
return result
6467

6568
return result

src/metacheck/scripts/warnings/w003.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def detect_dual_license_missing_codemeta_pitfall(somef_data: Dict, file_name: st
77
Detect when repository has multiple licenses but codemeta.json only lists one.
88
"""
99
result = {
10-
"has_pitfall": False,
10+
"has_warning": False,
1111
"file_name": file_name,
1212
"has_dual_license_indicator": False,
1313
"codemeta_license_count": 0,
@@ -25,13 +25,13 @@ def detect_dual_license_missing_codemeta_pitfall(somef_data: Dict, file_name: st
2525
r"dual[\s-]?licen[cs]ed?",
2626
r"dually[\s-]?licen[cs]ed?",
2727
r"multiple[\s-]?licen[cs]es?",
28-
r"licen[cs]ed?\s+under.*(?:and|or)",
29-
r"choose.*licen[cs]e",
30-
r"either.*licen[cs]e",
28+
r"(?:is|are)\s+licen[cs]ed?\s+under.*(?:and|or).*licen[cs]e",
29+
r"choose.*(?:between|from).*licen[cs]e",
30+
r"either.*or.*licen[cs]e",
3131
r"\d+\..*licen[cs]e.*\n.*\d+\..*licen[cs]e",
3232
r"licen[cs]e.*options?",
33-
r"available\s+under.*licen[cs]es?"
34-
]
33+
r"available\s+under.*(?:two|multiple|either).*licen[cs]es?"
34+
]
3535

3636
has_dual_license_indicator = False
3737
dual_license_source = None
@@ -59,6 +59,6 @@ def detect_dual_license_missing_codemeta_pitfall(somef_data: Dict, file_name: st
5959
result["dual_license_source"] = dual_license_source
6060

6161
if has_dual_license_indicator and codemeta_license_count <= 1:
62-
result["has_pitfall"] = True
62+
result["has_warning"] = True
6363

6464
return result

src/metacheck/utils/json_ld_utils.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,16 @@ def format_evidence_text(pitfall_code: str, pitfall_result: Dict) -> str:
253253
identifier_value = pitfall_result.get('identifier_value') or 'unknown'
254254
return f"{evidence_base}codemeta.json Identifier uses raw SWHID without resolvable URL: '{identifier_value}'"
255255

256+
elif pitfall_code == "P019":
257+
if "inconsistencies" in pitfall_result:
258+
inconsistency = pitfall_result["inconsistencies"][0]
259+
source_fewer = inconsistency.get('source_with_fewer', 'unknown')
260+
count_fewer = inconsistency.get('fewer_count', 0)
261+
source_more = inconsistency.get('source_with_more', 'unknown')
262+
count_more = inconsistency.get('more_count', 0)
263+
return f"{evidence_base}Author count mismatch: {source_fewer} has {count_fewer} while {source_more} has {count_more}"
264+
return f"{evidence_base}Inconsistent author counts found across metadata files"
265+
256266
# Warnings
257267
elif pitfall_code == "W001":
258268
if "unversioned_requirements" in pitfall_result:
@@ -288,9 +298,9 @@ def format_evidence_text(pitfall_code: str, pitfall_result: Dict) -> str:
288298
return f"{evidence_base}codemeta.json Programming languages in metadata do not have version specifications"
289299

290300
elif pitfall_code == "W005":
291-
if "requirements_string" in pitfall_result:
301+
if "requirement_string" in pitfall_result:
292302
metadata_source = extract_metadata_source(pitfall_result)
293-
requirements_string = pitfall_result.get('requirements_string') or 'unknown'
303+
requirements_string = pitfall_result.get('requirement_string') or 'unknown'
294304
return f"{evidence_base}{metadata_source} Multiple requirements written as single string: '{requirements_string}'"
295305

296306
elif pitfall_code == "W006":
@@ -348,6 +358,7 @@ def get_pitfall_category(pitfall_code: str) -> str:
348358
"P016": "metadatafile", # metadata file codeRepository different repo
349359
"P017": "codemeta", # codemeta.json version mismatch
350360
"P018": "codemeta", # codemeta.json raw SWHIDs
361+
"P019": "metadatafile", # inconsistent author counts
351362

352363
# Warnings
353364
"W001": "metadatafile", # metadata file requirements
@@ -389,6 +400,7 @@ def get_suggestion_text(pitfall_code: str) -> str:
389400
"P016": "Make sure that the codeRepository URL in your metadata exactly matches the repository hosting your source code.",
390401
"P017": "You need to synchronize all version references across metadata and build configuration files.",
391402
"P018": "Always use the full resolvable SWHID URL (e.g., https://archive.softwareheritage.org/swh:1:dir:abcd.../). This will ensure that both humans and machines can access the archived software snapshot directly",
403+
"P019": "Ensure that the number of authors is consistent across all metadata files. Inconsistencies may signal that some contributors are missing in certain files.",
392404

393405
# Warnings
394406
"W001": "Add version numbers to your dependencies. This provides stability for users and allows reproducibility across different environments.",
@@ -491,7 +503,6 @@ def save_individual_pitfall_jsonld(jsonld_data: Dict, output_dir: Path, file_nam
491503
output_file = output_dir / f"{base_name}_pitfalls.jsonld"
492504

493505
try:
494-
# Convert any sets to lists before JSON serialization
495506
serializable_data = convert_sets_to_lists(jsonld_data)
496507

497508
with open(output_file, 'w', encoding='utf-8') as f:

0 commit comments

Comments
 (0)