Skip to content

Commit dedc2a6

Browse files
Added verbose command for detailed report, tweaked W003 regex and updated codemeta.json
1 parent f366df7 commit dedc2a6

9 files changed

Lines changed: 462 additions & 398 deletions

File tree

codemeta.json

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,29 @@
11
{
2-
"@context": "https://w3id.org/codemeta/3.0",
3-
"type": "SoftwareSourceCode",
4-
"author": [
5-
{
6-
"id": "https://orcid.org/0009-0005-6868-4511",
7-
"type": "Person",
8-
"affiliation": {
9-
"type": "Organization",
10-
"name": "Ontology Engineering Group, Universidad Politecnica de Madrid"
11-
},
12-
"email": "a.elhounsri@upm.es",
13-
"familyName": "El Hounsri",
14-
"givenName": "Anas"
15-
}
16-
],
17-
"codeRepository": "git+https://github.com/SoftwareUnderstanding/RsMetaCheck.git",
18-
"dateModified": "2025-09-19",
19-
"description": "Automated tool to detect metadata quality pitfalls in software repositories (Python, Java, C++, etc.). Analyzes SoMEF output files for version mismatches, license issues, broken URLs, and more. ",
20-
"downloadUrl": "https://github.com/SoftwareUnderstanding/RsMetaCheck/archive/refs/heads/main.tar.gz",
21-
"keywords": [
22-
"codemeta",
23-
"pitfalls",
24-
"metadata",
25-
"analysis"
26-
],
27-
"license": "https://spdx.org/licenses/MIT",
28-
"name": "RsMetaCheck",
29-
"operatingSystem": "Linux",
30-
"programmingLanguage": "Python",
31-
"version": "0.1.1",
32-
"developmentStatus": "active",
33-
"issueTracker": "https://github.com/SoftwareUnderstanding/RsMetaCheck/issues"
34-
}
2+
"@context": "https://w3id.org/codemeta/3.0",
3+
"type": "SoftwareSourceCode",
4+
"author": [
5+
{
6+
"id": "https://orcid.org/0009-0005-6868-4511",
7+
"type": "Person",
8+
"affiliation": {
9+
"type": "Organization",
10+
"name": "Ontology Engineering Group, Universidad Politecnica de Madrid"
11+
},
12+
"email": "a.elhounsri@upm.es",
13+
"familyName": "El Hounsri",
14+
"givenName": "Anas"
15+
}
16+
],
17+
"codeRepository": "git+https://github.com/SoftwareUnderstanding/RsMetaCheck.git",
18+
"dateModified": "2025-09-19",
19+
"description": "Automated tool to detect metadata quality pitfalls in software repositories (Python, Java, C++, etc.). Analyzes SoMEF output files for version mismatches, license issues, broken URLs, and more. ",
20+
"downloadUrl": "https://github.com/SoftwareUnderstanding/RsMetaCheck/archive/refs/heads/main.tar.gz",
21+
"keywords": ["codemeta", "pitfalls", "metadata", "analysis"],
22+
"license": "https://spdx.org/licenses/MIT",
23+
"name": "RsMetaCheck",
24+
"operatingSystem": "Linux",
25+
"programmingLanguage": "Python",
26+
"version": "0.2.0",
27+
"developmentStatus": "active",
28+
"issueTracker": "https://github.com/SoftwareUnderstanding/RsMetaCheck/issues"
29+
}

poetry.lock

Lines changed: 391 additions & 343 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/metacheck/cli.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ def cli():
3535
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF."
3636
)
3737

38+
parser.add_argument(
39+
"--verbose",
40+
action="store_true",
41+
help="Include both detected AND undetected pitfalls in the output JSON-LD."
42+
)
43+
3844
args = parser.parse_args()
3945

4046
if args.skip_somef:
@@ -52,7 +58,7 @@ def cli():
5258
return
5359

5460
print(f"Analyzing {len(somef_json_paths)} SoMEF output files...")
55-
run_analysis(somef_json_paths, args.pitfalls_output, args.analysis_output)
61+
run_analysis(somef_json_paths, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
5662

5763
else:
5864
threshold = args.threshold
@@ -71,7 +77,7 @@ def cli():
7177
print(f"Warning: Skipping invalid input (not a URL or existing file): {input_item}")
7278

7379
print(f"\nRunning analysis on outputs in {somef_output_dir}...")
74-
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output)
80+
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output, verbose=args.verbose)
7581

7682

7783
if __name__ == "__main__":

src/metacheck/detect_pitfalls_main.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from metacheck.scripts.warnings.w010 import detect_git_remote_shorthand_pitfall
3939

4040

41-
def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path]):
41+
def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[str, Path], output_file: Union[str, Path], verbose: bool = False):
4242
"""
4343
Detect all software repository pitfalls in SoMEF output files using modular detectors.
4444
Now also generates individual JSON-LD files for each repository.
@@ -363,8 +363,8 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
363363
for result in repo_pitfall_results
364364
)
365365

366-
if has_any_issue:
367-
jsonld_data = create_pitfall_jsonld(somef_data, repo_pitfall_results, json_file.name)
366+
if has_any_issue or verbose:
367+
jsonld_data = create_pitfall_jsonld(somef_data, repo_pitfall_results, json_file.name, verbose=verbose)
368368
saved_file = save_individual_pitfall_jsonld(jsonld_data, pitfalls_output_dir, json_file.name)
369369

370370
if saved_file:
@@ -412,7 +412,7 @@ def detect_all_pitfalls(json_files: Iterable[Path], pitfalls_output_dir: Union[s
412412
print(f"Error writing output file: {e}")
413413

414414

415-
def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None):
415+
def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_output=None, verbose=False):
416416
"""
417417
Main function to run all pitfall detections.
418418
@@ -421,6 +421,7 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
421421
somef_json_paths (Iterable[Path], optional): Explicit list of SoMEF output JSON files.
422422
pitfalls_dir (str|Path, optional): Directory to save pitfall JSON-LD files.
423423
analysis_output (str|Path, optional): Path to save summary results JSON.
424+
verbose (bool, optional): Include both detected AND undetected pitfalls in JSON-LD.
424425
425426
Note: Provide either input_dir OR somef_json_paths, not both.
426427
If both are provided, somef_json_paths takes precedence.
@@ -448,7 +449,7 @@ def main(input_dir=None, somef_json_paths=None, pitfalls_dir=None, analysis_outp
448449
print("No JSON files found for analysis.")
449450
return
450451

451-
detect_all_pitfalls(json_files, pitfalls_directory, output_file)
452+
detect_all_pitfalls(json_files, pitfalls_directory, output_file, verbose)
452453

453454
if __name__ == "__main__":
454455
main()

src/metacheck/run_analyzer.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from metacheck.detect_pitfalls_main import main
44

55

6-
def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Union[str, Path], analysis_file: Union[str, Path]):
6+
def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Union[str, Path], analysis_file: Union[str, Path], verbose: bool = False):
77
"""
88
Run metadata analysis using existing code.
99
@@ -12,17 +12,18 @@ def run_analysis(somef_input: Union[str, Path, Iterable[Path]], pitfalls_dir: Un
1212
or an iterable of Path objects pointing to specific SoMEF JSON files
1313
pitfalls_dir: Directory to save pitfall JSON-LD files
1414
analysis_file: Path to save summary results JSON
15+
verbose: bool indicating if both detected and undetected checks should be logged.
1516
"""
1617
print(f"\nRunning analysis...")
1718

1819
if isinstance(somef_input, (str, Path)):
1920
somef_path = Path(somef_input)
2021
if somef_path.is_dir():
2122
print(f"Using directory: {somef_input}")
22-
main(input_dir=somef_input, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file)
23+
main(input_dir=somef_input, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose)
2324
else:
2425
print(f"Error: {somef_input} is not a valid directory")
2526
else:
2627
json_files = list(somef_input)
2728
print(f"Using {len(json_files)} specified JSON files")
28-
main(somef_json_paths=json_files, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file)
29+
main(somef_json_paths=json_files, pitfalls_dir=pitfalls_dir, analysis_output=analysis_file, verbose=verbose)

src/metacheck/scripts/warnings/w003.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ def detect_dual_license_missing_codemeta_pitfall(somef_data: Dict, file_name: st
2626
r"dually[\s-]?licen[cs]ed?",
2727
r"multiple[\s-]?licen[cs]es?",
2828
r"(?:is|are)\s+licen[cs]ed?\s+under.*(?:and|or).*licen[cs]e",
29-
r"choose.*(?:between|from).*licen[cs]e",
29+
r"choose.*(?:between|from|your).*licen[cs]e",
3030
r"either.*or.*licen[cs]e",
3131
r"\d+\..*licen[cs]e.*\n.*\d+\..*licen[cs]e",
3232
r"licen[cs]e.*options?",
33-
r"available\s+under.*(?:two|multiple|either).*licen[cs]es?"
34-
]
33+
r"available\s+under.*(?:two|multiple|either).*licen[cs]es?",
34+
r"licen[cs]ed? under.*(?:and|or)"
35+
]
3536

3637
has_dual_license_indicator = False
3738
dual_license_source = None

src/metacheck/scripts/warnings/w009.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def detect_development_status_url_pitfall(somef_data: Dict, file_name: str) -> D
3131
Detect when codemeta.json developmentStatus is a URL instead of a string.
3232
"""
3333
result = {
34-
"has_pitfall": False,
34+
"has_warning": False,
3535
"file_name": file_name,
3636
"development_status": None,
3737
"source": None,
@@ -54,7 +54,7 @@ def detect_development_status_url_pitfall(somef_data: Dict, file_name: str) -> D
5454
dev_status = entry["result"]["value"]
5555

5656
if is_url(dev_status):
57-
result["has_pitfall"] = True
57+
result["has_warning"] = True
5858
result["development_status"] = dev_status
5959
result["source"] = source
6060
result["is_url"] = True

src/metacheck/scripts/warnings/w010.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def detect_git_remote_shorthand_pitfall(somef_data: Dict, file_name: str) -> Dic
3232
Detect when metadata files use Git remote-style shorthand in codeRepository.
3333
"""
3434
result = {
35-
"has_pitfall": False,
35+
"has_warning": False,
3636
"file_name": file_name,
3737
"repository_url": None,
3838
"source": None,
@@ -64,7 +64,7 @@ def detect_git_remote_shorthand_pitfall(somef_data: Dict, file_name: str) -> Dic
6464
repo_url = entry["result"]["value"]
6565

6666
if is_git_remote_shorthand(repo_url):
67-
result["has_pitfall"] = True
67+
result["has_warning"] = True
6868
result["repository_url"] = repo_url
6969
result["source"] = source if source else f"technique: {technique}"
7070
result["metadata_source_file"] = extract_metadata_source_filename(source)

src/metacheck/utils/json_ld_utils.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,11 @@ def convert_sets_to_lists(obj):
444444
return obj
445445

446446

447-
def create_pitfall_jsonld(somef_data: Dict, pitfall_results: List[Dict], file_name: str) -> Dict:
447+
def create_pitfall_jsonld(somef_data: Dict, pitfall_results: List[Dict], file_name: str, verbose: bool = False) -> Dict:
448448
"""
449449
Create a JSON-LD structure for detected pitfalls following the sample format.
450450
"""
451+
import hashlib
451452
software_info = extract_software_info_from_somef(somef_data)
452453
description_info = extract_description_info(somef_data)
453454

@@ -468,9 +469,17 @@ def create_pitfall_jsonld(somef_data: Dict, pitfall_results: List[Dict], file_na
468469
}
469470

470471
for pitfall_result in pitfall_results:
471-
if pitfall_result.get("has_pitfall", False) or pitfall_result.get("has_warning", False):
472+
has_pitfall = pitfall_result.get("has_pitfall", False)
473+
has_warning = pitfall_result.get("has_warning", False)
474+
has_issue = has_pitfall or has_warning
475+
476+
if has_issue or verbose:
472477
pitfall_code = pitfall_result.get("pitfall_code", "Unknown")
473478
category = get_pitfall_category(pitfall_code)
479+
480+
output_val = "true" if has_issue else "false"
481+
evidence_val = format_evidence_text(pitfall_code, pitfall_result) if has_issue else f"{pitfall_code} not detected:"
482+
suggestion_val = get_suggestion_text(pitfall_code) if has_issue else ""
474483

475484
check_result = {
476485
"@type": "CheckResult",
@@ -483,10 +492,13 @@ def create_pitfall_jsonld(somef_data: Dict, pitfall_results: List[Dict], file_na
483492
},
484493
"process": get_pitfall_description(pitfall_code),
485494
"status": {"@id": "schema:CompletedActionStatus"},
486-
"checkId": pitfall_code,
487-
"evidence": format_evidence_text(pitfall_code, pitfall_result),
488-
"suggestion": get_suggestion_text(pitfall_code)
495+
"output": output_val,
496+
"evidence": evidence_val,
497+
"suggestion": suggestion_val
489498
}
499+
500+
check_hash = hashlib.sha256(json.dumps(check_result, sort_keys=True).encode("utf-8")).hexdigest()
501+
check_result["checkId"] = check_hash
490502

491503
jsonld_output["checks"].append(check_result)
492504

0 commit comments

Comments
 (0)