Skip to content

Commit dc2780e

Browse files
Merge pull request #17 from Anas-Elhounsri/dev
Skip somef feature added and updated the README
2 parents ef3f7ee + 165f332 commit dc2780e

4 files changed

Lines changed: 550 additions & 470 deletions

File tree

README.md

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ repositories you want to analyze.
4848
### 2. Directory Structure [NEEDS AN UPDATE]
4949
```
5050
project/ detect_pitfalls_main.py
51-
+-- somef_outputs/ # Directory containing SoMEF JSON files ¦
52-
+-- repository1.json ¦
53-
+-- repository2.json ¦
51+
+-- somef_outputs/ # Directory containing SoMEF JSON files
52+
+-- repository1.json
53+
+-- repository2.json
5454
+-- ...
55-
+-- scripts/ # Individual pitfall detector modules ¦
56-
+-- p001.py ¦
57-
+-- p002.py ¦
55+
+-- scripts/ # Individual pitfall detector modules
56+
+-- p001.py
57+
+-- p002.py
5858
+-- ...
5959
+-- all_pitfalls_results.json # Generated output file
6060
@@ -108,6 +108,15 @@ The results will be like the following:
108108
./results/pitfalls/
109109
./results/summary.json
110110
```
111+
112+
If you have already ran SoMEF individually before running this package and wish to run the analysis, you can skip SoMEF by running this command:
113+
114+
`python -m metacheck.cli --skip-somef --input somef_outputs/*.json
115+
`
116+
or if you wish to run for multiple paths:
117+
118+
`python -m metacheck.cli --skip-somef --input my_somef_outputs_1/*.json my_somef_outputs_2/*.json
119+
`
111120
### 4. Output
112121

113122
The tool will:
@@ -130,7 +139,7 @@ The output file contains:
130139

131140
1. **"There is no valid repository URL" error**: Ensure the JSON file that contains the repositories
132141
has a valid structure and that you are inputing the correct path
133-
2. **Network timeouts**: Some pitfalls validate URLs and may time outthis is normal behavior
142+
2. **Network timeouts**: Some pitfalls validate URLs and may time out this is normal behavior
134143

135144
### Performance Notes
136145

src/metacheck/cli.py

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,46 +1,73 @@
1-
import argparse
2-
import os
3-
from metacheck.run_somef import run_somef_batch
4-
from metacheck.run_analyzer import run_analysis
5-
6-
def cli():
7-
parser = argparse.ArgumentParser(description="Detect metadata pitfalls in software repositories using SoMEF.")
8-
parser.add_argument(
9-
"--input",
10-
nargs="+", # <-- accepts multiple files
11-
required=True,
12-
help="One or more JSON files containing repositories (e.g., GitHub, GitLab)."
13-
)
14-
parser.add_argument(
15-
"--pitfalls-output",
16-
default=os.path.join(os.getcwd(), "pitfalls_outputs"),
17-
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs)."
18-
)
19-
parser.add_argument(
20-
"--analysis-output",
21-
default=os.path.join(os.getcwd(), "analysis_results.json"),
22-
help="File path for summary results (default: ./analysis_results.json)."
23-
)
24-
parser.add_argument(
25-
"--threshold",
26-
type=float,
27-
default=0.8,
28-
help="SoMEF confidence threshold (default: 0.8)."
29-
)
30-
31-
args = parser.parse_args()
32-
threshold = args.threshold
33-
somef_output_dir = os.path.join(os.getcwd(), "somef_outputs")
34-
35-
print(f"Detected {len(args.input)} input files:")
36-
for json_path in args.input:
37-
if not os.path.exists(json_path):
38-
print(f"Skipping missing file: {json_path}")
39-
continue
40-
print(f"Processing repositories from {json_path}")
41-
run_somef_batch(json_path, somef_output_dir, threshold)
42-
43-
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output)
44-
45-
if __name__ == "__main__":
46-
cli()
1+
import argparse
2+
import os
3+
from pathlib import Path
4+
from metacheck.run_somef import run_somef_batch
5+
from metacheck.run_analyzer import run_analysis
6+
7+
8+
def cli():
9+
parser = argparse.ArgumentParser(description="Detect metadata pitfalls in software repositories using SoMEF.")
10+
parser.add_argument(
11+
"--input",
12+
nargs="+",
13+
required=True,
14+
help="One or more JSON files containing repositories (e.g., GitHub, GitLab) OR existing SoMEF output files when using --skip-somef."
15+
)
16+
parser.add_argument(
17+
"--skip-somef",
18+
action="store_true",
19+
help="Skip SoMEF execution and analyze existing SoMEF output files directly. --input should point to SoMEF JSON files."
20+
)
21+
parser.add_argument(
22+
"--pitfalls-output",
23+
default=os.path.join(os.getcwd(), "pitfalls_outputs"),
24+
help="Directory to store pitfall JSON-LD files (default: ./pitfalls_outputs)."
25+
)
26+
parser.add_argument(
27+
"--analysis-output",
28+
default=os.path.join(os.getcwd(), "analysis_results.json"),
29+
help="File path for summary results (default: ./analysis_results.json)."
30+
)
31+
parser.add_argument(
32+
"--threshold",
33+
type=float,
34+
default=0.8,
35+
help="SoMEF confidence threshold (default: 0.8). Only used when running SoMEF."
36+
)
37+
38+
args = parser.parse_args()
39+
40+
if args.skip_somef:
41+
print(f"Skipping SoMEF execution. Analyzing {len(args.input)} existing SoMEF output files...")
42+
43+
somef_json_paths = []
44+
for json_path in args.input:
45+
if not os.path.exists(json_path):
46+
print(f"Warning: File not found, skipping: {json_path}")
47+
continue
48+
somef_json_paths.append(Path(json_path))
49+
50+
if not somef_json_paths:
51+
print("Error: No valid SoMEF output files found.")
52+
return
53+
54+
print(f"Analyzing {len(somef_json_paths)} SoMEF output files...")
55+
run_analysis(somef_json_paths, args.pitfalls_output, args.analysis_output)
56+
57+
else:
58+
threshold = args.threshold
59+
somef_output_dir = os.path.join(os.getcwd(), "somef_outputs")
60+
61+
print(f"Detected {len(args.input)} input files:")
62+
for json_path in args.input:
63+
if not os.path.exists(json_path):
64+
print(f"Skipping missing file: {json_path}")
65+
continue
66+
print(f"Processing repositories from {json_path}")
67+
run_somef_batch(json_path, somef_output_dir, threshold)
68+
69+
run_analysis(somef_output_dir, args.pitfalls_output, args.analysis_output)
70+
71+
72+
if __name__ == "__main__":
73+
cli()

0 commit comments

Comments
 (0)