Skip to content

Commit ff4945b

Browse files
Correct final patch for #15, now passes all tests
1 parent 0d8da69 commit ff4945b

1 file changed

Lines changed: 79 additions & 83 deletions

File tree

src/metacheck/scripts/p006.py

Lines changed: 79 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,80 @@
1-
from typing import Dict, List
2-
import re
3-
from urllib.parse import urlparse
4-
5-
6-
def is_homepage_url(url: str) -> bool:
7-
"""
8-
Check if a URL appears to be a homepage/wiki rather than a README file.
9-
Returns True if it's likely a homepage.
10-
"""
11-
if not url:
12-
return False
13-
14-
url_lower = url.lower()
15-
16-
if 'raw.githubusercontent.com' in url_lower:
17-
return False
18-
19-
# Check for documentation sites and wikis
20-
homepage_indicators = [
21-
'.readthedocs.io',
22-
'.github.io',
23-
'wiki',
24-
'docs.',
25-
'documentation'
26-
]
27-
28-
if ('github.com' in url_lower or 'gitlab.com' in url_lower):
29-
30-
if 'readme' in url_lower or 'blob/' in url_lower:
31-
return False
32-
33-
return True
34-
35-
# Check for other homepage indicators
36-
for indicator in homepage_indicators:
37-
if indicator in url_lower:
38-
return True
39-
40-
# Check for generic domains (.org, .com, .net) but more specific
41-
if any(domain in url_lower for domain in ['.org', '.com', '.net']):
42-
if any(ext in url_lower for ext in ['.md', '.txt', '.rst', '.html', 'readme']):
43-
return False
44-
45-
if '/' in url_lower.split('.')[-1]:
46-
return False
47-
return True
48-
49-
return False
50-
51-
52-
def detect_readme_homepage_pitfall(somef_data: Dict, file_name: str) -> Dict:
53-
"""
54-
Detect when README property in codemeta.json points to homepage/wiki instead of README file.
55-
"""
56-
result = {
57-
"has_pitfall": False,
58-
"file_name": file_name,
59-
"readme_url": None,
60-
"source": None,
61-
"is_homepage": False
62-
}
63-
64-
if "readme_url" not in somef_data:
65-
return result
66-
67-
readme_entries = somef_data["readme_url"]
68-
if not isinstance(readme_entries, list):
69-
return result
70-
71-
for entry in readme_entries:
72-
if "technique" in entry and entry["technique"] == "code_parser":
73-
if "source" in entry and "codemeta.json" in entry["source"]:
74-
if "result" in entry and "value" in entry["result"]:
75-
readme_url = entry["result"]["value"]
76-
77-
if is_homepage_url(readme_url):
78-
result["has_pitfall"] = True
79-
result["readme_url"] = readme_url
80-
result["source"] = entry["source"]
81-
result["is_homepage"] = True
82-
break
83-
1+
from typing import Dict, List
2+
import re
3+
from urllib.parse import urlparse
4+
5+
6+
7+
def is_homepage_url(url: str) -> bool:
8+
"""
9+
Check if a URL appears to be a homepage/wiki rather than a README file.
10+
Returns True if it's likely a homepage.
11+
"""
12+
if not url:
13+
return False
14+
15+
url_lower = url.lower()
16+
17+
if 'raw.githubusercontent.com' in url_lower:
18+
return False
19+
20+
# Check for documentation sites and wikis
21+
homepage_indicators = [
22+
'.readthedocs.io',
23+
'.github.io',
24+
'wiki',
25+
'docs.',
26+
'documentation'
27+
]
28+
29+
if ('github.com' in url_lower or 'gitlab.com' in url_lower):
30+
if 'readme' in url_lower or 'blob/' in url_lower:
31+
return False
32+
return True
33+
34+
# Check for other homepage indicators
35+
for indicator in homepage_indicators:
36+
if indicator in url_lower:
37+
return True
38+
39+
# Check for generic domains (.org, .com, .net)
40+
if any(domain in url_lower for domain in ['.org', '.com', '.net']):
41+
if any(ext in url_lower for ext in ['.md', '.txt', '.rst', '.html', 'readme']):
42+
return False
43+
return True
44+
45+
return False
46+
47+
48+
def detect_readme_homepage_pitfall(somef_data: Dict, file_name: str) -> Dict:
49+
"""
50+
Detect when README property in codemeta.json points to homepage/wiki instead of README file.
51+
"""
52+
result = {
53+
"has_pitfall": False,
54+
"file_name": file_name,
55+
"readme_url": None,
56+
"source": None,
57+
"is_homepage": False
58+
}
59+
60+
if "readme_url" not in somef_data:
61+
return result
62+
63+
readme_entries = somef_data["readme_url"]
64+
if not isinstance(readme_entries, list):
65+
return result
66+
67+
for entry in readme_entries:
68+
if "technique" in entry and entry["technique"] == "code_parser":
69+
if "source" in entry and "codemeta.json" in entry["source"]:
70+
if "result" in entry and "value" in entry["result"]:
71+
readme_url = entry["result"]["value"]
72+
73+
if is_homepage_url(readme_url):
74+
result["has_pitfall"] = True
75+
result["readme_url"] = readme_url
76+
result["source"] = entry["source"]
77+
result["is_homepage"] = True
78+
break
79+
8480
return result

0 commit comments

Comments
 (0)