diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37b4d95..6c81d96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,9 @@ jobs: run: python -m compileall wordpress-api-pro/scripts - name: Unit tests - run: python3 tests/test_cpt_seeding.py + run: | + python3 tests/test_cpt_seeding.py + python3 tests/test_site_audit.py - name: Seed dry-run smoke (no network) run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 62e5628..7c3e3db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 3.7.0 - 2026-06-02 + +- Add `site_audit.py` — no-auth Tier-1 website audit (PageSpeed, SSL, security headers, CMS/PHP detection, SEO basics) emitting findings against the audit-engine thresholds. Stdlib-only; the sales-hook quick scan. Pure parsers unit-tested offline + wired into CI. + ## 3.6.0 - 2026-06-02 CPT content seeding (Tier-1 dynamic content). diff --git a/package.json b/package.json index 57f98b6..4761d87 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "wordpress-api-pro", - "version": "3.6.0", + "version": "3.7.0", "description": "WordPress REST API integration skill for OpenClaw - manage posts, pages, media, WooCommerce, Elementor, and metadata with explicit safety boundaries", "private": true, "main": "wordpress-api-pro/SKILL.md", diff --git a/tests/test_site_audit.py b/tests/test_site_audit.py new file mode 100644 index 0000000..24da109 --- /dev/null +++ b/tests/test_site_audit.py @@ -0,0 +1,74 @@ +import os, sys, unittest + +SCRIPTS = os.path.join(os.path.dirname(__file__), "..", "wordpress-api-pro", "scripts") +sys.path.insert(0, os.path.abspath(SCRIPTS)) + +import site_audit as sa # noqa: E402 + + +class CmsTest(unittest.TestCase): + def test_detects_wordpress_and_version_from_generator(self): + html = '' + out = sa.parse_cms(html, {}) + self.assertTrue(out["is_wordpress"]) + self.assertEqual(out["wp_version"], "6.5.2") + + def test_detects_wp_from_wp_content_when_no_generator(self): + html = '' + out = sa.parse_cms(html, {}) + self.assertTrue(out["is_wordpress"]) + + def test_php_version_from_x_powered_by(self): + out = sa.parse_cms("", {"X-Powered-By": "PHP/8.1.27"}) + self.assertEqual(out["php_version"], "8.1.27") + + def test_non_wp(self): + self.assertFalse(sa.parse_cms("nothing", {})["is_wordpress"]) + + +class SeoTest(unittest.TestCase): + def test_extracts_title_and_description_and_h1_and_canonical(self): + html = ('Acme — Home' + '' + '

Hi

') + out = sa.parse_seo(html) + self.assertEqual(out["title"], "Acme — Home") + self.assertEqual(out["meta_description"], "We build things.") + self.assertEqual(out["h1_count"], 1) + self.assertTrue(out["has_canonical"]) + + def test_missing_fields(self): + out = sa.parse_seo("") + self.assertIsNone(out["title"]) + self.assertIsNone(out["meta_description"]) + self.assertEqual(out["h1_count"], 0) + self.assertFalse(out["has_canonical"]) + + +class HeadersTest(unittest.TestCase): + def test_present_and_missing_security_headers(self): + out = sa.analyze_headers({ + "Strict-Transport-Security": "max-age=63072000", + "X-Content-Type-Options": "nosniff", + }) + self.assertIn("Strict-Transport-Security", out["present"]) + self.assertIn("Content-Security-Policy", out["missing"]) + self.assertIn("X-Frame-Options", out["missing"]) + + +class SslTest(unittest.TestCase): + def test_days_left_positive(self): + now = sa._parse_cert_time("Jan 1 00:00:00 2026 GMT") + days = sa.ssl_days_left("Mar 2 00:00:00 2026 GMT", now=now) + self.assertEqual(days, 60) + + +class PageSpeedTest(unittest.TestCase): + def test_grade(self): + self.assertEqual(sa.grade_pagespeed(0.95), "pass") + self.assertEqual(sa.grade_pagespeed(0.80), "warn") + self.assertEqual(sa.grade_pagespeed(0.50), "fail") + + +if __name__ == "__main__": + unittest.main() diff --git a/wordpress-api-pro/SKILL.md b/wordpress-api-pro/SKILL.md index 68df1db..b6517fc 100644 --- a/wordpress-api-pro/SKILL.md +++ b/wordpress-api-pro/SKILL.md @@ -1,6 +1,6 @@ --- name: wordpress-api-pro -version: 3.6.0 +version: 3.7.0 license: MIT-0 description: | WordPress REST API integration for managing posts, pages, media, WooCommerce products, Elementor content, SEO meta, ACF, and JetEngine fields. @@ -199,6 +199,7 @@ python3 scripts/upload_media.py \ - `scripts/acf_fields.py` — read/write ACF fields. - `scripts/seo_meta.py` — read/write Rank Math and Yoast SEO metadata. - `scripts/jetengine_fields.py` — read/write JetEngine custom fields. +- `scripts/site_audit.py` — no-auth Tier-1 website audit (PageSpeed/SSL/security headers/CMS+PHP/SEO basics). Public probes only; run cold pre-sale. - `scripts/describe_cpt.py` — discover a CPT's rest_base, taxonomies, and field keys (read-only). - `scripts/seed_content.py` — batch-create CPT entries with ACF/Jet fields, taxonomies, and featured images from a JSON dataset. **Dry-run by default; pass `--execute` to write.** - `scripts/elementor_content.py` — read/update Elementor `_elementor_data`. diff --git a/wordpress-api-pro/scripts/site_audit.py b/wordpress-api-pro/scripts/site_audit.py new file mode 100644 index 0000000..95d455c --- /dev/null +++ b/wordpress-api-pro/scripts/site_audit.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 +"""No-auth Tier-1 website audit — public signals only (PageSpeed/SSL/headers/CMS/SEO). + +Run cold, before any engagement, as the sales-hook quick scan. Read-only public +fetches; no credentials. Outputs findings JSON (default) or a 1-page --summary. + +Usage: + python3 site_audit.py https://example.com + python3 site_audit.py https://example.com --summary +Env (optional): PAGESPEED_API_KEY (higher PageSpeed Insights quota) +""" +import argparse, json, os, re, ssl, socket, sys, urllib.request, urllib.parse +from datetime import datetime, timezone + +UA = "Mozilla/5.0 (compatible; DigitizerAudit/1.0)" +SECURITY_HEADERS = [ + "Strict-Transport-Security", "Content-Security-Policy", "X-Frame-Options", + "X-Content-Type-Options", "Referrer-Policy", +] + + +# ---- pure parsers (unit-tested, no network) -------------------------------- +def parse_cms(html, headers): + html = html or "" + headers = {k.lower(): v for k, v in (headers or {}).items()} + gen = re.search(r']+name=["\']generator["\'][^>]+content=["\']([^"\']+)["\']', html, re.I) + generator = gen.group(1) if gen else None + is_wp = bool(generator and "wordpress" in generator.lower()) or "/wp-content/" in html or "/wp-json" in html + wp_version = None + if generator: + m = re.search(r'WordPress\s+([0-9.]+)', generator, re.I) + if m: + wp_version = m.group(1) + php = None + xpb = headers.get("x-powered-by", "") + mp = re.search(r'PHP/([0-9.]+)', xpb) + if mp: + php = mp.group(1) + return {"is_wordpress": is_wp, "wp_version": wp_version, "php_version": php, "generator": generator} + + +def parse_seo(html): + html = html or "" + t = re.search(r']*>(.*?)', html, re.I | re.S) + d = re.search(r']+name=["\']description["\'][^>]+content=["\'](.*?)["\']', html, re.I | re.S) + return { + "title": t.group(1).strip() if t else None, + "meta_description": d.group(1).strip() if d else None, + "h1_count": len(re.findall(r']', html, re.I)), + "has_canonical": bool(re.search(r']+rel=["\']canonical["\']', html, re.I)), + } + + +def analyze_headers(headers): + present_keys = {k.lower() for k in (headers or {})} + present, missing = [], [] + for h in SECURITY_HEADERS: + (present if h.lower() in present_keys else missing).append(h) + return {"present": present, "missing": missing} + + +def _parse_cert_time(s): + # OpenSSL notAfter format, e.g. "Mar 2 00:00:00 2026 GMT" + return datetime.strptime(s, "%b %d %H:%M:%S %Y %Z").replace(tzinfo=timezone.utc) + + +def ssl_days_left(notafter, now=None): + end = _parse_cert_time(notafter) + now = now or datetime.now(timezone.utc) + return (end - now).days + + +def grade_pagespeed(score): + # score is 0..1 (Lighthouse). >=0.9 pass, >=0.7 warn, else fail. + if score is None: + return "skipped" + if score >= 0.9: + return "pass" + if score >= 0.7: + return "warn" + return "fail" + + +# ---- fetching (network; not unit-tested) ----------------------------------- +def _get(url, method="GET", timeout=15): + req = urllib.request.Request(url, method=method, headers={"User-Agent": UA}) + with urllib.request.urlopen(req, timeout=timeout) as r: + body = r.read().decode("utf-8", "replace") if method == "GET" else "" + return r.getcode(), dict(r.headers), r.geturl(), body + + +def _ssl_notafter(host, port=443, timeout=10): + ctx = ssl.create_default_context() + with socket.create_connection((host, port), timeout=timeout) as sock: + with ctx.wrap_socket(sock, server_hostname=host) as ssock: + return ssock.getpeercert().get("notAfter") + + +def _url_exists(url): + try: + code, _, _, _ = _get(url, method="GET", timeout=10) + return 200 <= code < 400 + except Exception: + return False + + +def _pagespeed(url, strategy, api_key=None): + base = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed" + q = {"url": url, "strategy": strategy} + if api_key: + q["key"] = api_key + try: + _, _, _, body = _get(base + "?" + urllib.parse.urlencode(q), timeout=60) + data = json.loads(body) + return data["lighthouseResult"]["categories"]["performance"]["score"] + except Exception: + return None + + +def audit(url, api_key=None): + findings = [] + + def add(group, check, value, status, note=""): + findings.append({"group": group, "check": check, "value": value, "status": status, "note": note}) + + try: + code, headers, final_url, html = _get(url) + except Exception as e: + add("reach", "reachable", str(e), "fail", "site did not respond") + return {"url": url, "reachable": False, "findings": findings} + + add("reach", "status", code, "pass" if code < 400 else "fail") + add("reach", "https", final_url.startswith("https://"), "pass" if final_url.startswith("https://") else "fail", + "no HTTPS redirect" if not final_url.startswith("https://") else "") + + host = urllib.parse.urlparse(final_url).hostname + if final_url.startswith("https://") and host: + try: + na = _ssl_notafter(host) + days = ssl_days_left(na) if na else None + st = "pass" if (days or 0) > 20 else ("warn" if (days or 0) > 0 else "fail") + add("security", "ssl_days_left", days, st, f"expires {na}") + except Exception as e: + add("security", "ssl", str(e), "fail", "SSL check failed") + + hdr = analyze_headers(headers) + add("security", "security_headers", f"{len(hdr['present'])}/5", + "pass" if len(hdr["present"]) >= 4 else ("warn" if hdr["present"] else "fail"), + "missing: " + ", ".join(hdr["missing"]) if hdr["missing"] else "") + + cms = parse_cms(html, headers) + add("cms", "wordpress", cms["is_wordpress"], "pass" if cms["is_wordpress"] else "warn", + f"version {cms['wp_version']}" if cms["wp_version"] else "version hidden") + if cms["php_version"]: + php_ok = cms["php_version"].startswith(("8.1", "8.2", "8.3", "8.4")) + add("security", "php_version", cms["php_version"], "pass" if php_ok else "fail", + "EOL PHP" if not php_ok else "") + + seo = parse_seo(html) + add("seo", "title", seo["title"], "pass" if seo["title"] else "fail") + add("seo", "meta_description", seo["meta_description"], "pass" if seo["meta_description"] else "fail") + add("seo", "single_h1", seo["h1_count"], "pass" if seo["h1_count"] == 1 else "warn", + f"{seo['h1_count']} H1s") + add("seo", "canonical", seo["has_canonical"], "pass" if seo["has_canonical"] else "warn") + + origin = f"{urllib.parse.urlparse(final_url).scheme}://{host}" + add("seo", "sitemap.xml", _url_exists(origin + "/sitemap.xml"), "pass" if _url_exists(origin + "/sitemap.xml") else "fail") + add("seo", "robots.txt", _url_exists(origin + "/robots.txt"), "pass" if _url_exists(origin + "/robots.txt") else "warn") + + for strat in ("mobile", "desktop"): + score = _pagespeed(final_url, strat, api_key) + add("performance", f"pagespeed_{strat}", round(score * 100) if score is not None else None, + grade_pagespeed(score), "PSI unavailable" if score is None else "") + + return {"url": final_url, "reachable": True, "findings": findings} + + +def _summary(result): + lines = [f"# Quick audit — {result['url']}", ""] + if not result["reachable"]: + return "\n".join(lines + ["Site unreachable."]) + order = {"fail": 0, "warn": 1, "skipped": 2, "pass": 3} + icon = {"pass": "🟢", "warn": "🟡", "fail": "🔴", "skipped": "⚪"} + for f in sorted(result["findings"], key=lambda x: order.get(x["status"], 9)): + note = f" — {f['note']}" if f["note"] else "" + lines.append(f"{icon.get(f['status'],'')} [{f['group']}] {f['check']}: {f['value']}{note}") + return "\n".join(lines) + + +def main(): + p = argparse.ArgumentParser(description="No-auth Tier-1 website audit") + p.add_argument("url", nargs="?", help="Site URL (http(s)://...)") + p.add_argument("--url", dest="url_opt") + p.add_argument("--summary", action="store_true", help="Human 1-page summary instead of JSON") + a = p.parse_args() + url = a.url or a.url_opt + if not url: + print(json.dumps({"error": "URL required"}), file=sys.stderr); sys.exit(1) + if not url.startswith(("http://", "https://")): + url = "https://" + url + result = audit(url, api_key=os.getenv("PAGESPEED_API_KEY")) + print(_summary(result) if a.summary else json.dumps(result, indent=2)) + if result["reachable"] and any(f["status"] == "fail" for f in result["findings"]): + sys.exit(2) + + +if __name__ == "__main__": + main()