Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ jobs:
run: python -m compileall wordpress-api-pro/scripts

- name: Unit tests
run: python3 tests/test_cpt_seeding.py
run: |
python3 tests/test_cpt_seeding.py
python3 tests/test_site_audit.py

- name: Seed dry-run smoke (no network)
run: |
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## 3.7.0 - 2026-06-02

- Add `site_audit.py` — no-auth Tier-1 website audit (PageSpeed, SSL, security headers, CMS/PHP detection, SEO basics) emitting findings against the audit-engine thresholds. Stdlib-only; the sales-hook quick scan. Pure parsers unit-tested offline + wired into CI.

## 3.6.0 - 2026-06-02

CPT content seeding (Tier-1 dynamic content).
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "wordpress-api-pro",
"version": "3.6.0",
"version": "3.7.0",
"description": "WordPress REST API integration skill for OpenClaw - manage posts, pages, media, WooCommerce, Elementor, and metadata with explicit safety boundaries",
"private": true,
"main": "wordpress-api-pro/SKILL.md",
Expand Down
74 changes: 74 additions & 0 deletions tests/test_site_audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os, sys, unittest

SCRIPTS = os.path.join(os.path.dirname(__file__), "..", "wordpress-api-pro", "scripts")
sys.path.insert(0, os.path.abspath(SCRIPTS))

import site_audit as sa # noqa: E402


class CmsTest(unittest.TestCase):
def test_detects_wordpress_and_version_from_generator(self):
html = '<meta name="generator" content="WordPress 6.5.2" />'
out = sa.parse_cms(html, {})
self.assertTrue(out["is_wordpress"])
self.assertEqual(out["wp_version"], "6.5.2")

def test_detects_wp_from_wp_content_when_no_generator(self):
html = '<link href="/wp-content/themes/x/style.css">'
out = sa.parse_cms(html, {})
self.assertTrue(out["is_wordpress"])

def test_php_version_from_x_powered_by(self):
out = sa.parse_cms("", {"X-Powered-By": "PHP/8.1.27"})
self.assertEqual(out["php_version"], "8.1.27")

def test_non_wp(self):
self.assertFalse(sa.parse_cms("<html>nothing</html>", {})["is_wordpress"])


class SeoTest(unittest.TestCase):
def test_extracts_title_and_description_and_h1_and_canonical(self):
html = ('<title>Acme — Home</title>'
'<meta name="description" content="We build things.">'
'<link rel="canonical" href="https://acme/"><h1>Hi</h1>')
out = sa.parse_seo(html)
self.assertEqual(out["title"], "Acme — Home")
self.assertEqual(out["meta_description"], "We build things.")
self.assertEqual(out["h1_count"], 1)
self.assertTrue(out["has_canonical"])

def test_missing_fields(self):
out = sa.parse_seo("<html></html>")
self.assertIsNone(out["title"])
self.assertIsNone(out["meta_description"])
self.assertEqual(out["h1_count"], 0)
self.assertFalse(out["has_canonical"])


class HeadersTest(unittest.TestCase):
def test_present_and_missing_security_headers(self):
out = sa.analyze_headers({
"Strict-Transport-Security": "max-age=63072000",
"X-Content-Type-Options": "nosniff",
})
self.assertIn("Strict-Transport-Security", out["present"])
self.assertIn("Content-Security-Policy", out["missing"])
self.assertIn("X-Frame-Options", out["missing"])


class SslTest(unittest.TestCase):
def test_days_left_positive(self):
now = sa._parse_cert_time("Jan 1 00:00:00 2026 GMT")
days = sa.ssl_days_left("Mar 2 00:00:00 2026 GMT", now=now)
self.assertEqual(days, 60)


class PageSpeedTest(unittest.TestCase):
def test_grade(self):
self.assertEqual(sa.grade_pagespeed(0.95), "pass")
self.assertEqual(sa.grade_pagespeed(0.80), "warn")
self.assertEqual(sa.grade_pagespeed(0.50), "fail")


if __name__ == "__main__":
unittest.main()
3 changes: 2 additions & 1 deletion wordpress-api-pro/SKILL.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
name: wordpress-api-pro
version: 3.6.0
version: 3.7.0
license: MIT-0
description: |
WordPress REST API integration for managing posts, pages, media, WooCommerce products, Elementor content, SEO meta, ACF, and JetEngine fields.
Expand Down Expand Up @@ -199,6 +199,7 @@ python3 scripts/upload_media.py \
- `scripts/acf_fields.py` — read/write ACF fields.
- `scripts/seo_meta.py` — read/write Rank Math and Yoast SEO metadata.
- `scripts/jetengine_fields.py` — read/write JetEngine custom fields.
- `scripts/site_audit.py` — no-auth Tier-1 website audit (PageSpeed/SSL/security headers/CMS+PHP/SEO basics). Public probes only; run cold pre-sale.
- `scripts/describe_cpt.py` — discover a CPT's rest_base, taxonomies, and field keys (read-only).
- `scripts/seed_content.py` — batch-create CPT entries with ACF/Jet fields, taxonomies, and featured images from a JSON dataset. **Dry-run by default; pass `--execute` to write.**
- `scripts/elementor_content.py` — read/update Elementor `_elementor_data`.
Expand Down
208 changes: 208 additions & 0 deletions wordpress-api-pro/scripts/site_audit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
#!/usr/bin/env python3
"""No-auth Tier-1 website audit — public signals only (PageSpeed/SSL/headers/CMS/SEO).

Run cold, before any engagement, as the sales-hook quick scan. Read-only public
fetches; no credentials. Outputs findings JSON (default) or a 1-page --summary.

Usage:
python3 site_audit.py https://example.com
python3 site_audit.py https://example.com --summary
Env (optional): PAGESPEED_API_KEY (higher PageSpeed Insights quota)
"""
import argparse, json, os, re, ssl, socket, sys, urllib.request, urllib.parse
from datetime import datetime, timezone

UA = "Mozilla/5.0 (compatible; DigitizerAudit/1.0)"
SECURITY_HEADERS = [
"Strict-Transport-Security", "Content-Security-Policy", "X-Frame-Options",
"X-Content-Type-Options", "Referrer-Policy",
]


# ---- pure parsers (unit-tested, no network) --------------------------------
def parse_cms(html, headers):
html = html or ""
headers = {k.lower(): v for k, v in (headers or {}).items()}
gen = re.search(r'<meta[^>]+name=["\']generator["\'][^>]+content=["\']([^"\']+)["\']', html, re.I)
generator = gen.group(1) if gen else None
is_wp = bool(generator and "wordpress" in generator.lower()) or "/wp-content/" in html or "/wp-json" in html
wp_version = None
if generator:
m = re.search(r'WordPress\s+([0-9.]+)', generator, re.I)
if m:
wp_version = m.group(1)
php = None
xpb = headers.get("x-powered-by", "")
mp = re.search(r'PHP/([0-9.]+)', xpb)
if mp:
php = mp.group(1)
return {"is_wordpress": is_wp, "wp_version": wp_version, "php_version": php, "generator": generator}


def parse_seo(html):
html = html or ""
t = re.search(r'<title[^>]*>(.*?)</title>', html, re.I | re.S)
d = re.search(r'<meta[^>]+name=["\']description["\'][^>]+content=["\'](.*?)["\']', html, re.I | re.S)
return {
"title": t.group(1).strip() if t else None,
"meta_description": d.group(1).strip() if d else None,
"h1_count": len(re.findall(r'<h1[\s>]', html, re.I)),
"has_canonical": bool(re.search(r'<link[^>]+rel=["\']canonical["\']', html, re.I)),
}


def analyze_headers(headers):
present_keys = {k.lower() for k in (headers or {})}
present, missing = [], []
for h in SECURITY_HEADERS:
(present if h.lower() in present_keys else missing).append(h)
return {"present": present, "missing": missing}


def _parse_cert_time(s):
# OpenSSL notAfter format, e.g. "Mar 2 00:00:00 2026 GMT"
return datetime.strptime(s, "%b %d %H:%M:%S %Y %Z").replace(tzinfo=timezone.utc)


def ssl_days_left(notafter, now=None):
end = _parse_cert_time(notafter)
now = now or datetime.now(timezone.utc)
return (end - now).days


def grade_pagespeed(score):
# score is 0..1 (Lighthouse). >=0.9 pass, >=0.7 warn, else fail.
if score is None:
return "skipped"
if score >= 0.9:
return "pass"
if score >= 0.7:
return "warn"
return "fail"


# ---- fetching (network; not unit-tested) -----------------------------------
def _get(url, method="GET", timeout=15):
req = urllib.request.Request(url, method=method, headers={"User-Agent": UA})
with urllib.request.urlopen(req, timeout=timeout) as r:
body = r.read().decode("utf-8", "replace") if method == "GET" else ""
return r.getcode(), dict(r.headers), r.geturl(), body
Comment on lines +87 to +89
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Treat HTTP error pages as reachable responses

For sites whose homepage returns a 4xx/5xx response, urllib.request.urlopen raises HTTPError here instead of returning a response object, so audit() falls into the generic unreachable path and skips the intended status/header/SEO checks. In a quick audit, an HTTP 500 or 404 page is still a reachable site with a failing status; catch urllib.error.HTTPError and return its code, headers, URL, and body so line 132 can report it accurately.

Useful? React with 👍 / 👎.



def _ssl_notafter(host, port=443, timeout=10):
ctx = ssl.create_default_context()
with socket.create_connection((host, port), timeout=timeout) as sock:
with ctx.wrap_socket(sock, server_hostname=host) as ssock:
return ssock.getpeercert().get("notAfter")


def _url_exists(url):
try:
code, _, _, _ = _get(url, method="GET", timeout=10)
return 200 <= code < 400
except Exception:
return False


def _pagespeed(url, strategy, api_key=None):
base = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed"
q = {"url": url, "strategy": strategy}
if api_key:
q["key"] = api_key
try:
_, _, _, body = _get(base + "?" + urllib.parse.urlencode(q), timeout=60)
data = json.loads(body)
return data["lighthouseResult"]["categories"]["performance"]["score"]
except Exception:
return None


def audit(url, api_key=None):
findings = []

def add(group, check, value, status, note=""):
findings.append({"group": group, "check": check, "value": value, "status": status, "note": note})

try:
code, headers, final_url, html = _get(url)
except Exception as e:
add("reach", "reachable", str(e), "fail", "site did not respond")
return {"url": url, "reachable": False, "findings": findings}

add("reach", "status", code, "pass" if code < 400 else "fail")
add("reach", "https", final_url.startswith("https://"), "pass" if final_url.startswith("https://") else "fail",
"no HTTPS redirect" if not final_url.startswith("https://") else "")

host = urllib.parse.urlparse(final_url).hostname
if final_url.startswith("https://") and host:
try:
na = _ssl_notafter(host)
days = ssl_days_left(na) if na else None
st = "pass" if (days or 0) > 20 else ("warn" if (days or 0) > 0 else "fail")
add("security", "ssl_days_left", days, st, f"expires {na}")
except Exception as e:
add("security", "ssl", str(e), "fail", "SSL check failed")

hdr = analyze_headers(headers)
add("security", "security_headers", f"{len(hdr['present'])}/5",
"pass" if len(hdr["present"]) >= 4 else ("warn" if hdr["present"] else "fail"),
"missing: " + ", ".join(hdr["missing"]) if hdr["missing"] else "")

cms = parse_cms(html, headers)
add("cms", "wordpress", cms["is_wordpress"], "pass" if cms["is_wordpress"] else "warn",
f"version {cms['wp_version']}" if cms["wp_version"] else "version hidden")
if cms["php_version"]:
php_ok = cms["php_version"].startswith(("8.1", "8.2", "8.3", "8.4"))
add("security", "php_version", cms["php_version"], "pass" if php_ok else "fail",
"EOL PHP" if not php_ok else "")

seo = parse_seo(html)
add("seo", "title", seo["title"], "pass" if seo["title"] else "fail")
add("seo", "meta_description", seo["meta_description"], "pass" if seo["meta_description"] else "fail")
add("seo", "single_h1", seo["h1_count"], "pass" if seo["h1_count"] == 1 else "warn",
f"{seo['h1_count']} H1s")
add("seo", "canonical", seo["has_canonical"], "pass" if seo["has_canonical"] else "warn")

origin = f"{urllib.parse.urlparse(final_url).scheme}://{host}"
add("seo", "sitemap.xml", _url_exists(origin + "/sitemap.xml"), "pass" if _url_exists(origin + "/sitemap.xml") else "fail")
add("seo", "robots.txt", _url_exists(origin + "/robots.txt"), "pass" if _url_exists(origin + "/robots.txt") else "warn")

for strat in ("mobile", "desktop"):
score = _pagespeed(final_url, strat, api_key)
add("performance", f"pagespeed_{strat}", round(score * 100) if score is not None else None,
grade_pagespeed(score), "PSI unavailable" if score is None else "")

return {"url": final_url, "reachable": True, "findings": findings}


def _summary(result):
lines = [f"# Quick audit — {result['url']}", ""]
if not result["reachable"]:
return "\n".join(lines + ["Site unreachable."])
order = {"fail": 0, "warn": 1, "skipped": 2, "pass": 3}
icon = {"pass": "🟢", "warn": "🟡", "fail": "🔴", "skipped": "⚪"}
for f in sorted(result["findings"], key=lambda x: order.get(x["status"], 9)):
note = f" — {f['note']}" if f["note"] else ""
lines.append(f"{icon.get(f['status'],'')} [{f['group']}] {f['check']}: {f['value']}{note}")
return "\n".join(lines)


def main():
p = argparse.ArgumentParser(description="No-auth Tier-1 website audit")
p.add_argument("url", nargs="?", help="Site URL (http(s)://...)")
p.add_argument("--url", dest="url_opt")
p.add_argument("--summary", action="store_true", help="Human 1-page summary instead of JSON")
a = p.parse_args()
url = a.url or a.url_opt
if not url:
print(json.dumps({"error": "URL required"}), file=sys.stderr); sys.exit(1)
if not url.startswith(("http://", "https://")):
url = "https://" + url
result = audit(url, api_key=os.getenv("PAGESPEED_API_KEY"))
print(_summary(result) if a.summary else json.dumps(result, indent=2))
if result["reachable"] and any(f["status"] == "fail" for f in result["findings"]):
sys.exit(2)
Comment on lines +203 to +204
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Exit nonzero when the target is unreachable

When the initial fetch fails due to DNS, timeout, or connection refusal, audit() returns reachable: false with a fail finding, but this guard suppresses the nonzero exit and the CLI exits 0. In automation or CI-style sales scans, completely unreachable targets therefore look successful; exit nonzero whenever there is a fail finding, or handle not result['reachable'] separately.

Useful? React with 👍 / 👎.



if __name__ == "__main__":
main()
Loading