-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsections.py
More file actions
98 lines (81 loc) · 3.16 KB
/
sections.py
File metadata and controls
98 lines (81 loc) · 3.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Detect logical sections from parsed HTML tree."""
from __future__ import annotations
from typing import Iterator
SECTION_TAGS = {"section", "header", "footer", "nav", "main", "article"}
def iter_tree(node: dict) -> Iterator[dict]:
yield node
for child in node.get("children", []):
yield from iter_tree(child)
def detect_sections(capture: dict) -> list[dict]:
all_nodes: list[dict] = []
for root in capture.get("sections", []):
all_nodes.extend(iter_tree(root))
section_nodes = [
n for n in all_nodes
if n.get("tag") in SECTION_TAGS and n.get("children")
]
# Deduplicate: keep outermost only (header containing nav → keep header)
kept: list[dict] = []
ids_seen: set[int] = set()
for n in section_nodes:
nid = id(n)
# Skip if any ancestor is already in kept
is_child = False
for k in kept:
if _is_descendant_of(n, k):
is_child = True
break
if is_child:
continue
kept.append(n)
ids_seen.add(nid)
# Also include body-level divs with background (announcement bars, cookie bars)
for root in capture.get("sections", []):
if root.get("tag") == "div" and root not in kept:
styles = root.get("styles", {})
has_bg = bool(styles.get("background-color") or styles.get("background"))
has_text = bool(root.get("text") or any(
c.get("text") for c in root.get("children", [])
))
if has_bg and has_text:
kept.insert(0, root) # announcement bars go at top
if len(kept) >= 2:
return kept
# Single semantic section (e.g. a lone <footer> or <nav> being converted
# in isolation, like when building header/footer theme parts): keep it
# as-is. Descending into its children would drop the outer bg/padding
# that belong to the section itself.
if len(kept) == 1:
return kept
# Fallback (no semantic sections found): use direct children of first root
roots = capture.get("sections", [])
if roots and roots[0].get("children"):
return [c for c in roots[0]["children"] if c.get("children")]
return section_nodes
def _is_descendant_of(child: dict, parent: dict) -> bool:
"""Check if child appears anywhere in parent's subtree (by identity)."""
for n in iter_tree(parent):
if n is child and n is not parent:
return True
return False
def classify_section(node: dict) -> str:
tag = node.get("tag", "")
classes = " ".join(node.get("classes", [])).lower()
if tag in ("header", "nav"):
return "nav"
if tag == "footer":
return "footer"
if "hero" in classes:
return "hero"
h1s = sum(1 for n in iter_tree(node) if n.get("tag") == "h1")
if h1s >= 1:
return "hero"
if "testimon" in classes:
return "testimonials"
if "cta" in classes or "contact" in classes:
return "cta"
h3s = sum(1 for n in iter_tree(node) if n.get("tag") == "h3")
svgs = sum(1 for n in iter_tree(node) if n.get("tag") == "svg")
if h3s >= 3 or svgs >= 3:
return "features"
return "content"