From 777cac2b5f006fa03e8df232e3983ab8c6bd6480 Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 15:18:02 +0000 Subject: [PATCH 1/4] Extract media playback state from audio/video elements in snapshots Closes #2 Add media-specific state detection in the content script's extractAttrs() for HTMLMediaElement instances, capturing playback state, current time, duration, muted flag, and video resolution. On the Rust side, add a Media node variant to structure.rs with corresponding XML rendering in xml.rs, and handle the new variant in runner.rs and commands.rs match arms. --- extension/src/content/content-script.ts | 18 +++++++++ src/page/structure.rs | 54 ++++++++++++++++++++++++- src/page/xml.rs | 29 +++++++++++++ src/plugin/runner.rs | 4 +- 4 files changed, 101 insertions(+), 4 deletions(-) diff --git a/extension/src/content/content-script.ts b/extension/src/content/content-script.ts index 96e7956..f11d138 100644 --- a/extension/src/content/content-script.ts +++ b/extension/src/content/content-script.ts @@ -1027,6 +1027,24 @@ function extractAttrs(element: Element): Record { attrs.selected = 'true'; } + // Media playback state + if (element instanceof HTMLMediaElement) { + attrs['media-state'] = element.paused ? 'paused' : 'playing'; + if (element.ended) { + attrs['media-state'] = 'ended'; + } + attrs['media-current-time'] = String(Math.round(element.currentTime)); + if (Number.isFinite(element.duration)) { + attrs['media-duration'] = String(Math.round(element.duration)); + } + if (element.muted) { + attrs['media-muted'] = 'true'; + } + if (element instanceof HTMLVideoElement && element.videoWidth > 0) { + attrs['media-resolution'] = `${element.videoWidth}x${element.videoHeight}`; + } + } + if ( element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || diff --git a/src/page/structure.rs b/src/page/structure.rs index 37355b5..90b3558 100644 --- a/src/page/structure.rs +++ b/src/page/structure.rs @@ -112,6 +112,15 @@ pub enum Node { Cell { children: Vec, }, + Media { + id: String, + tag: String, + media_state: String, + current_time: u64, + duration: Option, + muted: bool, + resolution: Option, + }, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -780,7 +789,8 @@ fn node_has_id(node: &Node, id: &str) -> bool { | Node::Checkbox { id: nid, .. } | Node::Radio { id: nid, .. } | Node::Select { id: nid, .. } - | Node::Textarea { id: nid, .. } => nid == id, + | Node::Textarea { id: nid, .. } + | Node::Media { id: nid, .. } => nid == id, Node::Text { id: Some(nid), .. } => nid == id, Node::List { id: Some(nid), .. } | Node::Table { id: Some(nid), .. } => nid == id, _ => false, @@ -921,6 +931,45 @@ fn build_node<'a>( let table = build_table_node(raw, children_by_parent, node_by_ref, filter, state)?; return Some(table); } + "audio" | "video" if raw.attrs.contains_key("media-state") => { + if !visible_here { + return None; + } + let id = format!("e{}", state.next_element_id); + state.next_element_id += 1; + state + .element_refs + .insert(id.clone(), raw.ref_id.clone()); + let media_state = raw + .attrs + .get("media-state") + .cloned() + .unwrap_or_else(|| "paused".to_string()); + let current_time = raw + .attrs + .get("media-current-time") + .and_then(|v| v.parse::().ok()) + .unwrap_or(0); + let duration = raw + .attrs + .get("media-duration") + .and_then(|v| v.parse::().ok()); + let muted = raw + .attrs + .get("media-muted") + .map(|v| v == "true") + .unwrap_or(false); + let resolution = raw.attrs.get("media-resolution").cloned(); + return Some(Node::Media { + id, + tag: raw.tag.clone(), + media_state, + current_time, + duration, + muted, + resolution, + }); + } _ => {} } @@ -1725,7 +1774,8 @@ fn estimate_node_lines(node: &Node) -> usize { | Node::Checkbox { .. } | Node::Radio { .. } | Node::Select { .. } - | Node::Textarea { .. } => 1, + | Node::Textarea { .. } + | Node::Media { .. } => 1, Node::Container { children, .. } | Node::List { children, .. } | Node::Item { children, .. } diff --git a/src/page/xml.rs b/src/page/xml.rs index 0764386..03cd9c7 100644 --- a/src/page/xml.rs +++ b/src/page/xml.rs @@ -414,6 +414,33 @@ fn render_node( } Node::Row { children } => render_row(out, children, indent), Node::Cell { children } => render_cell(out, children, indent), + Node::Media { + id, + tag, + media_state, + current_time, + duration, + muted, + resolution, + } => { + out.push_str(&format!( + "{indent_str}\n"); + } } } @@ -611,6 +638,7 @@ fn node_type_tag(node: &Node) -> &str { Node::Table { .. } => "table", Node::Row { .. } => "row", Node::Cell { .. } => "cell", + Node::Media { .. } => "media", } } @@ -691,6 +719,7 @@ fn item_can_inline_single_child(node: &Node) -> bool { | Node::Radio { .. } | Node::Select { .. } | Node::Textarea { .. } + | Node::Media { .. } ) } diff --git a/src/plugin/runner.rs b/src/plugin/runner.rs index 2d8a259..7ffaf66 100644 --- a/src/plugin/runner.rs +++ b/src/plugin/runner.rs @@ -411,9 +411,8 @@ fn collect_interactive_targets(node: &Node, out: &mut Vec<(String, String)>) { collect_interactive_targets(child, out); } } - Node::Text { .. } | Node::Heading { .. } => {} + Node::Text { .. } | Node::Heading { .. } | Node::Media { .. } => {} } -} fn node_text(node: &Node) -> String { match node { @@ -446,6 +445,7 @@ fn node_text(node: &Node) -> String { .filter(|value| !value.is_empty()) .collect::>() .join(" "), + Node::Media { tag, media_state, .. } => format!("{} ({})", tag, media_state), } } From 3f063458565e07440c18f0889e99257e4ee7a5b5 Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 15:42:17 +0000 Subject: [PATCH 2/4] Fix unclosed delimiter in collect_interactive_targets function Add missing closing brace for the function body, which was causing a compilation error due to an unclosed delimiter. --- src/plugin/runner.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/plugin/runner.rs b/src/plugin/runner.rs index 7ffaf66..f9cdf57 100644 --- a/src/plugin/runner.rs +++ b/src/plugin/runner.rs @@ -413,6 +413,7 @@ fn collect_interactive_targets(node: &Node, out: &mut Vec<(String, String)>) { } Node::Text { .. } | Node::Heading { .. } | Node::Media { .. } => {} } +} fn node_text(node: &Node) -> String { match node { From 8100d0cae48b8de568527143794c48fb48b41c12 Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 16:07:34 +0000 Subject: [PATCH 3/4] Address review: include Media nodes in interactive target collection for plugins --- src/plugin/runner.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/plugin/runner.rs b/src/plugin/runner.rs index f9cdf57..388a7b4 100644 --- a/src/plugin/runner.rs +++ b/src/plugin/runner.rs @@ -411,7 +411,18 @@ fn collect_interactive_targets(node: &Node, out: &mut Vec<(String, String)>) { collect_interactive_targets(child, out); } } - Node::Text { .. } | Node::Heading { .. } | Node::Media { .. } => {} + Node::Media { + id, + tag, + media_state, + .. + } => { + out.push(( + id.clone(), + join_parts([Some(tag.as_str()), Some(media_state.as_str())]), + )); + } + Node::Text { .. } | Node::Heading { .. } => {} } } From da09002de60fb153c47448faa3b52e2465d10f5f Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 17:01:33 +0000 Subject: [PATCH 4/4] Add tests for Media node structuring and XML rendering --- src/page/structure.rs | 89 +++++++++++++++++++++++++++++++++++++++++++ src/page/xml.rs | 37 ++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/src/page/structure.rs b/src/page/structure.rs index 90b3558..9f6baad 100644 --- a/src/page/structure.rs +++ b/src/page/structure.rs @@ -2240,4 +2240,93 @@ mod tests { other => panic!("unexpected: {other:?}"), } } + + #[test] + fn audio_with_media_state_becomes_media_node() { + let body = node("r1", None, "body", "", 0.0); + let mut audio = node("r2", Some("r1"), "audio", "", 10.0); + audio.attrs.insert("media-state".into(), "playing".into()); + audio + .attrs + .insert("media-current-time".into(), "30".into()); + audio.attrs.insert("media-duration".into(), "180".into()); + audio.attrs.insert("media-muted".into(), "true".into()); + + let page = parse_page_from_snapshot(&snapshot(vec![body, audio]), Some(1)).unwrap(); + match &page.nodes[0] { + Node::Media { + id, + tag, + media_state, + current_time, + duration, + muted, + resolution, + } => { + assert_eq!(id, "e1"); + assert_eq!(tag, "audio"); + assert_eq!(media_state, "playing"); + assert_eq!(*current_time, 30); + assert_eq!(*duration, Some(180)); + assert!(*muted); + assert!(resolution.is_none()); + } + other => panic!("expected Media, got: {other:?}"), + } + assert_eq!(page.element_refs.get("e1").map(String::as_str), Some("r2")); + } + + #[test] + fn video_with_all_media_attrs_becomes_media_node() { + let body = node("r1", None, "body", "", 0.0); + let mut video = node("r2", Some("r1"), "video", "", 10.0); + video.attrs.insert("media-state".into(), "playing".into()); + video + .attrs + .insert("media-current-time".into(), "42".into()); + video.attrs.insert("media-duration".into(), "120".into()); + video.attrs.insert("media-muted".into(), "false".into()); + video + .attrs + .insert("media-resolution".into(), "1920x1080".into()); + + let page = parse_page_from_snapshot(&snapshot(vec![body, video]), Some(1)).unwrap(); + match &page.nodes[0] { + Node::Media { + id, + tag, + media_state, + current_time, + duration, + muted, + resolution, + } => { + assert_eq!(id, "e1"); + assert_eq!(tag, "video"); + assert_eq!(media_state, "playing"); + assert_eq!(*current_time, 42); + assert_eq!(*duration, Some(120)); + assert!(!*muted); + assert_eq!(resolution.as_deref(), Some("1920x1080")); + } + other => panic!("expected Media, got: {other:?}"), + } + assert_eq!(page.element_refs.get("e1").map(String::as_str), Some("r2")); + } + + #[test] + fn video_without_media_state_is_not_media_node() { + let body = node("r1", None, "body", "", 0.0); + let video = node("r2", Some("r1"), "video", "Some video text", 10.0); + + let page = parse_page_from_snapshot(&snapshot(vec![body, video]), Some(1)).unwrap(); + // Without media-state attr, the video element should fall through + // to normal node handling (e.g. Text or Container), not Media. + for n in &page.nodes { + assert!( + !matches!(n, Node::Media { .. }), + "video without media-state should not produce Media node" + ); + } + } } diff --git a/src/page/xml.rs b/src/page/xml.rs index 03cd9c7..33f36ef 100644 --- a/src/page/xml.rs +++ b/src/page/xml.rs @@ -1186,4 +1186,41 @@ mod tests { assert!(xml.contains("class=\"type-a\"")); assert!(xml.contains("class=\"type-b\"")); } + + #[test] + fn render_xml_media_node_with_all_fields() { + let xml = render_xml(&page(vec![Node::Media { + id: "e1".into(), + tag: "video".into(), + media_state: "playing".into(), + current_time: 42, + duration: Some(120), + muted: true, + resolution: Some("1920x1080".into()), + }])); + + assert!(xml.contains( + "" + )); + } + + #[test] + fn render_xml_media_node_minimal_fields() { + let xml = render_xml(&page(vec![Node::Media { + id: "e2".into(), + tag: "audio".into(), + media_state: "paused".into(), + current_time: 0, + duration: None, + muted: false, + resolution: None, + }])); + + assert!(xml.contains( + "" + )); + assert!(!xml.contains("duration=")); + assert!(!xml.contains("muted=")); + assert!(!xml.contains("resolution=")); + } }