Skip to content

Commit 3baa32f

Browse files
committed
to text
1 parent b09d7d9 commit 3baa32f

2 files changed

Lines changed: 58 additions & 1 deletion

File tree

src/dom/element.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ use super::node::{Node, write_html_list};
22
#[cfg(feature = "source-span")]
33
use super::span::SourceSpan;
44
use crate::dom::vecmap::VecMap;
5-
use crate::{Attribute, VecSet};
5+
use crate::{Attribute, Text, VecSet};
6+
use html_escape::decode_html_entities;
67
use ownable::{IntoOwned, ToBorrowed, ToOwned};
78
use serde::Serialize;
89
use std::borrow::Cow;
@@ -151,6 +152,39 @@ impl<'a> Element<'a> {
151152
writer.push('>');
152153
}
153154
}
155+
156+
/// Strip all tags.
157+
///
158+
/// Note that html entities are not removed, only tags.
159+
/// Use [`Self::to_text`], or [`Text::decode`] on the result.
160+
pub fn strip_tags(&self) -> Text<'_> {
161+
if let &[Node::Text(t)] = &self.children.as_slice() {
162+
t.to_borrowed()
163+
} else {
164+
let mut result = String::new();
165+
self.write_strip_tags(&mut result);
166+
Text(result.into())
167+
}
168+
}
169+
170+
fn write_strip_tags(&self, writer: &mut String) {
171+
for c in &self.children {
172+
match c {
173+
Node::Text(t) => writer.push_str(t),
174+
Node::Element(e) => e.write_strip_tags(writer),
175+
Node::Comment(_) => (),
176+
}
177+
}
178+
}
179+
180+
/// Strip tags and decode html-entities.
181+
pub fn to_text(&self) -> Cow<'_, str> {
182+
let t = self.strip_tags().0;
183+
match decode_html_entities(&t) {
184+
Cow::Borrowed(_) => t, // it's only returned as borrowed if the whole input is passed though
185+
Cow::Owned(o) => Cow::Owned(o),
186+
}
187+
}
154188
}
155189

156190
pub(crate) fn for_each_element<'a, F>(tree: &mut [Node<'a>], f: &mut F) -> ControlFlow<(), ()>

src/dom/node.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
use super::element::{Element, for_each_element_async};
22
use crate::dom::element::for_each_element;
33
use crate::{Text, WalkResult};
4+
use html_escape::decode_html_entities;
45
use ownable::{IntoOwned, ToBorrowed, ToOwned};
56
use serde::Serialize;
7+
use std::borrow::Cow;
68

79
#[derive(Debug, Serialize, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
810
#[serde(untagged)]
@@ -89,6 +91,27 @@ impl<'a> Node<'a> {
8991
}
9092
}
9193
}
94+
95+
/// Strip all tags.
96+
///
97+
/// Note that html entities are not removed, only tags.
98+
/// Use [`Self::to_text`], or [`Text::decode`] on the result.
99+
pub fn strip_tags(&self) -> Text<'_> {
100+
match self {
101+
Node::Text(t) => t.to_borrowed(),
102+
Node::Element(e) => e.strip_tags(),
103+
Node::Comment(_) => "".into(),
104+
}
105+
}
106+
107+
/// Strip tags and decode html-entities.
108+
pub fn to_text(&self) -> Cow<'_, str> {
109+
let t = self.strip_tags().0;
110+
match decode_html_entities(&t) {
111+
Cow::Borrowed(_) => t, // it's only returned as borrowed if the whole input is passed though
112+
Cow::Owned(o) => Cow::Owned(o),
113+
}
114+
}
92115
}
93116

94117
pub(crate) fn write_html_list(writer: &mut String, list: &[Node]) {

0 commit comments

Comments
 (0)