Skip to content

Commit a4fefb4

Browse files
committed
to text
1 parent 60df894 commit a4fefb4

2 files changed

Lines changed: 58 additions & 1 deletion

File tree

src/dom/element.rs

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ use super::node::{Node, write_html_list};
33
use super::span::SourceSpan;
44
use crate::dom::vecmap::VecMap;
55
use crate::dom::walk::Children;
6-
use crate::{Attribute, VecSet};
6+
use crate::{Attribute, Text, VecSet};
7+
use html_escape::decode_html_entities;
78
use ownable::{IntoOwned, ToBorrowed, ToOwned};
89
use serde::Serialize;
910
use std::borrow::Cow;
@@ -141,4 +142,37 @@ impl Element<'_> {
141142
writer.push('>');
142143
}
143144
}
145+
146+
/// Strip all tags.
147+
///
148+
/// Note that html entities are not removed, only tags.
149+
/// Use [`Self::to_text`], or [`Text::decode`] on the result.
150+
pub fn strip_tags(&self) -> Text<'_> {
151+
if let &[Node::Text(t)] = &self.children.as_slice() {
152+
t.to_borrowed()
153+
} else {
154+
let mut result = String::new();
155+
self.write_strip_tags(&mut result);
156+
Text(result.into())
157+
}
158+
}
159+
160+
fn write_strip_tags(&self, writer: &mut String) {
161+
for c in &self.children {
162+
match c {
163+
Node::Text(t) => writer.push_str(t),
164+
Node::Element(e) => e.write_strip_tags(writer),
165+
Node::Comment(_) => (),
166+
}
167+
}
168+
}
169+
170+
/// Strip tags and decode html-entities.
171+
pub fn to_text(&self) -> Cow<'_, str> {
172+
let t = self.strip_tags().0;
173+
match decode_html_entities(&t) {
174+
Cow::Borrowed(_) => t, // it's only returned as borrowed if the whole input is passed though
175+
Cow::Owned(o) => Cow::Owned(o),
176+
}
177+
}
144178
}

src/dom/node.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
use super::element::Element;
22
use crate::Text;
33
use crate::dom::walk::Children;
4+
use html_escape::decode_html_entities;
45
use ownable::{IntoOwned, ToBorrowed, ToOwned};
56
use serde::Serialize;
67
use std::array;
8+
use std::borrow::Cow;
79

810
#[derive(Debug, Serialize, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
911
#[serde(untagged)]
@@ -84,6 +86,27 @@ impl Node<'_> {
8486
}
8587
}
8688
}
89+
90+
/// Strip all tags.
91+
///
92+
/// Note that html entities are not removed, only tags.
93+
/// Use [`Self::to_text`], or [`Text::decode`] on the result.
94+
pub fn strip_tags(&self) -> Text<'_> {
95+
match self {
96+
Node::Text(t) => t.to_borrowed(),
97+
Node::Element(e) => e.strip_tags(),
98+
Node::Comment(_) => "".into(),
99+
}
100+
}
101+
102+
/// Strip tags and decode html-entities.
103+
pub fn to_text(&self) -> Cow<'_, str> {
104+
let t = self.strip_tags().0;
105+
match decode_html_entities(&t) {
106+
Cow::Borrowed(_) => t, // it's only returned as borrowed if the whole input is passed though
107+
Cow::Owned(o) => Cow::Owned(o),
108+
}
109+
}
87110
}
88111

89112
pub(crate) fn write_html_list(writer: &mut String, list: &[Node]) {

0 commit comments

Comments
 (0)