Skip to content

Commit 6abb86c

Browse files
committed
disable serde and debug by default
serde and debug is not helpful except for tests
1 parent 1b226d2 commit 6abb86c

21 files changed

Lines changed: 132 additions & 78 deletions

Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,15 @@ readme = "README.md"
1414
pest = "2.5.7"
1515
pest_derive = "2.5.7"
1616
thiserror = "1.0.40"
17-
serde = { version = "1.0.159", features = ["derive"] }
18-
serde_json = "1.0.95"
17+
serde = { version = "1.0.159", features = ["derive"], optional = true }
18+
serde_json = { version = "1.0.95", optional = true }
1919
html-escape = "0.2"
2020
ownable = "1.0.0"
2121

2222
[features]
2323
default = ["source-span"]
2424
source-span = []
25+
test = ["dep:serde", "dep:serde_json"]
2526

2627
[dev-dependencies]
2728
indoc = "2.0.1"

examples/simple_parser/main.rs

Lines changed: 63 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,74 @@
1-
use clap::Parser;
2-
use html_parser::{Dom, Result};
3-
use std::{
4-
fs::File,
5-
io::{self, Read},
6-
path::PathBuf,
7-
};
8-
9-
#[derive(Debug, Parser)]
10-
/// A simple and general purpose html/xhtml parser.
11-
struct Opt {
12-
#[arg(short, long)]
13-
/// Pretty-print the output.
14-
pretty_print: bool,
15-
16-
#[arg(short, long)]
17-
/// Debug the parser, this will print errors to the console.
18-
debug: bool,
19-
20-
/// Path to the file, or stdin (piped content).
21-
///
22-
/// This argument can either be a path to the html-file that you would like to parse or the
23-
/// result of stdin. Note: Content over stdin needs to be finite, for now, as it is collected
24-
/// into a string and then processed by the parser.
25-
input: Option<PathBuf>,
1+
fn main() -> html_parser::Result<()> {
2+
#[cfg(feature = "test")]
3+
{
4+
real::main()
5+
}
6+
7+
#[cfg(not(feature = "test"))]
8+
panic!("this example requires the `test` feature to be enabled");
269
}
2710

28-
fn main() -> Result<()> {
29-
let opt = Opt::parse();
11+
#[cfg(feature = "test")]
12+
mod real {
13+
use clap::Parser;
14+
use html_parser::{Dom, Result};
15+
use std::{
16+
fs::File,
17+
io::{self, Read},
18+
path::PathBuf,
19+
};
3020

31-
let mut content = String::with_capacity(100_000);
21+
#[derive(Debug, Parser)]
22+
/// A simple and general purpose html/xhtml parser.
23+
struct Opt {
24+
#[arg(short, long)]
25+
/// Pretty-print the output.
26+
pretty_print: bool,
3227

33-
// If input is provided then use that as a path
34-
if let Some(path) = opt.input {
35-
let mut file = File::open(path)?;
36-
file.read_to_string(&mut content)?;
28+
#[arg(short, long)]
29+
/// Debug the parser, this will print errors to the console.
30+
debug: bool,
3731

38-
// Else read from stdin, this enables piping
39-
// ex: `cat index.html | html_parser`
40-
} else {
41-
let stdin = io::stdin();
42-
let mut handle = stdin.lock();
43-
handle.read_to_string(&mut content)?;
44-
};
32+
/// Path to the file, or stdin (piped content).
33+
///
34+
/// This argument can either be a path to the html-file that you would like to parse or the
35+
/// result of stdin. Note: Content over stdin needs to be finite, for now, as it is collected
36+
/// into a string and then processed by the parser.
37+
input: Option<PathBuf>,
38+
}
39+
40+
pub(super) fn main() -> Result<()> {
41+
let opt = Opt::parse();
42+
43+
let mut content = String::with_capacity(100_000);
4544

46-
let dom = Dom::parse(&content)?;
45+
// If input is provided then use that as a path
46+
if let Some(path) = opt.input {
47+
let mut file = File::open(path)?;
48+
file.read_to_string(&mut content)?;
4749

48-
if opt.debug {
49-
for error in &dom.errors {
50-
println!("# {}", error);
50+
// Else read from stdin, this enables piping
51+
// ex: `cat index.html | html_parser`
52+
} else {
53+
let stdin = io::stdin();
54+
let mut handle = stdin.lock();
55+
handle.read_to_string(&mut content)?;
56+
};
57+
58+
let dom = Dom::parse(&content)?;
59+
60+
if opt.debug {
61+
for error in &dom.errors {
62+
println!("# {}", error);
63+
}
5164
}
52-
}
5365

54-
if opt.pretty_print {
55-
println!("{}", dom.to_json_pretty()?);
56-
} else {
57-
println!("{}", dom.to_json()?);
58-
}
66+
if opt.pretty_print {
67+
println!("{}", dom.to_json_pretty()?);
68+
} else {
69+
println!("{}", dom.to_json()?);
70+
}
5971

60-
Ok(())
72+
Ok(())
73+
}
6174
}

src/dom/element.rs

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ use crate::dom::vecmap::VecMap;
66
use crate::{Attribute, Text, VecSet};
77
use html_escape::decode_html_entities;
88
use ownable::{IntoOwned, ToBorrowed, ToOwned};
9+
#[cfg(feature = "test")]
910
use serde::Serialize;
1011
use std::borrow::Cow;
1112
use std::default::Default;
1213

1314
/// Normal: `<div></div>` or Void: `<meta/>`and `<meta>`
14-
#[derive(Debug, Clone, Serialize, PartialEq)]
15-
#[serde(rename_all = "camelCase")]
15+
#[derive(Clone, PartialEq)]
16+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
17+
#[cfg_attr(feature = "test", serde(rename_all = "camelCase"))]
1618
// TODO: Align with: https://html.spec.whatwg.org/multipage/syntax.html#elements-2
1719
pub enum ElementVariant {
1820
/// A normal element can have children, ex: <div></div>.
@@ -24,11 +26,12 @@ pub enum ElementVariant {
2426
pub type Attributes<'a> = VecMap<Cow<'a, str>, Option<Attribute<'a>>>;
2527

2628
/// Most of the parsed html nodes are elements, except for text
27-
#[derive(Debug, Serialize, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
28-
#[serde(rename_all = "camelCase")]
29+
#[derive(PartialEq, IntoOwned, ToBorrowed, ToOwned)]
30+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
31+
#[cfg_attr(feature = "test", serde(rename_all = "camelCase"))]
2932
pub struct Element<'a> {
3033
/// The id of the element
31-
#[serde(skip_serializing_if = "Option::is_none")]
34+
#[cfg_attr(feature = "test", serde(skip_serializing_if = "Option::is_none"))]
3235
pub id: Option<Attribute<'a>>,
3336

3437
/// The name / tag of the element
@@ -39,20 +42,20 @@ pub struct Element<'a> {
3942
pub variant: ElementVariant,
4043

4144
/// All of the elements attributes, except id and class
42-
#[serde(skip_serializing_if = "VecMap::is_empty")]
45+
#[cfg_attr(feature = "test", serde(skip_serializing_if = "VecMap::is_empty"))]
4346
pub attributes: Attributes<'a>,
4447

4548
/// All of the elements classes
46-
#[serde(skip_serializing_if = "VecSet::is_empty")]
49+
#[cfg_attr(feature = "test", serde(skip_serializing_if = "VecSet::is_empty"))]
4750
pub classes: VecSet<Attribute<'a>>,
4851

4952
/// All of the elements child nodes
50-
#[serde(skip_serializing_if = "Vec::is_empty")]
53+
#[cfg_attr(feature = "test", serde(skip_serializing_if = "Vec::is_empty"))]
5154
pub children: Vec<Node<'a>>,
5255

5356
#[cfg(feature = "source-span")]
5457
/// Span of the element in the parsed source
55-
#[serde(skip)]
58+
#[cfg_attr(feature = "test", serde(skip))]
5659
pub source_span: SourceSpan<'a>,
5760
}
5861

src/dom/html.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
use html_escape::{decode_html_entities, encode_text};
22
use ownable::{IntoOwned, ToBorrowed, ToOwned};
3+
#[cfg(feature = "test")]
34
use serde::Serialize;
45
use std::borrow::{Borrow, Cow};
56
use std::fmt::{Debug, Formatter};
67
use std::ops::Deref;
78

8-
#[derive(Default, Debug, Serialize, PartialEq, Eq, IntoOwned, ToBorrowed, ToOwned)]
9-
#[serde(transparent)]
9+
#[derive(Default, PartialEq, Eq, IntoOwned, ToBorrowed, ToOwned)]
10+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
11+
#[cfg_attr(feature = "test", serde(transparent))]
1012
pub struct Attribute<'a>(pub Cow<'a, str>);
1113

1214
impl<'a> Attribute<'a> {
@@ -53,8 +55,9 @@ impl Borrow<str> for Attribute<'_> {
5355
}
5456
}
5557

56-
#[derive(Default, Serialize, PartialEq, Eq, IntoOwned, ToBorrowed, ToOwned)]
57-
#[serde(transparent)]
58+
#[derive(Default, PartialEq, Eq, IntoOwned, ToBorrowed, ToOwned)]
59+
#[cfg_attr(feature = "test", derive(Serialize))]
60+
#[cfg_attr(feature = "test", serde(transparent))]
5861
pub struct Text<'a>(pub Cow<'a, str>);
5962

6063
impl<'a> Text<'a> {

src/dom/mod.rs

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use ownable::{IntoOwned, ToBorrowed, ToOwned};
22
use pest::{Parser, iterators::Pair, iterators::Pairs};
3+
#[cfg(feature = "test")]
34
use serde::Serialize;
45
use std::borrow::Cow;
56
use std::default::Default;
@@ -27,8 +28,9 @@ use element::{Element, ElementVariant};
2728
use node::Node;
2829

2930
/// Document, DocumentFragment or Empty
30-
#[derive(Debug, Clone, Copy, PartialEq, Serialize)]
31-
#[serde(rename_all = "camelCase")]
31+
#[derive(Clone, Copy, PartialEq)]
32+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
33+
#[cfg_attr(feature = "test", serde(rename_all = "camelCase"))]
3234
pub enum DomVariant {
3335
/// This means that the parsed html had the representation of an html document. The doctype is optional but a document should only have one root node with the name of html.
3436
/// Example:
@@ -53,19 +55,20 @@ pub enum DomVariant {
5355
}
5456

5557
/// **The main struct** & the result of the parsed html
56-
#[derive(Debug, Serialize, PartialEq, ToBorrowed, ToOwned, IntoOwned)]
57-
#[serde(rename_all = "camelCase")]
58+
#[derive(PartialEq, ToBorrowed, ToOwned, IntoOwned)]
59+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
60+
#[cfg_attr(feature = "test", serde(rename_all = "camelCase"))]
5861
pub struct Dom<'a> {
5962
/// The type of the tree that was parsed
6063
#[ownable(clone)]
6164
pub tree_type: DomVariant,
6265

6366
/// All of the root children in the tree
64-
#[serde(skip_serializing_if = "Vec::is_empty")]
67+
#[cfg_attr(feature = "test", serde(skip_serializing_if = "Vec::is_empty"))]
6568
pub children: Vec<Node<'a>>,
6669

6770
/// A collection of all errors during parsing
68-
#[serde(skip_serializing)]
71+
#[cfg_attr(feature = "test", serde(skip_serializing))]
6972
#[ownable(clone)]
7073
pub errors: Vec<String>,
7174
}
@@ -89,10 +92,12 @@ impl<'a> Dom<'a> {
8992
Self::build_dom(pairs)
9093
}
9194

95+
#[cfg(feature = "test")]
9296
pub fn to_json(&self) -> Result<String> {
9397
Ok(serde_json::to_string(self)?)
9498
}
9599

100+
#[cfg(feature = "test")]
96101
pub fn to_json_pretty(&self) -> Result<String> {
97102
Ok(serde_json::to_string_pretty(self)?)
98103
}
@@ -178,7 +183,10 @@ impl<'a> Dom<'a> {
178183
} else {
179184
// Anything else (i.e. Text() or Element() ) can't happen at the top level;
180185
// if we had seen one, we would have set the document type above
181-
unreachable!("[build dom] empty document with an Element {:?}", node)
186+
unreachable!(
187+
"[build dom] empty document with an Element {:?}",
188+
node.to_html()
189+
)
182190
}
183191
}
184192
}

src/dom/node.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ use crate::Text;
33
use crate::dom::for_each::ForEach;
44
use html_escape::decode_html_entities;
55
use ownable::{IntoOwned, ToBorrowed, ToOwned};
6+
#[cfg(feature = "test")]
67
use serde::Serialize;
78
use std::array;
89
use std::borrow::Cow;
910

10-
#[derive(Debug, Serialize, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
11-
#[serde(untagged)]
11+
#[derive(PartialEq, IntoOwned, ToBorrowed, ToOwned)]
12+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
13+
#[cfg_attr(feature = "test", serde(untagged))]
1214
pub enum Node<'a> {
1315
Text(Text<'a>),
1416
Element(Element<'a>),
@@ -188,6 +190,7 @@ impl<'a> Iterator for NodeIntoIterator<'a> {
188190
}
189191
}
190192

193+
#[cfg(feature = "test")]
191194
#[cfg(test)]
192195
mod tests {
193196
use super::*;

src/dom/span.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
use ownable::{IntoOwned, ToBorrowed, ToOwned};
2+
#[cfg(feature = "test")]
23
use serde::Serialize;
34
use std::borrow::Cow;
45

56
/// Span of the information in the parsed source.
6-
#[derive(Debug, Default, Clone, Serialize, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
7-
#[serde(rename_all = "camelCase")]
7+
#[derive(Default, Clone, PartialEq, IntoOwned, ToBorrowed, ToOwned)]
8+
#[cfg_attr(feature = "test", derive(Debug, Serialize))]
9+
#[cfg_attr(feature = "test", serde(rename_all = "camelCase"))]
810
pub struct SourceSpan<'a> {
911
pub text: Cow<'a, str>,
1012
pub start_line: usize,

src/dom/vecmap.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use ownable::traits::{IntoOwned, ToBorrowed, ToOwned};
2+
#[cfg(feature = "test")]
23
use serde::{Serialize, Serializer};
34
use std::borrow::Borrow;
45
use std::fmt::{Debug, Formatter};
@@ -166,6 +167,7 @@ where
166167
}
167168
}
168169

170+
#[cfg(feature = "test")]
169171
impl<K, V> Serialize for VecMap<K, V>
170172
where
171173
K: Serialize,

src/dom/vecset.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::VecMap;
22
use ownable::traits::{IntoOwned, ToBorrowed, ToOwned};
3+
#[cfg(feature = "test")]
34
use serde::{Serialize, Serializer};
45
use std::borrow::Borrow;
56
use std::fmt::{Debug, Formatter};
@@ -109,6 +110,7 @@ impl<K: IntoOwned> IntoOwned for VecSet<K> {
109110
}
110111
}
111112

113+
#[cfg(feature = "test")]
112114
impl<K> Serialize for VecSet<K>
113115
where
114116
K: Serialize,

src/error.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub enum Error {
88
Cli(String),
99
#[error("{0}")]
1010
IO(#[from] std::io::Error),
11+
#[cfg(feature = "test")]
1112
#[error("{0}")]
1213
Serde(#[from] serde_json::Error),
1314
}

0 commit comments

Comments
 (0)