forked from HenrikJoreteg/html-parse-stringify
-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathparse.js
More file actions
103 lines (89 loc) · 3.42 KB
/
parse.js
File metadata and controls
103 lines (89 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
/*jshint -W030 */
var tagRE = /(?:<!--[\S\s]*?-->|<(?:"[^"]*"['"]*|'[^']*'['"]*|[^'">])+>)/g;
var parseTag = require('./parse-tag');
// re-used obj for quick lookups of components
var empty = Object.create ? Object.create(null) : {};
// common logic for pushing a child node onto a list
function pushTextNode(list, html, level, start, ignoreWhitespace) {
// calculate correct end of the content slice in case there's
// no tag after the text node.
var end = html.indexOf('<', start);
var content = html.slice(start, end === -1 ? undefined : end);
// if a node is nothing but whitespace, collapse it as the spec states:
// https://www.w3.org/TR/html4/struct/text.html#h-9.1
if (/^\s*$/.test(content)) {
content = ' ';
}
// don't add whitespace-only text nodes if they would be trailing text nodes
// or if they would be leading whitespace-only text nodes:
// * end > -1 indicates this is not a trailing text node
// * leading node is when level is -1 and list has length 0
if ((!ignoreWhitespace && end > -1 && level + list.length >= 0) || content !== ' ') {
list.push({
type: 'text',
content: content
});
}
}
module.exports = function parse(html, options) {
options || (options = {});
options.components || (options.components = empty);
var result = [];
var current;
var level = -1;
var arr = [];
var byTag = {};
var inComponent = false;
html.replace(tagRE, function (tag, index) {
if (inComponent) {
if (tag !== ('</' + current.name + '>')) {
return;
} else {
inComponent = false;
}
}
var isOpen = tag.charAt(1) !== '/';
var isComment = tag.indexOf('<!--') === 0;
var start = index + tag.length;
var nextChar = html.charAt(start);
var parent;
if (isOpen && !isComment) {
level++;
current = parseTag(tag);
if (current.type === 'tag' && options.components[current.name]) {
current.type = 'component';
inComponent = true;
}
if (!current.voidElement && !inComponent && nextChar && nextChar !== '<') {
pushTextNode(current.children, html, level, start, options.ignoreWhitespace);
}
byTag[current.tagName] = current;
// if we're at root, push new base node
if (level === 0) {
result.push(current);
}
parent = arr[level - 1];
if (parent) {
parent.children.push(current);
}
arr[level] = current;
}
if (isComment || !isOpen || current.voidElement) {
if (!isComment && level > -1) {
level--;
}
if (!inComponent && nextChar !== '<' && nextChar) {
// trailing text node
// if we're at the root, push a base text node. otherwise add as
// a child to the current node.
parent = level === -1 ? result : arr[level].children;
pushTextNode(parent, html, level, start, options.ignoreWhitespace);
}
}
});
// If the "html" passed isn't actually html, add it as a text node.
if (!result.length && html.length) {
pushTextNode(result, html, 0, 0, options.ignoreWhitespace);
}
return result;
};