Skip to content

Commit bf3bfcf

Browse files
committed
fix #104,#102
1 parent 0f42df8 commit bf3bfcf

2 files changed

Lines changed: 35 additions & 37 deletions

File tree

node_test.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,21 @@ func TestOutputXMLWithNamespacePrefix(t *testing.T) {
420420
}
421421
}
422422

423+
func TestQueryWithPrefix(t *testing.T) {
424+
s := `<?xml version="1.0" encoding="UTF-8"?><S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/"><S:Body test="1"><ns2:Fault xmlns:ns2="http://schemas.xmlsoap.org/soap/envelope/" xmlns:ns3="http://www.w3.org/2003/05/soap-envelope"><faultcode>ns2:Client</faultcode><faultstring>This is a client fault</faultstring></ns2:Fault></S:Body></S:Envelope>`
425+
doc, _ := Parse(strings.NewReader(s))
426+
n, err := Query(doc, `//S:Envelope/S:Body/ns2:Fault/faultcode`)
427+
if err != nil {
428+
t.Fatal(err)
429+
}
430+
if n == nil {
431+
t.Fatalf("should found one but got nil")
432+
}
433+
if expected, v := "ns2:Client", n.InnerText(); expected != v {
434+
t.Fatalf("expected %s but got %s", expected, v)
435+
}
436+
}
437+
423438
func TestOutputXMLWithCommentNode(t *testing.T) {
424439
s := `<?xml version="1.0" encoding="utf-8"?>
425440
<!-- Students grades are updated bi-monthly -->

parse.go

Lines changed: 20 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
5252
type parser struct {
5353
decoder *xml.Decoder
5454
doc *Node
55-
space2prefix map[string]string
5655
level int
5756
prev *Node
5857
streamElementXPath *xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
@@ -65,24 +64,24 @@ type parser struct {
6564
func createParser(r io.Reader) *parser {
6665
reader := newCachedReader(bufio.NewReader(r))
6766
p := &parser{
68-
decoder: xml.NewDecoder(reader),
69-
doc: &Node{Type: DocumentNode},
70-
space2prefix: make(map[string]string),
71-
level: 0,
72-
reader: reader,
67+
decoder: xml.NewDecoder(reader),
68+
doc: &Node{Type: DocumentNode},
69+
level: 0,
70+
reader: reader,
7371
}
74-
// http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
75-
p.space2prefix["http://www.w3.org/XML/1998/namespace"] = "xml"
7672
p.decoder.CharsetReader = charset.NewReaderLabel
7773
p.prev = p.doc
7874
return p
7975
}
8076

8177
func (p *parser) parse() (*Node, error) {
8278
var streamElementNodeCounter int
79+
space2prefix := map[string]string{"http://www.w3.org/XML/1998/namespace": "xml"}
8380

8481
for {
82+
p.reader.StartCaching()
8583
tok, err := p.decoder.Token()
84+
p.reader.StopCaching()
8685
if err != nil {
8786
return nil, err
8887
}
@@ -104,35 +103,27 @@ func (p *parser) parse() (*Node, error) {
104103
p.level = 1
105104
p.prev = node
106105
}
107-
// https://www.w3.org/TR/xml-names/#scoping-defaulting
108-
var defaultNamespaceURL string
109-
// [#102], If found the duplicate NamespaceURL, we should saving into the loca
110-
// and use it instead of p.space2prefix.
111-
local_space2prefix := make(map[string]string)
106+
112107
for _, att := range tok.Attr {
113108
if att.Name.Local == "xmlns" {
114-
p.space2prefix[att.Value] = "" // reset empty if exist the default namespace
115-
local_space2prefix[att.Value] = ""
116-
defaultNamespaceURL = att.Value
109+
space2prefix[att.Value] = "" // reset empty if exist the default namespace
110+
// defaultNamespaceURL = att.Value
117111
} else if att.Name.Space == "xmlns" {
118-
if _, ok := p.space2prefix[att.Value]; !ok {
119-
p.space2prefix[att.Value] = att.Name.Local
120-
} else if defaultNamespaceURL != att.Value {
121-
local_space2prefix[att.Value] = att.Name.Local
122-
}
112+
// maybe there are have duplicate NamespaceURL?
113+
space2prefix[att.Value] = att.Name.Local
123114
}
124115
}
125116

126117
if space := tok.Name.Space; space != "" {
127-
if _, found := p.space2prefix[space]; !found && p.decoder.Strict {
118+
if _, found := space2prefix[space]; !found && p.decoder.Strict {
128119
return nil, fmt.Errorf("xmlquery: invalid XML document, namespace %s is missing", space)
129120
}
130121
}
131122

132123
attributes := make([]Attr, len(tok.Attr))
133124
for i, att := range tok.Attr {
134125
name := att.Name
135-
if prefix, ok := p.space2prefix[name.Space]; ok {
126+
if prefix, ok := space2prefix[name.Space]; ok {
136127
name.Space = prefix
137128
}
138129
attributes[i] = Attr{
@@ -160,18 +151,13 @@ func (p *parser) parse() (*Node, error) {
160151
}
161152
AddSibling(p.prev.Parent, node)
162153
}
154+
163155
if node.NamespaceURI != "" {
164-
var keepPrefix bool = true
165-
if prefix, ok := local_space2prefix[node.NamespaceURI]; ok {
166-
keepPrefix = true
167-
node.Prefix = prefix
168-
} else {
169-
node.Prefix = p.space2prefix[node.NamespaceURI]
170-
}
171-
if defaultNamespaceURL != "" && node.NamespaceURI == defaultNamespaceURL {
172-
node.Prefix = ""
173-
} else if n := node.Parent; n != nil && !keepPrefix && node.NamespaceURI == n.NamespaceURI {
174-
node.Prefix = n.Prefix
156+
if v, ok := space2prefix[node.NamespaceURI]; ok {
157+
cached := string(p.reader.Cache())
158+
if strings.HasPrefix(cached, fmt.Sprintf("%s:%s", v, node.Data)) || strings.HasPrefix(cached, fmt.Sprintf("<%s:%s", v, node.Data)) {
159+
node.Prefix = v
160+
}
175161
}
176162
}
177163
// If we're in the streaming mode, we need to remember the node if it is the target node
@@ -191,7 +177,6 @@ func (p *parser) parse() (*Node, error) {
191177
}
192178
p.prev = node
193179
p.level++
194-
p.reader.StartCaching()
195180
case xml.EndElement:
196181
p.level--
197182
// If we're in streaming mode, and we already have a potential streaming
@@ -228,7 +213,6 @@ func (p *parser) parse() (*Node, error) {
228213
}
229214
}
230215
case xml.CharData:
231-
p.reader.StopCaching()
232216
// First, normalize the cache...
233217
cached := strings.ToUpper(string(p.reader.Cache()))
234218
nodeType := TextNode
@@ -247,7 +231,6 @@ func (p *parser) parse() (*Node, error) {
247231
}
248232
AddSibling(p.prev.Parent, node)
249233
}
250-
p.reader.StartCaching()
251234
case xml.Comment:
252235
node := &Node{Type: CommentNode, Data: string(tok), level: p.level}
253236
if p.level == p.prev.level {

0 commit comments

Comments
 (0)