@@ -52,7 +52,6 @@ func ParseWithOptions(r io.Reader, options ParserOptions) (*Node, error) {
5252type parser struct {
5353 decoder * xml.Decoder
5454 doc * Node
55- space2prefix map [string ]string
5655 level int
5756 prev * Node
5857 streamElementXPath * xpath.Expr // Under streaming mode, this specifies the xpath to the target element node(s).
@@ -65,24 +64,24 @@ type parser struct {
6564func createParser (r io.Reader ) * parser {
6665 reader := newCachedReader (bufio .NewReader (r ))
6766 p := & parser {
68- decoder : xml .NewDecoder (reader ),
69- doc : & Node {Type : DocumentNode },
70- space2prefix : make (map [string ]string ),
71- level : 0 ,
72- reader : reader ,
67+ decoder : xml .NewDecoder (reader ),
68+ doc : & Node {Type : DocumentNode },
69+ level : 0 ,
70+ reader : reader ,
7371 }
74- // http://www.w3.org/XML/1998/namespace is bound by definition to the prefix xml.
75- p .space2prefix ["http://www.w3.org/XML/1998/namespace" ] = "xml"
7672 p .decoder .CharsetReader = charset .NewReaderLabel
7773 p .prev = p .doc
7874 return p
7975}
8076
8177func (p * parser ) parse () (* Node , error ) {
8278 var streamElementNodeCounter int
79+ space2prefix := map [string ]string {"http://www.w3.org/XML/1998/namespace" : "xml" }
8380
8481 for {
82+ p .reader .StartCaching ()
8583 tok , err := p .decoder .Token ()
84+ p .reader .StopCaching ()
8685 if err != nil {
8786 return nil , err
8887 }
@@ -104,35 +103,27 @@ func (p *parser) parse() (*Node, error) {
104103 p .level = 1
105104 p .prev = node
106105 }
107- // https://www.w3.org/TR/xml-names/#scoping-defaulting
108- var defaultNamespaceURL string
109- // [#102], If found the duplicate NamespaceURL, we should saving into the loca
110- // and use it instead of p.space2prefix.
111- local_space2prefix := make (map [string ]string )
106+
112107 for _ , att := range tok .Attr {
113108 if att .Name .Local == "xmlns" {
114- p .space2prefix [att .Value ] = "" // reset empty if exist the default namespace
115- local_space2prefix [att .Value ] = ""
116- defaultNamespaceURL = att .Value
109+ space2prefix [att .Value ] = "" // reset empty if exist the default namespace
110+ // defaultNamespaceURL = att.Value
117111 } else if att .Name .Space == "xmlns" {
118- if _ , ok := p .space2prefix [att .Value ]; ! ok {
119- p .space2prefix [att .Value ] = att .Name .Local
120- } else if defaultNamespaceURL != att .Value {
121- local_space2prefix [att .Value ] = att .Name .Local
122- }
112+ // maybe there are have duplicate NamespaceURL?
113+ space2prefix [att .Value ] = att .Name .Local
123114 }
124115 }
125116
126117 if space := tok .Name .Space ; space != "" {
127- if _ , found := p . space2prefix [space ]; ! found && p .decoder .Strict {
118+ if _ , found := space2prefix [space ]; ! found && p .decoder .Strict {
128119 return nil , fmt .Errorf ("xmlquery: invalid XML document, namespace %s is missing" , space )
129120 }
130121 }
131122
132123 attributes := make ([]Attr , len (tok .Attr ))
133124 for i , att := range tok .Attr {
134125 name := att .Name
135- if prefix , ok := p . space2prefix [name .Space ]; ok {
126+ if prefix , ok := space2prefix [name .Space ]; ok {
136127 name .Space = prefix
137128 }
138129 attributes [i ] = Attr {
@@ -160,18 +151,13 @@ func (p *parser) parse() (*Node, error) {
160151 }
161152 AddSibling (p .prev .Parent , node )
162153 }
154+
163155 if node .NamespaceURI != "" {
164- var keepPrefix bool = true
165- if prefix , ok := local_space2prefix [node .NamespaceURI ]; ok {
166- keepPrefix = true
167- node .Prefix = prefix
168- } else {
169- node .Prefix = p .space2prefix [node .NamespaceURI ]
170- }
171- if defaultNamespaceURL != "" && node .NamespaceURI == defaultNamespaceURL {
172- node .Prefix = ""
173- } else if n := node .Parent ; n != nil && ! keepPrefix && node .NamespaceURI == n .NamespaceURI {
174- node .Prefix = n .Prefix
156+ if v , ok := space2prefix [node .NamespaceURI ]; ok {
157+ cached := string (p .reader .Cache ())
158+ if strings .HasPrefix (cached , fmt .Sprintf ("%s:%s" , v , node .Data )) || strings .HasPrefix (cached , fmt .Sprintf ("<%s:%s" , v , node .Data )) {
159+ node .Prefix = v
160+ }
175161 }
176162 }
177163 // If we're in the streaming mode, we need to remember the node if it is the target node
@@ -191,7 +177,6 @@ func (p *parser) parse() (*Node, error) {
191177 }
192178 p .prev = node
193179 p .level ++
194- p .reader .StartCaching ()
195180 case xml.EndElement :
196181 p .level --
197182 // If we're in streaming mode, and we already have a potential streaming
@@ -228,7 +213,6 @@ func (p *parser) parse() (*Node, error) {
228213 }
229214 }
230215 case xml.CharData :
231- p .reader .StopCaching ()
232216 // First, normalize the cache...
233217 cached := strings .ToUpper (string (p .reader .Cache ()))
234218 nodeType := TextNode
@@ -247,7 +231,6 @@ func (p *parser) parse() (*Node, error) {
247231 }
248232 AddSibling (p .prev .Parent , node )
249233 }
250- p .reader .StartCaching ()
251234 case xml.Comment :
252235 node := & Node {Type : CommentNode , Data : string (tok ), level : p .level }
253236 if p .level == p .prev .level {
0 commit comments