Skip to content

Commit 3955b6d

Browse files
Maintain empty/non-elements in stringAsXml output
The stringAsXml implementation currently converts both `<foo/>` and `<foo></foo>` XML strings to `<foo/>`. Although stringAsXml does not guarantee the resulting XML will be always be the same, we do try to keep keep the result as close to the original where reasonable. By casting the existing XMLReader/Writer classes to XMLStreamReader2 and XMLStreamWriter2, we can used additional APIs to detect empty elements and write them as empty, with all other elements written with full endings. DAFFODIL-3074
1 parent deb3f6d commit 3955b6d

5 files changed

Lines changed: 33 additions & 17 deletions

File tree

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ import javax.xml.XMLConstants
2424
import javax.xml.stream.XMLInputFactory
2525
import javax.xml.stream.XMLStreamConstants.*
2626
import javax.xml.stream.XMLStreamException
27-
import javax.xml.stream.XMLStreamReader
28-
import javax.xml.stream.XMLStreamWriter
2927

3028
import org.apache.daffodil.api
3129
import org.apache.daffodil.api.Daffodil.InfosetInputterEventType
@@ -34,7 +32,9 @@ import org.apache.daffodil.lib.exceptions.Assert
3432
import org.apache.daffodil.lib.xml.XMLUtils
3533
import org.apache.daffodil.runtime1.dpath.NodeInfo
3634

37-
import com.ctc.wstx.cfg.ErrorConsts;
35+
import com.ctc.wstx.cfg.ErrorConsts
36+
import org.codehaus.stax2.XMLStreamReader2
37+
import org.codehaus.stax2.XMLStreamWriter2
3838

3939
object XMLTextInfoset {
4040
lazy val xmlInputFactory = {
@@ -140,10 +140,14 @@ object XMLTextInfoset {
140140
*
141141
* Both a lone CR and CRLF are converted to LF.
142142
*/
143-
def writeXMLStreamEvent(xsr: XMLStreamReader, xsw: XMLStreamWriter): Unit = {
143+
def writeXMLStreamEvent(xsr: XMLStreamReader2, xsw: XMLStreamWriter2): Unit = {
144144
xsr.getEventType() match {
145145
case START_ELEMENT => {
146-
xsw.writeStartElement(xsr.getPrefix(), xsr.getLocalName(), xsr.getNamespaceURI())
146+
if (xsr.isEmptyElement()) {
147+
xsw.writeEmptyElement(xsr.getPrefix(), xsr.getLocalName(), xsr.getNamespaceURI())
148+
} else {
149+
xsw.writeStartElement(xsr.getPrefix(), xsr.getLocalName(), xsr.getNamespaceURI())
150+
}
147151
for (i <- 0 until xsr.getNamespaceCount()) {
148152
xsw.writeNamespace(xsr.getNamespacePrefix(i), xsr.getNamespaceURI(i))
149153
}
@@ -155,8 +159,13 @@ object XMLTextInfoset {
155159
xsr.getAttributeValue(i)
156160
)
157161
}
162+
if (xsr.isEmptyElement()) {
163+
// skip the next END_ELEMENT event since writeEmptyElement above causes the
164+
// XMLStreamWriter to handle closing the empty element
165+
xsr.next()
166+
}
158167
}
159-
case END_ELEMENT => xsw.writeEndElement()
168+
case END_ELEMENT => xsw.writeFullEndElement()
160169
case CHARACTERS => xsw.writeCharacters(xsr.getText())
161170
case COMMENT => xsw.writeComment(xsr.getText())
162171
case CDATA => xsw.writeCData(xsr.getText())
@@ -189,8 +198,10 @@ object XMLTextInfoset {
189198

190199
class XMLTextInfosetInputter(input: java.io.InputStream) extends api.infoset.InfosetInputter {
191200

192-
private lazy val xsr: XMLStreamReader = {
193-
val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(input)
201+
private lazy val xsr: XMLStreamReader2 = {
202+
val xsr = XMLTextInfoset.xmlInputFactory
203+
.createXMLStreamReader(input)
204+
.asInstanceOf[XMLStreamReader2]
194205

195206
// no need for UnparseError here. If the XML syntax is bad, parser catches it before we get here.
196207
Assert.invariant(xsr.hasNext())
@@ -256,8 +267,9 @@ class XMLTextInfosetInputter(input: java.io.InputStream) extends api.infoset.Inf
256267
// wrapper tag. We trim the result to remove whitespace that the outputter
257268
// may have written with pretty mode enabled.
258269
val sw = new StringWriter()
259-
val xsw =
260-
XMLTextInfoset.xmlOutputFactory.createXMLStreamWriter(sw, StandardCharsets.UTF_8.toString)
270+
val xsw = XMLTextInfoset.xmlOutputFactory
271+
.createXMLStreamWriter(sw, StandardCharsets.UTF_8.toString)
272+
.asInstanceOf[XMLStreamWriter2]
261273
xsw.writeStartDocument()
262274
while (
263275
xsr.getEventType() != END_ELEMENT || xsr.getLocalName() != XMLTextInfoset.stringAsXml

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ import org.apache.daffodil.lib.util.Indentable
3131
import org.apache.daffodil.lib.xml.XMLUtils
3232
import org.apache.daffodil.runtime1.dpath.NodeInfo
3333

34+
import org.codehaus.stax2.XMLStreamReader2
35+
import org.codehaus.stax2.XMLStreamWriter2
36+
3437
/**
3538
* Writes the infoset to a java.io.BufferedWriter as XML text.
3639
*
@@ -148,11 +151,12 @@ class XMLTextInfosetOutputter private (
148151
// logic also skips the START_DOCUMENT event so that the XML declaration is
149152
// not written in the middle of our XML infoset
150153
val sr = new StringReader(str)
151-
val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(sr)
152-
val xsw = XMLTextInfoset.xmlOutputFactory.createXMLStreamWriter(
153-
writer,
154-
StandardCharsets.UTF_8.toString
155-
)
154+
val xsr = XMLTextInfoset.xmlInputFactory
155+
.createXMLStreamReader(sr)
156+
.asInstanceOf[XMLStreamReader2]
157+
val xsw = XMLTextInfoset.xmlOutputFactory
158+
.createXMLStreamWriter(writer, StandardCharsets.UTF_8.toString)
159+
.asInstanceOf[XMLStreamWriter2]
156160
Assert.invariant(xsr.getEventType() == START_DOCUMENT)
157161
while (xsr.hasNext()) {
158162
xsr.next()

daffodil-test/src/test/resources/org/apache/daffodil/infoset/stringAsXml/namespaced/binMessage_01.dat.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
<?processing instruction?>
1010
<field><![CDATA[Field ]]> with <![CDATA[cdata]]> </field> here is mixed content
1111
<field>spaces</field> <field> spaces </field> and more mixed content
12-
<field/> and more mixed content
13-
<field/>
12+
<field attr="foo"/> and more mixed content
13+
<field attr="bar"></field>
1414
<field>entity references: &lt; > &amp; " ' ©</field>
1515
<field>CR</field>&#xd;<field>LF</field>
1616
<field>CRLF</field>&#xd;

0 commit comments

Comments
 (0)