Skip to content

Commit 4623076

Browse files
Add support for infoset dataType member
- Add new setIncludeDataType and getIncludeDataType functions to the InfosetOutputter API to configure if InfosetOutputters should include the dataType member when creating infosets. It is left up to specific InfosetOutputter implementations if an how to represent this member in their infosets - The XMLTextInfosetOutputter and ScalaXMLInfosetOutputter are updated to include the xsi:type attribute when this is enabled. They also define the "xsi" namespace prefix if not already defined. Other InfosetOutputters do not currently implement this. - Update the TDML Runner to enable this flag for all infoset outputters. The TDML Runner already supports type-aware comparisons when xsi:type is provided, so enabling this allows all TDML test to use type aware logic. This means expected infosets no longer need to include xsi:type when for type-aware comparisons. - Add new "infosetIncludeDataType", which calls setIncludeDataType if set. This allows enabling this feature without needing to modify code to call the new API function - Fix TDML tests that used an xsi:type value that did not match the actual value - Discovered a bug where xs:decimal elements could be output with scientific notation. This is not legal in XSD so these elements are now output using .toPlainString Deprecation/Compatibility - Previous versions of Daffodil could sometimes use scientific notation when outputting elements with an xs:decimal type, which XSD does not allow. Daffodil now always outputs xs:deicmal types using standard decimal notation without any exponent part. DAFFODIL-182
1 parent deb3f6d commit 4623076

10 files changed

Lines changed: 105 additions & 34 deletions

File tree

daffodil-core/src/main/java/org/apache/daffodil/api/infoset/InfosetOutputter.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,28 @@ public abstract class InfosetOutputter {
4747
* blob directory path which defaults to java temp dir
4848
*/
4949
private Path blobDirectory = Paths.get(System.getProperty("java.io.tmpdir"));
50+
5051
/**
5152
* blob prefix which defaults to daffodil-
5253
*/
5354
private String blobPrefix = "daffodil-";
55+
5456
/**
5557
* blob suffix which defaults to .blob
5658
*/
5759
private String blobSuffix = ".blob";
60+
5861
/**
5962
* list of blob paths output in the infoset
6063
*/
6164
private List<Path> blobPaths;
6265

66+
/**
67+
* whether or not to enable the dataType infoset member. It is up to
68+
* InfosetOutputter implementations if and how to include member.
69+
*/
70+
private boolean includeDataType = false;
71+
6372
/**
6473
* Reset the internal state of this InfosetOutputter. This should be called
6574
* in between calls to the parse method.
@@ -205,4 +214,22 @@ final public List<Path> getBlobPaths() {
205214
final public void setBlobPaths(List<Path> blobPaths) {
206215
this.blobPaths = blobPaths;
207216
}
217+
218+
/**
219+
* Set whether the InfosetOutputter should include the dataType member when
220+
* it outputs infoset elements. It is up to InfosetOutputter implementations
221+
* if and how to output the dataType
222+
*/
223+
final public void setIncludeDataType(boolean includeDataType) {
224+
this.includeDataType = includeDataType;
225+
}
226+
227+
/**
228+
* Get whether the InfosetOutputter should include the dataType member when
229+
* it outputs infoset elements. It is up to InfosetOutputter implementations
230+
* if and how to output this dataType
231+
*/
232+
final public boolean getIncludeDataType() {
233+
return includeDataType;
234+
}
208235
}

daffodil-core/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -939,8 +939,14 @@ Differences were (path, expected, actual):
939939
(prefixB, labelB, attribsB, nsbB, childrenB)
940940
case x => Assert.invariantFailed(s"Expected elem, found $x")
941941
}
942-
val typeA: Option[String] = getXSIType(a)
943-
val typeB: Option[String] = getXSIType(b)
942+
// some TDML files use xsd prefixes for xsi:type values. Instead of trying to resolve
943+
// the prefix, just replace "xsd:" with "xs:"--the rest of our code assumes xs prefixes
944+
val typeA: Option[String] = getXSIType(a).map { t =>
945+
if (t.startsWith("xsd:")) "xs:" + t.substring(4) else t
946+
}
947+
val typeB: Option[String] = getXSIType(b).map { t =>
948+
if (t.startsWith("xsd:")) "xs:" + t.substring(4) else t
949+
}
944950
val maybeType: Option[String] = Option(typeA.getOrElse(typeB.getOrElse(null)))
945951
val nilledA = a.attribute(XSI_NAMESPACE.toString, "nil")
946952
val nilledB = b.attribute(XSI_NAMESPACE.toString, "nil")
@@ -965,15 +971,9 @@ Differences were (path, expected, actual):
965971
nilledB.map(_.toString).getOrElse("")
966972
)
967973
)
968-
} else if (typeA != typeB && typeA.isDefined && typeB.isDefined) {
974+
} else if (typeA.isDefined && typeB.isDefined && typeA.get != typeB.get) {
969975
// different xsi:type (if both suppplied)
970-
List(
971-
(
972-
zPath + "/" + labelA + "@xsi:type",
973-
typeA.map(_.toString).getOrElse(""),
974-
typeA.map(_.toString).getOrElse("")
975-
)
976-
)
976+
List((zPath + "/" + labelA + "@xsi:type", typeA.get, typeB.get))
977977
} else {
978978
val pathLabel = labelA + maybeIndex.map("[" + _ + "]").getOrElse("")
979979
val thisPathStep = pathLabel +: parentPathSteps

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/InfosetImpl.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1543,6 +1543,10 @@ sealed class DISimple(override val erd: ElementRuntimeData)
15431543
else if (f == Float.NegativeInfinity) XMLUtils.NegativeInfinityString
15441544
else f.toString
15451545
}
1546+
case d: java.math.BigDecimal => {
1547+
// scientific notation is not allowed by XSD for xs:decimal
1548+
d.toPlainString
1549+
}
15461550
case x => x.toString
15471551
}
15481552
}

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/ScalaXMLInfosetOutputter.scala

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ package org.apache.daffodil.runtime1.infoset
1919

2020
import scala.collection.mutable.ListBuffer
2121
import scala.xml.MetaData
22+
import scala.xml.NamespaceBinding
2223
import scala.xml.Null
24+
import scala.xml.PrefixedAttribute
2325
import scala.xml.UnprefixedAttribute
2426

2527
import org.apache.daffodil.api.DFDLPrimType
@@ -54,6 +56,16 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
5456
resultNode = Maybe(root(0))
5557
}
5658

59+
private def getScope(diElem: DIElement): NamespaceBinding = {
60+
val minScope = diElem.metadata.minimizedScope
61+
// if including xsi:type is enabled, ensure the xsi namespace is defined on the root element
62+
if (getIncludeDataType() && stack.length == 1 && minScope.getPrefix("xsi") == null) {
63+
NamespaceBinding("xsi", XMLUtils.XSI_NAMESPACE, minScope)
64+
} else {
65+
minScope
66+
}
67+
}
68+
5769
private def getAttributes(diElem: DIElement): MetaData = {
5870
val nilAttr = if (diElem.isNilled) XMLUtils.xmlNilAttribute else Null
5971
val freedAttr =
@@ -80,7 +92,14 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
8092
} else {
8193
nilAttr
8294
}
83-
freedAttr
95+
val typedAttr =
96+
if (getIncludeDataType() && diElem.isSimple) {
97+
val primName = diElem.erd.optPrimType.get.name
98+
new PrefixedAttribute("xsi", "type", "xs:" + primName, freedAttr)
99+
} else {
100+
freedAttr
101+
}
102+
typedAttr
84103
}
85104

86105
override def startSimple(se: InfosetSimpleElement): Unit = {
@@ -105,7 +124,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
105124
diSimple.metadata.prefix,
106125
diSimple.metadata.name,
107126
attributes,
108-
diSimple.metadata.minimizedScope,
127+
getScope(diSimple),
109128
minimizeEmpty = true,
110129
children*
111130
)
@@ -130,7 +149,7 @@ class ScalaXMLInfosetOutputter(showFreedInfo: Boolean = false)
130149
diComplex.metadata.prefix,
131150
diComplex.metadata.name,
132151
attributes,
133-
diComplex.metadata.minimizedScope,
152+
getScope(diComplex),
134153
minimizeEmpty = true,
135154
children*
136155
)

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetOutputter.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,12 @@ class XMLTextInfosetOutputter private (
8282
*/
8383
private var inScopeComplexElementHasChildren = false
8484

85+
private var hasStartedRoot = false
86+
8587
override def reset(): Unit = {
8688
resetIndentation()
8789
inScopeComplexElementHasChildren = false
90+
hasStartedRoot = false
8891
}
8992

9093
private def outputTagName(elem: DIElement): Unit = {
@@ -109,6 +112,16 @@ class XMLTextInfosetOutputter private (
109112
nsbStart.buildString(sb, nsbEnd)
110113
writer.write(sb.toString)
111114
}
115+
// if including xsi:type is enabled, ensure the xsi namespace is defined on the root
116+
// element
117+
if (getIncludeDataType() && !hasStartedRoot && nsbStart.getPrefix("xsi") == null) {
118+
writer.write(" xmlns:xsi=\"" + XMLUtils.XSI_NAMESPACE + "\"")
119+
}
120+
}
121+
122+
if (getIncludeDataType() && elem.isSimple) {
123+
val primName = elem.erd.optPrimType.get.name
124+
writer.write(" xsi:type=\"xs:" + primName + "\"")
112125
}
113126

114127
if (elem.isNilled) {
@@ -223,6 +236,7 @@ class XMLTextInfosetOutputter private (
223236

224237
outputEndTag(simple)
225238
inScopeComplexElementHasChildren = true
239+
hasStartedRoot = true
226240
}
227241

228242
override def endSimple(simple: InfosetSimpleElement): Unit = {
@@ -238,6 +252,7 @@ class XMLTextInfosetOutputter private (
238252
outputStartTag(complex)
239253
incrementIndentation()
240254
inScopeComplexElementHasChildren = false
255+
hasStartedRoot = true
241256
}
242257

243258
override def endComplex(ce: InfosetComplexElement): Unit = {

daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,11 @@ class DataProcessor(
287287
output: api.infoset.InfosetOutputter
288288
): DFDL.ParseResult = {
289289
checkNotError()
290+
291+
if (tunables.infosetIncludeDataType) {
292+
output.setIncludeDataType(true)
293+
}
294+
290295
// If full validation is enabled, tee all the infoset events to a second
291296
// infoset outputter that writes the infoset to a byte array, and then
292297
// we'll validate that byte array upon a successful parse.

daffodil-propgen/src/main/resources/org/apache/daffodil/xsd/dafext.xsd

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,14 @@
273273
</xs:restriction>
274274
</xs:simpleType>
275275
</xs:element>
276+
<xs:element name="infosetIncludeDataType" type="xs:boolean" default="false" minOccurs="0">
277+
<xs:annotation>
278+
<xs:documentation>
279+
Whether or not to include the dataType infoset member when outputting an infoset. If
280+
and how this is represented in the infoset is up to InfosetOutputter implementations.
281+
</xs:documentation>
282+
</xs:annotation>
283+
</xs:element>
276284
<xs:element name="inputFileMemoryMapLowThreshold" type="xs:int" default="33554432" minOccurs="0">
277285
<xs:annotation>
278286
<xs:documentation>

daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class TDMLInfosetOutputterScala(scalaOut: ScalaXMLInfosetOutputter)
5757
object TDMLInfosetOutputterScala {
5858
def apply(): TDMLInfosetOutputterScala = {
5959
val scalaOut = new ScalaXMLInfosetOutputter()
60+
scalaOut.setIncludeDataType(true)
6061
new TDMLInfosetOutputterScala(scalaOut)
6162
}
6263
}
@@ -99,6 +100,10 @@ object TDMLInfosetOutputterAll {
99100
val jsonOut = new JsonInfosetOutputter(jsonStream, false)
100101
val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
101102

103+
Seq(scalaOut, jdomOut, w3cdomOut, jsonOut, xmlOut).foreach { out =>
104+
out.setIncludeDataType(true)
105+
}
106+
102107
new TDMLInfosetOutputterAll(
103108
jsonStream,
104109
xmlStream,

daffodil-tdml-processor/src/test/scala/org/apache/daffodil/processor/tdml/TestTDMLRunner.scala

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
690690
runner.runOneTest("testTDMLHexBinaryTypeAwareSuccess")
691691
}
692692

693-
@Test def testTDMLHexBinaryTypeAwareFailure(): Unit = {
693+
@Test def testTDMLHexBinaryTypeAwareSuccess_03(): Unit = {
694694
val testSuite = <ts:testSuite xmlns:ts={tdml} suiteName="theSuiteName" xmlns:xs={
695695
xsd
696696
} xmlns:dfdl={dfdl} xmlns:tns={example}>
@@ -699,7 +699,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
699699
<dfdl:format ref="tns:GeneralFormat"/>
700700
<xs:element name="data" type="xs:hexBinary" dfdl:lengthKind="explicit" dfdl:length="4"/>
701701
</ts:defineSchema>
702-
<ts:parserTestCase ID="some identifier" name="testTDMLHexBinaryTypeAwareFailure"
702+
<ts:parserTestCase ID="some identifier" name="testTDMLHexBinaryTypeAwareSuccess"
703703
root="data" model="mySchema">
704704
<ts:document>
705705
<ts:documentPart type="byte">A1B2C3D4</ts:documentPart>
@@ -712,13 +712,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
712712
</ts:parserTestCase>
713713
</ts:testSuite>
714714
val runner = new Runner(testSuite)
715-
val e = intercept[Exception] {
716-
runner.runOneTest("testTDMLHexBinaryTypeAwareFailure")
717-
}
718-
val msg = e.getMessage()
719-
assertTrue(msg.contains("Comparison failed"))
720-
assertTrue(msg.contains("a1b2c3d4"))
721-
assertTrue(msg.contains("A1B2C3D4"))
715+
runner.runOneTest("testTDMLHexBinaryTypeAwareSuccess")
722716
}
723717

724718
@Test def testTDMLDateTimeTypeAwareSuccess_01(): Unit = {
@@ -825,7 +819,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
825819
runner.runOneTest("testTDMLDateTimeTypeAwareSuccess")
826820
}
827821

828-
@Test def testTDMLDateTimeTypeAwareFailure(): Unit = {
822+
@Test def testTDMLDateTimeTypeAwareSuccess_05(): Unit = {
829823
val testSuite = <ts:testSuite xmlns:ts={tdml} suiteName="theSuiteName" xmlns:xs={
830824
xsd
831825
} xmlns:dfdl={dfdl} xmlns:tns={example}>
@@ -836,7 +830,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
836830
dfdl:calendarPatternKind="explicit"
837831
dfdl:calendarPattern="uuuu-MM-dd'T'HH:mm:ss.SSSSSSxxxxx" />
838832
</ts:defineSchema>
839-
<ts:parserTestCase ID="some identifier" name="testTDMLDateTimeTypeAwareFailure"
833+
<ts:parserTestCase ID="some identifier" name="testTDMLDateTimeTypeAwareSuccess"
840834
root="data" model="mySchema">
841835
<ts:document>1995-03-24T01:30:00.000000+00:00</ts:document>
842836
<ts:infoset>
@@ -847,13 +841,7 @@ f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb fc fd fe ff
847841
</ts:parserTestCase>
848842
</ts:testSuite>
849843
val runner = new Runner(testSuite)
850-
val e = intercept[Exception] {
851-
runner.runOneTest("testTDMLDateTimeTypeAwareFailure")
852-
}
853-
val msg = e.getMessage()
854-
assertTrue(msg.contains("Comparison failed"))
855-
assertTrue(msg.contains("1995-03-24T01:30:00Z"))
856-
assertTrue(msg.contains("1995-03-24T01:30:00+00:00"))
844+
runner.runOneTest("testTDMLDateTimeTypeAwareSuccess")
857845
}
858846

859847
/**

daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3345,7 +3345,7 @@
33453345
</tdml:document>
33463346
<tdml:infoset>
33473347
<tdml:dfdlInfoset>
3348-
<dec_03 xsi:type="xs:double">1.23456789E+13</dec_03>
3348+
<dec_03 xsi:type="xs:decimal">12345678900000</dec_03>
33493349
</tdml:dfdlInfoset>
33503350
</tdml:infoset>
33513351
</tdml:parserTestCase>
@@ -3357,7 +3357,7 @@
33573357
</tdml:document>
33583358
<tdml:infoset>
33593359
<tdml:dfdlInfoset>
3360-
<dec_04 xsi:type="xs:double">1E-200</dec_04>
3360+
<dec_04 xsi:type="xs:decimal">0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001</dec_04>
33613361
</tdml:dfdlInfoset>
33623362
</tdml:infoset>
33633363
</tdml:parserTestCase>
@@ -3369,7 +3369,7 @@
33693369
</tdml:document>
33703370
<tdml:infoset>
33713371
<tdml:dfdlInfoset>
3372-
<dec_05 xsi:type="xs:double">1E+200</dec_05>
3372+
<dec_05 xsi:type="xs:decimal">100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000</dec_05>
33733373
</tdml:dfdlInfoset>
33743374
</tdml:infoset>
33753375
</tdml:parserTestCase>

0 commit comments

Comments
 (0)