Skip to content

Commit 15c9e0d

Browse files
stephenamar-dbjnyi
andauthored
Implement quote_keys and c_document_end for yaml functions (#231)
Forked from PR #143 --------- Co-authored-by: Yi Jin <yi.jin@databricks.com>
1 parent 29bcc9a commit 15c9e0d

6 files changed

Lines changed: 738 additions & 669 deletions

File tree

sjsonnet/src/sjsonnet/BaseCharRenderer.scala

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ package sjsonnet
44
// with some private definitions made accessible to subclasses
55

66
import ujson._
7+
78
import scala.annotation.switch
8-
import upickle.core.{ArrVisitor, ObjVisitor}
9+
import upickle.core.{ArrVisitor, ObjVisitor, Visitor}
910
class BaseCharRenderer[T <: upickle.core.CharOps.Output]
1011
(out: T,
1112
indent: Int = -1,
1213
escapeUnicode: Boolean = false,
1314
newline: Array[Char] = Array('\n')) extends JsVisitor[T, T]{
1415
protected[this] val elemBuilder = new upickle.core.CharBuilder
15-
protected[this] val unicodeCharBuilder = new upickle.core.CharBuilder()
16-
def flushCharBuilder() = {
16+
def flushCharBuilder(): Unit = {
1717
elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000)
1818
}
1919

@@ -22,25 +22,29 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
2222

2323
protected[this] var commaBuffered = false
2424

25-
def flushBuffer() = {
25+
def flushBuffer(): Unit = {
2626
if (commaBuffered) {
2727
commaBuffered = false
2828
elemBuilder.append(',')
2929
renderIndent()
3030
}
3131
}
32-
def visitArray(length: Int, index: Int) = new ArrVisitor[T, T] {
32+
33+
def visitArray(length: Int, index: Int): ArrVisitor[T, T] = new ArrVisitor[T, T] {
3334
flushBuffer()
3435
elemBuilder.append('[')
3536

3637
depth += 1
3738
renderIndent()
38-
def subVisitor = BaseCharRenderer.this
39+
40+
def subVisitor: Visitor[T, T] = BaseCharRenderer.this
41+
3942
def visitValue(v: T, index: Int): Unit = {
4043
flushBuffer()
4144
commaBuffered = true
4245
}
43-
def visitEnd(index: Int) = {
46+
47+
def visitEnd(index: Int): T = {
4448
commaBuffered = false
4549
depth -= 1
4650
renderIndent()
@@ -50,21 +54,26 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
5054
}
5155
}
5256

53-
def visitObject(length: Int, index: Int) = new ObjVisitor[T, T] {
57+
def visitObject(length: Int, index: Int): ObjVisitor[T, T] = new ObjVisitor[T, T] {
5458
flushBuffer()
5559
elemBuilder.append('{')
5660
depth += 1
5761
renderIndent()
58-
def subVisitor = BaseCharRenderer.this
59-
def visitKey(index: Int) = BaseCharRenderer.this
62+
63+
def subVisitor: Visitor[T, T] = BaseCharRenderer.this
64+
65+
def visitKey(index: Int): Visitor[T, T] = BaseCharRenderer.this
66+
6067
def visitKeyValue(s: Any): Unit = {
6168
elemBuilder.append(':')
6269
if (indent != -1) elemBuilder.append(' ')
6370
}
71+
6472
def visitValue(v: T, index: Int): Unit = {
6573
commaBuffered = true
6674
}
67-
def visitEnd(index: Int) = {
75+
76+
def visitEnd(index: Int): T = {
6877
commaBuffered = false
6978
depth -= 1
7079
renderIndent()
@@ -74,7 +83,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
7483
}
7584
}
7685

77-
def visitNull(index: Int) = {
86+
def visitNull(index: Int): T = {
7887
flushBuffer()
7988
elemBuilder.ensureLength(4)
8089
elemBuilder.appendUnsafe('n')
@@ -85,7 +94,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
8594
out
8695
}
8796

88-
def visitFalse(index: Int) = {
97+
def visitFalse(index: Int): T = {
8998
flushBuffer()
9099
elemBuilder.ensureLength(5)
91100
elemBuilder.appendUnsafe('f')
@@ -97,7 +106,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
97106
out
98107
}
99108

100-
def visitTrue(index: Int) = {
109+
def visitTrue(index: Int): T = {
101110
flushBuffer()
102111
elemBuilder.ensureLength(4)
103112
elemBuilder.appendUnsafe('t')
@@ -108,7 +117,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
108117
out
109118
}
110119

111-
def visitFloat64StringParts(s: CharSequence, decIndex: Int, expIndex: Int, index: Int) = {
120+
def visitFloat64StringParts(s: CharSequence, decIndex: Int, expIndex: Int, index: Int): T = {
112121
flushBuffer()
113122
elemBuilder.ensureLength(s.length())
114123
var i = 0
@@ -121,7 +130,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
121130
out
122131
}
123132

124-
override def visitFloat64(d: Double, index: Int) = {
133+
override def visitFloat64(d: Double, index: Int): T = {
125134
d match{
126135
case Double.PositiveInfinity => visitNonNullString("Infinity", -1)
127136
case Double.NegativeInfinity => visitNonNullString("-Infinity", -1)
@@ -137,20 +146,20 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]
137146
}
138147

139148

140-
def visitString(s: CharSequence, index: Int) = {
149+
def visitString(s: CharSequence, index: Int): T = {
141150

142151
if (s eq null) visitNull(index)
143152
else visitNonNullString(s, index)
144153
}
145154

146-
def visitNonNullString(s: CharSequence, index: Int) = {
155+
private def visitNonNullString(s: CharSequence, index: Int) = {
147156
flushBuffer()
148-
upickle.core.RenderUtils.escapeChar(unicodeCharBuilder, elemBuilder, s, escapeUnicode)
157+
upickle.core.RenderUtils.escapeChar(null, elemBuilder, s, escapeUnicode)
149158
flushCharBuilder()
150159
out
151160
}
152161

153-
final def renderIndent() = {
162+
final def renderIndent(): Unit = {
154163
if (indent == -1) ()
155164
else {
156165
var i = indent * depth

sjsonnet/src/sjsonnet/Std.scala

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1121,36 +1121,54 @@ class Std {
11211121
builtin(ManifestTomlEx),
11221122
builtinWithDefaults("manifestYamlDoc",
11231123
"v" -> null,
1124-
"indent_array_in_object" -> Val.False(dummyPos)){ (args, pos, ev) =>
1124+
"indent_array_in_object" -> Val.False(dummyPos),
1125+
"quote_keys" -> Val.True(dummyPos)){ (args, pos, ev) =>
11251126
val v = args(0)
11261127
val indentArrayInObject = args(1) match {
11271128
case Val.False(_) => false
11281129
case Val.True(_) => true
11291130
case _ => Error.fail("indent_array_in_object has to be a boolean, got" + v.getClass)
11301131
}
1132+
val quoteKeys = args(2) match {
1133+
case Val.False(_) => false
1134+
case Val.True(_) => true
1135+
case _ => Error.fail("quote_keys has to be a boolean, got " + v.getClass)
1136+
}
11311137
Materializer.apply0(
11321138
v,
1133-
new YamlRenderer(indentArrayInObject = indentArrayInObject)
1139+
new YamlRenderer(indentArrayInObject = indentArrayInObject, quoteKeys = quoteKeys)
11341140
)(ev).toString
11351141
},
11361142
builtinWithDefaults("manifestYamlStream",
11371143
"v" -> null,
1138-
"indent_array_in_object" -> Val.False(dummyPos)){ (args, pos, ev) =>
1144+
"indent_array_in_object" -> Val.False(dummyPos),
1145+
"c_document_end" -> Val.True(dummyPos),
1146+
"quote_keys" -> Val.True(dummyPos)){ (args, _, ev) =>
11391147
val v = args(0)
11401148
val indentArrayInObject = args(1) match {
11411149
case Val.False(_) => false
11421150
case Val.True(_) => true
11431151
case _ => Error.fail("indent_array_in_object has to be a boolean, got" + v.getClass)
11441152
}
1153+
val cDocumentEnd = args(2) match {
1154+
case Val.False(_) => false
1155+
case Val.True(_) => true
1156+
case _ => Error.fail("c_document_end has to be a boolean, got " + v.getClass)
1157+
}
1158+
val quoteKeys = args(3) match {
1159+
case Val.False(_) => false
1160+
case Val.True(_) => true
1161+
case _ => Error.fail("quote_keys has to be a boolean, got " + v.getClass)
1162+
}
11451163
v match {
11461164
case arr: Val.Arr => arr.asLazyArray
11471165
.map { item =>
11481166
Materializer.apply0(
11491167
item.force,
1150-
new YamlRenderer(indentArrayInObject = indentArrayInObject)
1168+
new YamlRenderer(indentArrayInObject = indentArrayInObject, quoteKeys = quoteKeys)
11511169
)(ev).toString()
11521170
}
1153-
.mkString("---\n", "\n---\n", "\n...\n")
1171+
.mkString("---\n", "\n---\n", if (cDocumentEnd) "\n...\n" else "\n")
11541172
case _ => Error.fail("manifestYamlStream only takes arrays, got " + v.getClass)
11551173
}
11561174
},

sjsonnet/src/sjsonnet/YamlRenderer.scala

Lines changed: 58 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,40 @@
11
package sjsonnet
22

3-
import java.io.{StringWriter, Writer}
3+
import java.io.StringWriter
44
import java.util.regex.Pattern
5+
import upickle.core.{ArrVisitor, ObjVisitor, SimpleVisitor, Visitor}
56

6-
import upickle.core.{ArrVisitor, ObjVisitor}
7+
import scala.util.Try
78

89

910

1011
class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayInObject: Boolean = false,
11-
indent: Int = 2) extends BaseCharRenderer(_out, indent){
12+
quoteKeys: Boolean = true, indent: Int = 2) extends BaseCharRenderer(_out, indent){
1213
var newlineBuffered = false
1314
var dashBuffered = false
1415
var afterKey = false
1516
private var topLevel = true
17+
private val outBuffer = _out.getBuffer
18+
19+
private val yamlKeyVisitor = new SimpleVisitor[StringWriter, StringWriter]() {
20+
override def expectedMsg = "Expected a string key"
21+
override def visitString(s: CharSequence, index: Int): StringWriter = {
22+
YamlRenderer.this.flushBuffer()
23+
if (quoteKeys || !YamlRenderer.isSafeBareKey(s.toString)) {
24+
upickle.core.RenderUtils.escapeChar(null, YamlRenderer.this.elemBuilder, s, unicode = true)
25+
} else {
26+
YamlRenderer.this.appendString(s.toString)
27+
}
28+
YamlRenderer.this.flushCharBuilder()
29+
_out
30+
}
31+
}
1632

17-
private val outBuffer = _out.getBuffer()
18-
19-
override def flushCharBuilder() = {
33+
override def flushCharBuilder(): Unit = {
2034
elemBuilder.writeOutToIfLongerThan(_out, if (depth <= 0 || topLevel) 0 else 1000)
2135
}
2236

23-
private[this] def appendString(s: String) = {
37+
private[this] def appendString(s: String): Unit = {
2438
val len = s.length
2539
var i = 0
2640
elemBuilder.ensureLength(len)
@@ -48,20 +62,20 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
4862
}
4963
depth -= 1
5064
} else {
51-
upickle.core.RenderUtils.escapeChar(unicodeCharBuilder, elemBuilder, s, true)
65+
upickle.core.RenderUtils.escapeChar(null, elemBuilder, s, unicode=true)
5266
}
5367
flushCharBuilder()
5468
_out
5569
}
5670

57-
override def visitFloat64(d: Double, index: Int) = {
71+
override def visitFloat64(d: Double, index: Int): StringWriter = {
5872
flushBuffer()
5973
appendString(RenderUtils.renderDouble(d))
6074
flushCharBuilder()
6175
_out
6276
}
6377

64-
override def flushBuffer() = {
78+
override def flushBuffer(): Unit = {
6579
if (newlineBuffered) {
6680
// drop space between colon and newline
6781
elemBuilder.writeOutToIfLongerThan(_out, 0)
@@ -81,7 +95,7 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
8195
dashBuffered = false
8296
}
8397

84-
override def visitArray(length: Int, index: Int) = new ArrVisitor[StringWriter, StringWriter] {
98+
override def visitArray(length: Int, index: Int): ArrVisitor[StringWriter, StringWriter] = new ArrVisitor[StringWriter, StringWriter] {
8599
var empty = true
86100
flushBuffer()
87101

@@ -91,19 +105,19 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
91105
}
92106
topLevel = false
93107

94-
val dedentInObject = afterKey && !indentArrayInObject
108+
private val dedentInObject = afterKey && !indentArrayInObject
95109
afterKey = false
96110
if (dedentInObject) depth -= 1
97111
dashBuffered = true
98112

99-
def subVisitor = YamlRenderer.this
113+
def subVisitor: Visitor[StringWriter, StringWriter] = YamlRenderer.this
100114
def visitValue(v: StringWriter, index: Int): Unit = {
101115
empty = false
102116
flushBuffer()
103117
newlineBuffered = true
104118
dashBuffered = true
105119
}
106-
def visitEnd(index: Int) = {
120+
def visitEnd(index: Int): StringWriter = {
107121
if (!dedentInObject) depth -= 1
108122
if (empty) {
109123
elemBuilder.ensureLength(2)
@@ -116,16 +130,19 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
116130
_out
117131
}
118132
}
119-
override def visitObject(length: Int, index: Int) = new ObjVisitor[StringWriter, StringWriter] {
133+
134+
override def visitObject(length: Int, index: Int): ObjVisitor[StringWriter, StringWriter] = new ObjVisitor[StringWriter, StringWriter] {
120135
var empty = true
121136
flushBuffer()
122137
if (!topLevel) depth += 1
123138
topLevel = false
124139

125140
if (afterKey) newlineBuffered = true
126141

127-
def subVisitor = YamlRenderer.this
128-
def visitKey(index: Int) = YamlRenderer.this
142+
def subVisitor: Visitor[StringWriter, StringWriter] = YamlRenderer.this
143+
144+
def visitKey(index: Int): Visitor[StringWriter, StringWriter] = yamlKeyVisitor
145+
129146
def visitKeyValue(s: Any): Unit = {
130147
empty = false
131148
flushBuffer()
@@ -136,11 +153,13 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
136153
afterKey = true
137154
newlineBuffered = false
138155
}
156+
139157
def visitValue(v: StringWriter, index: Int): Unit = {
140158
newlineBuffered = true
141159
afterKey = false
142160
}
143-
def visitEnd(index: Int) = {
161+
162+
def visitEnd(index: Int): StringWriter = {
144163
if (empty) {
145164
elemBuilder.ensureLength(2)
146165
elemBuilder.append('{')
@@ -155,9 +174,28 @@ class YamlRenderer(_out: StringWriter = new java.io.StringWriter(), indentArrayI
155174
}
156175
}
157176
object YamlRenderer{
158-
val newlinePattern = Pattern.compile("\n")
177+
val newlinePattern: Pattern = Pattern.compile("\n")
178+
private val safeYamlKeyPattern = Pattern.compile("^[a-zA-Z0-9/._-]+$")
179+
private val yamlReserved = Set("true", "false", "null", "yes", "no", "on", "off", "y", "n", ".nan",
180+
"+.inf", "-.inf", ".inf", "null", "-", "---", "''")
181+
private val yamlTimestampPattern = Pattern.compile("^(?:[0-9]*-){2}[0-9]*$")
182+
private val yamlBinaryPattern = Pattern.compile("^[-+]?0b[0-1_]+$")
183+
private val yamlHexPattern = Pattern.compile("[-+]?0x[0-9a-fA-F_]+")
184+
private val yamlFloatPattern = Pattern.compile( "^-?([0-9_]*)*(\\.[0-9_]*)?(e[-+][0-9_]+)?$" )
185+
private val yamlIntPattern = Pattern.compile("^[-+]?[0-9_]+$")
186+
187+
private def isSafeBareKey(k: String) = {
188+
val l = k.toLowerCase
189+
!yamlReserved.contains(l) &&
190+
safeYamlKeyPattern.matcher(k).matches() &&
191+
!yamlTimestampPattern.matcher(l).matches() &&
192+
!yamlBinaryPattern.matcher(k).matches() &&
193+
!yamlHexPattern.matcher(k).matches() &&
194+
!yamlFloatPattern.matcher(l).matches() &&
195+
!yamlIntPattern.matcher(l).matches()
196+
}
159197

160-
def writeIndentation(out: upickle.core.CharBuilder, n: Int) = {
198+
def writeIndentation(out: upickle.core.CharBuilder, n: Int): Unit = {
161199
out.ensureLength(n+1)
162200
out.append('\n')
163201
var i = n

0 commit comments

Comments
 (0)