Skip to content

Commit 191e35c

Browse files
authored
Optimize JSON codec (#1148)
* Better Json benchmarks * Reuse JsonMapSerializer instance instead of allocating per writeMap() * Override serialize() in JsonCodec with larger initial buffer * Close deserializer in deserializeShape to enable Jackson buffer recycling * Pre-compute SerializableString per Schema member for field name writes * Cache resolved TimestampFormatter per Schema in UseTimestampFormatTrait * Optimize fieldToMember cache with get-before-compute pattern * Replace InterceptingSerializer with flat struct serializer * Add Schema extension system with SPI and migrate JSON caches * Use bounded recycler pool for Jackson buffer recycling * Remove FAST_DOUBLE_WRITER * Add some more thread safety guards in DeferredRootSchema * Make Schema Extensions lazy * Add a Smithy Json serializer and deserializer * Remove publication guard * Correctness fixes * More perf improvements * More perf improvements * Improve timestamp serialization * Use copy of Schubfach for direct conversion of Double/Float to byte * Add SmithyJson to fuzz tests * Use Arrays.equals in the fast path * Use FastDoubleParser for direct byte-to-double parsing * Reuse Schubfach DoubleToDecimal/FloatToDecimal instances * Write ISO-8601 timestamps directly to byte buffer * Fast-path BigInteger/BigDecimal when value fits in long * Use parseLong for integer epoch-seconds timestamps * Parse ISO-8601 and HTTP-date timestamps directly from bytes * Write HTTP-date timestamps directly to byte buffer * Localize pos to register in readStruct hot loop * Fast-path BigDecimal with non-zero scale when unscaled fits in long * Localize pos in readList and readStringMap hot loops * Handle out-of-range epoch seconds in parseLong fast path * Cleanup and add differential fuzzing * More tests and correctness changes * Codegen perf improvements * Fuse speculative field name check with scan in readStruct * Pool SmithyJsonSerializer to avoid per-call allocation * Eliminate OffsetDateTime allocation in timestamp writes * Write BigInteger directly to byte buffer via 18-digit groups * Add equals/hashCode to JsonSettings for serializer pool matching * Merge buffer pool into serializer pool and fix exception safety * Fuse field name and value capacity checks in StructSerializer * Use getPlain/compareAndExchangeAcquire for cheaper pool operations * Cleanups and refactor fuzz test to use one codec * Tighten fuzz tests, reduce allocations while reading blobs * Address PR comments * Clear out needsComma if there is a exception * Replace streams with loop while loading Schema extensions
1 parent 14bada8 commit 191e35c

53 files changed

Lines changed: 11060 additions & 441 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

codecs/cbor-codec/src/fuzz/java/software/amazon/smithy/java/cbor/DeserializationFuzzTest.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
import software.amazon.smithy.java.core.serde.Codec;
99
import software.amazon.smithy.java.fuzz.CodecDeserializationFuzzTestBase;
1010

11-
/**
12-
* Fuzz tests for Rpcv2CborCodec with various configurations.
13-
*/
1411
class DeserializationFuzzTest {
1512

1613
static class DefaultTest extends CodecDeserializationFuzzTestBase {
@@ -20,5 +17,4 @@ protected Codec codecToFuzz() {
2017
return Rpcv2CborCodec.builder().build();
2118
}
2219
}
23-
2420
}

codecs/json-codec/build.gradle.kts

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ plugins {
22
id("smithy-java.module-conventions")
33
id("smithy-java.fuzz-test")
44
id("me.champeau.jmh") version "0.7.3"
5+
id("software.amazon.smithy.gradle.smithy-base")
56
alias(libs.plugins.shadow)
67
}
78

@@ -13,7 +14,10 @@ extra["moduleName"] = "software.amazon.smithy.java.json"
1314
dependencies {
1415
api(project(":core"))
1516
compileOnly(libs.jackson.core)
17+
compileOnly(libs.fastdoubleparser)
1618
testRuntimeOnly(libs.jackson.core)
19+
testRuntimeOnly(libs.fastdoubleparser)
20+
smithyBuild(project(":codegen:codegen-plugin"))
1721
}
1822

1923
tasks {
@@ -29,7 +33,15 @@ tasks {
2933
.toString(),
3034
),
3135
)
36+
include(
37+
dependency(
38+
libs.fastdoubleparser
39+
.get()
40+
.toString(),
41+
),
42+
)
3243
relocate("tools.jackson.core", "software.amazon.smithy.java.internal.shaded.tools.jackson.core")
44+
relocate("ch.randelshofer", "software.amazon.smithy.java.internal.shaded.ch.randelshofer")
3345
}
3446
}
3547
jar {
@@ -56,10 +68,53 @@ afterEvaluate {
5668
}
5769
}
5870

71+
afterEvaluate {
72+
val typePath = smithy.getPluginProjectionPath(smithy.sourceProjection.get(), "java-codegen").get()
73+
sourceSets.named("jmh") {
74+
java {
75+
srcDir("$typePath/java")
76+
}
77+
resources {
78+
srcDir("$typePath/resources")
79+
}
80+
}
81+
sourceSets.named("test") {
82+
java {
83+
srcDir("$typePath/java")
84+
}
85+
resources {
86+
srcDir("$typePath/resources")
87+
}
88+
}
89+
}
90+
91+
tasks.named("compileJmhJava") {
92+
dependsOn("smithyBuild")
93+
}
94+
95+
tasks.named("compileTestJava") {
96+
dependsOn("smithyBuild")
97+
}
98+
99+
tasks.named("processJmhResources") {
100+
dependsOn("smithyBuild")
101+
}
102+
103+
tasks.named("processTestResources") {
104+
dependsOn("smithyBuild")
105+
}
106+
59107
jmh {
60108
warmupIterations = 3
61-
iterations = 3
62-
fork = 3
63-
// profilers.add("async:output=flamegraph")
109+
iterations = 5
110+
fork = 1
111+
jvmArgs.addAll("-Xms1g", "-Xmx1g")
112+
includes.addAll(
113+
providers
114+
.gradleProperty("jmh.includes")
115+
.map { listOf(it) }
116+
.orElse(emptyList()),
117+
)
118+
profilers.add("async:output=jfr;dir=${layout.buildDirectory.get()}/jmh-profiler")
64119
// profilers.add("gc")
65120
}
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
$version: "2"
2+
3+
namespace smithy.java.json.bench
4+
5+
/// A simple structure with only scalar fields for baseline benchmarks.
6+
structure SimpleStruct {
7+
@required
8+
name: String
9+
10+
@required
11+
age: Integer
12+
13+
active: Boolean
14+
15+
score: Double
16+
17+
createdAt: Timestamp
18+
}
19+
20+
/// A complex structure that exercises many Smithy type features.
21+
structure ComplexStruct {
22+
@required
23+
id: String
24+
25+
@required
26+
count: Integer
27+
28+
@required
29+
enabled: PrimitiveBoolean = false
30+
31+
@required
32+
ratio: PrimitiveDouble = 0
33+
34+
@required
35+
score: PrimitiveFloat = 0
36+
37+
@required
38+
bigCount: PrimitiveLong = 0
39+
40+
optionalString: String
41+
42+
optionalInt: Integer
43+
44+
createdAt: Timestamp
45+
46+
@timestampFormat("date-time")
47+
updatedAt: Timestamp
48+
49+
@timestampFormat("http-date")
50+
expiresAt: Timestamp
51+
52+
payload: Blob
53+
54+
tags: StringList
55+
56+
intList: IntegerList
57+
58+
metadata: StringMap
59+
60+
intMap: IntegerMap
61+
62+
@required
63+
nested: NestedStruct
64+
65+
optionalNested: NestedStruct
66+
67+
structList: NestedStructList
68+
69+
structMap: NestedStructMap
70+
71+
choice: BenchUnion
72+
73+
color: Color
74+
75+
colorList: ColorList
76+
77+
sparseStrings: SparseStringList
78+
79+
sparseMap: SparseStringMap
80+
81+
bigIntValue: BigInteger
82+
83+
bigDecValue: BigDecimal
84+
85+
freeformData: Document
86+
}
87+
88+
structure NestedStruct {
89+
@required
90+
field1: String
91+
92+
@required
93+
field2: Integer
94+
95+
inner: InnerStruct
96+
}
97+
98+
structure InnerStruct {
99+
value: String
100+
numbers: IntegerList
101+
}
102+
103+
union BenchUnion {
104+
stringValue: String
105+
intValue: Integer
106+
structValue: NestedStruct
107+
}
108+
109+
enum Color {
110+
RED
111+
GREEN
112+
BLUE
113+
YELLOW
114+
}
115+
116+
list StringList {
117+
member: String
118+
}
119+
120+
list IntegerList {
121+
member: Integer
122+
}
123+
124+
list NestedStructList {
125+
member: NestedStruct
126+
}
127+
128+
list ColorList {
129+
member: Color
130+
}
131+
132+
@sparse
133+
list SparseStringList {
134+
member: String
135+
}
136+
137+
map StringMap {
138+
key: String
139+
value: String
140+
}
141+
142+
map IntegerMap {
143+
key: String
144+
value: Integer
145+
}
146+
147+
map NestedStructMap {
148+
key: String
149+
value: NestedStruct
150+
}
151+
152+
@sparse
153+
map SparseStringMap {
154+
key: String
155+
value: String
156+
}
157+
158+
/// Structure focused on numeric boundary testing
159+
structure NumericStruct {
160+
byteVal: Byte
161+
shortVal: Short
162+
intVal: Integer
163+
longVal: Long
164+
floatVal: Float
165+
doubleVal: Double
166+
bigIntVal: BigInteger
167+
bigDecVal: BigDecimal
168+
}
169+
170+
/// Structure focused on string edge cases
171+
structure StringStruct {
172+
@required
173+
value: String
174+
}
175+
176+
/// Structure with all three timestamp formats
177+
structure TimestampStruct {
178+
epochSeconds: Timestamp
179+
180+
@timestampFormat("date-time")
181+
dateTime: Timestamp
182+
183+
@timestampFormat("http-date")
184+
httpDate: Timestamp
185+
}
186+
187+
/// Structure with jsonName trait on multiple fields
188+
structure JsonNameStruct {
189+
@jsonName("ID")
190+
id: String
191+
192+
@jsonName("DisplayName")
193+
displayName: String
194+
195+
normalField: String
196+
}
197+
198+
/// Structure that nests itself for depth testing
199+
structure RecursiveStruct {
200+
value: String
201+
child: RecursiveStruct
202+
}
203+
204+
/// Structure for blob testing
205+
structure BlobStruct {
206+
data: Blob
207+
}
208+
209+
list DoubleList {
210+
member: Double
211+
}
212+
213+
list BigDecimalList {
214+
member: BigDecimal
215+
}
216+
217+
map IntToStructMap {
218+
key: String
219+
value: InnerStruct
220+
}
221+
222+
list BooleanList {
223+
member: Boolean
224+
}
225+
226+
list ByteList {
227+
member: Byte
228+
}
229+
230+
list ShortList {
231+
member: Short
232+
}
233+
234+
list LongList {
235+
member: Long
236+
}
237+
238+
list FloatList {
239+
member: Float
240+
}
241+
242+
list BigIntegerList {
243+
member: BigInteger
244+
}
245+
246+
list BlobList {
247+
member: Blob
248+
}
249+
250+
list TimestampList {
251+
member: Timestamp
252+
}
253+
254+
/// Structure containing lists of all types to exercise ListElementSerializer
255+
structure AllListsStruct {
256+
booleans: BooleanList
257+
bytes: ByteList
258+
shorts: ShortList
259+
ints: IntegerList
260+
longs: LongList
261+
floats: FloatList
262+
doubles: DoubleList
263+
bigInts: BigIntegerList
264+
bigDecs: BigDecimalList
265+
strings: StringList
266+
blobs: BlobList
267+
timestamps: TimestampList
268+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"version": "1.0",
3+
"plugins": {
4+
"java-codegen": {
5+
"namespace": "software.amazon.smithy.java.json.bench",
6+
"modes": ["types"]
7+
}
8+
}
9+
}

0 commit comments

Comments
 (0)