@@ -40,13 +40,24 @@ var KEY = C.KEY = 0x72;
4040// Parser Modes
4141var OBJECT = C . OBJECT = 0x81 ;
4242var ARRAY = C . ARRAY = 0x82 ;
43+ // Character constants
44+ var BACK_SLASH = "\\" . charCodeAt ( 0 ) ;
45+ var FORWARD_SLASH = "\/" . charCodeAt ( 0 ) ;
46+ var BACKSPACE = "\b" . charCodeAt ( 0 ) ;
47+ var FORM_FEED = "\f" . charCodeAt ( 0 ) ;
48+ var NEWLINE = "\n" . charCodeAt ( 0 ) ;
49+ var CARRIAGE_RETURN = "\r" . charCodeAt ( 0 ) ;
50+ var TAB = "\t" . charCodeAt ( 0 ) ;
4351
52+ var STRING_BUFFER_SIZE = 64 * 1024 ;
4453
4554function Parser ( ) {
4655 this . tState = START ;
4756 this . value = undefined ;
4857
4958 this . string = undefined ; // string data
59+ this . stringBuffer = Buffer . alloc ? Buffer . alloc ( STRING_BUFFER_SIZE ) : new Buffer ( STRING_BUFFER_SIZE ) ;
60+ this . stringBufferOffset = 0 ;
5061 this . unicode = undefined ; // unicode escapes
5162
5263 this . key = undefined ;
@@ -77,6 +88,41 @@ proto.charError = function (buffer, i) {
7788 this . tState = STOP ;
7889 this . onError ( new Error ( "Unexpected " + JSON . stringify ( String . fromCharCode ( buffer [ i ] ) ) + " at position " + i + " in state " + Parser . toknam ( this . tState ) ) ) ;
7990} ;
91+ proto . appendStringChar = function ( char ) {
92+ if ( this . stringBufferOffset >= STRING_BUFFER_SIZE ) {
93+ this . string += this . stringBuffer . toString ( 'utf8' ) ;
94+ this . stringBufferOffset = 0 ;
95+ }
96+
97+ this . stringBuffer [ this . stringBufferOffset ++ ] = char ;
98+ } ;
99+ proto . appendStringBuf = function ( buf , start , end ) {
100+ var size = buf . length ;
101+ if ( typeof start === 'number' ) {
102+ if ( typeof end === 'number' ) {
103+ if ( end < 0 ) {
104+ // adding a negative end decreeses the size
105+ size = buf . length - start + end ;
106+ } else {
107+ size = end - start ;
108+ }
109+ } else {
110+ size = buf . length - start ;
111+ }
112+ }
113+
114+ if ( size < 0 ) {
115+ size = 0 ;
116+ }
117+
118+ if ( this . stringBufferOffset + size > STRING_BUFFER_SIZE ) {
119+ this . string += this . stringBuffer . toString ( 'utf8' , 0 , this . stringBufferOffset ) ;
120+ this . stringBufferOffset = 0 ;
121+ }
122+
123+ buf . copy ( this . stringBuffer , this . stringBufferOffset , start , end ) ;
124+ this . stringBufferOffset += size ;
125+ } ;
80126proto . write = function ( buffer ) {
81127 if ( typeof buffer === "string" ) buffer = new Buffer ( buffer ) ;
82128 var n ;
@@ -93,7 +139,10 @@ proto.write = function (buffer) {
93139 } else if ( n === 0x74 ) { this . tState = TRUE1 ; // t
94140 } else if ( n === 0x66 ) { this . tState = FALSE1 ; // f
95141 } else if ( n === 0x6e ) { this . tState = NULL1 ; // n
96- } else if ( n === 0x22 ) { this . string = "" ; this . tState = STRING1 ; // "
142+ } else if ( n === 0x22 ) { // "
143+ this . string = "" ;
144+ this . stringBufferOffset = 0 ;
145+ this . tState = STRING1 ;
97146 } else if ( n === 0x2d ) { this . string = "-" ; this . tState = NUMBER1 ; // -
98147 } else {
99148 if ( n >= 0x30 && n < 0x40 ) { // 1-9
@@ -112,7 +161,8 @@ proto.write = function (buffer) {
112161 for ( var j = 0 ; j < this . bytes_remaining ; j ++ ) {
113162 this . temp_buffs [ this . bytes_in_sequence ] [ this . bytes_in_sequence - this . bytes_remaining + j ] = buffer [ j ] ;
114163 }
115- this . string += this . temp_buffs [ this . bytes_in_sequence ] . toString ( ) ;
164+
165+ this . appendStringBuf ( this . temp_buffs [ this . bytes_in_sequence ] ) ;
116166 this . bytes_in_sequence = this . bytes_remaining = 0 ;
117167 i = i + j - 1 ;
118168 } else if ( this . bytes_remaining === 0 && n >= 128 ) { // else if no remainder bytes carried over, parse multi byte (>=128) chars one at a time
@@ -129,38 +179,47 @@ proto.write = function (buffer) {
129179 this . bytes_remaining = ( i + this . bytes_in_sequence ) - buffer . length ;
130180 i = buffer . length - 1 ;
131181 } else {
132- this . string += buffer . slice ( i , ( i + this . bytes_in_sequence ) ) . toString ( ) ;
182+ this . appendStringBuf ( buffer , i , i + this . bytes_in_sequence ) ;
133183 i = i + this . bytes_in_sequence - 1 ;
134184 }
135- } else if ( n === 0x22 ) { this . tState = START ; this . onToken ( STRING , this . string ) ; this . offset += Buffer . byteLength ( this . string , 'utf8' ) + 1 ; this . string = undefined ; }
136- else if ( n === 0x5c ) { this . tState = STRING2 ; }
137- else if ( n >= 0x20 ) { this . string += String . fromCharCode ( n ) ; }
185+ } else if ( n === 0x22 ) {
186+ this . tState = START ;
187+ this . string += this . stringBuffer . toString ( 'utf8' , 0 , this . stringBufferOffset ) ;
188+ this . stringBufferOffset = 0 ;
189+ this . onToken ( STRING , this . string ) ;
190+ this . offset += Buffer . byteLength ( this . string , 'utf8' ) + 1 ;
191+ this . string = undefined ;
192+ }
193+ else if ( n === 0x5c ) {
194+ this . tState = STRING2 ;
195+ }
196+ else if ( n >= 0x20 ) { this . appendStringChar ( n ) ; }
138197 else {
139198 return this . charError ( buffer , i ) ;
140199 }
141200 } else if ( this . tState === STRING2 ) { // After backslash
142201 n = buffer [ i ] ;
143- if ( n === 0x22 ) { this . string += "\"" ; this . tState = STRING1 ;
144- } else if ( n === 0x5c ) { this . string += "\\" ; this . tState = STRING1 ;
145- } else if ( n === 0x2f ) { this . string += "\/" ; this . tState = STRING1 ;
146- } else if ( n === 0x62 ) { this . string += "\b" ; this . tState = STRING1 ;
147- } else if ( n === 0x66 ) { this . string += "\f" ; this . tState = STRING1 ;
148- } else if ( n === 0x6e ) { this . string += "\n" ; this . tState = STRING1 ;
149- } else if ( n === 0x72 ) { this . string += "\r" ; this . tState = STRING1 ;
150- } else if ( n === 0x74 ) { this . string += "\t" ; this . tState = STRING1 ;
202+ if ( n === 0x22 ) { this . appendStringChar ( n ) ; this . tState = STRING1 ;
203+ } else if ( n === 0x5c ) { this . appendStringChar ( BACK_SLASH ) ; this . tState = STRING1 ;
204+ } else if ( n === 0x2f ) { this . appendStringChar ( FORWARD_SLASH ) ; this . tState = STRING1 ;
205+ } else if ( n === 0x62 ) { this . appendStringChar ( BACKSPACE ) ; this . tState = STRING1 ;
206+ } else if ( n === 0x66 ) { this . appendStringChar ( FORM_FEED ) ; this . tState = STRING1 ;
207+ } else if ( n === 0x6e ) { this . appendStringChar ( NEWLINE ) ; this . tState = STRING1 ;
208+ } else if ( n === 0x72 ) { this . appendStringChar ( CARRIAGE_RETURN ) ; this . tState = STRING1 ;
209+ } else if ( n === 0x74 ) { this . appendStringChar ( TAB ) ; this . tState = STRING1 ;
151210 } else if ( n === 0x75 ) { this . unicode = "" ; this . tState = STRING3 ;
152- } else {
153- return this . charError ( buffer , i ) ;
211+ } else {
212+ return this . charError ( buffer , i ) ;
154213 }
155214 } else if ( this . tState === STRING3 || this . tState === STRING4 || this . tState === STRING5 || this . tState === STRING6 ) { // unicode hex codes
156215 n = buffer [ i ] ;
157216 // 0-9 A-F a-f
158217 if ( ( n >= 0x30 && n < 0x40 ) || ( n > 0x40 && n <= 0x46 ) || ( n > 0x60 && n <= 0x66 ) ) {
159218 this . unicode += String . fromCharCode ( n ) ;
160219 if ( this . tState ++ === STRING6 ) {
161- this . string += String . fromCharCode ( parseInt ( this . unicode , 16 ) ) ;
220+ this . appendStringBuf ( Buffer ( String . fromCharCode ( parseInt ( this . unicode , 16 ) ) ) ) ;
162221 this . unicode = undefined ;
163- this . tState = STRING1 ;
222+ this . tState = STRING1 ;
164223 }
165224 } else {
166225 return this . charError ( buffer , i ) ;
@@ -266,14 +325,14 @@ proto.emit = function (value) {
266325} ;
267326proto . onValue = function ( value ) {
268327 // Override me
269- } ;
328+ } ;
270329proto . onToken = function ( token , value ) {
271330 if ( this . state === VALUE ) {
272331 if ( token === STRING || token === NUMBER || token === TRUE || token === FALSE || token === NULL ) {
273332 if ( this . value ) {
274333 this . value [ this . key ] = value ;
275334 }
276- this . emit ( value ) ;
335+ this . emit ( value ) ;
277336 } else if ( token === LEFT_BRACE ) {
278337 this . push ( ) ;
279338 if ( this . value ) {
@@ -322,7 +381,7 @@ proto.onToken = function (token, value) {
322381 if ( token === COLON ) { this . state = VALUE ; }
323382 else { return this . parseError ( token , value ) ; }
324383 } else if ( this . state === COMMA ) {
325- if ( token === COMMA ) {
384+ if ( token === COMMA ) {
326385 if ( this . mode === ARRAY ) { this . key ++ ; this . state = VALUE ; }
327386 else if ( this . mode === OBJECT ) { this . state = KEY ; }
328387
0 commit comments