diff --git a/lib/strtok.js b/lib/strtok.js index 4884b6f..9f43168 100644 --- a/lib/strtok.js +++ b/lib/strtok.js @@ -45,7 +45,7 @@ var UINT8 = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = v & 0xff; return (no - o) + this.len; @@ -65,7 +65,7 @@ var UINT16_LE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = v & 0xff; b[no + 1] = (v >>> 8) & 0xff; @@ -86,7 +86,7 @@ var UINT16_BE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = (v >>> 8) & 0xff; b[no + 1] = v & 0xff; @@ -112,7 +112,7 @@ var UINT32_LE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = v & 0xff; b[no + 1] = (v >>> 8) & 0xff; b[no + 2] = (v >>> 16) & 0xff; @@ -137,7 +137,7 @@ var UINT32_BE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = (v >>> 24) & 0xff; b[no + 1] = (v >>> 16) & 0xff; b[no + 2] = (v >>> 8) & 0xff; @@ -163,7 +163,7 @@ var INT8 = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = v & 0xff; return (no - o) + this.len; @@ -186,7 +186,7 @@ var INT16_BE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = ((v & 0xffff) >>> 8) & 0xff; b[no + 1] = v & 0xff; @@ -214,7 +214,7 @@ var INT32_BE = { assert.ok(o >= 0); assert.ok(this.len <= b.length); - no = maybeFlush(b, o, this.len, flush); + var no = maybeFlush(b, o, this.len, flush); b[no] = (v >>> 24) & 0xff; b[no + 1] = (v >>> 16) & 0xff; b[no + 2] = (v >>> 8) & 0xff; @@ -230,6 +230,15 @@ exports.INT32_BE = INT32_BE; // These types are intended to allow callers to re-use them by manipulating // the 'len' and other properties directly. +var IgnoreType = function(l) { + this.len = l; + this.get = function() { + return null; + }; +}; +exports.IgnoreType = IgnoreType; + + var BufferType = function(l) { var self = this; @@ -265,6 +274,7 @@ var parse = function(s, cb) { var bufs = []; var bufsLen = 0; var bufOffset = 0; + var ignoreLen = 0; // Callback for FSM to tell us what type to expect next var typeCallback = function(t) { @@ -283,8 +293,9 @@ var parse = function(s, cb) { // Out strategy for handling buffers is to shift them off of the bufs[] // array until we have enough accumulated to account for type.len bytes. var emitData = function() { + var b; while (type !== DONE && type !== DEFER && bufsLen >= type.len) { - var b = bufs[0]; + b = bufs[0]; var bo = bufOffset; assert.ok(bufOffset >= 0 && bufOffset < b.length); @@ -337,6 +348,31 @@ var parse = function(s, cb) { bufsLen -= type.len; type = cb(type.get(b, bo), typeCallback); + if (type instanceof IgnoreType) { + ignoreLen += type.len; + if (ignoreLen >= bufsLen) { + // clear all buffers + ignoreLen -= bufsLen; + bufsLen = 0; + bufs = []; + bufOffset = 0; + } else if (ignoreLen < bufs[0].length - bufOffset) { + // set bufOffset correctly + bufsLen -= ignoreLen; + bufOffset += ignoreLen; + ignoreLen = 0; + } else if (bufsLen > 0) { + // shift some buffers and set bufOffset correctly. + bufsLen -= ignoreLen; + ignoreLen += bufOffset; + while (ignoreLen >= bufs[0].length) { + ignoreLen -= bufs.shift().length; + } + bufOffset = ignoreLen; + ignoreLen = 0; + } + type = cb(type.get(), typeCallback); + } } if (type === DONE) { @@ -346,7 +382,7 @@ var parse = function(s, cb) { // stream; the protocol layer will have set up listeners for this // event if it cares about the remaining data. while (bufs.length > 0) { - var b = bufs.shift(); + b = bufs.shift(); if (bufOffset > 0) { b = b.slice(bufOffset, b.length); @@ -360,10 +396,22 @@ var parse = function(s, cb) { // Listen for data from our stream var dataListener = function(d) { - bufs.push(d); - bufsLen += d.length; - - emitData(); + if (d.length <= ignoreLen) { + // ignore this data + assert.strictEqual(bufsLen, 0); + assert.strictEqual(bufs.length, 0); + ignoreLen -= d.length; + } else if (ignoreLen > 0) { + assert.strictEqual(bufsLen, 0); + bufsLen = d.length - ignoreLen; + bufs.push(d.slice(ignoreLen)); + ignoreLen = 0; + emitData(); + } else { + bufs.push(d); + bufsLen += d.length; + emitData(); + } }; // Get the initial type diff --git a/package.json b/package.json index 3788b34..0d7c870 100644 --- a/package.json +++ b/package.json @@ -1,22 +1,21 @@ { - "name" : "strtok", - "version" : "0.1.0", - "description" : "A streaming tokenizer", - "author" : "Peter Griess ", - "engines" : { - "node" : ">=0.1.98" - }, - "repositories" : [ - { - "type" : "git", - "url" : "http://github.com/pgriess/node-strtok" - } - ], - "licenses" : [ - { - "type" : "BSD", - "url" : "http://github.com/pgriess/node-strtok/blob/master/LICENSE" - } - ], - "main" : "./lib/strtok.js" + "name": "strtok", + "version": "0.1.0", + "description": "A streaming tokenizer", + "author": "Peter Griess ", + "scripts": { + "test": "find test/ -name 'test-*.js' -exec node {} \\;" + }, + "engines": { + "node": ">=0.1.98" + }, + "repository": { + "type": "git", + "url": "http://github.com/pgriess/node-strtok" + }, + "license": { + "type": "BSD", + "url": "http://github.com/pgriess/node-strtok/blob/master/LICENSE" + }, + "main": "./lib/strtok.js" } diff --git a/test/test-ignore.js b/test/test-ignore.js new file mode 100644 index 0000000..734db01 --- /dev/null +++ b/test/test-ignore.js @@ -0,0 +1,26 @@ +// Test reading an array of bytes. + +var assert = require('assert'); +var util = require('./util'); +var strtok = require('../lib/strtok'); + +util.runParseTests('\x04asdfaoeu', [ + function(v) { + assert.ok(v === undefined); + return strtok.UINT8; + }, + function(v) { + assert.strictEqual(v, 4); + return new strtok.IgnoreType(4); + }, + function(v) { + assert.equal(v, null); + return new strtok.BufferType(4); + }, + function(v) { + assert.ok(Buffer.isBuffer(v)); + assert.equal(v.toString('utf8'), 'aoeu'); + return strtok.DONE; + } +]); +