From deaa5e1e75c57d0d5c6c996a2379a3a6b01ce2d2 Mon Sep 17 00:00:00 2001 From: Mert Can Altin Date: Mon, 18 May 2026 21:56:33 +0300 Subject: [PATCH] src: dispatch ToV8Value(string_view) via StringBytes::Encode Signed-off-by: Mert Can Altin --- benchmark/fs/readfile-utf8-fastpath.js | 62 +++++++++++ src/util-inl.h | 16 --- src/util.cc | 11 ++ src/util.h | 6 +- .../test-fs-readfile-utf8-fast-path.js | 103 ++++++++++++++++++ 5 files changed, 179 insertions(+), 19 deletions(-) create mode 100644 benchmark/fs/readfile-utf8-fastpath.js create mode 100644 test/parallel/test-fs-readfile-utf8-fast-path.js diff --git a/benchmark/fs/readfile-utf8-fastpath.js b/benchmark/fs/readfile-utf8-fastpath.js new file mode 100644 index 00000000000000..9bf00717c5f0b2 --- /dev/null +++ b/benchmark/fs/readfile-utf8-fastpath.js @@ -0,0 +1,62 @@ +'use strict'; + +const common = require('../common.js'); +const fs = require('fs'); +const path = require('path'); +const tmpdir = require('../../test/common/tmpdir'); + +const bench = common.createBenchmark(main, { + size: [64, 1024, 16384, 262144, 4194304], + content: ['ascii', 'latin1', 'utf8_mixed'], + source: ['path', 'fd'], + n: [3e3], +}); + +function buildContent(kind, size) { + if (kind === 'ascii') { + return Buffer.alloc(size, 0x61); // 'a' + } + if (kind === 'latin1') { + // 'é' in UTF-8 is 0xC3 0xA9 (2 bytes per char) + const pair = Buffer.from([0xC3, 0xA9]); + const buf = Buffer.alloc(size); + for (let i = 0; i + 2 <= size; i += 2) pair.copy(buf, i); + return buf; + } + if (kind === 'utf8_mixed') { + // mixed ASCII + 3-byte CJK (U+4E2D 中 = E4 B8 AD) + const cjk = Buffer.from([0xE4, 0xB8, 0xAD]); + const buf = Buffer.alloc(size); + let i = 0; + while (i + 4 <= size) { + buf[i++] = 0x61; + cjk.copy(buf, i); + i += 3; + } + return buf; + } + throw new Error('unknown content: ' + kind); +} + +function main({ n, size, content, source }) { + tmpdir.refresh(); + const file = path.join(tmpdir.path, `bench-${content}-${size}.bin`); + fs.writeFileSync(file, buildContent(content, size)); + + let arg; + let shouldClose = false; + if (source === 'fd') { + arg = fs.openSync(file, 'r'); + shouldClose = true; + } else { + arg = file; + } + + bench.start(); + for (let i = 0; i < n; i++) { + fs.readFileSync(arg, 'utf8'); + } + bench.end(n); + + if (shouldClose) fs.closeSync(arg); +} diff --git a/src/util-inl.h b/src/util-inl.h index d59e30a635b08b..e357d15a14496d 100644 --- a/src/util-inl.h +++ b/src/util-inl.h @@ -341,22 +341,6 @@ v8::Maybe FromV8Array(v8::Local context, return js_array->Iterate(context, PushItemToVector, &data); } -v8::MaybeLocal ToV8Value(v8::Local context, - std::string_view str, - v8::Isolate* isolate) { - if (isolate == nullptr) isolate = v8::Isolate::GetCurrent(); - if (str.size() >= static_cast(v8::String::kMaxLength)) [[unlikely]] { - // V8 only has a TODO comment about adding an exception when the maximum - // string size is exceeded. - ThrowErrStringTooLong(isolate); - return v8::MaybeLocal(); - } - - return v8::String::NewFromUtf8( - isolate, str.data(), v8::NewStringType::kNormal, str.size()) - .FromMaybe(v8::Local()); -} - v8::MaybeLocal ToV8Value(v8::Local context, std::u16string_view str, v8::Isolate* isolate) { diff --git a/src/util.cc b/src/util.cc index 1ea51cf7012963..317b8db0daac69 100644 --- a/src/util.cc +++ b/src/util.cc @@ -812,4 +812,15 @@ v8::Maybe GetValidFileMode(Environment* env, return v8::Just(mode); } +v8::MaybeLocal ToV8Value(v8::Local context, + std::string_view str, + v8::Isolate* isolate) { + if (isolate == nullptr) isolate = v8::Isolate::GetCurrent(); + if (str.size() >= static_cast(v8::String::kMaxLength)) [[unlikely]] { + ThrowErrStringTooLong(isolate); + return v8::MaybeLocal(); + } + return StringBytes::Encode(isolate, str.data(), str.size(), UTF8); +} + } // namespace node diff --git a/src/util.h b/src/util.h index 3dedeca4d227e9..48305bfdc13143 100644 --- a/src/util.h +++ b/src/util.h @@ -701,9 +701,9 @@ inline v8::Maybe FromV8Array(v8::Local context, v8::Local js_array, std::vector>* out); -inline v8::MaybeLocal ToV8Value(v8::Local context, - std::string_view str, - v8::Isolate* isolate = nullptr); +v8::MaybeLocal ToV8Value(v8::Local context, + std::string_view str, + v8::Isolate* isolate = nullptr); inline v8::MaybeLocal ToV8Value(v8::Local context, std::u16string_view str, v8::Isolate* isolate = nullptr); diff --git a/test/parallel/test-fs-readfile-utf8-fast-path.js b/test/parallel/test-fs-readfile-utf8-fast-path.js new file mode 100644 index 00000000000000..18d0d884dfa455 --- /dev/null +++ b/test/parallel/test-fs-readfile-utf8-fast-path.js @@ -0,0 +1,103 @@ +'use strict'; + +require('../common'); +const fs = require('node:fs'); +const path = require('node:path'); +const assert = require('node:assert'); +const { describe, it } = require('node:test'); +const tmpdir = require('../common/tmpdir'); + +tmpdir.refresh(); + +function writeFile(name, buf) { + const p = path.join(tmpdir.path, name); + fs.writeFileSync(p, buf); + return p; +} + +function expectMatches(filePath, rawBuf) { + assert.strictEqual( + fs.readFileSync(filePath, 'utf8'), + rawBuf.toString('utf8'), + ); +} + +describe('fs.readFileSync utf8 simdutf dispatch', () => { + it('empty file', () => { + const p = writeFile('empty.txt', Buffer.alloc(0)); + assert.strictEqual(fs.readFileSync(p, 'utf8'), ''); + }); + + it('ascii small', () => { + const buf = Buffer.from('hello'); + expectMatches(writeFile('tiny-ascii.txt', buf), buf); + }); + + it('ascii 20KB', () => { + const buf = Buffer.alloc(20 * 1024, 0x41); + expectMatches(writeFile('medium-ascii.txt', buf), buf); + }); + + it('ascii 1MB', () => { + const buf = Buffer.alloc(1024 * 1024, 0x61); + expectMatches(writeFile('large-ascii.txt', buf), buf); + }); + + it('fd input', () => { + const buf = Buffer.alloc(50 * 1024, 0x62); + const p = writeFile('fd-ascii.txt', buf); + const fd = fs.openSync(p, 'r'); + try { + assert.strictEqual(fs.readFileSync(fd, 'utf8'), buf.toString('utf8')); + } finally { + fs.closeSync(fd); + } + }); + + it('multibyte UTF-8', () => { + const buf = Buffer.from('中文测试 — café — 🚀'.repeat(500), 'utf8'); + expectMatches(writeFile('multibyte.txt', buf), buf); + }); + + it('latin1-fits utf8', () => { + const buf = Buffer.from('naïve café résumé — niño Köln '.repeat(500), 'utf8'); + expectMatches(writeFile('latin1-fits.txt', buf), buf); + }); + + it('invalid: lone continuation byte', () => { + const buf = Buffer.from([0x68, 0x69, 0x80, 0x21]); + expectMatches(writeFile('invalid-cont.txt', buf), buf); + }); + + it('invalid: overlong', () => { + const buf = Buffer.from([0x41, 0xC0, 0xAF, 0x42]); + expectMatches(writeFile('invalid-overlong.txt', buf), buf); + }); + + it('invalid: surrogate', () => { + const buf = Buffer.from([0x41, 0xED, 0xA0, 0x80, 0x42]); + expectMatches(writeFile('invalid-surrogate.txt', buf), buf); + }); + + it('latin1 boundary U+00FF', () => { + const buf = Buffer.from('ÿ'.repeat(2048), 'utf8'); + expectMatches(writeFile('latin1-boundary.txt', buf), buf); + }); + + it('above latin1 U+0100', () => { + const buf = Buffer.from('ĀāĂ'.repeat(1024), 'utf8'); + expectMatches(writeFile('above-latin1.txt', buf), buf); + }); + + it('single codepoint each UTF-8 length', () => { + for (const cp of [0x41, 0x00E9, 0x4E2D, 0x1F600]) { + const buf = Buffer.from(String.fromCodePoint(cp), 'utf8'); + expectMatches(writeFile(`single-cp-${cp.toString(16)}.txt`, buf), buf); + } + }); + + it('truncated multibyte at EOF', () => { + const buf = Buffer.from([0x41, 0xE4, 0xB8]); + expectMatches(writeFile('truncated-multibyte.txt', buf), buf); + }); +});