nodejs · mertcanaltin · May 18, 2026
diff --git a/benchmark/fs/readfile-utf8-fastpath.js b/benchmark/fs/readfile-utf8-fastpath.js
@@ -0,0 +1,62 @@
+'use strict';
+
+const common = require('../common.js');
+const fs = require('fs');
+const path = require('path');
+const tmpdir = require('../../test/common/tmpdir');
+
+const bench = common.createBenchmark(main, {
+  size: [64, 1024, 16384, 262144, 4194304],
+  content: ['ascii', 'latin1', 'utf8_mixed'],
+  source: ['path', 'fd'],
+  n: [3e3],
+});
+
+function buildContent(kind, size) {
+  if (kind === 'ascii') {
+    return Buffer.alloc(size, 0x61); // 'a'
+  }
+  if (kind === 'latin1') {
+    // 'é' in UTF-8 is 0xC3 0xA9 (2 bytes per char)
+    const pair = Buffer.from([0xC3, 0xA9]);
+    const buf = Buffer.alloc(size);
+    for (let i = 0; i + 2 <= size; i += 2) pair.copy(buf, i);
+    return buf;
+  }
+  if (kind === 'utf8_mixed') {
+    // mixed ASCII + 3-byte CJK (U+4E2D 中 = E4 B8 AD)
+    const cjk = Buffer.from([0xE4, 0xB8, 0xAD]);
+    const buf = Buffer.alloc(size);
+    let i = 0;
+    while (i + 4 <= size) {
+      buf[i++] = 0x61;
+      cjk.copy(buf, i);
+      i += 3;
+    }
+    return buf;
+  }
+  throw new Error('unknown content: ' + kind);
+}
+
+function main({ n, size, content, source }) {
+  tmpdir.refresh();
+  const file = path.join(tmpdir.path, `bench-${content}-${size}.bin`);
+  fs.writeFileSync(file, buildContent(content, size));
+
+  let arg;
+  let shouldClose = false;
+  if (source === 'fd') {
+    arg = fs.openSync(file, 'r');
+    shouldClose = true;
+  } else {
+    arg = file;
+  }
+
+  bench.start();
+  for (let i = 0; i < n; i++) {
+    fs.readFileSync(arg, 'utf8');
+  }
+  bench.end(n);
+
+  if (shouldClose) fs.closeSync(arg);
+}
diff --git a/src/util-inl.h b/src/util-inl.h
@@ -341,22 +341,6 @@ v8::Maybe<void> FromV8Array(v8::Local<v8::Context> context,
   return js_array->Iterate(context, PushItemToVector, &data);
 }
 
-v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
-                                    std::string_view str,
-                                    v8::Isolate* isolate) {
-  if (isolate == nullptr) isolate = v8::Isolate::GetCurrent();
-  if (str.size() >= static_cast<size_t>(v8::String::kMaxLength)) [[unlikely]] {
-    // V8 only has a TODO comment about adding an exception when the maximum
-    // string size is exceeded.
-    ThrowErrStringTooLong(isolate);
-    return v8::MaybeLocal<v8::Value>();
-  }
-
-  return v8::String::NewFromUtf8(
-             isolate, str.data(), v8::NewStringType::kNormal, str.size())
-      .FromMaybe(v8::Local<v8::String>());
-}
-
 v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
                                     std::u16string_view str,
                                     v8::Isolate* isolate) {

diff --git a/src/util.cc b/src/util.cc
@@ -812,4 +812,15 @@ v8::Maybe<int> GetValidFileMode(Environment* env,
   return v8::Just(mode);
 }
 
+v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
+                                    std::string_view str,
+                                    v8::Isolate* isolate) {
+  if (isolate == nullptr) isolate = v8::Isolate::GetCurrent();
+  if (str.size() >= static_cast<size_t>(v8::String::kMaxLength)) [[unlikely]] {
+    ThrowErrStringTooLong(isolate);
+    return v8::MaybeLocal<v8::Value>();
+  }
+  return StringBytes::Encode(isolate, str.data(), str.size(), UTF8);
+}
+
 }  // namespace node
diff --git a/src/util.h b/src/util.h
@@ -701,9 +701,9 @@ inline v8::Maybe<void> FromV8Array(v8::Local<v8::Context> context,
                                    v8::Local<v8::Array> js_array,
                                    std::vector<v8::Global<v8::Value>>* out);
 
-inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
-                                           std::string_view str,
-                                           v8::Isolate* isolate = nullptr);
+v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
+                                    std::string_view str,
+                                    v8::Isolate* isolate = nullptr);
 inline v8::MaybeLocal<v8::Value> ToV8Value(v8::Local<v8::Context> context,
                                            std::u16string_view str,
                                            v8::Isolate* isolate = nullptr);

diff --git a/test/parallel/test-fs-readfile-utf8-fast-path.js b/test/parallel/test-fs-readfile-utf8-fast-path.js
@@ -0,0 +1,103 @@
+'use strict';
+
+require('../common');
+const fs = require('node:fs');
+const path = require('node:path');
+const assert = require('node:assert');
+const { describe, it } = require('node:test');
+const tmpdir = require('../common/tmpdir');
+
+tmpdir.refresh();
+
+function writeFile(name, buf) {
+  const p = path.join(tmpdir.path, name);
+  fs.writeFileSync(p, buf);
+  return p;
+}
+
+function expectMatches(filePath, rawBuf) {
+  assert.strictEqual(
+    fs.readFileSync(filePath, 'utf8'),
+    rawBuf.toString('utf8'),
+  );
+}
+
+describe('fs.readFileSync utf8 simdutf dispatch', () => {
+  it('empty file', () => {
+    const p = writeFile('empty.txt', Buffer.alloc(0));
+    assert.strictEqual(fs.readFileSync(p, 'utf8'), '');
+  });
+
+  it('ascii small', () => {
+    const buf = Buffer.from('hello');
+    expectMatches(writeFile('tiny-ascii.txt', buf), buf);
+  });
+
+  it('ascii 20KB', () => {
+    const buf = Buffer.alloc(20 * 1024, 0x41);
+    expectMatches(writeFile('medium-ascii.txt', buf), buf);
+  });
+
+  it('ascii 1MB', () => {
+    const buf = Buffer.alloc(1024 * 1024, 0x61);
+    expectMatches(writeFile('large-ascii.txt', buf), buf);
+  });
+
+  it('fd input', () => {
+    const buf = Buffer.alloc(50 * 1024, 0x62);
+    const p = writeFile('fd-ascii.txt', buf);
+    const fd = fs.openSync(p, 'r');
+    try {
+      assert.strictEqual(fs.readFileSync(fd, 'utf8'), buf.toString('utf8'));
+    } finally {
+      fs.closeSync(fd);
+    }
+  });
+
+  it('multibyte UTF-8', () => {
+    const buf = Buffer.from('中文测试 — café — 🚀'.repeat(500), 'utf8');
+    expectMatches(writeFile('multibyte.txt', buf), buf);
+  });
+
+  it('latin1-fits utf8', () => {
+    const buf = Buffer.from('naïve café résumé — niño Köln '.repeat(500), 'utf8');
+    expectMatches(writeFile('latin1-fits.txt', buf), buf);
+  });
+
+  it('invalid: lone continuation byte', () => {
+    const buf = Buffer.from([0x68, 0x69, 0x80, 0x21]);
+    expectMatches(writeFile('invalid-cont.txt', buf), buf);
+  });
+
+  it('invalid: overlong', () => {
+    const buf = Buffer.from([0x41, 0xC0, 0xAF, 0x42]);
+    expectMatches(writeFile('invalid-overlong.txt', buf), buf);
+  });
+
+  it('invalid: surrogate', () => {
+    const buf = Buffer.from([0x41, 0xED, 0xA0, 0x80, 0x42]);
+    expectMatches(writeFile('invalid-surrogate.txt', buf), buf);
+  });
+
+  it('latin1 boundary U+00FF', () => {
+    const buf = Buffer.from('ÿ'.repeat(2048), 'utf8');
+    expectMatches(writeFile('latin1-boundary.txt', buf), buf);
+  });
+
+  it('above latin1 U+0100', () => {
+    const buf = Buffer.from('ĀāĂ'.repeat(1024), 'utf8');
+    expectMatches(writeFile('above-latin1.txt', buf), buf);
+  });
+
+  it('single codepoint each UTF-8 length', () => {
+    for (const cp of [0x41, 0x00E9, 0x4E2D, 0x1F600]) {
+      const buf = Buffer.from(String.fromCodePoint(cp), 'utf8');
+      expectMatches(writeFile(`single-cp-${cp.toString(16)}.txt`, buf), buf);
+    }
+  });
+
+  it('truncated multibyte at EOF', () => {
+    const buf = Buffer.from([0x41, 0xE4, 0xB8]);
+    expectMatches(writeFile('truncated-multibyte.txt', buf), buf);
+  });
+});