Skip to content

Commit c5c434b

Browse files
committed
fix: replace regex with literal search in Excel/DOCX paths
Per review feedback from es-dc: the simplest and safest approach is to force literal matching for Office file content search, eliminating the ReDoS attack surface entirely. - searchExcelFiles: uses String.indexOf() instead of RegExp - searchDocxFiles: uses String.indexOf() instead of RegExp - Case-insensitive matching via toLowerCase() - File pattern glob matching still uses RegExp (safe — generated from sanitized extensions, not user input) - isSafeRegex/buildSafeRegex kept as exports for text search path
1 parent 57884ae commit c5c434b

1 file changed

Lines changed: 19 additions & 31 deletions

File tree

src/search-manager.ts

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -389,17 +389,13 @@ export interface SearchSessionOptions {
389389
ignoreCase: boolean,
390390
maxResults?: number,
391391
filePattern?: string,
392-
literalSearch?: boolean
392+
_literalSearch?: boolean
393393
): Promise<SearchResult[]> {
394394
const results: SearchResult[] = [];
395395

396-
// Build regex for matching content, with ReDoS protection
397-
// When literalSearch is true, escape the pattern so it's matched literally
398-
const flags = ignoreCase ? 'i' : '';
399-
const effectivePattern = literalSearch
400-
? pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
401-
: pattern;
402-
const { regex } = buildSafeRegex(effectivePattern, flags);
396+
// Office file search always uses literal matching to prevent ReDoS.
397+
// Regex patterns are treated as literal strings — this is intentional.
398+
const searchTerm = ignoreCase ? pattern.toLowerCase() : pattern;
403399

404400
// Find Excel files recursively
405401
let excelFiles = await this.findExcelFiles(rootPath);
@@ -412,9 +408,9 @@ export interface SearchSessionOptions {
412408
return patterns.some(pat => {
413409
// Support glob-like patterns
414410
if (pat.includes('*')) {
411+
// Glob patterns are safe (generated from sanitized file extensions, not user regex)
415412
const regexPat = pat.replace(/\./g, '\\.').replace(/\*/g, '.*');
416-
const { regex: globRegex } = buildSafeRegex(`^${regexPat}$`, 'i');
417-
return globRegex.test(fileName);
413+
return new RegExp(`^${regexPat}$`, 'i').test(fileName);
418414
}
419415
// Exact match (case-insensitive)
420416
return fileName.toLowerCase() === pat.toLowerCase();
@@ -470,12 +466,10 @@ export interface SearchSessionOptions {
470466
// Join all cell values with space for cross-column matching
471467
const rowText = rowValues.join(' ');
472468

473-
if (regex.test(rowText)) {
474-
// Extract the matching portion for display
475-
const match = rowText.match(regex);
476-
const matchContext = match
477-
? this.getMatchContext(rowText, match.index || 0, match[0].length)
478-
: rowText.substring(0, 150);
469+
const textToSearch = ignoreCase ? rowText.toLowerCase() : rowText;
470+
const matchIndex = textToSearch.indexOf(searchTerm);
471+
if (matchIndex !== -1) {
472+
const matchContext = this.getMatchContext(rowText, matchIndex, searchTerm.length);
479473

480474
results.push({
481475
file: `${filePath}:${sheetName}!Row${rowNumber}`,
@@ -572,17 +566,13 @@ export interface SearchSessionOptions {
572566
ignoreCase: boolean,
573567
maxResults?: number,
574568
filePattern?: string,
575-
literalSearch?: boolean
569+
_literalSearch?: boolean
576570
): Promise<SearchResult[]> {
577571
const results: SearchResult[] = [];
578572

579-
// Build regex for matching content, with ReDoS protection
580-
// When literalSearch is true, escape the pattern so it's matched literally
581-
const flags = ignoreCase ? 'i' : '';
582-
const effectivePattern = literalSearch
583-
? pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
584-
: pattern;
585-
const { regex } = buildSafeRegex(effectivePattern, flags);
573+
// Office file search always uses literal matching to prevent ReDoS.
574+
// Regex patterns are treated as literal strings — this is intentional.
575+
const searchTerm = ignoreCase ? pattern.toLowerCase() : pattern;
586576

587577
let docxFiles = await this.findDocxFiles(rootPath);
588578

@@ -593,8 +583,7 @@ export interface SearchSessionOptions {
593583
return patterns.some(pat => {
594584
if (pat.includes('*')) {
595585
const regexPat = pat.replace(/\./g, '\\.').replace(/\*/g, '.*');
596-
const { regex: globRegex } = buildSafeRegex(`^${regexPat}$`, 'i');
597-
return globRegex.test(fileName);
586+
return new RegExp(`^${regexPat}$`, 'i').test(fileName);
598587
}
599588
return fileName.toLowerCase() === pat.toLowerCase();
600589
});
@@ -630,11 +619,10 @@ export interface SearchSessionOptions {
630619
if (!text || !text.trim()) continue;
631620
lineNum++;
632621

633-
if (regex.test(text)) {
634-
const match = text.match(regex);
635-
const matchContext = match
636-
? this.getMatchContext(text, match.index || 0, match[0].length)
637-
: text.substring(0, 150);
622+
const textToSearch = ignoreCase ? text.toLowerCase() : text;
623+
const matchIndex = textToSearch.indexOf(searchTerm);
624+
if (matchIndex !== -1) {
625+
const matchContext = this.getMatchContext(text, matchIndex, searchTerm.length);
638626

639627
const partName = xmlPath === 'word/document.xml' ? '' : `:${xmlPath.replace('word/', '')}`;
640628
results.push({

0 commit comments

Comments
 (0)