Skip to content

Commit 3fc78c6

Browse files
committed
string
add boyerMoore add kmp add rabin karp
1 parent 12954e4 commit 3fc78c6

8 files changed

Lines changed: 466 additions & 47 deletions

File tree

src/string/boyerMoore.js

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
*
3+
* @typedef {Object} PatternTable
4+
* @property {Object[]} badCharacter badCharacter
5+
* @property {number[]} goodSuffix goodSuffix
6+
*/
7+
8+
/**
9+
* Build Pattern Table for Pattern
10+
* @param {string} pattern pattern
11+
* @returns {PatternTable} pattern table
12+
*/
13+
function buildPattern(pattern) {
14+
let length = pattern.length;
15+
let badCharacter = [{}];
16+
// bad character
17+
for (let i = 1; i < pattern.length; i++) {
18+
badCharacter[i] = {}
19+
Object.assign(badCharacter[i], badCharacter[i - 1]);
20+
badCharacter[i][pattern[i - 1]] = i - 1;
21+
}
22+
let goodSuffix = new Array().fill(0);
23+
// console.table(badCharacter);
24+
// console.table(goodSuffix);
25+
return {
26+
badCharacter,
27+
goodSuffix,
28+
};
29+
}
30+
31+
/**
32+
* calc offset from pattern table
33+
* @param {PatternTable} patternTable pattern table
34+
* @param {number} dismatchIndex dismatch index
35+
* @param {string} dismatchChar dismatch character
36+
*/
37+
function getOffset(patternTable, dismatchIndex, badChar) {
38+
let posBadChar = patternTable.badCharacter[dismatchIndex][badChar] || -1;
39+
return Math.max(dismatchIndex - posBadChar, patternTable.goodSuffix[dismatchIndex]);
40+
}
41+
42+
/**
43+
* Word search
44+
* @param {string} text - Text that may contain the searchable pattern.
45+
* @param {string} pattern - Pattern that is being searched in text.
46+
* @return {number} - Position of the pattern in text.
47+
*/
48+
function boyerMoore(text, pattern) {
49+
if (pattern.length === 0) {
50+
return 0;
51+
}
52+
const patternTable = buildPattern(pattern);
53+
54+
let pIndex = pattern.length - 1;
55+
let tIndex = pIndex;
56+
while (tIndex < text.length) {
57+
if (text[tIndex] === pattern[pIndex]) {
58+
if (pIndex === 0) {
59+
return tIndex;
60+
}
61+
tIndex--;
62+
pIndex--;
63+
} else {
64+
tIndex += getOffset(patternTable, pIndex, text[tIndex]);
65+
pIndex = pattern.length - 1;
66+
}
67+
}
68+
return -1;
69+
}
70+
71+
module.exports = boyerMoore;

src/string/knuthMorrisPratt.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/**
2+
* build pattern table
3+
* @param {string} pattern word
4+
* @return {number[]} pattern table
5+
*/
6+
function buildPattern(pattern) {
7+
const patternTable = new Array(pattern.length).fill(0); // Initialize
8+
let prefixIndex = 0;
9+
for (let i = 1; i < pattern.length; i++) {
10+
if (pattern[i] === pattern[prefixIndex]) {
11+
patternTable[i] = ++prefixIndex;
12+
} else if (prefixIndex) {
13+
prefixIndex = patternTable[prefixIndex - 1];
14+
i--; // Still Compare Current pattern[i] with prefix
15+
}
16+
}
17+
return patternTable;
18+
}
19+
20+
/**
21+
* Word search
22+
* @param {string} text - Text that may contain the searchable pattern.
23+
* @param {string} pattern - Pattern that is being searched in text.
24+
* @return {number} - Position of the pattern in text.
25+
*/
26+
function knuthMorrisPratt(text, pattern) {
27+
if (pattern.length === 0) {
28+
return 0;
29+
}
30+
const patternTable = buildPattern(pattern);
31+
let tIndex = 0;
32+
let pIndex = 0;
33+
while (tIndex < text.length) {
34+
if (pattern[pIndex] === text[tIndex]) {
35+
if (pIndex === pattern.length - 1) {
36+
return tIndex - pIndex;
37+
}
38+
tIndex++;
39+
pIndex++;
40+
} else {
41+
if (pIndex) {
42+
// Have Common Prefix
43+
pIndex = patternTable[pIndex - 1];
44+
} else {
45+
pIndex = 0;
46+
tIndex++;
47+
}
48+
}
49+
}
50+
return -1;
51+
}
52+
53+
module.exports = knuthMorrisPratt;

src/string/rabinKarp.js

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
const DEFAULT_BASE = 37;
2+
const DEFAULT_MODULUS = 101;
3+
4+
/**
5+
* Converts char to number.
6+
*
7+
* @param {string} char
8+
* @return {number}
9+
*/
10+
function charToNumber(char) {
11+
let charCode = char.codePointAt(0);
12+
// Check if character has surrogate pair.
13+
const surrogate = char.codePointAt(1);
14+
if (surrogate !== undefined) {
15+
const surrogateShift = 2 ** 16;
16+
charCode += surrogate * surrogateShift;
17+
}
18+
19+
return charCode;
20+
}
21+
22+
/**
23+
* Creates hash code for a string
24+
* @param {string} word
25+
*/
26+
function hash(word) {
27+
const charCodes = Array.from(word).map(char => charToNumber(char));
28+
let hash = 0;
29+
for (let charIndex = 0; charIndex < charCodes.length; charIndex += 1) {
30+
hash *= DEFAULT_BASE;
31+
hash += charCodes[charIndex];
32+
hash %= DEFAULT_MODULUS;
33+
}
34+
return hash;
35+
}
36+
37+
/**
38+
* Function that creates hash representation of the word
39+
* based on previous word (shifted by one character left) hash value.
40+
*
41+
* Recalculates the hash representation of a word so that it isn't
42+
* necessary to traverse the whole word again.
43+
* @param {number} prevHash
44+
* @param {string} prevWord
45+
* @param {string} newWord
46+
*/
47+
function roll(prevHash, prevWord, newWord) {
48+
let hash = prevHash;
49+
50+
const prevValue = charToNumber(prevWord[0]);
51+
const newValue = charToNumber(newWord[newWord.length - 1]);
52+
53+
let prevValueMultiplier = 1;
54+
// If we donnot use DEFAULT_MODULUS to keep hash safe
55+
// It will be possible to calc prevValueMultiplier directly use prevWord insted of for...
56+
for (let i = 1; i < prevWord.length; i += 1) {
57+
prevValueMultiplier *= DEFAULT_BASE;
58+
prevValueMultiplier %= DEFAULT_MODULUS;
59+
}
60+
61+
hash += DEFAULT_MODULUS;
62+
hash -= (prevValue * prevValueMultiplier) % DEFAULT_MODULUS;
63+
hash *= DEFAULT_BASE;
64+
hash += newValue;
65+
hash %= DEFAULT_MODULUS;
66+
return hash;
67+
}
68+
69+
/**
70+
*
71+
* Word search
72+
* @param {string} text - Text that may contain the searchable pattern.
73+
* @param {string} pattern - Pattern that is being searched in text.
74+
* @return {number} - Position of the pattern in text.
75+
*/
76+
function rabinKarp(text, pattern) {
77+
// Calculate word hash that we will use for comparison with other substring hashes.
78+
const patternHash = hash(pattern);
79+
let prevFrame = null;
80+
let currentFrameHash = null;
81+
// Go through all substring of the text that may match.
82+
for (let charIndex = 0; charIndex <= (text.length - pattern.length); charIndex += 1) {
83+
const currentFrame = text.substring(charIndex, charIndex + pattern.length);
84+
// Calculate the hash of current substring.
85+
if (currentFrameHash === null) {
86+
currentFrameHash = hash(currentFrame);
87+
} else {
88+
currentFrameHash = roll(currentFrameHash, prevFrame, currentFrame);
89+
}
90+
prevFrame = currentFrame;
91+
// Compare the hash of current substring and seeking string.
92+
// In case if hashes match let's make sure that substrings are equal.
93+
// In case of hash collision the strings may not be equal.
94+
if (
95+
patternHash === currentFrameHash &&
96+
text.substr(charIndex, pattern.length) === pattern
97+
) {
98+
return charIndex;
99+
}
100+
}
101+
return -1;
102+
}
103+
104+
module.exports = rabinKarp;

test/index.js

Lines changed: 45 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,52 @@
1-
// // TREE
2-
// require("./tree/tree.BinarySearchTreeCommon.test")
3-
// require("./tree/tree.BinarySearchTree.test");
4-
// require("./tree/tree.AVLTree.test");
5-
// require("./tree/tree.SplayTree.test");
6-
// require("./tree/tree.RedBlackTree.test");
7-
// require("./tree/tree.Treap.test");
8-
// require('./tree/tree.DisjoinSet.test');
1+
// TREE
2+
require("./tree/tree.BinarySearchTreeCommon.test")
3+
require("./tree/tree.BinarySearchTree.test");
4+
require("./tree/tree.AVLTree.test");
5+
require("./tree/tree.SplayTree.test");
6+
require("./tree/tree.RedBlackTree.test");
7+
require("./tree/tree.Treap.test");
8+
require('./tree/tree.DisjoinSet.test');
99

10-
// // HEAP
11-
// require("./heap/heap.HeapCommon.test");
12-
// require("./heap/heap.MinHeap.test");
13-
// require("./heap/heap.MaxHeap.test");
14-
// require("./heap/heap.LeftistHeap.test");
15-
// require("./heap/heap.BinomialHeap.test");
16-
// require("./heap/heap.FibonacciHeap.test");
17-
// require('./heap/heap.PriorityQueue.test');
10+
// HEAP
11+
require("./heap/heap.HeapCommon.test");
12+
require("./heap/heap.MinHeap.test");
13+
require("./heap/heap.MaxHeap.test");
14+
require("./heap/heap.LeftistHeap.test");
15+
require("./heap/heap.BinomialHeap.test");
16+
require("./heap/heap.FibonacciHeap.test");
17+
require('./heap/heap.PriorityQueue.test');
1818

19-
// // HASH
20-
// require("./hash/hash.HashSet.test");
21-
// require("./hash/hash.HashTable.test");
19+
// HASH
20+
require("./hash/hash.HashSet.test");
21+
require("./hash/hash.HashTable.test");
2222

23-
// // Sort
24-
// require("./sort");
23+
// Sort
24+
require("./sort");
2525

26-
// // Graph
27-
// require("./graph/graph.GraphEdge.test");
28-
// require("./graph/graph.GraphVertex.test");
29-
// require("./graph/graph.Graph.test");
30-
// require("./graph/graph.depthFirstSearch.test");
31-
// require("./graph/graph.breadthFirstSearch.test");
32-
// require("./graph/graph.articulationPoints.test");
33-
// require("./graph/graph.graphBridges.test");
34-
// require('./graph/graph.scc.test');
35-
// require('./graph/graph.topologicalSort.test');
36-
// require('./graph/graph.detectCircle.test');
37-
// require('./graph/graph.eulerianPath.test');
38-
// require('./graph/graph.hamiltonianPath.test');
39-
// require('./graph/graph.kruskal.test');
40-
// require('./graph/graph.prim.test');
41-
// require('./graph/graph.floydWarshall.test');
42-
// require('./graph/graph.dijkstra.test');
43-
// require('./graph/graph.bellmanFord.test');
44-
// require('./graph/graph.spfa.test');
45-
// require('./graph/graph.astar.test');
26+
// Graph
27+
require("./graph/graph.GraphEdge.test");
28+
require("./graph/graph.GraphVertex.test");
29+
require("./graph/graph.Graph.test");
30+
require("./graph/graph.depthFirstSearch.test");
31+
require("./graph/graph.breadthFirstSearch.test");
32+
require("./graph/graph.articulationPoints.test");
33+
require("./graph/graph.graphBridges.test");
34+
require('./graph/graph.scc.test');
35+
require('./graph/graph.topologicalSort.test');
36+
require('./graph/graph.detectCircle.test');
37+
require('./graph/graph.eulerianPath.test');
38+
require('./graph/graph.hamiltonianPath.test');
39+
require('./graph/graph.kruskal.test');
40+
require('./graph/graph.prim.test');
41+
require('./graph/graph.floydWarshall.test');
42+
require('./graph/graph.dijkstra.test');
43+
require('./graph/graph.bellmanFord.test');
44+
require('./graph/graph.spfa.test');
45+
require('./graph/graph.astar.test');
4646

4747
// String
4848
require('./string/string.levenshteinDistance.test');
49-
require('./string/string.hammingDistance.test');
49+
require('./string/string.hammingDistance.test');
50+
require('./string/string.rabinKarp.test');
51+
require('./string/string.knuthMorrisPratt.test');
52+
require('./string/string.boyerMoore.test');
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
const expect = require('chai').expect;
2+
const boyerMoore = require('../../src/string/boyerMoore');
3+
4+
describe('boyerMoore', () => {
5+
it('should find substring in a string', () => {
6+
expect(boyerMoore('HERE IS A SIMPLE EXAMPLE', 'EXAMPLE')).to.equal(17);
7+
});
8+
9+
it('should find substring in a string', () => {
10+
expect(boyerMoore('', '')).to.equal(0);
11+
expect(boyerMoore('a', '')).to.equal(0);
12+
expect(boyerMoore('a', 'a')).to.equal(0);
13+
expect(boyerMoore('ab', 'b')).to.equal(1);
14+
expect(boyerMoore('abcbcglx', 'abca')).to.equal(-1);
15+
expect(boyerMoore('abcbcglx', 'bcgl')).to.equal(3);
16+
expect(boyerMoore('abcxabcdabxabcdabcdabcy', 'abcdabcy')).to.equal(15);
17+
expect(boyerMoore('abcxabcdabxabcdabcdabcy', 'abcdabca')).to.equal(-1);
18+
expect(boyerMoore('abcxabcdabxaabcdabcabcdabcdabcy', 'abcdabca')).to.equal(12);
19+
expect(boyerMoore('abcxabcdabxaabaabaaaabcdabcdabcy', 'aabaabaaa')).to.equal(11);
20+
expect(boyerMoore('^ !/\'#\'pp', ' !/\'#\'pp')).to.equal(1);
21+
});
22+
23+
it('should work with bigger texts', () => {
24+
const text = 'Lorem Ipsum is simply dummy text of the printing and ' +
25+
'typesetting industry. Lorem Ipsum has been the industry\'s standard ' +
26+
'dummy text ever since the 1500s, when an unknown printer took a ' +
27+
'galley of type and scrambled it to make a type specimen book. It ' +
28+
'has survived not only five centuries, but also the leap into ' +
29+
'electronic typesetting, remaining essentially unchanged. It was ' +
30+
'popularised in the 1960s with the release of Letraset sheets ' +
31+
'containing Lorem Ipsum passages, and more recently with desktop' +
32+
'publishing software like Aldus PageMaker including versions of Lorem ' +
33+
'Ipsum.';
34+
35+
expect(boyerMoore(text, 'Lorem')).to.equal(0);
36+
expect(boyerMoore(text, 'versions')).to.equal(549);
37+
expect(boyerMoore(text, 'versions of Lorem Ipsum.')).to.equal(549);
38+
expect(boyerMoore(text, 'versions of Lorem Ipsum:')).to.equal(-1);
39+
expect(boyerMoore(text, 'Lorem Ipsum passages, and more recently with')).to.equal(446);
40+
});
41+
});

0 commit comments

Comments
 (0)