77 */
88class SpamSum
99{
10- /**
11- * Compute the SpamSum of string using default parameters:
12- * length = 64 characters
13- * 64 possible letters (Base64)
14- * min blocksize = 3
15- * block size computed automatically
16- *
17- * @param string $string
18- * @return \webd\language\SpamSum
19- */
20- public static function Hash ($ string )
10+ /**
11+ * Compute the SpamSum of string using default parameters:
12+ * length = 64 characters
13+ * 64 possible letters (Base64)
14+ * min blocksize = 3
15+ * block size computed automatically
16+ *
17+ * @param string $string
18+ * @return \webd\language\SpamSum
19+ */
20+ public static function hash ($ string )
2121 {
2222 $ ss = new SpamSum ();
23- $ ss ->HashString ($ string );
23+ $ ss ->hashString ($ string );
2424 return $ ss ;
2525 }
2626
2727 const HASH_PRIME = 0x01000193 ;
2828 const HASH_INIT = 0x28021967 ;
2929 const B64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/ " ;
30-
30+
3131 protected $ SPAMSUM_LENGTH = 64 ;
3232 protected $ LETTERS = 64 ;
3333 protected $ BLOCKSIZE = 0 ;
3434 protected $ MIN_BLOCKSIZE = 3 ;
3535 protected $ auto_blocksize = true ;
36-
36+
3737 protected $ left ;
3838 protected $ right ;
39-
40- /**
41- * Set a different hash length
42- * min = 1, default = 64
43- *
44- * @param int $l
45- */
46- public function SetHashLength ($ l )
39+
40+ /**
41+ * Set a different hash length
42+ * min = 1, default = 64
43+ *
44+ * @param int $l
45+ */
46+ public function setHashLength ($ l )
4747 {
4848 $ this ->SPAMSUM_LENGTH = $ l ;
4949 }
50-
51- /**
52- * Set the number of letters to use to create the hash
53- * min = 2, max = 64, default = 64 (base64)
54- * @param int $l
55- */
56- public function SetLetters ($ l )
50+
51+ /**
52+ * Set the number of letters to use to create the hash
53+ * min = 2, max = 64, default = 64 (base64)
54+ * @param int $l
55+ */
56+ public function setLetters ($ l )
5757 {
5858 $ this ->LETTERS = $ l ;
5959 }
60-
61- /**
62- * Manually set the minimum block size
63- * min = 1, default = 3
64- * @param int $s
65- */
66- public function SetMinBlocksize ($ s )
60+
61+ /**
62+ * Manually set the minimum block size
63+ * min = 1, default = 3
64+ * @param int $s
65+ */
66+ public function setMinBlocksize ($ s )
6767 {
6868 $ this ->MIN_BLOCKSIZE = $ s ;
6969 }
70-
71- /**
72- * Set the blok size manually, so that it won't be computed from the length of
73- * the string
74- * @param int $s
75- */
76- public function SetBlockSize ($ s )
70+
71+ /**
72+ * Set the blok size manually, so that it won't be computed from the length of
73+ * the string
74+ * @param int $s
75+ */
76+ public function setBlockSize ($ s )
7777 {
7878 $ this ->BLOCKSIZE = $ s ;
7979 $ this ->auto_blocksize = false ;
8080 }
81-
82- /**
83- *
84- * @param string $string
85- * @return \webd\language\SpamSum
86- */
87- public function HashString ($ string )
81+
82+ /**
83+ *
84+ * @param string $string
85+ * @return \webd\language\SpamSum
86+ */
87+ public function hashString ($ string )
8888 {
8989 $ b64 = self ::B64 ;
9090 $ length = strlen ($ string );
9191
92- $ in = unpack (' C* ' , $ string );
92+ $ in = unpack (" C* " , $ string );
9393
94- // Reindex (to start from 0)
94+ // Reindex (to start from 0)
9595 foreach ($ in as $ k => $ v ) {
9696 $ in [$ k - 1 ] = $ v ;
9797 }
9898 unset($ in [count ($ in )]);
9999
100- // Guess a a reasonable block size
100+ // Guess a a reasonable block size
101101 if ($ this ->auto_blocksize ) {
102102 $ this ->BLOCKSIZE = $ this ->MIN_BLOCKSIZE ;
103-
103+
104104 while ($ this ->BLOCKSIZE * $ this ->SPAMSUM_LENGTH < $ length ) {
105105 $ this ->BLOCKSIZE = $ this ->BLOCKSIZE * 2 ;
106106 }
107107 }
108-
108+
109109 again:
110110
111- $ this ->left = array () ;
112- $ this ->right = array () ;
111+ $ this ->left = [] ;
112+ $ this ->right = [] ;
113113
114114 $ k = $ j = 0 ;
115115 $ h3 = $ h2 = self ::HASH_INIT ;
116- $ h = $ this ->rolling_hash_reset ();
116+ $ h = $ this ->rollingHashReset ();
117117
118118 for ($ i = 0 ; $ i < $ length ; $ i ++) {
119- /* at each character we update the rolling hash and the normal
120- * hash. When the rolling hash hits the reset value then we emit
121- * the normal hash as a element of the signature and reset both
122- * hashes
123- */
124- $ h = $ this ->rolling_hash ($ in [$ i ]);
125- $ h2 = self ::sum_hash ($ in [$ i ], $ h2 );
126- $ h3 = self ::sum_hash ($ in [$ i ], $ h3 );
127-
128- if ($ h % $ this ->BLOCKSIZE == ( $ this ->BLOCKSIZE - 1 ) ) {
129- /* we have hit a reset point. We now emit a hash which is based
130- * on all chacaters in the piece of the string between the last
131- * reset point and this one
132- */
119+ /* at each character we update the rolling hash and the normal
120+ * hash. When the rolling hash hits the reset value then we emit
121+ * the normal hash as a element of the signature and reset both
122+ * hashes
123+ */
124+ $ h = $ this ->rollingHash ($ in [$ i ]);
125+ $ h2 = self ::sumHash ($ in [$ i ], $ h2 );
126+ $ h3 = self ::sumHash ($ in [$ i ], $ h3 );
127+
128+ if ($ h % $ this ->BLOCKSIZE == $ this ->BLOCKSIZE - 1 ) {
129+ /* we have hit a reset point. We now emit a hash which is based
130+ * on all chacaters in the piece of the string between the last
131+ * reset point and this one
132+ */
133133 $ this ->left [$ j ] = $ b64 [$ h2 % $ this ->LETTERS ];
134134 if ($ j < $ this ->SPAMSUM_LENGTH - 1 ) {
135135 /* we can have a problem with the tail overflowing. The easiest way
@@ -142,11 +142,11 @@ public function HashString($string)
142142 }
143143 }
144144
145- /* this produces a second signature with a block size of block_size*2.
146- * By producing dual signatures in this way the effect of small changes
147- * in the string near a block size boundary is greatly reduced.
148- */
149- if ($ h % ($ this ->BLOCKSIZE * 2 ) == (( $ this ->BLOCKSIZE * 2 ) - 1 ) ) {
145+ /* this produces a second signature with a block size of block_size*2.
146+ * By producing dual signatures in this way the effect of small changes
147+ * in the string near a block size boundary is greatly reduced.
148+ */
149+ if ($ h % ($ this ->BLOCKSIZE * 2 ) == $ this ->BLOCKSIZE * 2 - 1 ) {
150150 $ this ->right [$ k ] = $ b64 [$ h3 % $ this ->LETTERS ];
151151 if ($ k < $ this ->SPAMSUM_LENGTH / 2 - 1 ) {
152152 $ h3 = self ::HASH_INIT ;
@@ -155,89 +155,89 @@ public function HashString($string)
155155 }
156156 }
157157
158- /* If we have anything left then add it to the end. This ensures that the
159- * last part of the string is always considered
160- */
158+ /* If we have anything left then add it to the end. This ensures that the
159+ * last part of the string is always considered
160+ */
161161 if ($ h != 0 ) {
162162 $ this ->left [$ j ] = $ b64 [$ h2 % $ this ->LETTERS ];
163163 $ this ->right [$ k ] = $ b64 [$ h3 % $ this ->LETTERS ];
164164 }
165165
166- /* Our blocksize guess may have been way off - repeat if necessary
167- */
168- if ($ this ->auto_blocksize
169- && $ this ->BLOCKSIZE > $ this ->MIN_BLOCKSIZE
170- && $ j < $ this ->SPAMSUM_LENGTH / 2 ) {
166+ /* Our blocksize guess may have been way off - repeat if necessary
167+ */
168+ if ($ this ->auto_blocksize &&
169+ $ this ->BLOCKSIZE > $ this ->MIN_BLOCKSIZE &&
170+ $ j < $ this ->SPAMSUM_LENGTH / 2
171+ ) {
171172 $ this ->BLOCKSIZE = $ this ->BLOCKSIZE / 2 ;
172173 goto again;
173174 }
174175
175176 return $ this ;
176177 }
177-
178+
178179 public function __toString ()
179180 {
180- return
181- $ this ->BLOCKSIZE . ": " . $ this ->Left () . ": " . $ this ->Right ();
181+ return $ this ->BLOCKSIZE . ": " . $ this ->left () . ": " . $ this ->right ();
182182 }
183-
184- public function BlockSize ()
183+
184+ public function blockSize ()
185185 {
186186 return $ this ->BLOCKSIZE ;
187187 }
188-
189- public function Left ()
188+
189+ public function left ()
190190 {
191191 return implode ("" , $ this ->left );
192192 }
193-
194- public function Right ()
193+
194+ public function right ()
195195 {
196196 return implode ("" , $ this ->right );
197197 }
198-
199- /* A simple non-rolling hash, based on the FNV hash
200- */
201- protected static function sum_hash ($ c , $ h )
198+
199+ /* A simple non-rolling hash, based on the FNV hash
200+ */
201+ protected static function sumHash ($ c , $ h )
202202 {
203203 $ h = ($ h * self ::HASH_PRIME ) % pow (2 , 32 );
204204 $ h = ($ h ^ $ c ) % pow (2 , 32 );
205205 return $ h ;
206206 }
207-
208-
209- /* A rolling hash, based on the Adler checksum. By using a rolling hash
210- * we can perform auto resynchronisation after inserts/deletes internally,
211- * h1 is the sum of the bytes in the window and h2 is the sum of the bytes
212- * times the index h3 is a shift/xor based rolling hash, and is mostly
213- * needed to ensure that we can cope with large blocksize values
214- */
207+
208+ /* A rolling hash, based on the Adler checksum. By using a rolling hash
209+ * we can perform auto resynchronisation after inserts/deletes internally,
210+ * h1 is the sum of the bytes in the window and h2 is the sum of the bytes
211+ * times the index h3 is a shift/xor based rolling hash, and is mostly
212+ * needed to ensure that we can cope with large blocksize values
213+ */
215214 const ROLLING_WINDOW = 7 ;
216-
217- protected $ rolling_window = array () ;
215+
216+ protected $ rolling_window = [] ;
218217 protected $ rolling_h1 ;
219218 protected $ rolling_h2 ;
220219 protected $ rolling_h3 ;
221220 protected $ rolling_n ;
222221
223- protected function rolling_hash ($ c )
222+ protected function rollingHash ($ c )
224223 {
225224 $ this ->rolling_h2 -= $ this ->rolling_h1 ;
226225 $ this ->rolling_h2 += self ::ROLLING_WINDOW * $ c ;
227226
228227 $ this ->rolling_h1 += $ c ;
229- $ this ->rolling_h1 -= $ this ->rolling_window [$ this ->rolling_n % self ::ROLLING_WINDOW ];
228+ $ this ->rolling_h1 -=
229+ $ this ->rolling_window [$ this ->rolling_n % self ::ROLLING_WINDOW ];
230230
231231 $ this ->rolling_window [$ this ->rolling_n % self ::ROLLING_WINDOW ] = $ c ;
232232 $ this ->rolling_n ++;
233233
234- $ this ->rolling_h3 = ($ this ->rolling_h3 << 5 ) & 0xFFFFFFFF ;
234+ $ this ->rolling_h3 = ($ this ->rolling_h3 << 5 ) & 0xffffffff ;
235235 $ this ->rolling_h3 ^= $ c ;
236236
237237 return $ this ->rolling_h1 + $ this ->rolling_h2 + $ this ->rolling_h3 ;
238238 }
239239
240- protected function rolling_hash_reset ()
240+ protected function rollingHashReset ()
241241 {
242242 for ($ i = 0 ; $ i < self ::ROLLING_WINDOW ; $ i ++) {
243243 $ this ->rolling_window [$ i ] = 0 ;
0 commit comments