2626using F23 . StringSimilarity . Interfaces ;
2727// ReSharper disable SuggestVarOrType_Elsewhere
2828// ReSharper disable TooWideLocalVariableScope
29+ // ReSharper disable IntroduceOptionalParameters.Global
2930
3031namespace F23 . StringSimilarity
3132{
@@ -34,14 +35,28 @@ namespace F23.StringSimilarity
3435 public class WeightedLevenshtein : IStringDistance
3536 {
3637 private readonly ICharacterSubstitution _characterSubstitution ;
38+ private readonly ICharacterInsDel _characterInsDel ;
3739
3840 /// <summary>
39- /// Create a new instance with provided character substitution.
41+ /// Instantiate with provided character substitution.
4042 /// </summary>
4143 /// <param name="characterSubstitution">The strategy to determine character substitution weights.</param>
4244 public WeightedLevenshtein ( ICharacterSubstitution characterSubstitution )
45+ : this ( characterSubstitution , null )
46+ {
47+ }
48+
49+ /// <summary>
50+ /// Instantiate with provided character substitution, insertion, and
51+ /// deletion weights.
52+ /// </summary>
53+ /// <param name="characterSubstitution">The strategy to determine character substitution weights.</param>
54+ /// <param name="characterInsDel">The strategy to determine character insertion/deletion weights.</param>
55+ public WeightedLevenshtein ( ICharacterSubstitution characterSubstitution ,
56+ ICharacterInsDel characterInsDel )
4357 {
4458 _characterSubstitution = characterSubstitution ;
59+ _characterInsDel = characterInsDel ;
4560 }
4661
4762 /// <summary>
@@ -84,45 +99,64 @@ public double Distance(string s1, string s2)
8499 double [ ] vtemp ;
85100
86101 // initialize v0 (the previous row of distances)
87- // this row is A[0][i]: edit distance for an empty s
88- // the distance is just the number of characters to delete from t
89- for ( int i = 0 ; i < v0 . Length ; i ++ )
102+ // this row is A[0][i]: edit distance for an empty s1
103+ // the distance is the cost of inserting each character of s2
104+ v0 [ 0 ] = 0 ;
105+ for ( int i = 1 ; i < v0 . Length ; i ++ )
90106 {
91- v0 [ i ] = i ;
107+ v0 [ i ] = v0 [ i - 1 ] + InsertionCost ( s2 [ i - 1 ] ) ;
92108 }
93109
94110 for ( int i = 0 ; i < s1 . Length ; i ++ )
95111 {
112+ char s1i = s1 [ i ] ;
113+ double deletionCost = DeletionCost ( s1i ) ;
114+
96115 // calculate v1 (current row distances) from the previous row v0
97116 // first element of v1 is A[i+1][0]
98- // edit distance is delete (i+1) chars from s to match empty t
99- v1 [ 0 ] = i + 1 ;
117+ // Edit distance is the cost of deleting characters from s1
118+ // to match empty t.
119+ v1 [ 0 ] = v0 [ 0 ] + deletionCost ;
100120
101121 // use formula to fill in the rest of the row
102122 for ( int j = 0 ; j < s2 . Length ; j ++ )
103123 {
124+ char s2j = s2 [ j ] ;
104125 double cost = 0 ;
105- if ( s1 [ i ] != s2 [ j ] )
126+
127+ if ( s1i != s2j )
106128 {
107- cost = _characterSubstitution . Cost ( s1 [ i ] , s2 [ j ] ) ;
129+ cost = _characterSubstitution . Cost ( s1i , s2j ) ;
108130 }
131+
132+ double insertionCost = InsertionCost ( s2j ) ;
133+
109134 v1 [ j + 1 ] = Math . Min (
110- v1 [ j ] + 1 , // Cost of insertion
135+ v1 [ j ] + insertionCost , // Cost of insertion
111136 Math . Min (
112- v0 [ j + 1 ] + 1 , // Cost of remove
137+ v0 [ j + 1 ] + deletionCost , // Cost of deletion
113138 v0 [ j ] + cost ) ) ; // Cost of substitution
114139 }
115140
116141 // copy v1 (current row) to v0 (previous row) for next iteration
117- //System.arraycopy(v1, 0, v0, 0, v0.length);
142+ // System.arraycopy(v1, 0, v0, 0, v0.length);
118143 // Flip references to current and previous row
119144 vtemp = v0 ;
120145 v0 = v1 ;
121146 v1 = vtemp ;
122-
123147 }
124148
125149 return v0 [ s2 . Length ] ;
126150 }
151+
152+ private double InsertionCost ( char c )
153+ {
154+ return _characterInsDel ? . InsertionCost ( c ) ?? 1.0 ;
155+ }
156+
157+ private double DeletionCost ( char c )
158+ {
159+ return _characterInsDel ? . DeletionCost ( c ) ?? 1.0 ;
160+ }
127161 }
128162}
0 commit comments