@@ -76,7 +76,11 @@ def orderise_string(s)
7676 # This all very much relies on ASCII character numbering. A copy of `ascii`(7)
7777 # up on a convenient terminal may assist in understanding what's going
7878 # on here.
79- n = s
79+
80+ # First up, let's transmogrify the string we were given into one that only contains
81+ # a controlled subset of characters, that we can easily map into a smaller numeric
82+ # space.
83+ s = s
8084 # We care not for your capitals!
8185 . downcase
8286 # Any group of rando characters sort at the end
@@ -85,6 +89,11 @@ def orderise_string(s)
8589 . gsub ( /[[:space:]]+/ , '{' )
8690 # Numbers come after spaces
8791 . gsub ( /[0-9]/ , '|' )
92+
93+ # Next, we turn that string of characters into a "packed" number that represents the
94+ # whole string, but in a more compact form than would be used if each character took
95+ # up the full seven or eight bits used by regular ASCII.
96+ n = s
8897 . each_char
8998 # 'a' => 1, 'b' => 2, ..., 'z' => 27, '{' => 28, '|' => 29,
9099 # '}' => 30 (unused), '~' => 31. 0 is kept as "no character" so
@@ -94,13 +103,13 @@ def orderise_string(s)
94103 # occupying five bits of said number.
95104 . inject ( 0 ) { |i , c | ( i << 5 ) + c }
96105
97- # Now we need to turn the number into one whose in-memory representation
106+ # Thirdly, we need to turn the number into one whose in-memory representation
98107 # has a length in bits that is a multiple of 64. This is to ensure that
99108 # the first character has the most-significant bits possible, so it
100109 # sorts the highest.
101110 n = n << ( 64 - ( s . length * 5 ) % 64 )
102111
103- # And now we can turn all that gigantic mess into an array of terms
112+ # And now, semi-finally, we can turn all that gigantic mess into an array of terms
104113 [ ] . tap do |terms |
105114 while n > 0
106115 terms . unshift ( n % 2 **64 )
0 commit comments