Skip to content
This repository was archived by the owner on Jan 27, 2023. It is now read-only.

Commit 7af1d2b

Browse files
committed
update text processor to accept min and max length for ngram filter
1 parent e5d5a3e commit 7af1d2b

1 file changed

Lines changed: 9 additions & 1 deletion

File tree

lib/cipherstash/analysis/text_processor.rb

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ class TextProcessor
1616
# Processor.new({
1717
# "tokenFilters"=>[
1818
# {"kind"=>"downcase"},
19-
# {"kind"=>"ngram", "tokenLength"=>3}
19+
# {"kind"=>"ngram", "minLength"=>3, "maxLength"}
2020
# ],
2121
# "tokenizer"=>{"kind"=>"standard"}
2222
# })
@@ -46,6 +46,14 @@ def build_token_filters(array)
4646
TokenFilters::Downcase.new(obj)
4747

4848
when "ngram"
49+
unless obj["minLength"].instance_of?(Integer) && obj["maxLength"].instance_of?(Integer)
50+
raise CipherStash::Client::Error::InternalError, "The values provided to the min and max length must be of type Integer."
51+
end
52+
53+
unless obj["maxLength"] >= obj["minLength"]
54+
raise CipherStash::Client::Error::InternalError, "The ngram filter min length must be less than or equal to the max length"
55+
end
56+
4957
TokenFilters::NGram.new(obj)
5058

5159
else

0 commit comments

Comments
 (0)