|
18 | 18 | end |
19 | 19 |
|
20 | 20 | describe "Standard text processor with an ngram filter" do |
21 | | - ["1", { foo: "bar" }, Object.new].each do |length| |
22 | | - it "raises an error if invalid length of #{length.inspect} provided" do |
23 | | - expect { |
24 | | - CipherStash::Analysis::TextProcessor.new({ |
25 | | - "tokenFilters" => [ |
26 | | - { "kind" => "downcase" }, |
27 | | - { "kind" => "ngram", "minLength" => length, "maxLength" => length } |
28 | | - ], |
29 | | - "tokenizer" => { "kind" => "standard" } |
30 | | - }) |
31 | | - }.to raise_error(CipherStash::Client::Error::InternalError, "The values provided to the min and max length must be of type Integer.") |
32 | | - end |
33 | | - end |
34 | | - |
35 | | - it "raises an error if the min length is greater than the max length" do |
36 | | - expect { |
37 | | - CipherStash::Analysis::TextProcessor.new({ |
38 | | - "tokenFilters" => [ |
39 | | - { "kind" => "downcase" }, |
40 | | - { "kind" => "ngram", "minLength" => 4, "maxLength" => 3 } |
41 | | - ], |
42 | | - "tokenizer" => { "kind" => "standard" } |
43 | | - }) |
44 | | - }.to raise_error(CipherStash::Client::Error::InternalError, "The ngram filter min length must be less than or equal to the max length") |
45 | | - end |
46 | | - |
47 | | - it "raises an error if tokenLength is provided" do |
48 | | - expect { |
49 | | - CipherStash::Analysis::TextProcessor.new({ |
50 | | - "tokenFilters" => [ |
51 | | - { "kind" => "downcase" }, |
52 | | - { "kind" => "ngram", "tokenLength" => 3 } |
53 | | - ], |
54 | | - "tokenizer" => { "kind" => "standard" } |
55 | | - }) |
56 | | - }.to raise_error(CipherStash::Client::Error::InternalError, "'tokenLength' is deprecated. Use 'minLength' and 'maxLength' for the ngram filter.") |
57 | | - end |
58 | | - |
59 | 21 | it "splits text into ngrams using min length of 3 and max length of 8" do |
60 | 22 | tokenizer = |
61 | 23 | CipherStash::Analysis::TextProcessor.new({ |
|
0 commit comments