DC-2101: Inital Token class and tests. Tokens are strings with special meaning, that will be used to make up expressions.

emetsger · emetsger · commit ba3dba1dc50d · 2015-09-10T11:03:01.000-04:00
diff --git a/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java b/dcs-bagit/dcs-bagit-support/src/main/java/org/dataconservancy/bagit/rules/Token.java
@@ -0,0 +1,138 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+import static org.dataconservancy.bagit.rules.Message.ERR_NULL;
+
+/**
+ * Tokens are strings that make up a location expressions.  Location expressions are patterns that are matched against
+ * paths.  Location expressions are inspired by Apache Ant file pattern matching.
+ */
+enum Token {
+
+    /**
+     * A token matching exactly one character in an expression.
+     */
+    EXACTLY_ONE_CHARACTER("?"),
+
+    /**
+     * A token that will match multiple directory levels in an expression.
+     */
+    MULTIPLE_DIRECTORIES("**"),
+
+    /**
+     * A token matching zero or more characters in an expression. <em>Must always be defined sometime after
+     * {@link #MULTIPLE_DIRECTORIES}</em>
+     */
+    ZERO_OR_MORE_CHARACTERS("*"),
+
+
+    /**
+     * A token that separates path segments in an expression.
+     */
+    PATH_SEPARATOR("/"),
+
+    /**
+     * A special token with a {@code null} token string.  <em>Must always be defined last</em>
+     */
+    LITERAL();
+
+    private static final String ERR_MULTIPLE_TOKENS = "Candidate sequence '%s' contains multiple tokens.  " +
+            "Try splitting up the tokens and submitting the tokens one at a time.";
+
+    /**
+     * String representation of the token, if there is one.
+     */
+    private String tokenString;
+
+    /**
+     * Construct a Token with no string representation.  Currently reserved for {@link #LITERAL} tokens.
+     */
+    private Token() {
+        this.tokenString = null;
+    }
+
+    /**
+     * Construct a token with the supplied string representation.
+     *
+     * @param tokenString the string representation of the token.
+     * @throws java.lang.IllegalArgumentException if the {@code tokenString} is {@code null}
+     */
+    private Token(String tokenString) {
+        if (tokenString == null) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "tokenString"));
+        }
+        this.tokenString = tokenString;
+    }
+
+    /**
+     * Obtain the string form of the token, may be {@code null}.  {@link #LITERAL} tokens will not
+     * have a string form, because a literal is the set of characters that <em>do not</em> represent a token.
+     *
+     * @return the string form of the token, or {@code null} in the case of {@code LITERAL} tokens.
+     */
+    String getTokenString() {
+        return tokenString;
+    }
+
+    /**
+     * Attempts to parse a string which represents a <em>single</em> token into a {@code Token}
+     *
+     * @param candidate the candidate token string
+     * @return a {@code Token} if {@code candidate} represents a valid token
+     * @throws java.lang.IllegalArgumentException if {@code candidate} does not represent a valid token
+     */
+    static Token parse(CharSequence candidate) {
+        if (candidate == null) {
+            throw new IllegalArgumentException(String.format(ERR_NULL, "candidate"));
+        }
+
+        for (Token m : Token.values()) {
+
+            // See if the candidate token string equals the string representation
+            // of the token (except LITERAL), and return it
+            if (m.tokenString != null && m.tokenString.equals(candidate)) {
+                return m;
+            }
+
+            // Check to see if the candidate token string _contains_ the string representation
+            // of the token (except LITERAL).  If so, that means that the candidate contains multiple
+            // tokens, which isn't allowed.
+            if (candidate.length() > 1 &&
+                    m.tokenString != null &&
+                    candidate.chars().anyMatch(
+                            c -> m.tokenString.contains(Character.toString((char) c)))) {
+                throw new IllegalArgumentException(String.format(ERR_MULTIPLE_TOKENS, candidate));
+            }
+        }
+
+        // None of our Token string representations equaled the candidate string.
+        // The candidate string did not _contain_ any of the Token string representations
+        // We must be left with a LITERAL.
+
+        return LITERAL;
+    }
+
+    @Override
+    public String toString() {
+        return "Token{" +
+                "tokenString='" + tokenString + '\'' +
+                '}';
+    }
+}
diff --git a/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java b/dcs-bagit/dcs-bagit-support/src/test/java/org/dataconservancy/bagit/rules/TokenTest.java
@@ -0,0 +1,92 @@
+/*
+ *
+ *  * Copyright 2015 Johns Hopkins University
+ *  *
+ *  * Licensed under the Apache License, Version 2.0 (the "License");
+ *  * you may not use this file except in compliance with the License.
+ *  * You may obtain a copy of the License at
+ *  *
+ *  *     http://www.apache.org/licenses/LICENSE-2.0
+ *  *
+ *  * Unless required by applicable law or agreed to in writing, software
+ *  * distributed under the License is distributed on an "AS IS" BASIS,
+ *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  * See the License for the specific language governing permissions and
+ *  * limitations under the License.
+ *
+ */
+
+package org.dataconservancy.bagit.rules;
+
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Insures that the {@link Token} class properly parses tokens.
+ */
+public class TokenTest {
+
+    /**
+     * Tokens are strings with special meanings.  Insure all single-character tokens can be parsed.
+     */
+    @Test
+    public void testParseSingleCharacterString() throws Exception {
+        assertEquals(Token.ZERO_OR_MORE_CHARACTERS, Token.parse("*"));
+        assertEquals(Token.EXACTLY_ONE_CHARACTER, Token.parse("?"));
+        assertEquals(Token.LITERAL, Token.parse("f"));
+        assertEquals(Token.PATH_SEPARATOR, Token.parse("/"));
+    }
+
+    /**
+     * Tokens are strings with special meanings.  Insure all multi-character tokens can be parsed.
+     */
+    @Test
+    public void testParseMultipleCharacterStrings() throws Exception {
+        assertEquals(Token.MULTIPLE_DIRECTORIES, Token.parse("**"));
+        assertEquals(Token.LITERAL, Token.parse("foobarbaz"));
+    }
+
+    /**
+     * Attempting to parse a string with multiple tokens is an error.
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseSingleStringContainingDifferentTokens() throws Exception {
+        Token.parse("*/?**abc");
+    }
+
+    /**
+     * Attempting to parse a string with multiple tokens is an error. (Just another case similar to above)
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseLiteralEndingWithPathSep() throws Exception {
+        Token.parse("directory/");
+    }
+
+    /**
+     * Zero length strings would be parsed as a literal.
+     */
+    @Test
+    public void testParseZeroLengthString() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(""));
+    }
+
+    /**
+     * Empty strings would be parsed as a literal.
+     */
+    @Test
+    public void testParseEmptyString() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(" "));
+    }
+
+    /**
+     * Parsing {@code null} results in an error
+     *
+     * @throws Exception
+     */
+    @Test(expected = IllegalArgumentException.class)
+    public void testParseNull() throws Exception {
+        assertEquals(Token.LITERAL, Token.parse(null));
+    }
+}