Skip to content
This repository was archived by the owner on Jan 30, 2026. It is now read-only.

Commit ba3dba1

Browse files
committed
DC-2101: Inital Token class and tests. Tokens are strings with special meaning, that will be used to make up expressions.
1 parent e0c38db commit ba3dba1

2 files changed

Lines changed: 230 additions & 0 deletions

File tree

  • dcs-bagit/dcs-bagit-support/src
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
/*
2+
*
3+
* * Copyright 2015 Johns Hopkins University
4+
* *
5+
* * Licensed under the Apache License, Version 2.0 (the "License");
6+
* * you may not use this file except in compliance with the License.
7+
* * You may obtain a copy of the License at
8+
* *
9+
* * http://www.apache.org/licenses/LICENSE-2.0
10+
* *
11+
* * Unless required by applicable law or agreed to in writing, software
12+
* * distributed under the License is distributed on an "AS IS" BASIS,
13+
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* * See the License for the specific language governing permissions and
15+
* * limitations under the License.
16+
*
17+
*/
18+
19+
package org.dataconservancy.bagit.rules;
20+
21+
import static org.dataconservancy.bagit.rules.Message.ERR_NULL;
22+
23+
/**
24+
* Tokens are strings that make up a location expressions. Location expressions are patterns that are matched against
25+
* paths. Location expressions are inspired by Apache Ant file pattern matching.
26+
*/
27+
enum Token {
28+
29+
/**
30+
* A token matching exactly one character in an expression.
31+
*/
32+
EXACTLY_ONE_CHARACTER("?"),
33+
34+
/**
35+
* A token that will match multiple directory levels in an expression.
36+
*/
37+
MULTIPLE_DIRECTORIES("**"),
38+
39+
/**
40+
* A token matching zero or more characters in an expression. <em>Must always be defined sometime after
41+
* {@link #MULTIPLE_DIRECTORIES}</em>
42+
*/
43+
ZERO_OR_MORE_CHARACTERS("*"),
44+
45+
46+
/**
47+
* A token that separates path segments in an expression.
48+
*/
49+
PATH_SEPARATOR("/"),
50+
51+
/**
52+
* A special token with a {@code null} token string. <em>Must always be defined last</em>
53+
*/
54+
LITERAL();
55+
56+
private static final String ERR_MULTIPLE_TOKENS = "Candidate sequence '%s' contains multiple tokens. " +
57+
"Try splitting up the tokens and submitting the tokens one at a time.";
58+
59+
/**
60+
* String representation of the token, if there is one.
61+
*/
62+
private String tokenString;
63+
64+
/**
65+
* Construct a Token with no string representation. Currently reserved for {@link #LITERAL} tokens.
66+
*/
67+
private Token() {
68+
this.tokenString = null;
69+
}
70+
71+
/**
72+
* Construct a token with the supplied string representation.
73+
*
74+
* @param tokenString the string representation of the token.
75+
* @throws java.lang.IllegalArgumentException if the {@code tokenString} is {@code null}
76+
*/
77+
private Token(String tokenString) {
78+
if (tokenString == null) {
79+
throw new IllegalArgumentException(String.format(ERR_NULL, "tokenString"));
80+
}
81+
this.tokenString = tokenString;
82+
}
83+
84+
/**
85+
* Obtain the string form of the token, may be {@code null}. {@link #LITERAL} tokens will not
86+
* have a string form, because a literal is the set of characters that <em>do not</em> represent a token.
87+
*
88+
* @return the string form of the token, or {@code null} in the case of {@code LITERAL} tokens.
89+
*/
90+
String getTokenString() {
91+
return tokenString;
92+
}
93+
94+
/**
95+
* Attempts to parse a string which represents a <em>single</em> token into a {@code Token}
96+
*
97+
* @param candidate the candidate token string
98+
* @return a {@code Token} if {@code candidate} represents a valid token
99+
* @throws java.lang.IllegalArgumentException if {@code candidate} does not represent a valid token
100+
*/
101+
static Token parse(CharSequence candidate) {
102+
if (candidate == null) {
103+
throw new IllegalArgumentException(String.format(ERR_NULL, "candidate"));
104+
}
105+
106+
for (Token m : Token.values()) {
107+
108+
// See if the candidate token string equals the string representation
109+
// of the token (except LITERAL), and return it
110+
if (m.tokenString != null && m.tokenString.equals(candidate)) {
111+
return m;
112+
}
113+
114+
// Check to see if the candidate token string _contains_ the string representation
115+
// of the token (except LITERAL). If so, that means that the candidate contains multiple
116+
// tokens, which isn't allowed.
117+
if (candidate.length() > 1 &&
118+
m.tokenString != null &&
119+
candidate.chars().anyMatch(
120+
c -> m.tokenString.contains(Character.toString((char) c)))) {
121+
throw new IllegalArgumentException(String.format(ERR_MULTIPLE_TOKENS, candidate));
122+
}
123+
}
124+
125+
// None of our Token string representations equaled the candidate string.
126+
// The candidate string did not _contain_ any of the Token string representations
127+
// We must be left with a LITERAL.
128+
129+
return LITERAL;
130+
}
131+
132+
@Override
133+
public String toString() {
134+
return "Token{" +
135+
"tokenString='" + tokenString + '\'' +
136+
'}';
137+
}
138+
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
*
3+
* * Copyright 2015 Johns Hopkins University
4+
* *
5+
* * Licensed under the Apache License, Version 2.0 (the "License");
6+
* * you may not use this file except in compliance with the License.
7+
* * You may obtain a copy of the License at
8+
* *
9+
* * http://www.apache.org/licenses/LICENSE-2.0
10+
* *
11+
* * Unless required by applicable law or agreed to in writing, software
12+
* * distributed under the License is distributed on an "AS IS" BASIS,
13+
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* * See the License for the specific language governing permissions and
15+
* * limitations under the License.
16+
*
17+
*/
18+
19+
package org.dataconservancy.bagit.rules;
20+
21+
22+
import org.junit.Test;
23+
24+
import static org.junit.Assert.assertEquals;
25+
26+
/**
27+
* Insures that the {@link Token} class properly parses tokens.
28+
*/
29+
public class TokenTest {
30+
31+
/**
32+
* Tokens are strings with special meanings. Insure all single-character tokens can be parsed.
33+
*/
34+
@Test
35+
public void testParseSingleCharacterString() throws Exception {
36+
assertEquals(Token.ZERO_OR_MORE_CHARACTERS, Token.parse("*"));
37+
assertEquals(Token.EXACTLY_ONE_CHARACTER, Token.parse("?"));
38+
assertEquals(Token.LITERAL, Token.parse("f"));
39+
assertEquals(Token.PATH_SEPARATOR, Token.parse("/"));
40+
}
41+
42+
/**
43+
* Tokens are strings with special meanings. Insure all multi-character tokens can be parsed.
44+
*/
45+
@Test
46+
public void testParseMultipleCharacterStrings() throws Exception {
47+
assertEquals(Token.MULTIPLE_DIRECTORIES, Token.parse("**"));
48+
assertEquals(Token.LITERAL, Token.parse("foobarbaz"));
49+
}
50+
51+
/**
52+
* Attempting to parse a string with multiple tokens is an error.
53+
*/
54+
@Test(expected = IllegalArgumentException.class)
55+
public void testParseSingleStringContainingDifferentTokens() throws Exception {
56+
Token.parse("*/?**abc");
57+
}
58+
59+
/**
60+
* Attempting to parse a string with multiple tokens is an error. (Just another case similar to above)
61+
*/
62+
@Test(expected = IllegalArgumentException.class)
63+
public void testParseLiteralEndingWithPathSep() throws Exception {
64+
Token.parse("directory/");
65+
}
66+
67+
/**
68+
* Zero length strings would be parsed as a literal.
69+
*/
70+
@Test
71+
public void testParseZeroLengthString() throws Exception {
72+
assertEquals(Token.LITERAL, Token.parse(""));
73+
}
74+
75+
/**
76+
* Empty strings would be parsed as a literal.
77+
*/
78+
@Test
79+
public void testParseEmptyString() throws Exception {
80+
assertEquals(Token.LITERAL, Token.parse(" "));
81+
}
82+
83+
/**
84+
* Parsing {@code null} results in an error
85+
*
86+
* @throws Exception
87+
*/
88+
@Test(expected = IllegalArgumentException.class)
89+
public void testParseNull() throws Exception {
90+
assertEquals(Token.LITERAL, Token.parse(null));
91+
}
92+
}

0 commit comments

Comments
 (0)