diff --git a/basex-core/.checkstyle b/basex-core/.checkstyle index 39a2c081f3..0b44be49b2 100644 --- a/basex-core/.checkstyle +++ b/basex-core/.checkstyle @@ -9,6 +9,7 @@ + diff --git a/basex-core/src/main/java/org/basex/query/QueryParser.java b/basex-core/src/main/java/org/basex/query/QueryParser.java index 385bf77a80..43e6986bd6 100644 --- a/basex-core/src/main/java/org/basex/query/QueryParser.java +++ b/basex-core/src/main/java/org/basex/query/QueryParser.java @@ -36,6 +36,7 @@ import org.basex.query.util.hash.*; import org.basex.query.util.list.*; import org.basex.query.util.parse.*; +import org.basex.query.util.rex.*; import org.basex.query.value.array.*; import org.basex.query.value.item.*; import org.basex.query.value.seq.*; @@ -94,8 +95,6 @@ public class QueryParser extends InputParser { private final HashSet decl = new HashSet<>(); /** Output declarations. */ private final HashMap sparams = new HashMap<>(); - /** QName cache. */ - private final QNmResolver qnames = new QNmResolver(); /** Local variable. */ private final LocalVars localVars = new LocalVars(this); @@ -110,6 +109,8 @@ public class QueryParser extends InputParser { private QueryError alter; /** Alternative position. */ private int alterPos; + /** Attribute value scanner. */ + private AttributeValueScanner attributeValueScanner; /** * Constructor. @@ -261,7 +262,6 @@ private void finish(final MainModule mm) throws QueryException { } // completes the parsing step - qnames.resolve(this, 0, sc.elemNS); if(sc.elemNS != null) sc.ns.add(EMPTY, sc.elemNS, null); RecordType.resolveRefs(recordTypeRefs, namedRecordTypes); } @@ -2085,10 +2085,7 @@ private void validate() throws QueryException { final int p = pos; if(!wsConsumeWs(VALIDATE)) return; - if(consume(TYPE)) { - final InputInfo ii = info(); - qnames.add(eQName(SKIPCHECK, QNAME_X), sc, ii); - } + if(consume(TYPE)) eQName(sc.elemNS, QNAME_X); consume(STRICT); consume(LAX); skipWs(); @@ -2471,7 +2468,7 @@ private ExprInfo simpleNodeTest(final Kind kind, final boolean all) throws Query } } // name test: prefix:name, name, Q{uri}name - if(kind == Kind.ELEMENT) qnames.add(name, sc, ii); else qnames.add(name, false, ii); + resolveQNm(name, kind == Kind.ELEMENT ? sc.elemNS : null, ii); return nameTest.apply(name, scope); } } @@ -3092,10 +3089,6 @@ private Expr dirElement(final boolean root) throws QueryException { final int size = qc.ns.size(); final byte[] nse = sc.elemNS; final byte[] nsd = sc.dirNS; - final int npos = qnames.size(); - - final QNm name = new QNm(qnm); - qnames.add(name, true, ii); final Atts ns = new Atts(); final ExprList cont = new ExprList(); @@ -3103,104 +3096,126 @@ private Expr dirElement(final boolean root) throws QueryException { // parse attributes boolean xmlDecl = false; // xml prefix explicitly declared? ArrayList atts = null; - while(true) { - final byte[] atn = qName(null); - if(atn.length == 0) break; - - final ExprList attv = new ExprList(); - consumeWS(); - if(root) { - if(!consume('=')) return null; - } else { - check('='); - } - consumeWS(); - final int delim = consume(); - if(!quote(delim)) throw error(NOQUOTE_X, found()); - final TokenBuilder tb = new TokenBuilder(); - boolean simple = true; + // remember start of attribute list + final int attrPos = pos; + for(int pass = 0; pass < 2; pass++) { + final boolean processXmlns = pass == 0; + pos = attrPos; while(true) { - while(!consume(delim)) { - cp = current(); - switch(cp) { - case '{': - if(next() == '{') { - tb.add(consume()); - consume(); - } else { - final byte[] text = tb.next(); - if(text.length == 0) { - add(attv, enclosedExpr()); - simple = false; + final byte[] atn = qName(null); + if(atn.length == 0) break; + consumeWS(); + if(root) { + if(!consume('=')) return null; + } else { + check('='); + } + consumeWS(); + + final int len = attributeValueScanner().length(pos); + final boolean hasXmlnsPrefix = startsWith(atn, XMLNS_COLON); + final boolean isXmlns = hasXmlnsPrefix || eq(atn, XMLNS); + if(processXmlns != isXmlns) { + if(len >= 0) { + pos += len; + consumeWS(); + continue; + } + Util.debugln("Failed to detect attribute value length: " + + new String(input, pos, Math.min(20, input.length - pos)) + "..."); + } + + final ExprList attv = new ExprList(); + final int delim = consume(); + if(!quote(delim)) throw error(NOQUOTE_X, found()); + final TokenBuilder tb = new TokenBuilder(); + + boolean simple = true; + while(true) { + while(!consume(delim)) { + cp = current(); + switch(cp) { + case '{': + if(next() == '{') { + tb.add(consume()); + consume(); } else { - add(attv, Str.get(text)); + final byte[] text = tb.next(); + if(text.length == 0) { + add(attv, enclosedExpr()); + simple = false; + } else { + add(attv, Str.get(text)); + } } - } - break; - case '}': - consume(); - check('}'); - tb.add('}'); - break; - case '<': - case 0: - throw error(NOQUOTE_X, found()); - case '\n': - case '\t': - tb.add(' '); - consume(); - break; - case '\r': - if(next() != '\n') tb.add(' '); - consume(); - break; - default: - entity(tb); - break; + break; + case '}': + consume(); + check('}'); + tb.add('}'); + break; + case '<': + case 0: + throw error(NOQUOTE_X, found()); + case '\n': + case '\t': + tb.add(' '); + consume(); + break; + case '\r': + if(next() != '\n') tb.add(' '); + consume(); + break; + default: + entity(tb); + break; + } } + if(!consume(delim)) break; + tb.add(delim); } - if(!consume(delim)) break; - tb.add(delim); - } - if(!tb.isEmpty()) add(attv, Str.get(tb.finish())); - - // parse namespace declarations - final boolean pr = startsWith(atn, XMLNS_COLON); - if(pr || eq(atn, XMLNS)) { - if(!simple) throw error(NSCONS); - final byte[] prefix = pr ? local(atn) : EMPTY; - final byte[] uri = attv.isEmpty() ? EMPTY : ((Str) attv.get(0)).string(); - if(eq(prefix, XML) && eq(uri, XML_URI)) { - if(xmlDecl) throw error(DUPLNSDEF_X, XML); - xmlDecl = true; - } else { - if(!Uri.get(uri).isValid()) throw error(INVURI_X, uri); - if(pr) { - if(uri.length == 0) throw error(NSEMPTYURI); - if(eq(prefix, XML, XMLNS)) throw error(BINDXML_X, prefix); - if(eq(uri, XML_URI)) throw error(BINDXMLURI_X_X, uri, XML); - if(eq(uri, XMLNS_URI)) throw error(BINDXMLURI_X_X, uri, XMLNS); - qc.ns.add(prefix, uri); + if(!tb.isEmpty()) add(attv, Str.get(tb.finish())); + + // parse namespace declarations + if(isXmlns) { + if(!simple) throw error(NSCONS); + final byte[] prefix = hasXmlnsPrefix ? local(atn) : EMPTY; + final byte[] uri = attv.isEmpty() ? EMPTY : ((Str) attv.get(0)).string(); + if(eq(prefix, XML) && eq(uri, XML_URI)) { + if(xmlDecl) throw error(DUPLNSDEF_X, XML); + xmlDecl = true; } else { - if(eq(uri, XML_URI)) throw error(XMLNSDEF_X, uri); - sc.dirNS = uri; - if(!sc.elemNsFixed) sc.elemNS = sc.dirNS; + if(!Uri.get(uri).isValid()) throw error(INVURI_X, uri); + if(hasXmlnsPrefix) { + if(uri.length == 0) throw error(NSEMPTYURI); + if(eq(prefix, XML, XMLNS)) throw error(BINDXML_X, prefix); + if(eq(uri, XML_URI)) throw error(BINDXMLURI_X_X, uri, XML); + if(eq(uri, XMLNS_URI)) throw error(BINDXMLURI_X_X, uri, XMLNS); + qc.ns.add(prefix, uri); + } else { + if(eq(uri, XML_URI)) throw error(XMLNSDEF_X, uri); + sc.dirNS = uri; + if(!sc.elemNsFixed) sc.elemNS = sc.dirNS; + } + if(ns.contains(prefix)) throw error(DUPLNSDEF_X, prefix); + ns.add(prefix, uri); } - if(ns.contains(prefix)) throw error(DUPLNSDEF_X, prefix); - ns.add(prefix, uri); + } else { + final QNm attn = new QNm(atn); + if(atts == null) atts = new ArrayList<>(1); + atts.add(attn); + resolveQNm(attn, null, info()); + add(cont, new CAttr(info(), false, attn, attv.finish())); } - } else { - final QNm attn = new QNm(atn); - if(atts == null) atts = new ArrayList<>(1); - atts.add(attn); - qnames.add(attn, false, info()); - add(cont, new CAttr(info(), false, attn, attv.finish())); + if(!consumeWS()) break; } - if(!consumeWS()) break; } + final QNm name = new QNm(qnm); + resolveQNm(name, sc.dirNS, ii); + if(consume('/')) { check('>'); } else { @@ -3217,8 +3232,6 @@ private Expr dirElement(final boolean root) throws QueryException { if(!eq(name.string(), close)) throw error(TAGWRONG_X_X, name.string(), close); } - qnames.resolve(this, npos, sc.dirNS); - // check for duplicate attribute names if(atts != null) { final int as = atts.size(); @@ -3235,6 +3248,15 @@ private Expr dirElement(final boolean root) throws QueryException { return new CElem(info(), false, name, ns, cont.finish()); } + /** + * Returns the attribute value scanner. + * @return attribute value scanner + */ + private AttributeValueScanner attributeValueScanner() { + if(attributeValueScanner == null) attributeValueScanner = new AttributeValueScanner(input); + return attributeValueScanner; + } + /** * Parses the "DirElemContent" rule. * @param name name of opening element @@ -3371,9 +3393,8 @@ private Expr compConstructor() throws QueryException { * @throws QueryException query exception */ private Expr compElement() throws QueryException { - final Expr name = compName(NOELEMNAME, true); + final Expr name = compName(NOELEMNAME, true, sc.elemNS); if(name == null) return null; - if(name instanceof final QNm qnm) qnames.add(qnm, sc, info()); skipWs(); return current('{') ? new CElem(info(), true, name, new Atts(), enclosedExpr()) : null; } @@ -3384,9 +3405,8 @@ private Expr compElement() throws QueryException { * @throws QueryException query exception */ private Expr compAttribute() throws QueryException { - final Expr name = compName(NOATTNAME, true); + final Expr name = compName(NOATTNAME, true, null); if(name == null) return null; - if(name instanceof final QNm qnm) qnames.add(qnm, false, info()); skipWs(); return current('{') ? new CAttr(info(), true, name, enclosedExpr()) : null; } @@ -3397,7 +3417,7 @@ private Expr compAttribute() throws QueryException { * @throws QueryException query exception */ private Expr compNamespace() throws QueryException { - final Expr name = compName(NONSNAME, false); + final Expr name = compName(NONSNAME, false, null); if(name == null) return null; skipWs(); return current('{') ? new CNSpace(info(), true, name, enclosedExpr()) : null; @@ -3409,7 +3429,7 @@ private Expr compNamespace() throws QueryException { * @throws QueryException query exception */ private Expr compPI() throws QueryException { - final Expr name = compName(NOPINAME, false); + final Expr name = compName(NOPINAME, false, null); if(name == null) return null; skipWs(); return current('{') ? new CPI(info(), true, name, enclosedExpr()) : null; @@ -3419,10 +3439,12 @@ private Expr compPI() throws QueryException { * Parses a computed name. * @param error error message * @param qname QName or NCName + * @param ns default namespace (can be {@code null}) * @return name or {@code null} * @throws QueryException query exception */ - private Expr compName(final QueryError error, final boolean qname) throws QueryException { + private Expr compName(final QueryError error, final boolean qname, final byte[] ns) + throws QueryException { // parse name enclosed in curly braces if(consume("{")) { final Expr name = check(expr(), error); @@ -3432,7 +3454,7 @@ private Expr compName(final QueryError error, final boolean qname) throws QueryE // parse literal name consume("#"); skipWs(); - if(qname) return eQName(SKIPCHECK, null); + if(qname) return eQName(ns, null); // parse name enclosed in quotes final byte[] string = ncName(null, false); @@ -4801,4 +4823,21 @@ public static String removeComments(final String query, final int max) { public final InputInfo info() { return new InputInfo(this, sc); } + + /** + * Finalizes the given QName by assigning a namespace URI. + * @param name QName to be resolved + * @param elemNS default element namespace (may be {@code null}, when resolving attribute names) + * @param info input info + * @throws QueryException query exception + */ + private void resolveQNm(final QNm name, final byte[] elemNS, final InputInfo info) + throws QueryException { + if(name.hasPrefix()) { + name.uri(qc.ns.resolve(name.prefix(), sc)); + if(!name.hasURI()) throw error(NOURI_X, info, name.prefix()); + } else if(elemNS != null) { + name.uri(elemNS); + } + } } diff --git a/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java b/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java deleted file mode 100644 index 888dd76870..0000000000 --- a/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java +++ /dev/null @@ -1,81 +0,0 @@ -package org.basex.query.util.parse; - -import static org.basex.query.QueryError.*; - -import java.util.*; - -import org.basex.query.*; -import org.basex.query.value.item.*; -import org.basex.util.*; - -/** - * Resolves namespace URIs of QNames whose resolution must be deferred until the surrounding - * namespace context is known. - * - * @author BaseX Team, BSD License - * @author Christian Gruen - */ -public final class QNmResolver { - /** - * Entry for a QName whose namespace URI still needs to be resolved. - * @param name QName to be resolved - * @param nsElem flag for assigning default element namespace - * @param info input info (can be {@code null}) - */ - private record Entry(QNm name, boolean nsElem, InputInfo info) { } - /** QNames to be resolved. */ - private final ArrayList entries = new ArrayList<>(); - - /** - * Adds a QName unless it already has a namespace URI or it can be immediately assigned the fixed - * default namespace URI. This method must not be called for the element names of direct element - * constructors. - * @param name QName - * @param sc static context - * @param info input info (can be {@code null}) - */ - public void add(final QNm name, final StaticContext sc, final InputInfo info) { - if(sc.elemNsFixed && !name.hasPrefix() && !name.hasURI()) name.uri(sc.elemNS); - else add(name, true, info); - } - - /** - * Adds a QName unless it already has a namespace URI. - * @param name qname - * @param nsElem default check - * @param info input info (can be {@code null}) - */ - public void add(final QNm name, final boolean nsElem, final InputInfo info) { - if(!name.hasURI()) entries.add(new Entry(name, nsElem, info)); - } - - /** - * Finalizes the QNames by assigning namespace URIs. - * @param qp query parser - * @param npos first entry to be checked - * @param elemNS default element namespace - * @throws QueryException query exception - */ - public void resolve(final QueryParser qp, final int npos, final byte[] elemNS) - throws QueryException { - for(int i = entries.size() - 1; i >= npos; --i) { - final Entry entry = entries.get(i); - if(entry.name.hasPrefix()) { - entry.name.uri(qp.qc.ns.resolve(entry.name.prefix(), qp.sc)); - if(npos == 0 && !entry.name.hasURI()) - throw qp.error(NOURI_X, entry.info, entry.name.prefix()); - } else if(entry.nsElem) { - entry.name.uri(elemNS); - } - if(entry.name.hasURI()) entries.remove(i); - } - } - - /** - * Returns the number of remaining QNames. - * @return number - */ - public int size() { - return entries.size(); - } -} diff --git a/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf b/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf new file mode 100644 index 0000000000..ad0e8a5bda --- /dev/null +++ b/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf @@ -0,0 +1,219 @@ + +DirAttributeValue + ::= '"' ( EscapeQuot | QuotAttrValueContent )* '"' + | "'" ( EscapeApos | AposAttrValueContent )* "'" +EscapeQuot + ::= '""' +EscapeApos + ::= "''" +QuotAttrValueContent + ::= QuotAttrContentChar + | CommonContent +CommonContent + ::= '{{' + | '}}' + | EnclosedExpr +AposAttrValueContent + ::= AposAttrContentChar + | CommonContent +DirElemContent + ::= DirectConstructor + | CDataSection + | CommonContent + | ElementContentChar +EnclosedExpr + ::= '{' EnclosedExprContent* '}' +EnclosedExprContent + ::= Comment + | StringLiteral + | StringConstructor + | StringTemplate + | Pragma + | DirectConstructor + | EnclosedExpr + | '<'^GeneralComp + | OtherEnclosedExprContent +Comment ::= '(:' ( CommentContents | Comment )* ':)' +StringLiteral + ::= AposStringLiteral + | QuotStringLiteral +StringConstructor + ::= '``[' StringConstructorContent ']``' +StringConstructorContent + ::= StringConstructorChars ( StringInterpolation StringConstructorChars )* +StringInterpolation + ::= '`' EnclosedExpr '`' +StringTemplate + ::= '`' ( StringTemplateFixedPart | StringTemplateVariablePart )* '`' +StringTemplateVariablePart + ::= EnclosedExpr +Pragma ::= '(#' PragmaContents '#)' +DirectConstructor + ::= DirElemConstructor + | DirCommentConstructor + | DirPIConstructor +DirElemConstructor + ::= '<'^DirElemConstructor QName DirAttributeList ( '/>' | '>' DirElemContent* '' ) +DirAttributeList + ::= ( S ( QName S? '=' S? DirAttributeValue )? )* +CDataSection + ::= '' +DirCommentConstructor + ::= '' +DirPIConstructor + ::= '' + + + +CommentContents + ::= ( Char+ - ( Char* ( '(:' | ':)' ) Char* ) ) - ( Char* '(' ) & ':' + | Char+ - ( Char* ( '(:' | ':)' ) Char* ) & '(' +AposStringLiteral + ::= "'" ( EscapeApos | [^'] )* "'" +EscapeApos + ::= "''" +QuotStringLiteral + ::= '"' ( EscapeQuot | [^"] )* '"' +EscapeQuot + ::= '""' +StringTemplateFixedPart + ::= ( Char - ( '{' | '}' | '`' ) | '{{' | '}}' | '``' )+ +StringConstructorChars + ::= Char* - ( Char* ( '`{' | ']``' ) Char* ) & ( '`{' | ']`' ) +PragmaContents + ::= Char* - ( Char* '#)' Char* ) & '#' +QuotAttrContentChar + ::= Char - ["{}<] +AposAttrContentChar + ::= Char - ['{}<] +ElementContentChar + ::= Char - [{}<] +CDataSectionContents + ::= Char* - ( Char* ']]>' Char* ) & ']]' +DirCommentContents + ::= ( Char - '-' | '-' ( Char - '-' ) )* +DirPIContents + ::= Char* - ( Char* '?>' Char* ) & '?' +QName ::= PrefixedName + | UnprefixedName +PrefixedName + ::= Prefix ':' LocalPart +Prefix ::= NCName +LocalPart + ::= NCName +UnprefixedName + ::= LocalPart +NCName ::= Name - ( Char* ':' Char* ) +Name ::= NameStartChar NameChar* +NameStartChar + ::= ':' + | [A-Z] + | '_' + | [a-z] + | [#xC0-#xD6] + | [#xD8-#xF6] + | [#xF8-#x2FF] + | [#x370-#x37D] + | [#x37F-#x1FFF] + | [#x200C-#x200D] + | [#x2070-#x218F] + | [#x2C00-#x2FEF] + | [#x3001-#xD7FF] + | [#xF900-#xFDCF] + | [#xFDF0-#xFFFD] + | [#x10000-#xEFFFF] +NameChar ::= NameStartChar + | '-' + | '.' + | [0-9] + | #xB7 + | [#x0300-#x036F] + | [#x203F-#x2040] +Char ::= #x9 + | #xA + | #xD + | [#x20-#xD7FF] + | [#xE000-#xFFFD] + | [#x10000-#x10FFFF] +S ::= ( #x20 | #x9 | #xD | #xA )+ +OtherEnclosedExprContent + ::= ( Char* '(' ) - ( Char* ( ["'`{}<] | '(#' | '(:' | '' + consume(23); // '-->' + } + + private void parse_DirPIConstructor() + { + consume(31); // '' + consume(34); // '?>' + } + + private void consume(int t) + { + if (l1 == t) + { + b0 = b1; e0 = e1; l1 = 0; + } + else + { + error(b1, e1, 0, l1, t); + } + } + + private void lookahead1(int tokenSetId) + { + if (l1 == 0) + { + l1 = match(tokenSetId); + b1 = begin; + e1 = end; + } + } + + private int error(int b, int e, int s, int l, int t) + { + throw new ParseException(b, e, s, l, t); + } + + private int b0, e0; + private int l1, b1, e1; + private CharSequence input = null; + private int size = 0; + private int begin = 0; + private int end = 0; + + private int match(int tokenSetId) + { + boolean nonbmp = false; + begin = end; + int current = end; + int result = INITIAL[tokenSetId]; + int state = 0; + + for (int code = result & 127; code != 0; ) + { + int charclass; + int c0 = current < size ? input.charAt(current) : 0; + ++current; + if (c0 < 0x80) + { + charclass = MAP0[c0]; + } + else if (c0 < 0xd800) + { + int c1 = c0 >> 4; + charclass = MAP1[(c0 & 15) + MAP1[(c1 & 31) + MAP1[c1 >> 5]]]; + } + else + { + if (c0 < 0xdc00) + { + int c1 = current < size ? input.charAt(current) : 0; + if (c1 >= 0xdc00 && c1 < 0xe000) + { + nonbmp = true; + ++current; + c0 = ((c0 & 0x3ff) << 10) + (c1 & 0x3ff) + 0x10000; + } + } + + int lo = 0, hi = 5; + for (int m = 3; ; m = (hi + lo) >> 1) + { + if (MAP2[m] > c0) {hi = m - 1;} + else if (MAP2[6 + m] < c0) {lo = m + 1;} + else {charclass = MAP2[12 + m]; break;} + if (lo > hi) {charclass = 0; break;} + } + } + + state = code; + int i0 = (charclass << 7) + code - 1; + code = TRANSITION[(i0 & 7) + TRANSITION[i0 >> 3]]; + + if (code > 127) + { + result = code; + code &= 127; + end = current; + } + } + + result >>= 7; + if (result == 0) + { + end = current - 1; + int c1 = end < size ? input.charAt(end) : 0; + if (c1 >= 0xdc00 && c1 < 0xe000) + { + --end; + } + return error(begin, end, state, -1, -1); + } + else if ((result & 64) != 0) + { + end = begin; + if (nonbmp) + { + for (int i = result >> 7; i > 0; --i) + { + int c1 = end < size ? input.charAt(end) : 0; + ++end; + if (c1 >= 0xd800 && c1 < 0xdc000) + { + ++end; + } + } + } + else + { + end += (result >> 7); + } + } + else if (nonbmp) + { + for (int i = result >> 7; i > 0; --i) + { + --end; + int c1 = end < size ? input.charAt(end) : 0; + if (c1 >= 0xdc00 && c1 < 0xe000) + { + --end; + } + } + } + else + { + end -= result >> 7; + } + + if (end > size) end = size; + return (result & 63) - 1; + } + + private static String[] getTokenSet(int tokenSetId) + { + java.util.ArrayList expected = new java.util.ArrayList<>(); + int s = tokenSetId < 0 ? - tokenSetId : INITIAL[tokenSetId] & 127; + for (int i = 0; i < 43; i += 32) + { + int j = i; + int i0 = (i >> 5) * 93 + s - 1; + int f = EXPECTED[(i0 & 3) + EXPECTED[i0 >> 2]]; + for ( ; f != 0; f >>>= 1, ++j) + { + if ((f & 1) != 0) + { + expected.add(TOKEN[j]); + } + } + } + return expected.toArray(new String[]{}); + } + + private static final int[] MAP0 = + { + /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 26, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 1, + /* 34 */ 2, 3, 4, 4, 4, 5, 6, 7, 4, 4, 4, 8, 9, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 4, 12, 13, 14, 15, 4, 16, 17, + /* 67 */ 18, 19, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 20, 17, 17, 17, 17, 17, 17, 21, 4, 22, + /* 94 */ 4, 17, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + /* 121 */ 17, 17, 24, 4, 25, 4, 4 + }; + + private static final int[] MAP1 = + { + /* 0 */ 108, 124, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 156, 181, 181, 181, 181, + /* 21 */ 181, 214, 215, 213, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, + /* 42 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, + /* 63 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, + /* 84 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, + /* 105 */ 214, 214, 214, 254, 247, 270, 370, 286, 307, 347, 291, 408, 408, 408, 400, 348, 322, 348, 322, 348, 348, + /* 126 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 364, 364, 364, 364, 364, 364, 364, + /* 147 */ 331, 348, 348, 348, 348, 348, 348, 348, 348, 386, 408, 408, 409, 407, 408, 408, 348, 348, 348, 348, 348, + /* 168 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 408, 408, 408, 408, 408, 408, 408, 408, + /* 189 */ 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, + /* 210 */ 408, 408, 408, 329, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, + /* 231 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 408, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 256 */ 0, 0, 0, 0, 0, 0, 0, 26, 26, 0, 0, 26, 0, 0, 26, 1, 2, 3, 4, 4, 4, 5, 6, 7, 4, 4, 4, 8, 9, 10, 4, 16, 17, + /* 289 */ 18, 19, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 24, 4, 25, 4, 4, 17, 17, 17, 17, 20, 17, 17, 17, 17, + /* 316 */ 17, 17, 21, 4, 22, 4, 17, 17, 17, 17, 17, 17, 17, 4, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + /* 343 */ 17, 17, 4, 17, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 9, 9, + /* 372 */ 9, 9, 9, 9, 9, 9, 9, 9, 11, 4, 12, 13, 14, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 17, 17, 4, 4, 4, 4, 4, + /* 405 */ 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9 + }; + + private static final int[] MAP2 = + { + /* 0 */ 57344, 63744, 64976, 65008, 65536, 983040, 63743, 64975, 65007, 65533, 983039, 1114111, 4, 17, 4, 17, 17, + /* 17 */ 4 + }; + + private static final int[] INITIAL = + { + /* 0 */ 1, 2, 3, 1540, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 + }; + + private static final int[] TRANSITION = + { + /* 0 */ 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 436, 441, 483, 432, 440, + /* 21 */ 449, 636, 491, 1152, 509, 441, 441, 441, 441, 441, 441, 436, 441, 520, 528, 440, 449, 547, 555, 676, 569, + /* 42 */ 441, 441, 441, 441, 441, 441, 584, 441, 483, 580, 440, 449, 572, 592, 1152, 606, 441, 441, 441, 441, 441, + /* 63 */ 441, 436, 441, 483, 432, 440, 449, 701, 592, 1152, 509, 441, 441, 441, 441, 441, 441, 436, 441, 616, 624, + /* 84 */ 440, 449, 1162, 555, 1152, 644, 441, 441, 441, 441, 441, 441, 436, 441, 561, 655, 440, 663, 720, 684, + /* 104 */ 826, 698, 441, 441, 441, 441, 441, 441, 436, 441, 483, 709, 717, 728, 701, 592, 1134, 509, 441, 441, 441, + /* 125 */ 441, 441, 441, 746, 441, 483, 754, 773, 449, 701, 592, 470, 785, 777, 735, 441, 441, 441, 441, 436, 441, + /* 146 */ 483, 432, 536, 449, 701, 592, 1199, 793, 777, 735, 441, 441, 441, 441, 436, 441, 1008, 432, 440, 449, + /* 166 */ 501, 592, 463, 509, 846, 441, 441, 441, 441, 441, 436, 441, 598, 432, 808, 819, 512, 592, 831, 606, 738, + /* 187 */ 441, 441, 441, 441, 441, 436, 441, 909, 940, 440, 449, 647, 555, 1152, 569, 441, 441, 441, 441, 441, 441, + /* 208 */ 436, 839, 483, 432, 440, 449, 701, 592, 1152, 509, 811, 848, 441, 441, 441, 441, 436, 856, 1022, 432, + /* 228 */ 765, 864, 701, 872, 880, 509, 846, 441, 441, 441, 441, 441, 628, 497, 483, 895, 632, 449, 925, 903, 1152, + /* 249 */ 921, 441, 441, 441, 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 1112, 761, 441, + /* 269 */ 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 1061, 735, 441, 441, 441, 441, 532, + /* 289 */ 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 948, 735, 441, 441, 441, 441, 532, 441, 974, 432, 536, + /* 309 */ 449, 1138, 933, 1199, 793, 1185, 735, 441, 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199, + /* 329 */ 793, 1061, 956, 441, 441, 441, 441, 436, 441, 483, 432, 440, 449, 701, 592, 477, 968, 441, 913, 441, 441, + /* 350 */ 441, 441, 982, 1157, 887, 994, 1002, 449, 701, 1016, 1152, 509, 441, 441, 441, 441, 441, 441, 1042, 960, + /* 370 */ 1030, 1038, 986, 1054, 647, 1069, 1083, 569, 441, 441, 441, 441, 441, 441, 436, 539, 1089, 1097, 440, + /* 389 */ 1105, 1120, 1128, 1152, 1146, 441, 441, 441, 441, 441, 441, 436, 441, 690, 1170, 440, 1178, 800, 555, + /* 408 */ 1152, 569, 441, 441, 441, 441, 441, 441, 436, 608, 1075, 432, 1046, 449, 701, 592, 456, 509, 670, 1193, + /* 428 */ 441, 441, 441, 441, 1152, 1280, 60, 1, 1, 2, 3, 1540, 5, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 0, 0, 0, + /* 455 */ 687, 0, 0, 43, 19328, 43, 0, 83, 0, 0, 43, 19328, 43, 0, 28160, 0, 0, 43, 19328, 43, 80, 71, 0, 0, 43, + /* 480 */ 19328, 43, 81, 0, 0, 43, 687, 0, 0, 0, 1024, 19916, 60, 0, 18492, 1, 3, 0, 0, 37, 0, 0, 0, 0, 0, 3968, + /* 506 */ 54, 55, 18492, 0, 19200, 18492, 0, 0, 0, 0, 0, 54, 55, 68, 2176, 0, 43, 687, 2176, 0, 0, 2225, 1152, + /* 529 */ 1280, 54, 1, 1, 2, 3, 1540, 5, 1826, 0, 0, 0, 0, 0, 0, 5120, 0, 0, 2304, 0, 0, 0, 0, 584, 55, 18432, + /* 555 */ 19840, 0, 0, 18432, 1, 3, 0, 0, 41, 687, 0, 0, 0, 1024, 0, 19200, 18432, 0, 0, 0, 0, 0, 54, 55, 74, 1152, + /* 581 */ 1280, 60, 1, 1, 17310, 3, 1540, 5, 0, 35, 0, 19840, 60, 0, 18492, 1, 3, 0, 0, 42, 687, 0, 0, 0, 1024, 0, + /* 607 */ 19200, 0, 0, 0, 0, 0, 0, 1959, 1959, 2560, 0, 43, 687, 2560, 0, 0, 1024, 2612, 1280, 55, 1, 1, 2, 3, + /* 631 */ 1540, 18081, 0, 0, 0, 0, 0, 0, 0, 70, 54, 55, 18492, 55, 19200, 18432, 0, 0, 0, 0, 0, 54, 55, 18432, + /* 655 */ 1152, 1280, 56, 1, 1, 2, 3, 1540, 16707, 16707, 16707, 0, 0, 0, 687, 0, 0, 83, 0, 83, 89, 0, 0, 43, + /* 679 */ 19328, 43, 0, 0, 54, 19840, 75, 0, 18507, 1, 3, 0, 0, 43, 46, 0, 0, 0, 51, 0, 19200, 18507, 0, 0, 0, 0, + /* 705 */ 0, 54, 55, 18492, 1152, 1280, 60, 1, 1, 0, 3, 1540, 5, 0, 2432, 0, 0, 0, 0, 0, 54, 55, 18507, 43, 3328, + /* 730 */ 43, 0, 0, 0, 687, 0, 0, 91, 0, 0, 0, 0, 0, 88, 0, 0, 1, 2, 3, 32, 5, 0, 0, 36, 1152, 1280, 60, 1, 1, 2, + /* 760 */ 3, 0, 0, 91, 93, 0, 0, 0, 0, 4480, 0, 0, 0, 5, 1826, 0, 64, 0, 0, 0, 0, 85, 86, 0, 0, 0, 19200, 18492, + /* 788 */ 80, 0, 0, 1871, 3712, 0, 19200, 18492, 0, 0, 0, 1871, 0, 0, 5504, 0, 0, 54, 55, 18432, 5, 63, 0, 0, 0, 0, + /* 814 */ 0, 0, 28160, 0, 0, 68, 16709, 16709, 0, 0, 0, 687, 0, 0, 16707, 19328, 16707, 0, 0, 0, 19328, 16709, 0, + /* 837 */ 82, 0, 4224, 0, 0, 0, 0, 0, 4224, 0, 0, 28160, 0, 28160, 0, 0, 0, 0, 0, 0, 4352, 0, 0, 0, 0, 0, 4352, 43, + /* 865 */ 43, 43, 0, 0, 0, 687, 3200, 19840, 60, 0, 18492, 1, 0, 0, 3072, 4608, 0, 43, 19328, 43, 0, 28160, 0, 40, + /* 889 */ 43, 687, 0, 0, 0, 1024, 1152, 1280, 58, 1, 1, 2, 3, 1540, 4096, 58, 0, 18490, 1, 3, 0, 0, 43, 687, 0, 0, + /* 915 */ 0, 0, 3840, 0, 0, 0, 0, 19200, 18490, 0, 0, 0, 0, 0, 4096, 54, 55, 18490, 19911, 60, 0, 18492, 1, 3, + /* 939 */ 1871, 0, 53, 57, 1, 1, 2, 3, 1540, 84, 85, 86, 0, 85, 86, 0, 91, 0, 92, 91, 0, 0, 0, 0, 0, 4864, 0, 0, 0, + /* 968 */ 0, 19200, 18492, 0, 4992, 0, 0, 0, 43, 687, 0, 0, 1826, 1024, 28, 2, 31, 1540, 5, 0, 0, 0, 0, 0, 0, 66, + /* 994 */ 1152, 1280, 60, 28, 28, 2, 34238, 1540, 5, 0, 0, 0, 0, 65, 0, 0, 43, 687, 0, 48, 48, 1024, 19840, 60, 0, + /* 1019 */ 18492, 28, 34238, 0, 0, 43, 687, 0, 4352, 4352, 1024, 0, 4864, 43, 4908, 0, 0, 0, 1024, 1152, 1280, 4923, + /* 1041 */ 33597, 29, 2, 3, 1540, 5, 0, 0, 0, 0, 0, 1959, 0, 43, 43, 43, 687, 0, 0, 44, 0, 85, 86, 0, 85, 86, 0, 91, + /* 1069 */ 19840, 0, 77, 18432, 78, 3, 0, 0, 43, 687, 1959, 1959, 1959, 1024, 0, 4736, 43, 19328, 43, 0, 0, 0, 43, + /* 1092 */ 5165, 0, 0, 0, 5170, 5170, 5170, 5120, 1, 33536, 2, 3, 1540, 43, 43, 43, 0, 687, 0, 45, 0, 85, 86, 0, 85, + /* 1117 */ 86, 90, 91, 0, 5248, 0, 0, 0, 54, 55, 18432, 19840, 0, 0, 18432, 33536, 3, 0, 0, 43, 19328, 0, 0, 0, 0, + /* 1142 */ 71, 54, 55, 18492, 0, 19200, 18432, 0, 0, 33536, 0, 0, 43, 19328, 43, 0, 0, 0, 38, 0, 0, 0, 0, 2688, 0, + /* 1167 */ 54, 457, 18432, 51, 51, 5376, 1, 1, 2, 3, 1540, 43, 43, 43, 0, 0, 687, 46, 0, 85, 86, 87, 85, 86, 0, 91, + /* 1193 */ 89, 0, 89, 0, 0, 0, 0, 0, 43, 19328, 43, 0, 71, 0 + }; + + private static final int[] EXPECTED = + { + /* 0 */ 65, 47, 107, 121, 51, 58, 62, 65, 47, 118, 69, 54, 73, 77, 81, 85, 143, 89, 135, 96, 100, 101, 102, 106, + /* 24 */ 107, 132, 92, 115, 112, 125, 107, 107, 129, 141, 107, 149, 107, 108, 107, 139, 107, 107, 147, 107, 107, + /* 45 */ 107, 107, 4096, 8192, 262144, 8388608, 589824, 0, 37748738, 16, 16, 16, 16777216, 606208, 16793600, + /* 60 */ 16801792, 196736, 1573120, -134217216, -1671397364, 32, 64, 1024, 2048, 4194306, 33554434, 2, 16, 131072, + /* 74 */ 0, 0, 1048576, -134217728, 8, 4, 6324224, -1677721600, 32768, 0, 32768, 32, 1024, 8192, 8388608, 2, + /* 90 */ 805306368, 134217728, 8, 32, 128, 1, 0, 32, 8192, 268435456, 536870912, 134217728, 134217728, 536870912, + /* 104 */ 134217728, 536870912, 536870912, 0, 0, 0, 0, 64, 160, 0, 2, 2, 0, 48, 0, 0, 16384, 0, 0, 16384, 16384, + /* 125 */ 1408, 1408, 1408, 736, 0, 4, 8, 0, 1, 2, 4, 2097152, 32768, 268435456, 0, 8, 16, 0, 0, 0, 2, 4194304, 0, + /* 148 */ 64, 0, 0, 256, 1024 + }; + + private static final String[] TOKEN = + { + "%ERROR", + "CommentContents", + "AposStringLiteral", + "QuotStringLiteral", + "StringTemplateFixedPart", + "StringConstructorChars", + "PragmaContents", + "QuotAttrContentChar", + "AposAttrContentChar", + "ElementContentChar", + "CDataSectionContents", + "DirCommentContents", + "DirPIContents", + "QName", + "S", + "OtherEnclosedExprContent", + "'\"'", + "'\"\"'", + "'#)'", + "''''", + "''''''", + "'(#'", + "'(:'", + "'-->'", + "'/>'", + "':)'", + "'<'", + "'<'", + "'