diff --git a/basex-core/.checkstyle b/basex-core/.checkstyle
index 39a2c081f3..0b44be49b2 100644
--- a/basex-core/.checkstyle
+++ b/basex-core/.checkstyle
@@ -9,6 +9,7 @@
+
diff --git a/basex-core/src/main/java/org/basex/query/QueryParser.java b/basex-core/src/main/java/org/basex/query/QueryParser.java
index 385bf77a80..43e6986bd6 100644
--- a/basex-core/src/main/java/org/basex/query/QueryParser.java
+++ b/basex-core/src/main/java/org/basex/query/QueryParser.java
@@ -36,6 +36,7 @@
import org.basex.query.util.hash.*;
import org.basex.query.util.list.*;
import org.basex.query.util.parse.*;
+import org.basex.query.util.rex.*;
import org.basex.query.value.array.*;
import org.basex.query.value.item.*;
import org.basex.query.value.seq.*;
@@ -94,8 +95,6 @@ public class QueryParser extends InputParser {
private final HashSet decl = new HashSet<>();
/** Output declarations. */
private final HashMap sparams = new HashMap<>();
- /** QName cache. */
- private final QNmResolver qnames = new QNmResolver();
/** Local variable. */
private final LocalVars localVars = new LocalVars(this);
@@ -110,6 +109,8 @@ public class QueryParser extends InputParser {
private QueryError alter;
/** Alternative position. */
private int alterPos;
+ /** Attribute value scanner. */
+ private AttributeValueScanner attributeValueScanner;
/**
* Constructor.
@@ -261,7 +262,6 @@ private void finish(final MainModule mm) throws QueryException {
}
// completes the parsing step
- qnames.resolve(this, 0, sc.elemNS);
if(sc.elemNS != null) sc.ns.add(EMPTY, sc.elemNS, null);
RecordType.resolveRefs(recordTypeRefs, namedRecordTypes);
}
@@ -2085,10 +2085,7 @@ private void validate() throws QueryException {
final int p = pos;
if(!wsConsumeWs(VALIDATE)) return;
- if(consume(TYPE)) {
- final InputInfo ii = info();
- qnames.add(eQName(SKIPCHECK, QNAME_X), sc, ii);
- }
+ if(consume(TYPE)) eQName(sc.elemNS, QNAME_X);
consume(STRICT);
consume(LAX);
skipWs();
@@ -2471,7 +2468,7 @@ private ExprInfo simpleNodeTest(final Kind kind, final boolean all) throws Query
}
}
// name test: prefix:name, name, Q{uri}name
- if(kind == Kind.ELEMENT) qnames.add(name, sc, ii); else qnames.add(name, false, ii);
+ resolveQNm(name, kind == Kind.ELEMENT ? sc.elemNS : null, ii);
return nameTest.apply(name, scope);
}
}
@@ -3092,10 +3089,6 @@ private Expr dirElement(final boolean root) throws QueryException {
final int size = qc.ns.size();
final byte[] nse = sc.elemNS;
final byte[] nsd = sc.dirNS;
- final int npos = qnames.size();
-
- final QNm name = new QNm(qnm);
- qnames.add(name, true, ii);
final Atts ns = new Atts();
final ExprList cont = new ExprList();
@@ -3103,104 +3096,126 @@ private Expr dirElement(final boolean root) throws QueryException {
// parse attributes
boolean xmlDecl = false; // xml prefix explicitly declared?
ArrayList atts = null;
- while(true) {
- final byte[] atn = qName(null);
- if(atn.length == 0) break;
-
- final ExprList attv = new ExprList();
- consumeWS();
- if(root) {
- if(!consume('=')) return null;
- } else {
- check('=');
- }
- consumeWS();
- final int delim = consume();
- if(!quote(delim)) throw error(NOQUOTE_X, found());
- final TokenBuilder tb = new TokenBuilder();
- boolean simple = true;
+ // remember start of attribute list
+ final int attrPos = pos;
+ for(int pass = 0; pass < 2; pass++) {
+ final boolean processXmlns = pass == 0;
+ pos = attrPos;
while(true) {
- while(!consume(delim)) {
- cp = current();
- switch(cp) {
- case '{':
- if(next() == '{') {
- tb.add(consume());
- consume();
- } else {
- final byte[] text = tb.next();
- if(text.length == 0) {
- add(attv, enclosedExpr());
- simple = false;
+ final byte[] atn = qName(null);
+ if(atn.length == 0) break;
+ consumeWS();
+ if(root) {
+ if(!consume('=')) return null;
+ } else {
+ check('=');
+ }
+ consumeWS();
+
+ final int len = attributeValueScanner().length(pos);
+ final boolean hasXmlnsPrefix = startsWith(atn, XMLNS_COLON);
+ final boolean isXmlns = hasXmlnsPrefix || eq(atn, XMLNS);
+ if(processXmlns != isXmlns) {
+ if(len >= 0) {
+ pos += len;
+ consumeWS();
+ continue;
+ }
+ Util.debugln("Failed to detect attribute value length: "
+ + new String(input, pos, Math.min(20, input.length - pos)) + "...");
+ }
+
+ final ExprList attv = new ExprList();
+ final int delim = consume();
+ if(!quote(delim)) throw error(NOQUOTE_X, found());
+ final TokenBuilder tb = new TokenBuilder();
+
+ boolean simple = true;
+ while(true) {
+ while(!consume(delim)) {
+ cp = current();
+ switch(cp) {
+ case '{':
+ if(next() == '{') {
+ tb.add(consume());
+ consume();
} else {
- add(attv, Str.get(text));
+ final byte[] text = tb.next();
+ if(text.length == 0) {
+ add(attv, enclosedExpr());
+ simple = false;
+ } else {
+ add(attv, Str.get(text));
+ }
}
- }
- break;
- case '}':
- consume();
- check('}');
- tb.add('}');
- break;
- case '<':
- case 0:
- throw error(NOQUOTE_X, found());
- case '\n':
- case '\t':
- tb.add(' ');
- consume();
- break;
- case '\r':
- if(next() != '\n') tb.add(' ');
- consume();
- break;
- default:
- entity(tb);
- break;
+ break;
+ case '}':
+ consume();
+ check('}');
+ tb.add('}');
+ break;
+ case '<':
+ case 0:
+ throw error(NOQUOTE_X, found());
+ case '\n':
+ case '\t':
+ tb.add(' ');
+ consume();
+ break;
+ case '\r':
+ if(next() != '\n') tb.add(' ');
+ consume();
+ break;
+ default:
+ entity(tb);
+ break;
+ }
}
+ if(!consume(delim)) break;
+ tb.add(delim);
}
- if(!consume(delim)) break;
- tb.add(delim);
- }
- if(!tb.isEmpty()) add(attv, Str.get(tb.finish()));
-
- // parse namespace declarations
- final boolean pr = startsWith(atn, XMLNS_COLON);
- if(pr || eq(atn, XMLNS)) {
- if(!simple) throw error(NSCONS);
- final byte[] prefix = pr ? local(atn) : EMPTY;
- final byte[] uri = attv.isEmpty() ? EMPTY : ((Str) attv.get(0)).string();
- if(eq(prefix, XML) && eq(uri, XML_URI)) {
- if(xmlDecl) throw error(DUPLNSDEF_X, XML);
- xmlDecl = true;
- } else {
- if(!Uri.get(uri).isValid()) throw error(INVURI_X, uri);
- if(pr) {
- if(uri.length == 0) throw error(NSEMPTYURI);
- if(eq(prefix, XML, XMLNS)) throw error(BINDXML_X, prefix);
- if(eq(uri, XML_URI)) throw error(BINDXMLURI_X_X, uri, XML);
- if(eq(uri, XMLNS_URI)) throw error(BINDXMLURI_X_X, uri, XMLNS);
- qc.ns.add(prefix, uri);
+ if(!tb.isEmpty()) add(attv, Str.get(tb.finish()));
+
+ // parse namespace declarations
+ if(isXmlns) {
+ if(!simple) throw error(NSCONS);
+ final byte[] prefix = hasXmlnsPrefix ? local(atn) : EMPTY;
+ final byte[] uri = attv.isEmpty() ? EMPTY : ((Str) attv.get(0)).string();
+ if(eq(prefix, XML) && eq(uri, XML_URI)) {
+ if(xmlDecl) throw error(DUPLNSDEF_X, XML);
+ xmlDecl = true;
} else {
- if(eq(uri, XML_URI)) throw error(XMLNSDEF_X, uri);
- sc.dirNS = uri;
- if(!sc.elemNsFixed) sc.elemNS = sc.dirNS;
+ if(!Uri.get(uri).isValid()) throw error(INVURI_X, uri);
+ if(hasXmlnsPrefix) {
+ if(uri.length == 0) throw error(NSEMPTYURI);
+ if(eq(prefix, XML, XMLNS)) throw error(BINDXML_X, prefix);
+ if(eq(uri, XML_URI)) throw error(BINDXMLURI_X_X, uri, XML);
+ if(eq(uri, XMLNS_URI)) throw error(BINDXMLURI_X_X, uri, XMLNS);
+ qc.ns.add(prefix, uri);
+ } else {
+ if(eq(uri, XML_URI)) throw error(XMLNSDEF_X, uri);
+ sc.dirNS = uri;
+ if(!sc.elemNsFixed) sc.elemNS = sc.dirNS;
+ }
+ if(ns.contains(prefix)) throw error(DUPLNSDEF_X, prefix);
+ ns.add(prefix, uri);
}
- if(ns.contains(prefix)) throw error(DUPLNSDEF_X, prefix);
- ns.add(prefix, uri);
+ } else {
+ final QNm attn = new QNm(atn);
+ if(atts == null) atts = new ArrayList<>(1);
+ atts.add(attn);
+ resolveQNm(attn, null, info());
+ add(cont, new CAttr(info(), false, attn, attv.finish()));
}
- } else {
- final QNm attn = new QNm(atn);
- if(atts == null) atts = new ArrayList<>(1);
- atts.add(attn);
- qnames.add(attn, false, info());
- add(cont, new CAttr(info(), false, attn, attv.finish()));
+ if(!consumeWS()) break;
}
- if(!consumeWS()) break;
}
+ final QNm name = new QNm(qnm);
+ resolveQNm(name, sc.dirNS, ii);
+
if(consume('/')) {
check('>');
} else {
@@ -3217,8 +3232,6 @@ private Expr dirElement(final boolean root) throws QueryException {
if(!eq(name.string(), close)) throw error(TAGWRONG_X_X, name.string(), close);
}
- qnames.resolve(this, npos, sc.dirNS);
-
// check for duplicate attribute names
if(atts != null) {
final int as = atts.size();
@@ -3235,6 +3248,15 @@ private Expr dirElement(final boolean root) throws QueryException {
return new CElem(info(), false, name, ns, cont.finish());
}
+ /**
+ * Returns the attribute value scanner.
+ * @return attribute value scanner
+ */
+ private AttributeValueScanner attributeValueScanner() {
+ if(attributeValueScanner == null) attributeValueScanner = new AttributeValueScanner(input);
+ return attributeValueScanner;
+ }
+
/**
* Parses the "DirElemContent" rule.
* @param name name of opening element
@@ -3371,9 +3393,8 @@ private Expr compConstructor() throws QueryException {
* @throws QueryException query exception
*/
private Expr compElement() throws QueryException {
- final Expr name = compName(NOELEMNAME, true);
+ final Expr name = compName(NOELEMNAME, true, sc.elemNS);
if(name == null) return null;
- if(name instanceof final QNm qnm) qnames.add(qnm, sc, info());
skipWs();
return current('{') ? new CElem(info(), true, name, new Atts(), enclosedExpr()) : null;
}
@@ -3384,9 +3405,8 @@ private Expr compElement() throws QueryException {
* @throws QueryException query exception
*/
private Expr compAttribute() throws QueryException {
- final Expr name = compName(NOATTNAME, true);
+ final Expr name = compName(NOATTNAME, true, null);
if(name == null) return null;
- if(name instanceof final QNm qnm) qnames.add(qnm, false, info());
skipWs();
return current('{') ? new CAttr(info(), true, name, enclosedExpr()) : null;
}
@@ -3397,7 +3417,7 @@ private Expr compAttribute() throws QueryException {
* @throws QueryException query exception
*/
private Expr compNamespace() throws QueryException {
- final Expr name = compName(NONSNAME, false);
+ final Expr name = compName(NONSNAME, false, null);
if(name == null) return null;
skipWs();
return current('{') ? new CNSpace(info(), true, name, enclosedExpr()) : null;
@@ -3409,7 +3429,7 @@ private Expr compNamespace() throws QueryException {
* @throws QueryException query exception
*/
private Expr compPI() throws QueryException {
- final Expr name = compName(NOPINAME, false);
+ final Expr name = compName(NOPINAME, false, null);
if(name == null) return null;
skipWs();
return current('{') ? new CPI(info(), true, name, enclosedExpr()) : null;
@@ -3419,10 +3439,12 @@ private Expr compPI() throws QueryException {
* Parses a computed name.
* @param error error message
* @param qname QName or NCName
+ * @param ns default namespace (can be {@code null})
* @return name or {@code null}
* @throws QueryException query exception
*/
- private Expr compName(final QueryError error, final boolean qname) throws QueryException {
+ private Expr compName(final QueryError error, final boolean qname, final byte[] ns)
+ throws QueryException {
// parse name enclosed in curly braces
if(consume("{")) {
final Expr name = check(expr(), error);
@@ -3432,7 +3454,7 @@ private Expr compName(final QueryError error, final boolean qname) throws QueryE
// parse literal name
consume("#");
skipWs();
- if(qname) return eQName(SKIPCHECK, null);
+ if(qname) return eQName(ns, null);
// parse name enclosed in quotes
final byte[] string = ncName(null, false);
@@ -4801,4 +4823,21 @@ public static String removeComments(final String query, final int max) {
public final InputInfo info() {
return new InputInfo(this, sc);
}
+
+ /**
+ * Finalizes the given QName by assigning a namespace URI.
+ * @param name QName to be resolved
+ * @param elemNS default element namespace (may be {@code null}, when resolving attribute names)
+ * @param info input info
+ * @throws QueryException query exception
+ */
+ private void resolveQNm(final QNm name, final byte[] elemNS, final InputInfo info)
+ throws QueryException {
+ if(name.hasPrefix()) {
+ name.uri(qc.ns.resolve(name.prefix(), sc));
+ if(!name.hasURI()) throw error(NOURI_X, info, name.prefix());
+ } else if(elemNS != null) {
+ name.uri(elemNS);
+ }
+ }
}
diff --git a/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java b/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java
deleted file mode 100644
index 888dd76870..0000000000
--- a/basex-core/src/main/java/org/basex/query/util/parse/QNmResolver.java
+++ /dev/null
@@ -1,81 +0,0 @@
-package org.basex.query.util.parse;
-
-import static org.basex.query.QueryError.*;
-
-import java.util.*;
-
-import org.basex.query.*;
-import org.basex.query.value.item.*;
-import org.basex.util.*;
-
-/**
- * Resolves namespace URIs of QNames whose resolution must be deferred until the surrounding
- * namespace context is known.
- *
- * @author BaseX Team, BSD License
- * @author Christian Gruen
- */
-public final class QNmResolver {
- /**
- * Entry for a QName whose namespace URI still needs to be resolved.
- * @param name QName to be resolved
- * @param nsElem flag for assigning default element namespace
- * @param info input info (can be {@code null})
- */
- private record Entry(QNm name, boolean nsElem, InputInfo info) { }
- /** QNames to be resolved. */
- private final ArrayList entries = new ArrayList<>();
-
- /**
- * Adds a QName unless it already has a namespace URI or it can be immediately assigned the fixed
- * default namespace URI. This method must not be called for the element names of direct element
- * constructors.
- * @param name QName
- * @param sc static context
- * @param info input info (can be {@code null})
- */
- public void add(final QNm name, final StaticContext sc, final InputInfo info) {
- if(sc.elemNsFixed && !name.hasPrefix() && !name.hasURI()) name.uri(sc.elemNS);
- else add(name, true, info);
- }
-
- /**
- * Adds a QName unless it already has a namespace URI.
- * @param name qname
- * @param nsElem default check
- * @param info input info (can be {@code null})
- */
- public void add(final QNm name, final boolean nsElem, final InputInfo info) {
- if(!name.hasURI()) entries.add(new Entry(name, nsElem, info));
- }
-
- /**
- * Finalizes the QNames by assigning namespace URIs.
- * @param qp query parser
- * @param npos first entry to be checked
- * @param elemNS default element namespace
- * @throws QueryException query exception
- */
- public void resolve(final QueryParser qp, final int npos, final byte[] elemNS)
- throws QueryException {
- for(int i = entries.size() - 1; i >= npos; --i) {
- final Entry entry = entries.get(i);
- if(entry.name.hasPrefix()) {
- entry.name.uri(qp.qc.ns.resolve(entry.name.prefix(), qp.sc));
- if(npos == 0 && !entry.name.hasURI())
- throw qp.error(NOURI_X, entry.info, entry.name.prefix());
- } else if(entry.nsElem) {
- entry.name.uri(elemNS);
- }
- if(entry.name.hasURI()) entries.remove(i);
- }
- }
-
- /**
- * Returns the number of remaining QNames.
- * @return number
- */
- public int size() {
- return entries.size();
- }
-}
diff --git a/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf b/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf
new file mode 100644
index 0000000000..ad0e8a5bda
--- /dev/null
+++ b/basex-core/src/main/java/org/basex/query/util/rex/AttributeValueScanner.ebnf
@@ -0,0 +1,219 @@
+
+DirAttributeValue
+ ::= '"' ( EscapeQuot | QuotAttrValueContent )* '"'
+ | "'" ( EscapeApos | AposAttrValueContent )* "'"
+EscapeQuot
+ ::= '""'
+EscapeApos
+ ::= "''"
+QuotAttrValueContent
+ ::= QuotAttrContentChar
+ | CommonContent
+CommonContent
+ ::= '{{'
+ | '}}'
+ | EnclosedExpr
+AposAttrValueContent
+ ::= AposAttrContentChar
+ | CommonContent
+DirElemContent
+ ::= DirectConstructor
+ | CDataSection
+ | CommonContent
+ | ElementContentChar
+EnclosedExpr
+ ::= '{' EnclosedExprContent* '}'
+EnclosedExprContent
+ ::= Comment
+ | StringLiteral
+ | StringConstructor
+ | StringTemplate
+ | Pragma
+ | DirectConstructor
+ | EnclosedExpr
+ | '<'^GeneralComp
+ | OtherEnclosedExprContent
+Comment ::= '(:' ( CommentContents | Comment )* ':)'
+StringLiteral
+ ::= AposStringLiteral
+ | QuotStringLiteral
+StringConstructor
+ ::= '``[' StringConstructorContent ']``'
+StringConstructorContent
+ ::= StringConstructorChars ( StringInterpolation StringConstructorChars )*
+StringInterpolation
+ ::= '`' EnclosedExpr '`'
+StringTemplate
+ ::= '`' ( StringTemplateFixedPart | StringTemplateVariablePart )* '`'
+StringTemplateVariablePart
+ ::= EnclosedExpr
+Pragma ::= '(#' PragmaContents '#)'
+DirectConstructor
+ ::= DirElemConstructor
+ | DirCommentConstructor
+ | DirPIConstructor
+DirElemConstructor
+ ::= '<'^DirElemConstructor QName DirAttributeList ( '/>' | '>' DirElemContent* '' QName S? '>' )
+DirAttributeList
+ ::= ( S ( QName S? '=' S? DirAttributeValue )? )*
+CDataSection
+ ::= ''
+DirCommentConstructor
+ ::= ''
+DirPIConstructor
+ ::= '' DirPIContents '?>'
+
+
+
+CommentContents
+ ::= ( Char+ - ( Char* ( '(:' | ':)' ) Char* ) ) - ( Char* '(' ) & ':'
+ | Char+ - ( Char* ( '(:' | ':)' ) Char* ) & '('
+AposStringLiteral
+ ::= "'" ( EscapeApos | [^'] )* "'"
+EscapeApos
+ ::= "''"
+QuotStringLiteral
+ ::= '"' ( EscapeQuot | [^"] )* '"'
+EscapeQuot
+ ::= '""'
+StringTemplateFixedPart
+ ::= ( Char - ( '{' | '}' | '`' ) | '{{' | '}}' | '``' )+
+StringConstructorChars
+ ::= Char* - ( Char* ( '`{' | ']``' ) Char* ) & ( '`{' | ']`' )
+PragmaContents
+ ::= Char* - ( Char* '#)' Char* ) & '#'
+QuotAttrContentChar
+ ::= Char - ["{}<]
+AposAttrContentChar
+ ::= Char - ['{}<]
+ElementContentChar
+ ::= Char - [{}<]
+CDataSectionContents
+ ::= Char* - ( Char* ']]>' Char* ) & ']]'
+DirCommentContents
+ ::= ( Char - '-' | '-' ( Char - '-' ) )*
+DirPIContents
+ ::= Char* - ( Char* '?>' Char* ) & '?'
+QName ::= PrefixedName
+ | UnprefixedName
+PrefixedName
+ ::= Prefix ':' LocalPart
+Prefix ::= NCName
+LocalPart
+ ::= NCName
+UnprefixedName
+ ::= LocalPart
+NCName ::= Name - ( Char* ':' Char* )
+Name ::= NameStartChar NameChar*
+NameStartChar
+ ::= ':'
+ | [A-Z]
+ | '_'
+ | [a-z]
+ | [#xC0-#xD6]
+ | [#xD8-#xF6]
+ | [#xF8-#x2FF]
+ | [#x370-#x37D]
+ | [#x37F-#x1FFF]
+ | [#x200C-#x200D]
+ | [#x2070-#x218F]
+ | [#x2C00-#x2FEF]
+ | [#x3001-#xD7FF]
+ | [#xF900-#xFDCF]
+ | [#xFDF0-#xFFFD]
+ | [#x10000-#xEFFFF]
+NameChar ::= NameStartChar
+ | '-'
+ | '.'
+ | [0-9]
+ | #xB7
+ | [#x0300-#x036F]
+ | [#x203F-#x2040]
+Char ::= #x9
+ | #xA
+ | #xD
+ | [#x20-#xD7FF]
+ | [#xE000-#xFFFD]
+ | [#x10000-#x10FFFF]
+S ::= ( #x20 | #x9 | #xD | #xA )+
+OtherEnclosedExprContent
+ ::= ( Char* '(' ) - ( Char* ( ["'`{}<] | '(#' | '(:' | ''
+ consume(23); // '-->'
+ }
+
+ private void parse_DirPIConstructor()
+ {
+ consume(31); // ''
+ lookahead1(4); // DirPIContents
+ consume(12); // DirPIContents
+ lookahead1(10); // '?>'
+ consume(34); // '?>'
+ }
+
+ private void consume(int t)
+ {
+ if (l1 == t)
+ {
+ b0 = b1; e0 = e1; l1 = 0;
+ }
+ else
+ {
+ error(b1, e1, 0, l1, t);
+ }
+ }
+
+ private void lookahead1(int tokenSetId)
+ {
+ if (l1 == 0)
+ {
+ l1 = match(tokenSetId);
+ b1 = begin;
+ e1 = end;
+ }
+ }
+
+ private int error(int b, int e, int s, int l, int t)
+ {
+ throw new ParseException(b, e, s, l, t);
+ }
+
+ private int b0, e0;
+ private int l1, b1, e1;
+ private CharSequence input = null;
+ private int size = 0;
+ private int begin = 0;
+ private int end = 0;
+
+ private int match(int tokenSetId)
+ {
+ boolean nonbmp = false;
+ begin = end;
+ int current = end;
+ int result = INITIAL[tokenSetId];
+ int state = 0;
+
+ for (int code = result & 127; code != 0; )
+ {
+ int charclass;
+ int c0 = current < size ? input.charAt(current) : 0;
+ ++current;
+ if (c0 < 0x80)
+ {
+ charclass = MAP0[c0];
+ }
+ else if (c0 < 0xd800)
+ {
+ int c1 = c0 >> 4;
+ charclass = MAP1[(c0 & 15) + MAP1[(c1 & 31) + MAP1[c1 >> 5]]];
+ }
+ else
+ {
+ if (c0 < 0xdc00)
+ {
+ int c1 = current < size ? input.charAt(current) : 0;
+ if (c1 >= 0xdc00 && c1 < 0xe000)
+ {
+ nonbmp = true;
+ ++current;
+ c0 = ((c0 & 0x3ff) << 10) + (c1 & 0x3ff) + 0x10000;
+ }
+ }
+
+ int lo = 0, hi = 5;
+ for (int m = 3; ; m = (hi + lo) >> 1)
+ {
+ if (MAP2[m] > c0) {hi = m - 1;}
+ else if (MAP2[6 + m] < c0) {lo = m + 1;}
+ else {charclass = MAP2[12 + m]; break;}
+ if (lo > hi) {charclass = 0; break;}
+ }
+ }
+
+ state = code;
+ int i0 = (charclass << 7) + code - 1;
+ code = TRANSITION[(i0 & 7) + TRANSITION[i0 >> 3]];
+
+ if (code > 127)
+ {
+ result = code;
+ code &= 127;
+ end = current;
+ }
+ }
+
+ result >>= 7;
+ if (result == 0)
+ {
+ end = current - 1;
+ int c1 = end < size ? input.charAt(end) : 0;
+ if (c1 >= 0xdc00 && c1 < 0xe000)
+ {
+ --end;
+ }
+ return error(begin, end, state, -1, -1);
+ }
+ else if ((result & 64) != 0)
+ {
+ end = begin;
+ if (nonbmp)
+ {
+ for (int i = result >> 7; i > 0; --i)
+ {
+ int c1 = end < size ? input.charAt(end) : 0;
+ ++end;
+ if (c1 >= 0xd800 && c1 < 0xdc000)
+ {
+ ++end;
+ }
+ }
+ }
+ else
+ {
+ end += (result >> 7);
+ }
+ }
+ else if (nonbmp)
+ {
+ for (int i = result >> 7; i > 0; --i)
+ {
+ --end;
+ int c1 = end < size ? input.charAt(end) : 0;
+ if (c1 >= 0xdc00 && c1 < 0xe000)
+ {
+ --end;
+ }
+ }
+ }
+ else
+ {
+ end -= result >> 7;
+ }
+
+ if (end > size) end = size;
+ return (result & 63) - 1;
+ }
+
+ private static String[] getTokenSet(int tokenSetId)
+ {
+ java.util.ArrayList expected = new java.util.ArrayList<>();
+ int s = tokenSetId < 0 ? - tokenSetId : INITIAL[tokenSetId] & 127;
+ for (int i = 0; i < 43; i += 32)
+ {
+ int j = i;
+ int i0 = (i >> 5) * 93 + s - 1;
+ int f = EXPECTED[(i0 & 3) + EXPECTED[i0 >> 2]];
+ for ( ; f != 0; f >>>= 1, ++j)
+ {
+ if ((f & 1) != 0)
+ {
+ expected.add(TOKEN[j]);
+ }
+ }
+ }
+ return expected.toArray(new String[]{});
+ }
+
+ private static final int[] MAP0 =
+ {
+ /* 0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 26, 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 26, 1,
+ /* 34 */ 2, 3, 4, 4, 4, 5, 6, 7, 4, 4, 4, 8, 9, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 4, 12, 13, 14, 15, 4, 16, 17,
+ /* 67 */ 18, 19, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 20, 17, 17, 17, 17, 17, 17, 21, 4, 22,
+ /* 94 */ 4, 17, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ /* 121 */ 17, 17, 24, 4, 25, 4, 4
+ };
+
+ private static final int[] MAP1 =
+ {
+ /* 0 */ 108, 124, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 156, 181, 181, 181, 181,
+ /* 21 */ 181, 214, 215, 213, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214,
+ /* 42 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214,
+ /* 63 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214,
+ /* 84 */ 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214,
+ /* 105 */ 214, 214, 214, 254, 247, 270, 370, 286, 307, 347, 291, 408, 408, 408, 400, 348, 322, 348, 322, 348, 348,
+ /* 126 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 364, 364, 364, 364, 364, 364, 364,
+ /* 147 */ 331, 348, 348, 348, 348, 348, 348, 348, 348, 386, 408, 408, 409, 407, 408, 408, 348, 348, 348, 348, 348,
+ /* 168 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 408, 408, 408, 408, 408, 408, 408, 408,
+ /* 189 */ 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408, 408,
+ /* 210 */ 408, 408, 408, 329, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348,
+ /* 231 */ 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 348, 408, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 256 */ 0, 0, 0, 0, 0, 0, 0, 26, 26, 0, 0, 26, 0, 0, 26, 1, 2, 3, 4, 4, 4, 5, 6, 7, 4, 4, 4, 8, 9, 10, 4, 16, 17,
+ /* 289 */ 18, 19, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 24, 4, 25, 4, 4, 17, 17, 17, 17, 20, 17, 17, 17, 17,
+ /* 316 */ 17, 17, 21, 4, 22, 4, 17, 17, 17, 17, 17, 17, 17, 4, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
+ /* 343 */ 17, 17, 4, 17, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 9, 9, 9, 9, 9, 9, 9, 9,
+ /* 372 */ 9, 9, 9, 9, 9, 9, 9, 9, 11, 4, 12, 13, 14, 15, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 17, 17, 4, 4, 4, 4, 4,
+ /* 405 */ 4, 4, 9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 9
+ };
+
+ private static final int[] MAP2 =
+ {
+ /* 0 */ 57344, 63744, 64976, 65008, 65536, 983040, 63743, 64975, 65007, 65533, 983039, 1114111, 4, 17, 4, 17, 17,
+ /* 17 */ 4
+ };
+
+ private static final int[] INITIAL =
+ {
+ /* 0 */ 1, 2, 3, 1540, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
+ };
+
+ private static final int[] TRANSITION =
+ {
+ /* 0 */ 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 441, 436, 441, 483, 432, 440,
+ /* 21 */ 449, 636, 491, 1152, 509, 441, 441, 441, 441, 441, 441, 436, 441, 520, 528, 440, 449, 547, 555, 676, 569,
+ /* 42 */ 441, 441, 441, 441, 441, 441, 584, 441, 483, 580, 440, 449, 572, 592, 1152, 606, 441, 441, 441, 441, 441,
+ /* 63 */ 441, 436, 441, 483, 432, 440, 449, 701, 592, 1152, 509, 441, 441, 441, 441, 441, 441, 436, 441, 616, 624,
+ /* 84 */ 440, 449, 1162, 555, 1152, 644, 441, 441, 441, 441, 441, 441, 436, 441, 561, 655, 440, 663, 720, 684,
+ /* 104 */ 826, 698, 441, 441, 441, 441, 441, 441, 436, 441, 483, 709, 717, 728, 701, 592, 1134, 509, 441, 441, 441,
+ /* 125 */ 441, 441, 441, 746, 441, 483, 754, 773, 449, 701, 592, 470, 785, 777, 735, 441, 441, 441, 441, 436, 441,
+ /* 146 */ 483, 432, 536, 449, 701, 592, 1199, 793, 777, 735, 441, 441, 441, 441, 436, 441, 1008, 432, 440, 449,
+ /* 166 */ 501, 592, 463, 509, 846, 441, 441, 441, 441, 441, 436, 441, 598, 432, 808, 819, 512, 592, 831, 606, 738,
+ /* 187 */ 441, 441, 441, 441, 441, 436, 441, 909, 940, 440, 449, 647, 555, 1152, 569, 441, 441, 441, 441, 441, 441,
+ /* 208 */ 436, 839, 483, 432, 440, 449, 701, 592, 1152, 509, 811, 848, 441, 441, 441, 441, 436, 856, 1022, 432,
+ /* 228 */ 765, 864, 701, 872, 880, 509, 846, 441, 441, 441, 441, 441, 628, 497, 483, 895, 632, 449, 925, 903, 1152,
+ /* 249 */ 921, 441, 441, 441, 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 1112, 761, 441,
+ /* 269 */ 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 1061, 735, 441, 441, 441, 441, 532,
+ /* 289 */ 441, 974, 432, 536, 449, 1138, 933, 1199, 793, 948, 735, 441, 441, 441, 441, 532, 441, 974, 432, 536,
+ /* 309 */ 449, 1138, 933, 1199, 793, 1185, 735, 441, 441, 441, 441, 532, 441, 974, 432, 536, 449, 1138, 933, 1199,
+ /* 329 */ 793, 1061, 956, 441, 441, 441, 441, 436, 441, 483, 432, 440, 449, 701, 592, 477, 968, 441, 913, 441, 441,
+ /* 350 */ 441, 441, 982, 1157, 887, 994, 1002, 449, 701, 1016, 1152, 509, 441, 441, 441, 441, 441, 441, 1042, 960,
+ /* 370 */ 1030, 1038, 986, 1054, 647, 1069, 1083, 569, 441, 441, 441, 441, 441, 441, 436, 539, 1089, 1097, 440,
+ /* 389 */ 1105, 1120, 1128, 1152, 1146, 441, 441, 441, 441, 441, 441, 436, 441, 690, 1170, 440, 1178, 800, 555,
+ /* 408 */ 1152, 569, 441, 441, 441, 441, 441, 441, 436, 608, 1075, 432, 1046, 449, 701, 592, 456, 509, 670, 1193,
+ /* 428 */ 441, 441, 441, 441, 1152, 1280, 60, 1, 1, 2, 3, 1540, 5, 0, 0, 0, 0, 0, 0, 0, 0, 43, 43, 43, 0, 0, 0,
+ /* 455 */ 687, 0, 0, 43, 19328, 43, 0, 83, 0, 0, 43, 19328, 43, 0, 28160, 0, 0, 43, 19328, 43, 80, 71, 0, 0, 43,
+ /* 480 */ 19328, 43, 81, 0, 0, 43, 687, 0, 0, 0, 1024, 19916, 60, 0, 18492, 1, 3, 0, 0, 37, 0, 0, 0, 0, 0, 3968,
+ /* 506 */ 54, 55, 18492, 0, 19200, 18492, 0, 0, 0, 0, 0, 54, 55, 68, 2176, 0, 43, 687, 2176, 0, 0, 2225, 1152,
+ /* 529 */ 1280, 54, 1, 1, 2, 3, 1540, 5, 1826, 0, 0, 0, 0, 0, 0, 5120, 0, 0, 2304, 0, 0, 0, 0, 584, 55, 18432,
+ /* 555 */ 19840, 0, 0, 18432, 1, 3, 0, 0, 41, 687, 0, 0, 0, 1024, 0, 19200, 18432, 0, 0, 0, 0, 0, 54, 55, 74, 1152,
+ /* 581 */ 1280, 60, 1, 1, 17310, 3, 1540, 5, 0, 35, 0, 19840, 60, 0, 18492, 1, 3, 0, 0, 42, 687, 0, 0, 0, 1024, 0,
+ /* 607 */ 19200, 0, 0, 0, 0, 0, 0, 1959, 1959, 2560, 0, 43, 687, 2560, 0, 0, 1024, 2612, 1280, 55, 1, 1, 2, 3,
+ /* 631 */ 1540, 18081, 0, 0, 0, 0, 0, 0, 0, 70, 54, 55, 18492, 55, 19200, 18432, 0, 0, 0, 0, 0, 54, 55, 18432,
+ /* 655 */ 1152, 1280, 56, 1, 1, 2, 3, 1540, 16707, 16707, 16707, 0, 0, 0, 687, 0, 0, 83, 0, 83, 89, 0, 0, 43,
+ /* 679 */ 19328, 43, 0, 0, 54, 19840, 75, 0, 18507, 1, 3, 0, 0, 43, 46, 0, 0, 0, 51, 0, 19200, 18507, 0, 0, 0, 0,
+ /* 705 */ 0, 54, 55, 18492, 1152, 1280, 60, 1, 1, 0, 3, 1540, 5, 0, 2432, 0, 0, 0, 0, 0, 54, 55, 18507, 43, 3328,
+ /* 730 */ 43, 0, 0, 0, 687, 0, 0, 91, 0, 0, 0, 0, 0, 88, 0, 0, 1, 2, 3, 32, 5, 0, 0, 36, 1152, 1280, 60, 1, 1, 2,
+ /* 760 */ 3, 0, 0, 91, 93, 0, 0, 0, 0, 4480, 0, 0, 0, 5, 1826, 0, 64, 0, 0, 0, 0, 85, 86, 0, 0, 0, 19200, 18492,
+ /* 788 */ 80, 0, 0, 1871, 3712, 0, 19200, 18492, 0, 0, 0, 1871, 0, 0, 5504, 0, 0, 54, 55, 18432, 5, 63, 0, 0, 0, 0,
+ /* 814 */ 0, 0, 28160, 0, 0, 68, 16709, 16709, 0, 0, 0, 687, 0, 0, 16707, 19328, 16707, 0, 0, 0, 19328, 16709, 0,
+ /* 837 */ 82, 0, 4224, 0, 0, 0, 0, 0, 4224, 0, 0, 28160, 0, 28160, 0, 0, 0, 0, 0, 0, 4352, 0, 0, 0, 0, 0, 4352, 43,
+ /* 865 */ 43, 43, 0, 0, 0, 687, 3200, 19840, 60, 0, 18492, 1, 0, 0, 3072, 4608, 0, 43, 19328, 43, 0, 28160, 0, 40,
+ /* 889 */ 43, 687, 0, 0, 0, 1024, 1152, 1280, 58, 1, 1, 2, 3, 1540, 4096, 58, 0, 18490, 1, 3, 0, 0, 43, 687, 0, 0,
+ /* 915 */ 0, 0, 3840, 0, 0, 0, 0, 19200, 18490, 0, 0, 0, 0, 0, 4096, 54, 55, 18490, 19911, 60, 0, 18492, 1, 3,
+ /* 939 */ 1871, 0, 53, 57, 1, 1, 2, 3, 1540, 84, 85, 86, 0, 85, 86, 0, 91, 0, 92, 91, 0, 0, 0, 0, 0, 4864, 0, 0, 0,
+ /* 968 */ 0, 19200, 18492, 0, 4992, 0, 0, 0, 43, 687, 0, 0, 1826, 1024, 28, 2, 31, 1540, 5, 0, 0, 0, 0, 0, 0, 66,
+ /* 994 */ 1152, 1280, 60, 28, 28, 2, 34238, 1540, 5, 0, 0, 0, 0, 65, 0, 0, 43, 687, 0, 48, 48, 1024, 19840, 60, 0,
+ /* 1019 */ 18492, 28, 34238, 0, 0, 43, 687, 0, 4352, 4352, 1024, 0, 4864, 43, 4908, 0, 0, 0, 1024, 1152, 1280, 4923,
+ /* 1041 */ 33597, 29, 2, 3, 1540, 5, 0, 0, 0, 0, 0, 1959, 0, 43, 43, 43, 687, 0, 0, 44, 0, 85, 86, 0, 85, 86, 0, 91,
+ /* 1069 */ 19840, 0, 77, 18432, 78, 3, 0, 0, 43, 687, 1959, 1959, 1959, 1024, 0, 4736, 43, 19328, 43, 0, 0, 0, 43,
+ /* 1092 */ 5165, 0, 0, 0, 5170, 5170, 5170, 5120, 1, 33536, 2, 3, 1540, 43, 43, 43, 0, 687, 0, 45, 0, 85, 86, 0, 85,
+ /* 1117 */ 86, 90, 91, 0, 5248, 0, 0, 0, 54, 55, 18432, 19840, 0, 0, 18432, 33536, 3, 0, 0, 43, 19328, 0, 0, 0, 0,
+ /* 1142 */ 71, 54, 55, 18492, 0, 19200, 18432, 0, 0, 33536, 0, 0, 43, 19328, 43, 0, 0, 0, 38, 0, 0, 0, 0, 2688, 0,
+ /* 1167 */ 54, 457, 18432, 51, 51, 5376, 1, 1, 2, 3, 1540, 43, 43, 43, 0, 0, 687, 46, 0, 85, 86, 87, 85, 86, 0, 91,
+ /* 1193 */ 89, 0, 89, 0, 0, 0, 0, 0, 43, 19328, 43, 0, 71, 0
+ };
+
+ private static final int[] EXPECTED =
+ {
+ /* 0 */ 65, 47, 107, 121, 51, 58, 62, 65, 47, 118, 69, 54, 73, 77, 81, 85, 143, 89, 135, 96, 100, 101, 102, 106,
+ /* 24 */ 107, 132, 92, 115, 112, 125, 107, 107, 129, 141, 107, 149, 107, 108, 107, 139, 107, 107, 147, 107, 107,
+ /* 45 */ 107, 107, 4096, 8192, 262144, 8388608, 589824, 0, 37748738, 16, 16, 16, 16777216, 606208, 16793600,
+ /* 60 */ 16801792, 196736, 1573120, -134217216, -1671397364, 32, 64, 1024, 2048, 4194306, 33554434, 2, 16, 131072,
+ /* 74 */ 0, 0, 1048576, -134217728, 8, 4, 6324224, -1677721600, 32768, 0, 32768, 32, 1024, 8192, 8388608, 2,
+ /* 90 */ 805306368, 134217728, 8, 32, 128, 1, 0, 32, 8192, 268435456, 536870912, 134217728, 134217728, 536870912,
+ /* 104 */ 134217728, 536870912, 536870912, 0, 0, 0, 0, 64, 160, 0, 2, 2, 0, 48, 0, 0, 16384, 0, 0, 16384, 16384,
+ /* 125 */ 1408, 1408, 1408, 736, 0, 4, 8, 0, 1, 2, 4, 2097152, 32768, 268435456, 0, 8, 16, 0, 0, 0, 2, 4194304, 0,
+ /* 148 */ 64, 0, 0, 256, 1024
+ };
+
+ private static final String[] TOKEN =
+ {
+ "%ERROR",
+ "CommentContents",
+ "AposStringLiteral",
+ "QuotStringLiteral",
+ "StringTemplateFixedPart",
+ "StringConstructorChars",
+ "PragmaContents",
+ "QuotAttrContentChar",
+ "AposAttrContentChar",
+ "ElementContentChar",
+ "CDataSectionContents",
+ "DirCommentContents",
+ "DirPIContents",
+ "QName",
+ "S",
+ "OtherEnclosedExprContent",
+ "'\"'",
+ "'\"\"'",
+ "'#)'",
+ "''''",
+ "''''''",
+ "'(#'",
+ "'(:'",
+ "'-->'",
+ "'/>'",
+ "':)'",
+ "'<'",
+ "'<'",
+ "'