Skip to content

Commit b802ae1

Browse files
[MOD] file:read, fn:unparsed-text: options unified
1 parent 7f3e25b commit b802ae1

23 files changed

Lines changed: 198 additions & 129 deletions

basex-core/src/main/java/org/basex/core/CommonOptions.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@ public interface CommonOptions {
4242
StringOption ENCODING = new StringOption("encoding");
4343
/** Normalize newlines. */
4444
BooleanOption NORMALIZE_NEWLINES = new BooleanOption("normalize-newlines");
45+
/** Fallback. */
46+
BooleanOption FALLBACK = new BooleanOption("fallback", false);
4547
}

basex-core/src/main/java/org/basex/gui/view/editor/EditorView.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ private EditorArea execEditor() {
649649
private byte[] read(final IOFile file) throws IOException {
650650
try {
651651
// try to open as validated UTF-8 document
652-
return new NewlineInput(file).validate(true).content();
652+
return new NewlineInput(file).fallback(false).content();
653653
} catch(final InputException ex) {
654654
// error...
655655
Util.debug(ex);

basex-core/src/main/java/org/basex/io/in/NewlineInput.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ public NewlineInput(final IO in, final String encoding) throws IOException {
5757
}
5858

5959
@Override
60-
public NewlineInput validate(final boolean flag) {
61-
super.validate(flag);
60+
public NewlineInput fallback(final boolean flag) {
61+
super.fallback(flag);
6262
return this;
6363
}
6464

basex-core/src/main/java/org/basex/io/in/TextDecoder.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
abstract class TextDecoder {
2222
/** Encoding. */
2323
final String encoding;
24-
/** Indicates if input must be valid. */
25-
boolean validate;
24+
/** Replace invalid input with Unicode replacement character. */
25+
boolean fallback = true;
2626

2727
/**
2828
* Constructor.
@@ -91,17 +91,16 @@ final int readUTF16(final TextInput ti, final boolean be) throws IOException {
9191
* @throws IOException I/O exception
9292
*/
9393
final int invalid(final boolean incomplete, final byte... bytes) throws IOException {
94-
if(validate) {
95-
final TokenBuilder tb = new TokenBuilder();
96-
final IntUnaryOperator toHex = c -> c + (c > 9 ? '7' : '0');
97-
for(final int b : bytes) {
98-
if(!tb.isEmpty()) tb.add(", ");
99-
tb.add(toHex.applyAsInt(b >> 4 & 0x0F)).add(toHex.applyAsInt(b & 0x0F));
100-
}
101-
if(incomplete) tb.add(", ??");
102-
throw new DecodingException("Invalid " + encoding + " character encoding: " + tb);
94+
if(fallback) return Token.REPLACEMENT;
95+
96+
final TokenBuilder tb = new TokenBuilder();
97+
final IntUnaryOperator toHex = c -> c + (c > 9 ? '7' : '0');
98+
for(final int b : bytes) {
99+
if(!tb.isEmpty()) tb.add(", ");
100+
tb.add(toHex.applyAsInt(b >> 4 & 0x0F)).add(toHex.applyAsInt(b & 0x0F));
103101
}
104-
return Token.REPLACEMENT;
102+
if(incomplete) tb.add(", ??");
103+
throw new DecodingException("Invalid " + encoding + " character encoding: " + tb);
105104
}
106105

107106
/** UTF8 Decoder. */

basex-core/src/main/java/org/basex/io/in/TextInput.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
public class TextInput extends BufferInput {
2020
/** Decoder. */
2121
private TextDecoder decoder;
22-
/** Indicates if the input is to be checked for valid XML 1.0.5 characters. */
23-
private boolean validate;
22+
/** Replace invalid input with Unicode replacement character. */
23+
private boolean fallback = true;
2424

2525
/**
2626
* Constructor.
@@ -105,9 +105,9 @@ private void guess(final String encoding) throws IOException {
105105
* @param flag flag to be set
106106
* @return self reference
107107
*/
108-
public TextInput validate(final boolean flag) {
109-
validate = flag;
110-
decoder.validate = flag;
108+
public TextInput fallback(final boolean flag) {
109+
fallback = flag;
110+
decoder.fallback = flag;
111111
return this;
112112
}
113113

@@ -122,7 +122,7 @@ public TextInput encoding(final String encoding) throws IOException {
122122
final String enc = normEncoding(encoding, false);
123123
decoder = TextDecoder.get(enc != UTF16 ? enc :
124124
decoder.encoding.equals(UTF16LE) ? UTF16LE : UTF16BE);
125-
decoder.validate = validate;
125+
decoder.fallback = fallback;
126126
}
127127
return this;
128128
}
@@ -144,8 +144,8 @@ public String encoding() {
144144
public int read() throws IOException {
145145
final int cp = decoder.read(this);
146146
if(cp != -1 && !XMLToken.valid(cp)) {
147-
if(validate) throw new InputException(cp);
148-
return Token.REPLACEMENT;
147+
if(fallback) return Token.REPLACEMENT;
148+
throw new InputException(cp);
149149
}
150150
return cp;
151151
}

basex-core/src/main/java/org/basex/query/func/archive/ArchiveFn.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ final byte[] encode(final byte[] value, final String encoding, final boolean enf
3737
if(encoding == Strings.UTF8 && !enforce) return value;
3838
try {
3939
final boolean validate = qc.context.options.get(MainOptions.CHECKSTRINGS);
40-
return ConvertFn.toString(new ArrayInput(value), encoding, validate);
40+
return ConvertFn.toString(new ArrayInput(value), encoding, !validate);
4141
} catch(final IOException ex) {
4242
throw ARCHIVE_ENCODE2_X.get(info, ex);
4343
}

basex-core/src/main/java/org/basex/query/func/bin/BinDecodeString.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio
3636
}
3737

3838
try {
39-
return Str.get(ConvertFn.toString(new ArrayInput(bytes), encoding, true));
39+
return Str.get(ConvertFn.toString(new ArrayInput(bytes), encoding, false));
4040
} catch(final IOException ex) {
4141
throw BIN_CE_X.get(info, ex);
4242
}

basex-core/src/main/java/org/basex/query/func/convert/ConvertBinaryToString.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio
2323
final boolean fallback = toBooleanOrFalse(arg(2), qc);
2424

2525
try(BufferInput bi = value.input(info)) {
26-
return Str.get(toString(bi, encoding, !fallback));
26+
return Str.get(toString(bi, encoding, fallback));
2727
} catch(final IOException ex) {
2828
throw CONVERT_STRING_X.get(info, ex);
2929
}

basex-core/src/main/java/org/basex/query/func/convert/ConvertFn.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,14 @@ public static byte[] toBinary(final byte[] in, final String encoding)
5656
* Converts the specified input to a string in the specified encoding.
5757
* @param is input stream
5858
* @param encoding encoding (can be {@code null})
59-
* @param validate validate string
59+
* @param fallback return Unicode replacement character to invalid characters
6060
* @return resulting value
6161
* @throws IOException I/O exception
6262
*/
63-
public static byte[] toString(final InputStream is, final String encoding, final boolean validate)
63+
public static byte[] toString(final InputStream is, final String encoding, final boolean fallback)
6464
throws IOException {
6565
try(TextInput ti = new TextInput(is, encoding)) {
66-
return ti.validate(validate).content();
66+
return ti.fallback(fallback).content();
6767
}
6868
}
6969
}

basex-core/src/main/java/org/basex/query/func/fetch/FetchText.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,6 @@ public Item item(final QueryContext qc, final InputInfo ii) throws QueryExceptio
2020
final String encoding = toEncodingOrNull(arg(1), FETCH_ENCODING_X, qc);
2121
final boolean fallback = toBooleanOrFalse(arg(2), qc);
2222

23-
return new StrLazy(source, encoding, FETCH_OPEN_X, !fallback);
23+
return new StrLazy(source, encoding, FETCH_OPEN_X, fallback);
2424
}
2525
}

0 commit comments

Comments
 (0)