Skip to content

Commit 0e919a4

Browse files
committed
refact: minor cleanup of Chars/DocumentInputStream
1 parent 8d18ebd commit 0e919a4

5 files changed

Lines changed: 123 additions & 57 deletions

File tree

org.eclipse.tm4e.ui.tests/src/main/java/org/eclipse/tm4e/ui/tests/internal/utils/DocumentInputStreamTest.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.eclipse.core.resources.ResourcesPlugin;
2424
import org.eclipse.core.runtime.CoreException;
2525
import org.eclipse.jface.text.IDocument;
26+
import org.eclipse.tm4e.ui.internal.utils.CharsInputStream;
2627
import org.eclipse.tm4e.ui.internal.utils.DocumentInputStream;
2728
import org.eclipse.ui.editors.text.FileDocumentProvider;
2829
import org.eclipse.ui.part.FileEditorInput;
@@ -62,7 +63,7 @@ public void testAvailable() throws IOException {
6263
try (var is = new DocumentInputStream(document)) {
6364
assertEquals(UTF_8, is.getCharset());
6465
assertEquals(TEST_ASCII.length(), is.available());
65-
final byte[] buffer = new byte[4];
66+
final var buffer = new byte[4];
6667
is.read(buffer);
6768
assertEquals(TEST_ASCII.length() - 4, is.available());
6869
is.readAllBytes();
@@ -99,7 +100,7 @@ public void testReadEachByte() throws IOException {
99100
bytesRead.add((byte) b);
100101
}
101102

102-
final byte[] byteArray = new byte[bytesRead.size()];
103+
final var byteArray = new byte[bytesRead.size()];
103104
for (int i = 0; i < bytesRead.size(); i++) {
104105
byteArray[i] = bytesRead.get(i);
105106
}
@@ -109,7 +110,7 @@ public void testReadEachByte() throws IOException {
109110

110111
@Test
111112
public void testReadIntoByteArray() throws IOException {
112-
final byte[] buffer = new byte[1024]; // Buffer to read a portion of the text
113+
final var buffer = new byte[1024]; // Buffer to read a portion of the text
113114

114115
try (var is = new DocumentInputStream(document)) {
115116
assertEquals(UTF_8, is.getCharset());
@@ -127,7 +128,7 @@ public void testSkip() throws IOException {
127128
final long skipped = is.skip(EMOJI_BYTES_LEN);
128129
assertEquals(EMOJI_BYTES_LEN, skipped);
129130

130-
final byte[] japanese = new byte[TEST_UNICODE_BYTES_LEN];
131+
final var japanese = new byte[TEST_UNICODE_BYTES_LEN];
131132
final int bytesRead = is.read(japanese);
132133

133134
assertEquals(JAPANESE, new String(japanese, 0, bytesRead, UTF_8));
@@ -140,11 +141,10 @@ public void testHighSurrogateAtEndOfInput() throws IOException {
140141
try (var is = new DocumentInputStream(document)) {
141142
assertEquals(UTF_8, is.getCharset());
142143
final byte[] result = is.readAllBytes();
143-
final String output = new String(result, UTF_8);
144+
final var output = new String(result, UTF_8);
144145

145-
// the high surrogate at the end should be replaced by the
146-
// Unicode replacement char
147-
assertEquals("A\uFFFD", output);
146+
// the high surrogate at the end should be replaced by the Unicode replacement char
147+
assertEquals("A" + CharsInputStream.UNICODE_REPLACEMENT_CHAR, output);
148148
}
149149
}
150150

@@ -154,10 +154,10 @@ public void testHighSurrogateWithoutLowSurrogate() throws IOException {
154154
try (var is = new DocumentInputStream(document)) {
155155
assertEquals(UTF_8, is.getCharset());
156156
final byte[] result = is.readAllBytes();
157-
final String output = new String(result, UTF_8);
157+
final var output = new String(result, UTF_8);
158158

159159
// the invalid surrogate pair should be replaced by the Unicode replacement char
160-
assertEquals("\uFFFD" + "A", output);
160+
assertEquals(CharsInputStream.UNICODE_REPLACEMENT_CHAR + "A", output);
161161
}
162162
}
163163
}

org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/CharsInputStream.java

Lines changed: 97 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,46 @@
1515
import java.io.InputStream;
1616
import java.nio.ByteBuffer;
1717
import java.nio.CharBuffer;
18+
import java.nio.charset.CharacterCodingException;
1819
import java.nio.charset.Charset;
1920
import java.nio.charset.CharsetEncoder;
2021
import java.nio.charset.CoderResult;
21-
import java.nio.charset.StandardCharsets;
2222
import java.util.Objects;
2323
import java.util.function.IntSupplier;
2424

25-
import org.eclipse.jdt.annotation.Nullable;
25+
public class CharsInputStream extends InputStream {
2626

27-
class CharsInputStream extends InputStream {
27+
/**
28+
* Functional interface for supplying characters at a specified index.
29+
* Implementations can define how characters are fetched.
30+
*/
2831
@FunctionalInterface
2932
public interface CharsSupplier {
3033
char charAt(int index) throws Exception;
3134
}
3235

3336
private enum EncoderState {
34-
ENCODING,
35-
FLUSHING,
37+
/**
38+
* The {@link #encoder} is actively encoding characters into bytes. This is the
39+
* initial state of the encoder.
40+
*/
41+
ENCODING, //
42+
43+
/**
44+
* The {@link #encoder} has finished processing all characters and is now
45+
* flushing any remaining bytes in its internal buffer.
46+
*/
47+
FLUSHING, //
48+
49+
/**
50+
* The {@link #encoder} has completed both the encoding and flushing processes.
51+
* No more data is left to be read from the encoder.
52+
*/
3653
DONE
3754
}
3855

56+
public static final char UNICODE_REPLACEMENT_CHAR = '\uFFFD';
57+
3958
/** 512 surrogate character pairs */
4059
private static final int DEFAULT_BUFFER_SIZE = 512;
4160
private static final int EOF = -1;
@@ -50,27 +69,27 @@ private enum EncoderState {
5069
private final CharsSupplier chars;
5170
private final IntSupplier charsLength;
5271

53-
CharsInputStream(final CharSequence chars) {
54-
this(chars, null);
72+
public CharsInputStream(final CharSequence chars) {
73+
this(chars, Charset.defaultCharset());
5574
}
5675

57-
CharsInputStream(final CharSequence chars, final @Nullable Charset charset) {
76+
public CharsInputStream(final CharSequence chars, final Charset charset) {
5877
this(chars, charset, DEFAULT_BUFFER_SIZE);
5978
}
6079

61-
CharsInputStream(final CharSequence chars, final @Nullable Charset charset, final int bufferSize) {
80+
public CharsInputStream(final CharSequence chars, final Charset charset, final int bufferSize) {
6281
this(chars::charAt, chars::length, charset, bufferSize);
6382
}
6483

65-
CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength) {
66-
this(chars, charsLength, null);
84+
public CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength) {
85+
this(chars, charsLength, Charset.defaultCharset());
6786
}
6887

6988
/**
7089
* @param chars function to access indexed chars.
7190
* @param charsLength function to get the number of indexed chars provided by the <code>chars</code> parameter.
7291
*/
73-
CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final @Nullable Charset charset) {
92+
CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final Charset charset) {
7493
this(chars, charsLength, charset, DEFAULT_BUFFER_SIZE);
7594
}
7695

@@ -79,10 +98,10 @@ private enum EncoderState {
7998
* @param charsLength function to get the number of indexed chars provided by the <code>chars</code> parameter.
8099
* @param bufferSize number of surrogate character pairs to encode at once.
81100
*/
82-
CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final @Nullable Charset charset, final int bufferSize) {
101+
public CharsInputStream(final CharsSupplier chars, final IntSupplier charsLength, final Charset charset, final int bufferSize) {
83102
if (bufferSize < 1)
84103
throw new IllegalArgumentException("[bufferSize] must be 1 or larger");
85-
encoder = (charset == null ? StandardCharsets.UTF_8 : charset).newEncoder();
104+
encoder = charset.newEncoder();
86105

87106
this.bufferSize = bufferSize;
88107
charBuffer = CharBuffer.allocate(bufferSize * 2); // buffer for 2 chars (high/low surrogate)
@@ -100,10 +119,47 @@ public int available() {
100119
return remaining == 0 ? charsLength.getAsInt() - charIndex : remaining;
101120
}
102121

103-
public Charset getCharset() {
104-
return encoder.charset();
122+
/**
123+
* This method is called by {@link #refillByteBuffer()} to encode characters
124+
* from the given {@link CharBuffer} into bytes and stores them in the
125+
* {@link #byteBuffer}.
126+
*
127+
* <p>
128+
* The method can be used either to encode characters in the middle of input
129+
* (with {@code isEndOfInput=false}) or to finalize the encoding process at the
130+
* end of input (with {@code isEndOfInput=true}).
131+
* </p>
132+
*
133+
* @param in
134+
* the {@link CharBuffer} containing characters to encode.
135+
* @param isEndOfInput
136+
* if {@code true}, signals that no more input will be provided,
137+
* allowing the encoder to complete its final encoding steps.
138+
*/
139+
private void encodeChars(final CharBuffer in, final boolean isEndOfInput) throws CharacterCodingException {
140+
byteBuffer.clear();
141+
final CoderResult result = encoder.encode(in, byteBuffer, isEndOfInput);
142+
byteBuffer.flip();
143+
if (result.isError()) {
144+
result.throwException();
145+
}
105146
}
106147

148+
/**
149+
* Flushes the remaining bytes from the encoder to the {@link #byteBuffer}.
150+
*
151+
* <p>
152+
* This method is called by {@link #refillByteBuffer()} when all characters have
153+
* been processed, and the encoder needs to output any remaining bytes. It
154+
* transitions the encoder state from {@link EncoderState#ENCODING} to
155+
* {@link EncoderState#FLUSHING}, and eventually to {@link EncoderState#DONE}
156+
* once all bytes have been flushed.
157+
* </p>
158+
*
159+
* @return {@code true} if there are still bytes left in the {@link #byteBuffer}
160+
* after flushing, or if the encoder still has more bytes to flush;
161+
* {@code false} if the flush is complete and no bytes remain.
162+
*/
107163
private boolean flushEncoder() throws IOException {
108164
if (encoderState == EncoderState.DONE)
109165
return false;
@@ -117,8 +173,12 @@ private boolean flushEncoder() throws IOException {
117173
final CoderResult result = encoder.flush(byteBuffer);
118174
byteBuffer.flip();
119175

120-
if (result.isOverflow()) // byteBuffer too small
176+
if (result.isOverflow()) {
177+
// the byteBuffer has been filled, but there are more bytes to be flushed.
178+
// after reading all available bytes from byteBuffer, flushEncoder() needs to
179+
// be called again to process the remaining data.
121180
return true;
181+
}
122182

123183
if (result.isError()) {
124184
result.throwException();
@@ -128,9 +188,13 @@ private boolean flushEncoder() throws IOException {
128188
return byteBuffer.hasRemaining();
129189
}
130190

191+
public Charset getCharset() {
192+
return encoder.charset();
193+
}
194+
131195
@Override
132196
public int read() throws IOException {
133-
if (!byteBuffer.hasRemaining() && !refillBuffer())
197+
if (!byteBuffer.hasRemaining() && !refillByteBuffer())
134198
return EOF;
135199
return byteBuffer.get() & 0xFF; // next byte as an unsigned integer (0 to 255)
136200
}
@@ -146,7 +210,7 @@ public int read(final byte[] buf, final int off, final int bytesToRead) throws I
146210

147211
while (bytesRead < bytesToRead) {
148212
if (bytesReadable == 0) {
149-
if (refillBuffer()) {
213+
if (refillByteBuffer()) {
150214
bytesReadable = byteBuffer.remaining();
151215
} else
152216
return bytesRead == 0 ? EOF : bytesRead;
@@ -161,7 +225,16 @@ public int read(final byte[] buf, final int off, final int bytesToRead) throws I
161225
return bytesRead;
162226
}
163227

164-
private boolean refillBuffer() throws IOException {
228+
/**
229+
* Refills the {@link #byteBuffer} by reading characters from the character
230+
* supplier, encoding them, and storing the resulting bytes into the
231+
* {@link #byteBuffer}.
232+
*
233+
* @return {@code true} if the buffer was successfully refilled and has bytes
234+
* available for reading, {@code false} if the end of the stream is
235+
* reached and there are no more bytes to read.
236+
*/
237+
private boolean refillByteBuffer() throws IOException {
165238
if (encoderState == EncoderState.DONE)
166239
return false;
167240

@@ -173,12 +246,7 @@ private boolean refillBuffer() throws IOException {
173246
// if EOF is reached transition to flushing
174247
if (charIndex >= charsLen) {
175248
// finalize encoding before switching to flushing
176-
byteBuffer.clear();
177-
final CoderResult result = encoder.encode(CharBuffer.allocate(0), byteBuffer, true /* signal EOF */);
178-
byteBuffer.flip();
179-
if (result.isError()) {
180-
result.throwException();
181-
}
249+
encodeChars(CharBuffer.allocate(0), true /* signal EOF */);
182250
return flushEncoder();
183251
}
184252

@@ -195,11 +263,11 @@ private boolean refillBuffer() throws IOException {
195263
charBuffer.put(lowSurrogate);
196264
} else {
197265
// missing low surrogate - fallback to replacement character
198-
charBuffer.put('\uFFFD');
266+
charBuffer.put(UNICODE_REPLACEMENT_CHAR);
199267
}
200268
} else {
201269
// missing low surrogate - fallback to replacement character
202-
charBuffer.put('\uFFFD');
270+
charBuffer.put(UNICODE_REPLACEMENT_CHAR);
203271
break;
204272
}
205273
} else {
@@ -209,12 +277,7 @@ private boolean refillBuffer() throws IOException {
209277
charBuffer.flip();
210278

211279
// encode chars into bytes
212-
byteBuffer.clear();
213-
final CoderResult result = encoder.encode(charBuffer, byteBuffer, false);
214-
byteBuffer.flip();
215-
if (result.isError()) {
216-
result.throwException();
217-
}
280+
encodeChars(charBuffer, false);
218281
} catch (final Exception ex) {
219282
throw new IOException(ex);
220283
}

org.eclipse.tm4e.ui/src/main/java/org/eclipse/tm4e/ui/internal/utils/DocumentInputStream.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,26 @@
1616
import org.eclipse.core.filebuffers.FileBuffers;
1717
import org.eclipse.core.filebuffers.ITextFileBuffer;
1818
import org.eclipse.core.filebuffers.ITextFileBufferManager;
19-
import org.eclipse.jdt.annotation.Nullable;
2019
import org.eclipse.jface.text.IDocument;
2120
import org.eclipse.tm4e.ui.TMUIPlugin;
2221

2322
public final class DocumentInputStream extends CharsInputStream {
2423

25-
private static @Nullable Charset getCharset(final IDocument document) {
24+
private static Charset getCharset(final IDocument document) {
2625
final ITextFileBufferManager bufferManager = FileBuffers.getTextFileBufferManager();
2726
if (bufferManager == null)
28-
return null;
27+
return Charset.defaultCharset();
2928
final ITextFileBuffer buffer = bufferManager.getTextFileBuffer(document);
3029
if (buffer == null)
31-
return null;
30+
return Charset.defaultCharset();
3231
try {
3332
final String charsetName = buffer.getEncoding();
3433
if (charsetName != null)
3534
return Charset.forName(charsetName);
3635
} catch (final Exception ex) {
3736
TMUIPlugin.logError(ex);
3837
}
39-
return null;
38+
return Charset.defaultCharset();
4039
}
4140

4241
public DocumentInputStream(final IDocument doc) {

0 commit comments

Comments
 (0)