1515import java .io .InputStream ;
1616import java .nio .ByteBuffer ;
1717import java .nio .CharBuffer ;
18+ import java .nio .charset .CharacterCodingException ;
1819import java .nio .charset .Charset ;
1920import java .nio .charset .CharsetEncoder ;
2021import java .nio .charset .CoderResult ;
21- import java .nio .charset .StandardCharsets ;
2222import java .util .Objects ;
2323import java .util .function .IntSupplier ;
2424
25- import org . eclipse . jdt . annotation . Nullable ;
25+ public class CharsInputStream extends InputStream {
2626
27- class CharsInputStream extends InputStream {
27+ /**
28+ * Functional interface for supplying characters at a specified index.
29+ * Implementations can define how characters are fetched.
30+ */
2831 @ FunctionalInterface
2932 public interface CharsSupplier {
3033 char charAt (int index ) throws Exception ;
3134 }
3235
3336 private enum EncoderState {
34- ENCODING ,
35- FLUSHING ,
37+ /**
38+ * The {@link #encoder} is actively encoding characters into bytes. This is the
39+ * initial state of the encoder.
40+ */
41+ ENCODING , //
42+
43+ /**
44+ * The {@link #encoder} has finished processing all characters and is now
45+ * flushing any remaining bytes in its internal buffer.
46+ */
47+ FLUSHING , //
48+
49+ /**
50+ * The {@link #encoder} has completed both the encoding and flushing processes.
51+ * No more data is left to be read from the encoder.
52+ */
3653 DONE
3754 }
3855
56+ public static final char UNICODE_REPLACEMENT_CHAR = '\uFFFD' ;
57+
3958 /** 512 surrogate character pairs */
4059 private static final int DEFAULT_BUFFER_SIZE = 512 ;
4160 private static final int EOF = -1 ;
@@ -50,27 +69,27 @@ private enum EncoderState {
5069 private final CharsSupplier chars ;
5170 private final IntSupplier charsLength ;
5271
53- CharsInputStream (final CharSequence chars ) {
54- this (chars , null );
72+ public CharsInputStream (final CharSequence chars ) {
73+ this (chars , Charset . defaultCharset () );
5574 }
5675
57- CharsInputStream (final CharSequence chars , final @ Nullable Charset charset ) {
76+ public CharsInputStream (final CharSequence chars , final Charset charset ) {
5877 this (chars , charset , DEFAULT_BUFFER_SIZE );
5978 }
6079
61- CharsInputStream (final CharSequence chars , final @ Nullable Charset charset , final int bufferSize ) {
80+ public CharsInputStream (final CharSequence chars , final Charset charset , final int bufferSize ) {
6281 this (chars ::charAt , chars ::length , charset , bufferSize );
6382 }
6483
65- CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength ) {
66- this (chars , charsLength , null );
84+ public CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength ) {
85+ this (chars , charsLength , Charset . defaultCharset () );
6786 }
6887
6988 /**
7089 * @param chars function to access indexed chars.
7190 * @param charsLength function to get the number of indexed chars provided by the <code>chars</code> parameter.
7291 */
73- CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength , final @ Nullable Charset charset ) {
92+ CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength , final Charset charset ) {
7493 this (chars , charsLength , charset , DEFAULT_BUFFER_SIZE );
7594 }
7695
@@ -79,10 +98,10 @@ private enum EncoderState {
7998 * @param charsLength function to get the number of indexed chars provided by the <code>chars</code> parameter.
8099 * @param bufferSize number of surrogate character pairs to encode at once.
81100 */
82- CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength , final @ Nullable Charset charset , final int bufferSize ) {
101+ public CharsInputStream (final CharsSupplier chars , final IntSupplier charsLength , final Charset charset , final int bufferSize ) {
83102 if (bufferSize < 1 )
84103 throw new IllegalArgumentException ("[bufferSize] must be 1 or larger" );
85- encoder = ( charset == null ? StandardCharsets . UTF_8 : charset ) .newEncoder ();
104+ encoder = charset .newEncoder ();
86105
87106 this .bufferSize = bufferSize ;
88107 charBuffer = CharBuffer .allocate (bufferSize * 2 ); // buffer for 2 chars (high/low surrogate)
@@ -100,10 +119,47 @@ public int available() {
100119 return remaining == 0 ? charsLength .getAsInt () - charIndex : remaining ;
101120 }
102121
103- public Charset getCharset () {
104- return encoder .charset ();
122+ /**
123+ * This method is called by {@link #refillByteBuffer()} to encode characters
124+ * from the given {@link CharBuffer} into bytes and stores them in the
125+ * {@link #byteBuffer}.
126+ *
127+ * <p>
128+ * The method can be used either to encode characters in the middle of input
129+ * (with {@code isEndOfInput=false}) or to finalize the encoding process at the
130+ * end of input (with {@code isEndOfInput=true}).
131+ * </p>
132+ *
133+ * @param in
134+ * the {@link CharBuffer} containing characters to encode.
135+ * @param isEndOfInput
136+ * if {@code true}, signals that no more input will be provided,
137+ * allowing the encoder to complete its final encoding steps.
138+ */
139+ private void encodeChars (final CharBuffer in , final boolean isEndOfInput ) throws CharacterCodingException {
140+ byteBuffer .clear ();
141+ final CoderResult result = encoder .encode (in , byteBuffer , isEndOfInput );
142+ byteBuffer .flip ();
143+ if (result .isError ()) {
144+ result .throwException ();
145+ }
105146 }
106147
148+ /**
149+ * Flushes the remaining bytes from the encoder to the {@link #byteBuffer}.
150+ *
151+ * <p>
152+ * This method is called by {@link #refillByteBuffer()} when all characters have
153+ * been processed, and the encoder needs to output any remaining bytes. It
154+ * transitions the encoder state from {@link EncoderState#ENCODING} to
155+ * {@link EncoderState#FLUSHING}, and eventually to {@link EncoderState#DONE}
156+ * once all bytes have been flushed.
157+ * </p>
158+ *
159+ * @return {@code true} if there are still bytes left in the {@link #byteBuffer}
160+ * after flushing, or if the encoder still has more bytes to flush;
161+ * {@code false} if the flush is complete and no bytes remain.
162+ */
107163 private boolean flushEncoder () throws IOException {
108164 if (encoderState == EncoderState .DONE )
109165 return false ;
@@ -117,8 +173,12 @@ private boolean flushEncoder() throws IOException {
117173 final CoderResult result = encoder .flush (byteBuffer );
118174 byteBuffer .flip ();
119175
120- if (result .isOverflow ()) // byteBuffer too small
176+ if (result .isOverflow ()) {
177+ // the byteBuffer has been filled, but there are more bytes to be flushed.
178+ // after reading all available bytes from byteBuffer, flushEncoder() needs to
179+ // be called again to process the remaining data.
121180 return true ;
181+ }
122182
123183 if (result .isError ()) {
124184 result .throwException ();
@@ -128,9 +188,13 @@ private boolean flushEncoder() throws IOException {
128188 return byteBuffer .hasRemaining ();
129189 }
130190
191+ public Charset getCharset () {
192+ return encoder .charset ();
193+ }
194+
131195 @ Override
132196 public int read () throws IOException {
133- if (!byteBuffer .hasRemaining () && !refillBuffer ())
197+ if (!byteBuffer .hasRemaining () && !refillByteBuffer ())
134198 return EOF ;
135199 return byteBuffer .get () & 0xFF ; // next byte as an unsigned integer (0 to 255)
136200 }
@@ -146,7 +210,7 @@ public int read(final byte[] buf, final int off, final int bytesToRead) throws I
146210
147211 while (bytesRead < bytesToRead ) {
148212 if (bytesReadable == 0 ) {
149- if (refillBuffer ()) {
213+ if (refillByteBuffer ()) {
150214 bytesReadable = byteBuffer .remaining ();
151215 } else
152216 return bytesRead == 0 ? EOF : bytesRead ;
@@ -161,7 +225,16 @@ public int read(final byte[] buf, final int off, final int bytesToRead) throws I
161225 return bytesRead ;
162226 }
163227
164- private boolean refillBuffer () throws IOException {
228+ /**
229+ * Refills the {@link #byteBuffer} by reading characters from the character
230+ * supplier, encoding them, and storing the resulting bytes into the
231+ * {@link #byteBuffer}.
232+ *
233+ * @return {@code true} if the buffer was successfully refilled and has bytes
234+ * available for reading, {@code false} if the end of the stream is
235+ * reached and there are no more bytes to read.
236+ */
237+ private boolean refillByteBuffer () throws IOException {
165238 if (encoderState == EncoderState .DONE )
166239 return false ;
167240
@@ -173,12 +246,7 @@ private boolean refillBuffer() throws IOException {
173246 // if EOF is reached transition to flushing
174247 if (charIndex >= charsLen ) {
175248 // finalize encoding before switching to flushing
176- byteBuffer .clear ();
177- final CoderResult result = encoder .encode (CharBuffer .allocate (0 ), byteBuffer , true /* signal EOF */ );
178- byteBuffer .flip ();
179- if (result .isError ()) {
180- result .throwException ();
181- }
249+ encodeChars (CharBuffer .allocate (0 ), true /* signal EOF */ );
182250 return flushEncoder ();
183251 }
184252
@@ -195,11 +263,11 @@ private boolean refillBuffer() throws IOException {
195263 charBuffer .put (lowSurrogate );
196264 } else {
197265 // missing low surrogate - fallback to replacement character
198- charBuffer .put ('\uFFFD' );
266+ charBuffer .put (UNICODE_REPLACEMENT_CHAR );
199267 }
200268 } else {
201269 // missing low surrogate - fallback to replacement character
202- charBuffer .put ('\uFFFD' );
270+ charBuffer .put (UNICODE_REPLACEMENT_CHAR );
203271 break ;
204272 }
205273 } else {
@@ -209,12 +277,7 @@ private boolean refillBuffer() throws IOException {
209277 charBuffer .flip ();
210278
211279 // encode chars into bytes
212- byteBuffer .clear ();
213- final CoderResult result = encoder .encode (charBuffer , byteBuffer , false );
214- byteBuffer .flip ();
215- if (result .isError ()) {
216- result .throwException ();
217- }
280+ encodeChars (charBuffer , false );
218281 } catch (final Exception ex ) {
219282 throw new IOException (ex );
220283 }
0 commit comments