@@ -2,11 +2,27 @@ import { parse as parseComment, Block } from 'comment-parser';
22import { Schema } from './ir' ;
33
44/**
5- * Compute the difference between byte length and character length for a string.
6- * This accounts for multibyte UTF-8 characters.
5+ * Convert a UTF-8 byte offset to a JavaScript string character offset.
6+ * SWC (written in Rust) uses byte offsets, but JavaScript strings use
7+ * UTF-16 code unit offsets. This function handles the conversion by
8+ * iterating through the string and accumulating byte lengths.
9+ *
10+ * @param str The source string
11+ * @param byteOffset The byte offset to convert
12+ * @returns The corresponding character offset
713 */
8- function computeByteLengthDiff ( str : string ) : number {
9- return Buffer . byteLength ( str , 'utf8' ) - str . length ;
14+ function byteOffsetToCharOffset ( str : string , byteOffset : number ) : number {
15+ let charCount = 0 ;
16+ let byteCount = 0 ;
17+
18+ for ( const char of str ) {
19+ const charBytes = Buffer . byteLength ( char , 'utf8' ) ;
20+ if ( byteCount + charBytes > byteOffset ) break ;
21+ byteCount += charBytes ;
22+ charCount ++ ;
23+ }
24+
25+ return charCount ;
1026}
1127
1228export function leadingComment (
@@ -18,20 +34,13 @@ export function leadingComment(
1834 // SWC uses byte offsets, but JavaScript strings use character offsets.
1935 // When there are multibyte UTF-8 characters, we need to adjust.
2036 // Calculate the byte-to-char difference for the portion of source before our slice.
21- const prefixLength = Math . min ( start - srcSpanStart , src . length ) ;
22- const prefix = src . slice ( 0 , prefixLength ) ;
23- const byteDiff = computeByteLengthDiff ( prefix ) ;
24-
25- // Adjust the slice offsets by the byte difference
26- const adjustedStart = start - srcSpanStart - byteDiff ;
27- const adjustedEnd =
28- end -
29- srcSpanStart -
30- computeByteLengthDiff ( src . slice ( 0 , Math . min ( end - srcSpanStart , src . length ) ) ) ;
31-
32- let commentString = src
33- . slice ( Math . max ( 0 , adjustedStart ) , Math . max ( 0 , adjustedEnd ) )
34- . trim ( ) ;
37+ const startByteOffset = start - srcSpanStart ;
38+ const endByteOffset = end - srcSpanStart ;
39+
40+ const startCharOffset = byteOffsetToCharOffset ( src , startByteOffset ) ;
41+ const endCharOffset = byteOffsetToCharOffset ( src , endByteOffset ) ;
42+
43+ let commentString = src . slice ( startCharOffset , endCharOffset ) . trim ( ) ;
3544
3645 if ( commentString . includes ( ' * ' ) && ! / \/ \* \* ( [ \s \S ] * ?) \* \/ / . test ( commentString ) ) {
3746 // The comment block seems to be JSDoc but was sliced incorrectly
0 commit comments