@@ -34,6 +34,12 @@ private sealed class EmbeddedFontInfo
3434 public int [ ] Bbox = [ - 166 , - 225 , 1000 , 931 ] ;
3535 /// <summary>Maps Unicode code point → CID. BMP chars use identity; non-BMP use PUA slots.</summary>
3636 public Dictionary < int , int > CpToCid = new ( ) ;
37+ /// <summary>Glyph advance widths indexed by glyph ID (from hmtx table).</summary>
38+ public ushort [ ] Advances = [ ] ;
39+ /// <summary>Maps Unicode code point → glyph ID (from cmap table).</summary>
40+ public Dictionary < int , ushort > Cmap = new ( ) ;
41+ /// <summary>Font units per em (from head table).</summary>
42+ public int UnitsPerEm = 1000 ;
3743 // PDF object numbers (assigned during Write)
3844 public int ToUnicodeObj , DescriptorObj , CidFontObj , Type0Obj , FontFileObj , CidToGidObj ;
3945 }
@@ -420,6 +426,9 @@ void LoadFontFile(string path)
420426 CapHeight = ( int ) ( capH * scale ) ,
421427 Bbox = [ .. bbox . Select ( v => ( int ) ( v * scale ) ) ] ,
422428 CpToCid = cpToCid ,
429+ Advances = advances ,
430+ Cmap = cmap ,
431+ UnitsPerEm = upm ,
423432 } ) ;
424433 }
425434
@@ -992,25 +1001,74 @@ private static string BuildContentStream(PdfPage page, bool hasUnicodeFont, Dict
9921001 sb . Append ( "\n " ) ;
9931002 sb . Append ( "2 Tr\n " ) ; // rendering mode: fill + stroke
9941003 }
995- // Apply word spacing (Tw) for justified text
996- if ( block . WordSpacing != 0 )
997- sb . Append ( $ "{ block . WordSpacing . ToString ( "F2" , CultureInfo . InvariantCulture ) } Tw\n ") ;
998- // Always set character spacing to prevent Tc from previous
999- // text blocks leaking through the graphics state.
1004+ // Determine the block's preferred font slot for font-aware
1005+ // width computation and Tz scaling.
1006+ var blockPrefSlot = - 1 ;
1007+ if ( fontNameToSlot != null && ! string . IsNullOrWhiteSpace ( block . PreferredFontName ) )
1008+ {
1009+ if ( hasBoldItalicFontVariant )
1010+ fontNameToSlot . TryGetValue ( boldItalicFontKey ! , out blockPrefSlot ) ;
1011+ if ( blockPrefSlot < 0 && hasBoldFontVariant )
1012+ fontNameToSlot . TryGetValue ( boldFontKey ! , out blockPrefSlot ) ;
1013+ if ( blockPrefSlot < 0 && hasItalicFontVariant )
1014+ fontNameToSlot . TryGetValue ( italicFontKey ! , out blockPrefSlot ) ;
1015+ if ( blockPrefSlot < 0 )
1016+ fontNameToSlot . TryGetValue ( block . PreferredFontName ! , out blockPrefSlot ) ;
1017+ }
1018+
1019+ // For CID/Identity-H fonts, Tw (word spacing) does NOT work —
1020+ // the PDF spec applies Tw only to single-byte 0x20.
1021+ // Instead: use Tz to correct glyph width (actual vs layout estimate),
1022+ // and TJ displacement values to add word spacing at space boundaries.
1023+ // TJ displacements are scaled by Tz/100, so we compensate for that.
1024+ var wordSpacingTJ = 0 ; // for CID path only
1025+ var cidTzPercent = 100.0 ;
1026+ {
1027+ EmbeddedFontInfo ? efForCid = null ;
1028+ if ( blockPrefSlot >= 0 && embeddedFonts != null && blockPrefSlot < embeddedFonts . Count )
1029+ efForCid = embeddedFonts [ blockPrefSlot ] ;
1030+ if ( efForCid != null && block . MaxWidth . HasValue )
1031+ {
1032+ var actualGlyphWidth = MeasureEmbeddedFontWidth ( block . Text , block . FontSize , efForCid ) ;
1033+ if ( actualGlyphWidth > 0 )
1034+ cidTzPercent = ( double ) block . MaxWidth . Value / actualGlyphWidth * 100.0 ;
1035+ // Clamp: only compress, never expand beyond 100%
1036+ if ( cidTzPercent > 100.0 ) cidTzPercent = 100.0 ;
1037+ }
1038+ if ( block . WordSpacing > 0 )
1039+ {
1040+ // TJ displacement = -(ws / fontSize / (Tz/100)) * 1000
1041+ // because PDF applies × Tz/100 to TJ values in text space
1042+ var tzFactor = cidTzPercent / 100.0 ;
1043+ wordSpacingTJ = - ( int ) Math . Round ( ( double ) block . WordSpacing / block . FontSize / tzFactor * 1000.0 ) ;
1044+ }
1045+ }
1046+ // Don't emit Tw for CID path (handled by TJ). Tc is still needed.
10001047 sb . Append ( $ "{ block . CharSpacing . ToString ( "F2" , CultureInfo . InvariantCulture ) } Tc\n ") ;
1001- // Apply horizontal scaling if text overflows MaxWidth;
1002- // always reset Tz to prevent scaling from previous blocks leaking .
1048+ // Tz: for CID path, use computed cidTzPercent.
1049+ // For WinAnsi fallback with no embedded font, keep compress-only Tz .
10031050 if ( block . MaxWidth . HasValue )
10041051 {
1005- var naturalWidth = MeasureTextWidth ( block . Text , block . FontSize , block . CharSpacing , bold : block . Bold ) ;
1006- if ( naturalWidth > block . MaxWidth . Value && naturalWidth > 0 )
1052+ EmbeddedFontInfo ? efForTz = null ;
1053+ if ( blockPrefSlot >= 0 && embeddedFonts != null && blockPrefSlot < embeddedFonts . Count )
1054+ efForTz = embeddedFonts [ blockPrefSlot ] ;
1055+ if ( efForTz != null )
10071056 {
1008- var tzPercent = ( block . MaxWidth . Value / naturalWidth ) * 100.0 ;
1009- sb . Append ( $ "{ tzPercent . ToString ( "F1" , CultureInfo . InvariantCulture ) } Tz\n ") ;
1057+ sb . Append ( $ "{ cidTzPercent . ToString ( "F1" , CultureInfo . InvariantCulture ) } Tz\n ") ;
10101058 }
10111059 else
10121060 {
1013- sb . Append ( "100.0 Tz\n " ) ;
1061+ // Fallback: Helvetica metrics, compress-only Tz
1062+ var naturalWidth = MeasureTextWidth ( block . Text , block . FontSize , block . CharSpacing , bold : block . Bold ) ;
1063+ if ( naturalWidth > block . MaxWidth . Value && naturalWidth > 0 )
1064+ {
1065+ var tzPercent = ( block . MaxWidth . Value / naturalWidth ) * 100.0 ;
1066+ sb . Append ( $ "{ tzPercent . ToString ( "F1" , CultureInfo . InvariantCulture ) } Tz\n ") ;
1067+ }
1068+ else
1069+ {
1070+ sb . Append ( "100.0 Tz\n " ) ;
1071+ }
10141072 }
10151073 }
10161074 else
@@ -1021,23 +1079,8 @@ private static string BuildContentStream(PdfPage page, bool hasUnicodeFont, Dict
10211079
10221080 // Split text into runs by font slot. Default all chars to slot 0 (F2).
10231081 var codePoints = ShapeArabicCodePoints ( EnumerateCodePoints ( block . Text ) . ToList ( ) ) ;
1024- // Per-block font preference: if the block specifies a preferred font,
1025- // try to use that font's slot for each codepoint (if the font includes it).
1026- var blockPrefSlot = - 1 ;
1027- if ( fontNameToSlot != null && ! string . IsNullOrWhiteSpace ( block . PreferredFontName ) )
1028- {
1029- // Use the bold italic font variant slot if available (highest priority).
1030- if ( hasBoldItalicFontVariant )
1031- fontNameToSlot . TryGetValue ( boldItalicFontKey ! , out blockPrefSlot ) ;
1032- // Use the bold font variant slot if available; otherwise fall back to regular.
1033- if ( blockPrefSlot < 0 && hasBoldFontVariant )
1034- fontNameToSlot . TryGetValue ( boldFontKey ! , out blockPrefSlot ) ;
1035- // Use the italic font variant slot if available.
1036- if ( blockPrefSlot < 0 && hasItalicFontVariant )
1037- fontNameToSlot . TryGetValue ( italicFontKey ! , out blockPrefSlot ) ;
1038- if ( blockPrefSlot < 0 )
1039- fontNameToSlot . TryGetValue ( block . PreferredFontName ! , out blockPrefSlot ) ;
1040- }
1082+ // Per-block font preference: blockPrefSlot was already determined
1083+ // above for Tz computation. Re-use it for run assignment.
10411084 var runs = new List < ( int fontSlot , List < int > cps ) > ( ) ;
10421085 foreach ( var cp in codePoints )
10431086 {
@@ -1062,20 +1105,50 @@ private static string BuildContentStream(PdfPage page, bool hasUnicodeFont, Dict
10621105 {
10631106 var fontName = $ "F{ run . fontSlot + 2 } ";
10641107 sb . Append ( $ "/{ fontName } { fontSize } Tf\n ") ;
1065- sb . Append ( '<' ) ;
1066- foreach ( var cp in run . cps )
1108+ // Use TJ (array form) to insert word spacing at space boundaries.
1109+ // Tw doesn't work for CID/Identity-H fonts, so we use TJ
1110+ // displacement values to add spacing after each space character.
1111+ if ( wordSpacingTJ != 0 )
1112+ {
1113+ sb . Append ( '[' ) ;
1114+ sb . Append ( '<' ) ;
1115+ foreach ( var cp in run . cps )
1116+ {
1117+ var cid = cp ;
1118+ if ( embeddedFonts != null && run . fontSlot < embeddedFonts . Count )
1119+ {
1120+ var ef = embeddedFonts [ run . fontSlot ] ;
1121+ if ( ef . CpToCid . TryGetValue ( cp , out var mapped ) )
1122+ cid = mapped ;
1123+ }
1124+ sb . Append ( cid . ToString ( "X4" ) ) ;
1125+ // Insert TJ displacement after space characters
1126+ if ( cp == ' ' )
1127+ {
1128+ sb . Append ( '>' ) ;
1129+ sb . Append ( wordSpacingTJ . ToString ( CultureInfo . InvariantCulture ) ) ;
1130+ sb . Append ( '<' ) ;
1131+ }
1132+ }
1133+ sb . Append ( ">] TJ\n " ) ;
1134+ }
1135+ else
10671136 {
1068- // Map code point to CID via the font's CpToCid table
1069- var cid = cp ;
1070- if ( embeddedFonts != null && run . fontSlot < embeddedFonts . Count )
1137+ // No word spacing — use simple Tj
1138+ sb . Append ( '<' ) ;
1139+ foreach ( var cp in run . cps )
10711140 {
1072- var ef = embeddedFonts [ run . fontSlot ] ;
1073- if ( ef . CpToCid . TryGetValue ( cp , out var mapped ) )
1074- cid = mapped ;
1141+ var cid = cp ;
1142+ if ( embeddedFonts != null && run . fontSlot < embeddedFonts . Count )
1143+ {
1144+ var ef = embeddedFonts [ run . fontSlot ] ;
1145+ if ( ef . CpToCid . TryGetValue ( cp , out var mapped ) )
1146+ cid = mapped ;
1147+ }
1148+ sb . Append ( cid . ToString ( "X4" ) ) ;
10751149 }
1076- sb . Append ( cid . ToString ( "X4" ) ) ;
1150+ sb . Append ( "> Tj \n " ) ;
10771151 }
1078- sb . Append ( "> Tj\n " ) ;
10791152 }
10801153
10811154 if ( block . Bold )
@@ -1110,6 +1183,24 @@ private static string BuildContentStream(PdfPage page, bool hasUnicodeFont, Dict
11101183 return sb . ToString ( ) ;
11111184 }
11121185
1186+ /// <summary>
1187+ /// Measures text width using an embedded font's actual glyph advance widths.
1188+ /// Returns the width in points for the given font size, excluding Tc/Tw contributions.
1189+ /// </summary>
1190+ private static double MeasureEmbeddedFontWidth ( string text , float fontSize , EmbeddedFontInfo ef )
1191+ {
1192+ double total = 0 ;
1193+ foreach ( var ch in text )
1194+ {
1195+ int cp = ch ;
1196+ if ( ef . Cmap . TryGetValue ( cp , out var gid ) && gid < ef . Advances . Length )
1197+ total += ef . Advances [ gid ] ;
1198+ else
1199+ total += ef . UnitsPerEm / 2 ; // fallback: half em
1200+ }
1201+ return total * fontSize / ef . UnitsPerEm ;
1202+ }
1203+
11131204 /// <summary>
11141205 /// Measures the natural rendering width of text in Helvetica at the given font size.
11151206 /// Uses the standard Helvetica character width table.
0 commit comments