Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 75 additions & 6 deletions src/MiniWord/MiniWord.Implment.cs
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
namespace MiniSoftware
{
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Drawing.Charts;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;
using Extensions;
using Utility;
using HtmlToOpenXml;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Xml;
using System.Xml.Linq;
using Utility;
using A = DocumentFormat.OpenXml.Drawing;
using DW = DocumentFormat.OpenXml.Drawing.Wordprocessing;
using PIC = DocumentFormat.OpenXml.Drawing.Pictures;
using System.Xml;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Drawing.Charts;
using System.Threading.Tasks;
using System.Threading;

public static partial class MiniWord
{
Expand Down Expand Up @@ -653,6 +654,21 @@ private static void ReplaceText(Paragraph p, WordprocessingDocument docx, Dictio

t.Remove();
}
else if (value is MiniWordHtml html)
{
AddHtmls(docx, run, new[] { html });
t.Remove();
}
else if (value is MiniWordHtml[] htmls)
{
AddHtmls(docx, run, htmls);
t.Remove();
}
else if (value is IEnumerable<MiniWordHtml> htmlList)
{
AddHtmls(docx, run, htmlList.ToArray());
t.Remove();
}
Comment on lines +657 to +666
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

MiniWordHtml, MiniWordHtml[]IEnumerable<MiniWordHtml> 的处理存在代码重复,并且对 IEnumerable<MiniWordHtml> 调用 ToArray() 会带来不必要的性能开销。可以通过修改 AddHtmls 方法的签名,使其接受 IEnumerable<MiniWordHtml>,来简化这部分逻辑。这样可以合并 MiniWordHtml[]IEnumerable<MiniWordHtml> 的处理分支,并避免不必要的内存分配和集合转换。

                            else if (value is MiniWordHtml html)
                            {
                                AddHtmls(docx, run, new[] { html });
                                t.Remove();
                            }
                            else if (value is IEnumerable<MiniWordHtml> htmlList)
                            {
                                AddHtmls(docx, run, htmlList);
                                t.Remove();
                            }

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已调整

else
{
var newText = value is DateTime
Expand Down Expand Up @@ -1175,5 +1191,58 @@ private static byte[] GetBytes(string path)
return ms.ToArray();
}
}

#region html支持

/// <summary>
/// 填充htmls
/// </summary>
/// <param name="run"></param>
/// <param name="miniWordHtmls"></param>
private static void AddHtmls(WordprocessingDocument docx, Run run, MiniWordHtml[] miniWordHtmls)
{
//找到当前顶级段落(body)添加,html中的表格不能直接放在run或者段落里
Paragraph topPara = FindTopPara(run);
foreach (var miniWordHtml in miniWordHtmls)
{
try
{
//实例化转换对象
HtmlConverter converter = new HtmlConverter(docx.MainDocumentPart);
//解析
var paras = converter.Parse(miniWordHtml.HtmlText);
//倒排插入(因为都是插入到标记位置后面所以需要倒排)
for (var i = paras.Count - 1; i >= 0; i--)
{
var item = paras[i];
topPara.Parent.InsertAfter(item, topPara);
}
}
catch (Exception)
{ }
}
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

AddHtmls 方法存在两个主要问题:

  1. 潜在的 NullReferenceException: 当占位符在表格等嵌套结构中时,FindTopPara 会返回 null。当前代码没有检查 topPara 是否为 null,会导致程序在尝试访问 topPara.Parent 时崩溃。
  2. 异常被静默吞噬: catch (Exception) 块是空的,这意味着任何在HTML解析或插入过程中发生的错误都将被忽略,这会使问题排查变得极为困难。

建议重构此方法以解决这些问题,提高代码的健壮性和可维护性。

        private static void AddHtmls(WordprocessingDocument docx, Run run, MiniWordHtml[] miniWordHtmls)
        {
            //找到当前顶级段落(body)添加,html中的表格不能直接放在run或者段落里
            Paragraph topPara = FindTopPara(run);
            if (topPara == null)
            {
                // TODO: Consider logging a warning or throwing an exception as HTML cannot be inserted here.
                return;
            }

            foreach (var miniWordHtml in miniWordHtmls)
            {
                try
                {
                    //实例化转换对象
                    HtmlConverter converter = new HtmlConverter(docx.MainDocumentPart);
                    //解析
                    var paras = converter.Parse(miniWordHtml.HtmlText);
                    //倒排插入(因为都是插入到标记位置后面所以需要倒排)
                    for (var i = paras.Count - 1; i >= 0; i--)
                    {
                        var item = paras[i];
                        topPara.Parent.InsertAfter(item, topPara);
                    }
                }
                catch (Exception ex)
                {
                    // TODO: Add logging here to trace potential issues.
                    // For example: System.Diagnostics.Trace.WriteLine($"Error parsing HTML: {ex.Message}");
                }
            }
        }


/// <summary>
/// 找到当前顶级段落(body)添加
/// </summary>
/// <param name="run"></param>
/// <returns></returns>
private static Paragraph FindTopPara(Run run)
{
Paragraph result = null;
for (var pnode = run.Parent; pnode != null;)
{
if (pnode is Paragraph para && pnode.Parent != null && pnode.Parent is Body)
{
result = para;
}
pnode = pnode.Parent;
}
return result;
}
Comment on lines +1227 to +1237
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

FindTopPara 方法中的 for 循环在找到匹配的段落后没有立即返回,而是继续向上遍历父元素直到根节点。这造成了不必要的性能开销。此外,循环的写法(for (var pnode = run.Parent; pnode != null;) 并在循环体内更新 pnode)也略显不寻常,可以改写为更常规的 forwhile 循环以提高可读性。

        private static Paragraph FindTopPara(Run run)
        {
            for (var pnode = run.Parent; pnode != null; pnode = pnode.Parent)
            {
                if (pnode is Paragraph para && pnode.Parent is Body)
                {
                    return para;
                }
            }
            return null;
        }

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已调整


#endregion

}
}
6 changes: 4 additions & 2 deletions src/MiniWord/MiniWord.csproj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>net45;netstandard2.0;</TargetFrameworks>
<TargetFrameworks>net462;netstandard2.0;</TargetFrameworks>
<Version>0.9.1</Version>
</PropertyGroup>
<PropertyGroup>
Expand Down Expand Up @@ -33,6 +33,8 @@
</ItemGroup>
<ItemGroup>
<!--<PackageReference Include="DocumentFormat.OpenXml" Version="3.0.1" />-->
<PackageReference Include="DocumentFormat.OpenXml" Version="[3.1.1,4.0.0)" />
<PackageReference Include="DocumentFormat.OpenXml" Version="3.4.1" />
<PackageReference Include="HtmlToOpenXml.dll" Version="3.3.1" />
<PackageReference Include="System.Text.Encoding.CodePages" Version="10.0.3" />
</ItemGroup>
</Project>
10 changes: 10 additions & 0 deletions src/MiniWord/MiniWordHtml.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
namespace MiniSoftware
{
/// <summary>
/// html参数对象
/// </summary>
public class MiniWordHtml
{
public string HtmlText { get; set; }
}
}