Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions coverage/emitted-schema-known-failures.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
[
{
"id": "ilpa-orphaned-ppr-close",
"issue": "#450",
"match": "not well-formed: Opening and ending tag mismatch: \"w:body\" != \"w:pPr\"",
"reason": "ILPA-pair comparison drops the opening <w:p><w:pPr> of a paragraph-mark sectPr paragraph, leaving orphaned close tags at body level"
},
{
"id": "duplicate-rprchange-in-rpr",
"issue": "#451",
Expand Down
32 changes: 30 additions & 2 deletions packages/docx-core/src/baselines/diffmatch/xmlParser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,36 @@ describe('diffmatch xml parser (xmldom)', () => {
await then('the body parts and sectPr are correctly separated', () => {
expect(parts.beforeBody).toContain('<w:body>');
expect(parts.afterBody).toContain('</w:body>');
expect(extracted.content).toContain('<w:p>');
expect(extracted.sectPr).toContain('<w:sectPr>');
expect(extracted.content).toContain('<w:p');
expect(extracted.content).toContain('<w:t>A</w:t>');
expect(extracted.sectPr).toContain('<w:sectPr');
expect(extracted.sectPr).toContain('<w:pgSz');
});
});

test('does not extract a paragraph-mark sectPr as a body-level sectPr', async ({ given, when, then }: AllureBddContext) => {
let parts: ReturnType<typeof getBodyContent>;
let extracted: ReturnType<typeof extractSectPr>;

await given('a body ending with a paragraph whose pPr contains sectPr and run content', () => {
const xml = `<?xml version="1.0" encoding="UTF-8"?>
<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:body>
<w:p><w:r><w:t>A</w:t></w:r></w:p>
<w:p><w:pPr><w:sectPr><w:pgSz w:w="12240" w:h="15840"/></w:sectPr></w:pPr><w:r><w:br w:type="page"/></w:r></w:p>
</w:body>
</w:document>`;
parts = getBodyContent(xml);
});

await when('sectPr is extracted for document reconstruction', () => {
extracted = extractSectPr(parts.bodyContent);
});

await then('the paragraph remains intact and no body-level sectPr is returned', () => {
expect(extracted.sectPr).toBeNull();
expect(extracted.content).toContain('<w:pPr><w:sectPr>');
expect(extracted.content).toContain('</w:pPr><w:r>');
});
});
});
20 changes: 12 additions & 8 deletions packages/docx-core/src/baselines/diffmatch/xmlParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,21 @@ export function extractSectPr(bodyContent: string): {
content: string;
sectPr: string | null;
} {
// Find the last w:sectPr in the body
const sectPrMatch = bodyContent.match(/<w:sectPr[^>]*>[\s\S]*?<\/w:sectPr>\s*$/);
const trimmed = bodyContent.trimEnd();
if (!trimmed.endsWith('</w:sectPr>')) return { content: bodyContent, sectPr: null };

if (!sectPrMatch) {
return { content: bodyContent, sectPr: null };
}
const sectPrStart = trimmed.lastIndexOf('<w:sectPr');
if (sectPrStart < 0) return { content: bodyContent, sectPr: null };

const beforeSectPr = trimmed.slice(0, sectPrStart);
const lastParagraphOpen = beforeSectPr.lastIndexOf('<w:p');
const lastParagraphClose = beforeSectPr.lastIndexOf('</w:p>');
if (lastParagraphOpen > lastParagraphClose) return { content: bodyContent, sectPr: null };

const sectPrStart = bodyContent.lastIndexOf(sectPrMatch[0]);
const trailingWhitespace = bodyContent.slice(trimmed.length);
return {
content: bodyContent.slice(0, sectPrStart),
sectPr: sectPrMatch[0],
content: bodyContent.slice(0, sectPrStart) + trailingWhitespace,
sectPr: trimmed.slice(sectPrStart),
};
}

Expand Down
Binary file modified packages/docx-core/src/testing/outputs/typescript_redline.docx
Binary file not shown.