-
Notifications
You must be signed in to change notification settings - Fork 724
feat(doc): add v2 XML content guards for bare ampersands and deprecated tags #822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a29a8bf
9fa94ab
259bc0e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -279,3 +279,78 @@ func leadingRun(s string, c byte) string { | |
| } | ||
| return s[:i] | ||
| } | ||
|
|
||
| // ── v2 XML content guards ────────────────────────────────────────────────── | ||
|
|
||
| // xmlEntityRe matches a valid XML entity reference: & < > ' | ||
| // " &#N; or &#xH;. Used to skip over valid references when scanning for | ||
| // bare ampersands. | ||
| var xmlEntityRe = regexp.MustCompile(`&(amp|lt|gt|apos|quot|#\d+|#x[0-9a-fA-F]+);`) | ||
|
|
||
| // CheckV2XMLBareAmpersand returns a non-empty error message when content | ||
| // contains a bare & that would cause the v2 XML parser to reject the request. | ||
| // Only runs when --doc-format xml (the default). Callers in Validate should | ||
| // return this as a hard error. | ||
| // | ||
| // Go's regexp package does not support lookahead, so we detect bare ampersands | ||
| // by replacing all valid entity references with a placeholder and then | ||
| // checking whether any & remains. | ||
| func CheckV2XMLBareAmpersand(content string) string { | ||
| if content == "" || !strings.Contains(content, "&") { | ||
| return "" | ||
| } | ||
| // Replace every valid entity reference with a fixed placeholder so that | ||
| // the subsequent Contains check only fires on truly bare ampersands. | ||
| // ("ENTITY" is not the same length as each entity; byte offsets are not | ||
| // preserved, but that is fine — we only need a yes/no bare-& answer.) | ||
| stripped := xmlEntityRe.ReplaceAllString(content, "ENTITY") | ||
| if !strings.Contains(stripped, "&") { | ||
| return "" | ||
| } | ||
| return "content contains a bare & character that is not a valid XML entity reference; " + | ||
| "the v2 XML parser will reject the request. " + | ||
| "Escape it as & (and < as <, > as > where needed)." | ||
| } | ||
|
|
||
| // quoteContainerTagRe matches the opening of a <quote-container> element | ||
| // (tag name followed by whitespace, >, or />) to avoid false positives on | ||
| // hypothetical attributes or element names that start with "quote-container". | ||
| var quoteContainerTagRe = regexp.MustCompile(`<quote-container(?:\s|>|/)`) | ||
|
|
||
| // columnIntWidthRe matches a <column … width="N" …> attribute where N is a | ||
| // plain integer (not a float). The pattern requires: | ||
| // - whitespace before "width" to exclude attributes like data-width | ||
| // - the value to be enclosed in quotes with digits immediately before the | ||
| // closing quote, so width="0.5" does NOT match (the dot prevents \d+ | ||
| // from consuming the full value up to the quote). | ||
| var columnIntWidthRe = regexp.MustCompile(`<column\b[^>]*\swidth\s*=\s*["']\d+["']`) | ||
|
|
||
| // CheckV2XMLWarnings returns a list of non-fatal warnings for v2 XML content. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The regex Suggested fix: use var columnWidthRe = regexp.MustCompile(`<column\b[^>]*\swidth\s*=\s*(['"])([^'"]+)\1`)
// then in CheckV2XMLWarnings:
for _, m := range columnWidthRe.FindAllStringSubmatch(content, -1) {
val := m[2]
if _, err := strconv.Atoi(val); err == nil {
// pure integer → warn
}
}Alternatively, if the regex approach is preferred, add a negative lookahead equivalent by excluding values containing a dot:
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 259bc0e. Used |
||
| // These describe constructs that are silently dropped or ignored by the v2 API | ||
| // but do not cause the request to fail. Callers should print these to stderr | ||
| // before executing the API call. | ||
| // | ||
| // Warnings emitted: | ||
| // | ||
| // 1. <quote-container> is not recognised by the v2 XML parser; the block is | ||
| // silently dropped. Use <blockquote> instead. | ||
| // | ||
| // 2. <column width="N"> with an integer value has no effect in v2. The | ||
| // correct attribute is width-ratio="0.N" (e.g. width-ratio="0.5"). | ||
| func CheckV2XMLWarnings(content string) []string { | ||
| if content == "" { | ||
| return nil | ||
| } | ||
| var warnings []string | ||
| if quoteContainerTagRe.MatchString(content) { | ||
| warnings = append(warnings, | ||
| "<quote-container> is not supported in v2 XML and will be silently dropped; "+ | ||
| "use <blockquote> instead.") | ||
| } | ||
| if columnIntWidthRe.MatchString(content) { | ||
| warnings = append(warnings, | ||
| "<column width=\"N\"> with an integer value has no effect in v2 XML; "+ | ||
| "use width-ratio=\"0.5\" (float 0–1) to set column width.") | ||
| } | ||
| return warnings | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -373,3 +373,121 @@ func TestDocsUpdateWarningsEmpty(t *testing.T) { | |
| t.Fatalf("expected no warnings, got: %v", warnings) | ||
| } | ||
| } | ||
|
|
||
| func TestCheckV2XMLBareAmpersand(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| content string | ||
| wantErr bool | ||
| }{ | ||
| {name: "empty is fine", content: "", wantErr: false}, | ||
| {name: "no ampersand", content: "<text>hello world</text>", wantErr: false}, | ||
| {name: "amp entity is fine", content: "<text>a & b</text>", wantErr: false}, | ||
| {name: "lt entity is fine", content: "<tag>", wantErr: false}, | ||
| {name: "gt entity is fine", content: "a > b", wantErr: false}, | ||
| {name: "apos entity is fine", content: "'", wantErr: false}, | ||
| {name: "quot entity is fine", content: """, wantErr: false}, | ||
| {name: "decimal numeric ref is fine", content: "A", wantErr: false}, | ||
| {name: "hex numeric ref is fine", content: "A", wantErr: false}, | ||
| {name: "bare ampersand flagged", content: "a & b", wantErr: true}, | ||
| {name: "bare ampersand in tag flagged", content: `<text color="blue">R&D</text>`, wantErr: true}, | ||
| {name: "unknown entity flagged", content: " ", wantErr: true}, | ||
| // mixed: valid entity alongside a bare & — the bare one must still be caught | ||
| {name: "valid entity mixed with bare ampersand flagged", content: "a & b & c", wantErr: true}, | ||
| } | ||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| t.Parallel() | ||
| got := CheckV2XMLBareAmpersand(tt.content) | ||
| if (got != "") != tt.wantErr { | ||
| t.Fatalf("CheckV2XMLBareAmpersand(%q) = %q, wantErr=%v", tt.content, got, tt.wantErr) | ||
| } | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| func TestCheckV2XMLWarnings(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| content string | ||
| wantContains []string | ||
| wantLen int | ||
| }{ | ||
| {name: "empty returns nil", content: "", wantLen: 0}, | ||
| {name: "clean XML no warnings", content: "<blockquote><p>text</p></blockquote>", wantLen: 0}, | ||
| { | ||
| name: "quote-container triggers warning", | ||
| content: `<quote-container><p>text</p></quote-container>`, | ||
| wantContains: []string{"quote-container", "blockquote"}, | ||
| wantLen: 1, | ||
| }, | ||
| { | ||
| name: "column integer width triggers warning", | ||
| content: `<grid><column width="50"><p>A</p></column></grid>`, | ||
| wantContains: []string{"width-ratio"}, | ||
| wantLen: 1, | ||
| }, | ||
| { | ||
| name: "column float width-ratio is fine", | ||
| content: `<grid><column width-ratio="0.5"><p>A</p></column></grid>`, | ||
| wantLen: 0, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test case: mixed valid entities and bare ampersand. There's no test for content that contains both valid entities and a bare Suggested addition: {name: "mixed valid entity and bare ampersand flagged", content: "a & b & c", wantErr: true},
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 259bc0e. Added test case |
||
| }, | ||
| { | ||
| name: "both issues produce two warnings", | ||
| content: `<quote-container/><grid><column width="30"/></grid>`, | ||
| wantLen: 2, | ||
| }, | ||
| // false-positive guards: names that start with "quote-container" but aren't the tag | ||
| { | ||
| name: "quote-containerized attribute prefix is not flagged", | ||
| content: `<block quote-containerized="true"/>`, | ||
| wantLen: 0, | ||
| }, | ||
| // false-positive guard: data-width should not trigger column warning | ||
| { | ||
| name: "data-width attribute is not flagged", | ||
| content: `<column data-width="50"/>`, | ||
| wantLen: 0, | ||
| }, | ||
| // single-quoted width should be caught | ||
| { | ||
| name: "column single-quoted integer width triggers warning", | ||
| content: `<grid><column width='30'/></grid>`, | ||
| wantLen: 1, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing test case: This is the flip side of the Suggested addition: {
name: "column float width value is fine",
content: `<grid><column width="0.5"><p>A</p></column></grid>`,
wantLen: 0,
},
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 259bc0e. Added test case |
||
| }, | ||
| // width with spaces around = should be caught | ||
| { | ||
| name: "column width with spaces around equals triggers warning", | ||
| content: `<grid><column width = "40"/></grid>`, | ||
| wantLen: 1, | ||
| }, | ||
| // float value must NOT trigger warning — width="0.5" is valid width-ratio syntax | ||
| { | ||
| name: "column float width value is not flagged", | ||
| content: `<grid><column width="0.5"><p>A</p></column></grid>`, | ||
| wantLen: 0, | ||
| }, | ||
| } | ||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| t.Parallel() | ||
| got := CheckV2XMLWarnings(tt.content) | ||
| if len(got) != tt.wantLen { | ||
| t.Fatalf("CheckV2XMLWarnings(%q) returned %d warnings, want %d: %v", tt.content, len(got), tt.wantLen, got) | ||
| } | ||
| combined := "" | ||
| for _, w := range got { | ||
| combined += w | ||
| } | ||
| for _, sub := range tt.wantContains { | ||
| if !strings.Contains(combined, sub) { | ||
| t.Errorf("expected warning to contain %q, got: %s", sub, combined) | ||
| } | ||
| } | ||
| }) | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.