From a29a8bffb521b4b9446700aede4dd52847207c9c Mon Sep 17 00:00:00 2001 From: baiqing Date: Mon, 11 May 2026 17:32:10 +0800 Subject: [PATCH 1/3] feat(doc): add v2 XML content guards for bare ampersands and deprecated tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add pre-flight checks for the v2 XML document path: - CheckV2XMLBareAmpersand: returns a hard error when content contains a bare & that is not a valid XML entity reference (&, <, >, ', ", &#N;, &#xH;). Such bare ampersands cause the v2 XML parser to reject the request. - CheckV2XMLWarnings: returns non-fatal warnings for two silently-wrong constructs — (v2 drops it; use
) and with an integer value (has no effect; use width-ratio="0.N"). Both checks are integrated into validateCreateV2/validateUpdateV2 (hard error) and executeCreateV2/executeUpdateV2 (warnings to stderr). Only fires when --doc-format is xml (the default). --- shortcuts/doc/docs_create_v2.go | 12 +++ shortcuts/doc/docs_update_check.go | 65 ++++++++++++++++ shortcuts/doc/docs_update_check_test.go | 98 +++++++++++++++++++++++++ shortcuts/doc/docs_update_v2.go | 11 +++ 4 files changed, 186 insertions(+) diff --git a/shortcuts/doc/docs_create_v2.go b/shortcuts/doc/docs_create_v2.go index 68ae824c9..c1ac1d366 100644 --- a/shortcuts/doc/docs_create_v2.go +++ b/shortcuts/doc/docs_create_v2.go @@ -5,6 +5,7 @@ package doc import ( "context" + "fmt" "strings" "github.com/larksuite/cli/shortcuts/common" @@ -27,6 +28,11 @@ func validateCreateV2(_ context.Context, runtime *common.RuntimeContext) error { if runtime.Str("parent-token") != "" && runtime.Str("parent-position") != "" { return common.FlagErrorf("--parent-token and --parent-position are mutually exclusive") } + if runtime.Str("doc-format") != "markdown" { + if msg := CheckV2XMLBareAmpersand(runtime.Str("content")); msg != "" { + return common.FlagErrorf("%s", msg) + } + } return nil } @@ -45,6 +51,12 @@ func dryRunCreateV2(_ context.Context, runtime *common.RuntimeContext) *common.D func executeCreateV2(_ context.Context, runtime *common.RuntimeContext) error { body := buildCreateBody(runtime) + if runtime.Str("doc-format") != "markdown" { + for _, w := range CheckV2XMLWarnings(runtime.Str("content")) { + fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w) + } + } + data, err := doDocAPI(runtime, "POST", "/open-apis/docs_ai/v1/documents", body) if err != nil { return err diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go index cf71c1012..f8b2965f8 100644 --- a/shortcuts/doc/docs_update_check.go +++ b/shortcuts/doc/docs_update_check.go @@ -279,3 +279,68 @@ func leadingRun(s string, c byte) string { } return s[:i] } + +// ── v2 XML content guards ────────────────────────────────────────────────── + +// xmlEntityRe matches a valid XML entity reference: & < > ' +// " &#N; or &#xH;. Used to skip over valid references when scanning for +// bare ampersands. +var xmlEntityRe = regexp.MustCompile(`&(amp|lt|gt|apos|quot|#\d+|#x[0-9a-fA-F]+);`) + +// CheckV2XMLBareAmpersand returns a non-empty error message when content +// contains a bare & that would cause the v2 XML parser to reject the request. +// Only runs when --doc-format xml (the default). Callers in Validate should +// return this as a hard error. +// +// Go's regexp package does not support lookahead, so we detect bare ampersands +// by replacing all valid entity references with a placeholder and then +// checking whether any & remains. +func CheckV2XMLBareAmpersand(content string) string { + if content == "" || !strings.Contains(content, "&") { + return "" + } + // Replace every valid entity with its same-length placeholder so positional + // byte offsets are preserved (not required here, but avoids false positives). + stripped := xmlEntityRe.ReplaceAllString(content, "ENTITY") + if !strings.Contains(stripped, "&") { + return "" + } + return "content contains a bare & character that is not a valid XML entity reference; " + + "the v2 XML parser will reject the request. " + + "Escape it as & (and < as <, > as > where needed)." +} + +// columnIntWidthRe matches a attribute where N is a +// plain integer. In v2 XML the valid attribute is width-ratio (a float 0–1), +// not width. An integer width silently has no effect on column sizing. +var columnIntWidthRe = regexp.MustCompile(`]*\bwidth="(\d+)"`) + +// CheckV2XMLWarnings returns a list of non-fatal warnings for v2 XML content. +// These describe constructs that are silently dropped or ignored by the v2 API +// but do not cause the request to fail. Callers should print these to stderr +// before executing the API call. +// +// Warnings emitted: +// +// 1. is not recognised by the v2 XML parser; the block is +// silently dropped. Use
instead. +// +// 2. with an integer value has no effect in v2. The +// correct attribute is width-ratio="0.N" (e.g. width-ratio="0.5"). +func CheckV2XMLWarnings(content string) []string { + if content == "" { + return nil + } + var warnings []string + if strings.Contains(content, " is not supported in v2 XML and will be silently dropped; "+ + "use
instead.") + } + if columnIntWidthRe.MatchString(content) { + warnings = append(warnings, + " with an integer value has no effect in v2 XML; "+ + "use width-ratio=\"0.5\" (float 0–1) to set column width.") + } + return warnings +} diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 50905873a..2ed4f4e69 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -373,3 +373,101 @@ func TestDocsUpdateWarningsEmpty(t *testing.T) { t.Fatalf("expected no warnings, got: %v", warnings) } } + +func TestCheckV2XMLBareAmpersand(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + content string + wantErr bool + }{ + {name: "empty is fine", content: "", wantErr: false}, + {name: "no ampersand", content: "hello world", wantErr: false}, + {name: "amp entity is fine", content: "a & b", wantErr: false}, + {name: "lt entity is fine", content: "<tag>", wantErr: false}, + {name: "gt entity is fine", content: "a > b", wantErr: false}, + {name: "apos entity is fine", content: "'", wantErr: false}, + {name: "quot entity is fine", content: """, wantErr: false}, + {name: "decimal numeric ref is fine", content: "A", wantErr: false}, + {name: "hex numeric ref is fine", content: "A", wantErr: false}, + {name: "bare ampersand flagged", content: "a & b", wantErr: true}, + {name: "bare ampersand in tag flagged", content: `R&D`, wantErr: true}, + {name: "unknown entity flagged", content: " ", wantErr: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := CheckV2XMLBareAmpersand(tt.content) + if (got != "") != tt.wantErr { + t.Fatalf("CheckV2XMLBareAmpersand(%q) = %q, wantErr=%v", tt.content, got, tt.wantErr) + } + }) + } +} + +func TestCheckV2XMLWarnings(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + content string + wantContains []string + wantLen int + }{ + {name: "empty returns nil", content: "", wantLen: 0}, + {name: "clean XML no warnings", content: "

text

", wantLen: 0}, + { + name: "quote-container triggers warning", + content: `

text

`, + wantContains: []string{"quote-container", "blockquote"}, + wantLen: 1, + }, + { + name: "column integer width triggers warning", + content: `

A

`, + wantContains: []string{"width-ratio"}, + wantLen: 1, + }, + { + name: "column float width-ratio is fine", + content: `

A

`, + wantLen: 0, + }, + { + name: "both issues produce two warnings", + content: ``, + wantLen: 2, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := CheckV2XMLWarnings(tt.content) + if len(got) != tt.wantLen { + t.Fatalf("CheckV2XMLWarnings(%q) returned %d warnings, want %d: %v", tt.content, len(got), tt.wantLen, got) + } + combined := "" + for _, w := range got { + combined += w + } + for _, sub := range tt.wantContains { + if !containsStr(combined, sub) { + t.Errorf("expected warning to contain %q, got: %s", sub, combined) + } + } + }) + } +} + +func containsStr(s, sub string) bool { + return len(s) >= len(sub) && (s == sub || len(sub) == 0 || + func() bool { + for i := 0; i+len(sub) <= len(s); i++ { + if s[i:i+len(sub)] == sub { + return true + } + } + return false + }()) +} diff --git a/shortcuts/doc/docs_update_v2.go b/shortcuts/doc/docs_update_v2.go index 8501be015..7f626cd52 100644 --- a/shortcuts/doc/docs_update_v2.go +++ b/shortcuts/doc/docs_update_v2.go @@ -103,6 +103,11 @@ func validateUpdateV2(_ context.Context, runtime *common.RuntimeContext) error { return common.FlagErrorf("--command append requires --content") } } + if runtime.Str("doc-format") != "markdown" && content != "" { + if msg := CheckV2XMLBareAmpersand(content); msg != "" { + return common.FlagErrorf("%s", msg) + } + } return nil } @@ -124,6 +129,12 @@ func executeUpdateV2(_ context.Context, runtime *common.RuntimeContext) error { apiPath := fmt.Sprintf("/open-apis/docs_ai/v1/documents/%s", ref.Token) body := buildUpdateBody(runtime) + if runtime.Str("doc-format") != "markdown" { + for _, w := range CheckV2XMLWarnings(runtime.Str("content")) { + fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w) + } + } + data, err := doDocAPI(runtime, "PUT", apiPath, body) if err != nil { return err From 9fa94ab3a26aad5efc77fec1fd210295c812bc6b Mon Sep 17 00:00:00 2001 From: baiqing Date: Tue, 12 May 2026 13:33:01 +0800 Subject: [PATCH 2/3] fix(doc): tighten quote-container and column-width regex to avoid false positives --- shortcuts/doc/docs_update_check.go | 17 +++++++++++++---- shortcuts/doc/docs_update_check_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go index f8b2965f8..1ecd6a26b 100644 --- a/shortcuts/doc/docs_update_check.go +++ b/shortcuts/doc/docs_update_check.go @@ -310,10 +310,19 @@ func CheckV2XMLBareAmpersand(content string) string { "Escape it as & (and < as <, > as > where needed)." } +// quoteContainerTagRe matches the opening of a element +// (tag name followed by whitespace, >, or />) to avoid false positives on +// hypothetical attributes or element names that start with "quote-container". +var quoteContainerTagRe = regexp.MustCompile(`|/)`) + // columnIntWidthRe matches a attribute where N is a -// plain integer. In v2 XML the valid attribute is width-ratio (a float 0–1), -// not width. An integer width silently has no effect on column sizing. -var columnIntWidthRe = regexp.MustCompile(`]*\bwidth="(\d+)"`) +// plain integer. The pattern requires whitespace before "width" to avoid +// matching unrelated attributes such as data-width, and accepts optional +// whitespace around "=" and either single or double quotes, so forms like +// width='50' or width = "50" are also detected. Go's RE2 engine does not +// support backreferences, so mismatched quote pairs (e.g. width="50') are +// also matched — that is acceptable for a non-blocking warning. +var columnIntWidthRe = regexp.MustCompile(`]*\swidth\s*=\s*['"]?\d+['"]?`) // CheckV2XMLWarnings returns a list of non-fatal warnings for v2 XML content. // These describe constructs that are silently dropped or ignored by the v2 API @@ -332,7 +341,7 @@ func CheckV2XMLWarnings(content string) []string { return nil } var warnings []string - if strings.Contains(content, " is not supported in v2 XML and will be silently dropped; "+ "use
instead.") diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 2ed4f4e69..2ef91b479 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -439,6 +439,30 @@ func TestCheckV2XMLWarnings(t *testing.T) { content: ``, wantLen: 2, }, + // false-positive guards: names that start with "quote-container" but aren't the tag + { + name: "quote-containerized attribute prefix is not flagged", + content: ``, + wantLen: 0, + }, + // false-positive guard: data-width should not trigger column warning + { + name: "data-width attribute is not flagged", + content: ``, + wantLen: 0, + }, + // single-quoted width should be caught + { + name: "column single-quoted integer width triggers warning", + content: ``, + wantLen: 1, + }, + // width with spaces around = should be caught + { + name: "column width with spaces around equals triggers warning", + content: ``, + wantLen: 1, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 259bc0e529f79d7f52921cd0cf90a1def7bce465 Mon Sep 17 00:00:00 2001 From: baiqing Date: Thu, 14 May 2026 10:14:37 +0800 Subject: [PATCH 3/3] fix(doc): fix columnIntWidthRe float false positive, remove redundant containsStr helper, align empty-content guard --- shortcuts/doc/docs_create_v2.go | 2 +- shortcuts/doc/docs_update_check.go | 19 ++++++++++--------- shortcuts/doc/docs_update_check_test.go | 22 +++++++++------------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/shortcuts/doc/docs_create_v2.go b/shortcuts/doc/docs_create_v2.go index c1ac1d366..a380781da 100644 --- a/shortcuts/doc/docs_create_v2.go +++ b/shortcuts/doc/docs_create_v2.go @@ -28,7 +28,7 @@ func validateCreateV2(_ context.Context, runtime *common.RuntimeContext) error { if runtime.Str("parent-token") != "" && runtime.Str("parent-position") != "" { return common.FlagErrorf("--parent-token and --parent-position are mutually exclusive") } - if runtime.Str("doc-format") != "markdown" { + if runtime.Str("doc-format") != "markdown" && runtime.Str("content") != "" { if msg := CheckV2XMLBareAmpersand(runtime.Str("content")); msg != "" { return common.FlagErrorf("%s", msg) } diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go index 1ecd6a26b..1aeea92da 100644 --- a/shortcuts/doc/docs_update_check.go +++ b/shortcuts/doc/docs_update_check.go @@ -299,8 +299,10 @@ func CheckV2XMLBareAmpersand(content string) string { if content == "" || !strings.Contains(content, "&") { return "" } - // Replace every valid entity with its same-length placeholder so positional - // byte offsets are preserved (not required here, but avoids false positives). + // Replace every valid entity reference with a fixed placeholder so that + // the subsequent Contains check only fires on truly bare ampersands. + // ("ENTITY" is not the same length as each entity; byte offsets are not + // preserved, but that is fine — we only need a yes/no bare-& answer.) stripped := xmlEntityRe.ReplaceAllString(content, "ENTITY") if !strings.Contains(stripped, "&") { return "" @@ -316,13 +318,12 @@ func CheckV2XMLBareAmpersand(content string) string { var quoteContainerTagRe = regexp.MustCompile(`|/)`) // columnIntWidthRe matches a attribute where N is a -// plain integer. The pattern requires whitespace before "width" to avoid -// matching unrelated attributes such as data-width, and accepts optional -// whitespace around "=" and either single or double quotes, so forms like -// width='50' or width = "50" are also detected. Go's RE2 engine does not -// support backreferences, so mismatched quote pairs (e.g. width="50') are -// also matched — that is acceptable for a non-blocking warning. -var columnIntWidthRe = regexp.MustCompile(`]*\swidth\s*=\s*['"]?\d+['"]?`) +// plain integer (not a float). The pattern requires: +// - whitespace before "width" to exclude attributes like data-width +// - the value to be enclosed in quotes with digits immediately before the +// closing quote, so width="0.5" does NOT match (the dot prevents \d+ +// from consuming the full value up to the quote). +var columnIntWidthRe = regexp.MustCompile(`]*\swidth\s*=\s*["']\d+["']`) // CheckV2XMLWarnings returns a list of non-fatal warnings for v2 XML content. // These describe constructs that are silently dropped or ignored by the v2 API diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 2ef91b479..c2ca98dd0 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -394,6 +394,8 @@ func TestCheckV2XMLBareAmpersand(t *testing.T) { {name: "bare ampersand flagged", content: "a & b", wantErr: true}, {name: "bare ampersand in tag flagged", content: `R&D`, wantErr: true}, {name: "unknown entity flagged", content: " ", wantErr: true}, + // mixed: valid entity alongside a bare & — the bare one must still be caught + {name: "valid entity mixed with bare ampersand flagged", content: "a & b & c", wantErr: true}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -463,6 +465,12 @@ func TestCheckV2XMLWarnings(t *testing.T) { content: ``, wantLen: 1, }, + // float value must NOT trigger warning — width="0.5" is valid width-ratio syntax + { + name: "column float width value is not flagged", + content: `

A

`, + wantLen: 0, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -476,22 +484,10 @@ func TestCheckV2XMLWarnings(t *testing.T) { combined += w } for _, sub := range tt.wantContains { - if !containsStr(combined, sub) { + if !strings.Contains(combined, sub) { t.Errorf("expected warning to contain %q, got: %s", sub, combined) } } }) } } - -func containsStr(s, sub string) bool { - return len(s) >= len(sub) && (s == sub || len(sub) == 0 || - func() bool { - for i := 0; i+len(sub) <= len(s); i++ { - if s[i:i+len(sub)] == sub { - return true - } - } - return false - }()) -}