diff --git a/shortcuts/doc/docs_create_v2.go b/shortcuts/doc/docs_create_v2.go index 68ae824c9..a380781da 100644 --- a/shortcuts/doc/docs_create_v2.go +++ b/shortcuts/doc/docs_create_v2.go @@ -5,6 +5,7 @@ package doc import ( "context" + "fmt" "strings" "github.com/larksuite/cli/shortcuts/common" @@ -27,6 +28,11 @@ func validateCreateV2(_ context.Context, runtime *common.RuntimeContext) error { if runtime.Str("parent-token") != "" && runtime.Str("parent-position") != "" { return common.FlagErrorf("--parent-token and --parent-position are mutually exclusive") } + if runtime.Str("doc-format") != "markdown" && runtime.Str("content") != "" { + if msg := CheckV2XMLBareAmpersand(runtime.Str("content")); msg != "" { + return common.FlagErrorf("%s", msg) + } + } return nil } @@ -45,6 +51,12 @@ func dryRunCreateV2(_ context.Context, runtime *common.RuntimeContext) *common.D func executeCreateV2(_ context.Context, runtime *common.RuntimeContext) error { body := buildCreateBody(runtime) + if runtime.Str("doc-format") != "markdown" { + for _, w := range CheckV2XMLWarnings(runtime.Str("content")) { + fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w) + } + } + data, err := doDocAPI(runtime, "POST", "/open-apis/docs_ai/v1/documents", body) if err != nil { return err diff --git a/shortcuts/doc/docs_update_check.go b/shortcuts/doc/docs_update_check.go index cf71c1012..1aeea92da 100644 --- a/shortcuts/doc/docs_update_check.go +++ b/shortcuts/doc/docs_update_check.go @@ -279,3 +279,78 @@ func leadingRun(s string, c byte) string { } return s[:i] } + +// ── v2 XML content guards ────────────────────────────────────────────────── + +// xmlEntityRe matches a valid XML entity reference: & < > ' +// " &#N; or &#xH;. Used to skip over valid references when scanning for +// bare ampersands. +var xmlEntityRe = regexp.MustCompile(`&(amp|lt|gt|apos|quot|#\d+|#x[0-9a-fA-F]+);`) + +// CheckV2XMLBareAmpersand returns a non-empty error message when content +// contains a bare & that would cause the v2 XML parser to reject the request. +// Only runs when --doc-format xml (the default). Callers in Validate should +// return this as a hard error. +// +// Go's regexp package does not support lookahead, so we detect bare ampersands +// by replacing all valid entity references with a placeholder and then +// checking whether any & remains. +func CheckV2XMLBareAmpersand(content string) string { + if content == "" || !strings.Contains(content, "&") { + return "" + } + // Replace every valid entity reference with a fixed placeholder so that + // the subsequent Contains check only fires on truly bare ampersands. + // ("ENTITY" is not the same length as each entity; byte offsets are not + // preserved, but that is fine — we only need a yes/no bare-& answer.) + stripped := xmlEntityRe.ReplaceAllString(content, "ENTITY") + if !strings.Contains(stripped, "&") { + return "" + } + return "content contains a bare & character that is not a valid XML entity reference; " + + "the v2 XML parser will reject the request. " + + "Escape it as & (and < as <, > as > where needed)." +} + +// quoteContainerTagRe matches the opening of a element +// (tag name followed by whitespace, >, or />) to avoid false positives on +// hypothetical attributes or element names that start with "quote-container". +var quoteContainerTagRe = regexp.MustCompile(`|/)`) + +// columnIntWidthRe matches a attribute where N is a +// plain integer (not a float). The pattern requires: +// - whitespace before "width" to exclude attributes like data-width +// - the value to be enclosed in quotes with digits immediately before the +// closing quote, so width="0.5" does NOT match (the dot prevents \d+ +// from consuming the full value up to the quote). +var columnIntWidthRe = regexp.MustCompile(`]*\swidth\s*=\s*["']\d+["']`) + +// CheckV2XMLWarnings returns a list of non-fatal warnings for v2 XML content. +// These describe constructs that are silently dropped or ignored by the v2 API +// but do not cause the request to fail. Callers should print these to stderr +// before executing the API call. +// +// Warnings emitted: +// +// 1. is not recognised by the v2 XML parser; the block is +// silently dropped. Use
instead. +// +// 2. with an integer value has no effect in v2. The +// correct attribute is width-ratio="0.N" (e.g. width-ratio="0.5"). +func CheckV2XMLWarnings(content string) []string { + if content == "" { + return nil + } + var warnings []string + if quoteContainerTagRe.MatchString(content) { + warnings = append(warnings, + " is not supported in v2 XML and will be silently dropped; "+ + "use
instead.") + } + if columnIntWidthRe.MatchString(content) { + warnings = append(warnings, + " with an integer value has no effect in v2 XML; "+ + "use width-ratio=\"0.5\" (float 0–1) to set column width.") + } + return warnings +} diff --git a/shortcuts/doc/docs_update_check_test.go b/shortcuts/doc/docs_update_check_test.go index 50905873a..c2ca98dd0 100644 --- a/shortcuts/doc/docs_update_check_test.go +++ b/shortcuts/doc/docs_update_check_test.go @@ -373,3 +373,121 @@ func TestDocsUpdateWarningsEmpty(t *testing.T) { t.Fatalf("expected no warnings, got: %v", warnings) } } + +func TestCheckV2XMLBareAmpersand(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + content string + wantErr bool + }{ + {name: "empty is fine", content: "", wantErr: false}, + {name: "no ampersand", content: "hello world", wantErr: false}, + {name: "amp entity is fine", content: "a & b", wantErr: false}, + {name: "lt entity is fine", content: "<tag>", wantErr: false}, + {name: "gt entity is fine", content: "a > b", wantErr: false}, + {name: "apos entity is fine", content: "'", wantErr: false}, + {name: "quot entity is fine", content: """, wantErr: false}, + {name: "decimal numeric ref is fine", content: "A", wantErr: false}, + {name: "hex numeric ref is fine", content: "A", wantErr: false}, + {name: "bare ampersand flagged", content: "a & b", wantErr: true}, + {name: "bare ampersand in tag flagged", content: `R&D`, wantErr: true}, + {name: "unknown entity flagged", content: " ", wantErr: true}, + // mixed: valid entity alongside a bare & — the bare one must still be caught + {name: "valid entity mixed with bare ampersand flagged", content: "a & b & c", wantErr: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := CheckV2XMLBareAmpersand(tt.content) + if (got != "") != tt.wantErr { + t.Fatalf("CheckV2XMLBareAmpersand(%q) = %q, wantErr=%v", tt.content, got, tt.wantErr) + } + }) + } +} + +func TestCheckV2XMLWarnings(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + content string + wantContains []string + wantLen int + }{ + {name: "empty returns nil", content: "", wantLen: 0}, + {name: "clean XML no warnings", content: "

text

", wantLen: 0}, + { + name: "quote-container triggers warning", + content: `

text

`, + wantContains: []string{"quote-container", "blockquote"}, + wantLen: 1, + }, + { + name: "column integer width triggers warning", + content: `

A

`, + wantContains: []string{"width-ratio"}, + wantLen: 1, + }, + { + name: "column float width-ratio is fine", + content: `

A

`, + wantLen: 0, + }, + { + name: "both issues produce two warnings", + content: ``, + wantLen: 2, + }, + // false-positive guards: names that start with "quote-container" but aren't the tag + { + name: "quote-containerized attribute prefix is not flagged", + content: ``, + wantLen: 0, + }, + // false-positive guard: data-width should not trigger column warning + { + name: "data-width attribute is not flagged", + content: ``, + wantLen: 0, + }, + // single-quoted width should be caught + { + name: "column single-quoted integer width triggers warning", + content: ``, + wantLen: 1, + }, + // width with spaces around = should be caught + { + name: "column width with spaces around equals triggers warning", + content: ``, + wantLen: 1, + }, + // float value must NOT trigger warning — width="0.5" is valid width-ratio syntax + { + name: "column float width value is not flagged", + content: `

A

`, + wantLen: 0, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + got := CheckV2XMLWarnings(tt.content) + if len(got) != tt.wantLen { + t.Fatalf("CheckV2XMLWarnings(%q) returned %d warnings, want %d: %v", tt.content, len(got), tt.wantLen, got) + } + combined := "" + for _, w := range got { + combined += w + } + for _, sub := range tt.wantContains { + if !strings.Contains(combined, sub) { + t.Errorf("expected warning to contain %q, got: %s", sub, combined) + } + } + }) + } +} diff --git a/shortcuts/doc/docs_update_v2.go b/shortcuts/doc/docs_update_v2.go index 8501be015..7f626cd52 100644 --- a/shortcuts/doc/docs_update_v2.go +++ b/shortcuts/doc/docs_update_v2.go @@ -103,6 +103,11 @@ func validateUpdateV2(_ context.Context, runtime *common.RuntimeContext) error { return common.FlagErrorf("--command append requires --content") } } + if runtime.Str("doc-format") != "markdown" && content != "" { + if msg := CheckV2XMLBareAmpersand(content); msg != "" { + return common.FlagErrorf("%s", msg) + } + } return nil } @@ -124,6 +129,12 @@ func executeUpdateV2(_ context.Context, runtime *common.RuntimeContext) error { apiPath := fmt.Sprintf("/open-apis/docs_ai/v1/documents/%s", ref.Token) body := buildUpdateBody(runtime) + if runtime.Str("doc-format") != "markdown" { + for _, w := range CheckV2XMLWarnings(runtime.Str("content")) { + fmt.Fprintf(runtime.IO().ErrOut, "warning: %s\n", w) + } + } + data, err := doDocAPI(runtime, "PUT", apiPath, body) if err != nil { return err