Skip to content

Commit 0343194

Browse files
authored
fix(cmd-config-generate): fix config output for Microsoft Windows UTF-8 encoding (python-semantic-release#1400)
Resolves: python-semantic-release#702 * docs(cmd-config-generate): add Windows PowerShell specific `generate-config` usage example * test(cmd-config-generate): adds UTF-8 encoding test for platform specific output
1 parent 81a0f98 commit 0343194

5 files changed

Lines changed: 133 additions & 16 deletions

File tree

docs/api/commands.rst

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -473,16 +473,36 @@ Release corresponding to this version.
473473

474474
Generate default configuration for semantic-release, to help you get started
475475
quickly. You can inspect the defaults, write to a file and then edit according to
476-
your needs.
477-
For example, to append the default configuration to your pyproject.toml
478-
file, you can use the following command::
476+
your needs. For example, to append the default configuration to your ``pyproject.toml``
477+
file, you can use the following command (in POSIX-Compliant shells):
479478

480-
$ semantic-release generate-config -f toml --pyproject >> pyproject.toml
479+
.. code-block:: bash
480+
481+
semantic-release generate-config --pyproject >> pyproject.toml
482+
483+
On Windows PowerShell, the redirection operators (`>`/`>>`) default to UTF-16LE,
484+
which can introduce NUL characters. Prefer one of the following to keep UTF-8:
485+
486+
.. code-block:: console
487+
488+
# 2 File output Piping Options in PowerShell (Out-File or Set-Content)
489+
490+
# Example for writing to pyproject.toml using Out-File:
491+
semantic-release generate-config --pyproject | Out-File -Encoding utf8 pyproject.toml
492+
493+
# Example for writing to a releaserc.toml file using Set-Content:
494+
semantic-release generate-config -f toml | Set-Content -Encoding utf8 releaserc.toml
481495
482496
If your project doesn't already leverage TOML files for configuration, it might better
483-
suit your project to use JSON instead::
497+
suit your project to use JSON instead:
498+
499+
.. code-block:: bash
500+
501+
# POSIX-Compliant shell example
502+
semantic-release generate-config -f json | tee releaserc.json
484503
485-
$ semantic-release generate-config -f json
504+
# Windows PowerShell example
505+
semantic-release generate-config -f json | Out-File -Encoding utf8 releaserc.json
486506
487507
If you would like to add JSON configuration to a shared file, e.g. ``package.json``, you
488508
can then simply add the output from this command as a **top-level** key to the file.

src/semantic_release/cli/commands/generate_config.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from __future__ import annotations
22

33
import json
4+
import sys
5+
from typing import Literal
46

57
import click
68
import tomlkit
@@ -31,7 +33,9 @@
3133
"'semantic_release'"
3234
),
3335
)
34-
def generate_config(fmt: str = "toml", is_pyproject_toml: bool = False) -> None:
36+
def generate_config(
37+
fmt: Literal["toml", "json"], is_pyproject_toml: bool = False
38+
) -> None:
3539
"""
3640
Generate default configuration for semantic-release, to help you get started
3741
quickly. You can inspect the defaults, write to a file and then edit according to
@@ -42,14 +46,29 @@ def generate_config(fmt: str = "toml", is_pyproject_toml: bool = False) -> None:
4246
"""
4347
# due to possible IntEnum values (which are not supported by tomlkit.dumps, see sdispater/tomlkit#237),
4448
# we must ensure the transformation of the model to a dict uses json serializable values
45-
config = RawConfig().model_dump(mode="json", exclude_none=True)
49+
config_dct = {
50+
"semantic_release": RawConfig().model_dump(mode="json", exclude_none=True)
51+
}
4652

47-
config_dct = {"semantic_release": config}
48-
if is_pyproject_toml and fmt == "toml":
49-
config_dct = {"tool": config_dct}
53+
if is_pyproject_toml:
54+
output = tomlkit.dumps({"tool": config_dct})
5055

51-
if fmt == "toml":
52-
click.echo(tomlkit.dumps(config_dct))
56+
elif fmt == "toml":
57+
output = tomlkit.dumps(config_dct)
5358

5459
elif fmt == "json":
55-
click.echo(json.dumps(config_dct, indent=4))
60+
output = json.dumps(config_dct, indent=4)
61+
62+
else:
63+
raise ValueError(f"Unsupported format: {fmt}")
64+
65+
# Write output directly to stdout buffer as UTF-8 bytes
66+
# This ensures consistent UTF-8 output on all platforms, especially Windows where
67+
# shell redirection (>, >>) defaults to the system encoding (e.g., UTF-16LE or cp1252)
68+
# By writing to sys.stdout.buffer, we bypass the encoding layer and guarantee UTF-8.
69+
try:
70+
sys.stdout.buffer.write(f"{output.strip()}\n".encode("utf-8")) # noqa: UP012; allow explicit encoding declaration
71+
sys.stdout.buffer.flush()
72+
except (AttributeError, TypeError):
73+
# Fallback for environments without buffer (shouldn't happen in standard Python)
74+
click.echo(output)

src/semantic_release/cli/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def load_raw_config_file(config_file: Path | str) -> dict[Any, Any]:
7575
while trying to read the specified configuration file
7676
"""
7777
logger.info("Loading configuration from %s", config_file)
78-
raw_text = (Path() / config_file).resolve().read_text(encoding="utf-8")
78+
raw_text = (Path() / config_file).resolve().read_text(encoding="utf-8-sig")
7979
try:
8080
logger.debug("Trying to parse configuration %s in TOML format", config_file)
8181
return parse_toml(raw_text)

tests/const.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class RepoActionStep(str, Enum):
3939
SUCCESS_EXIT_CODE = 0
4040

4141
CHANGELOG_SUBCMD = Cli.SubCmds.CHANGELOG.name.lower()
42-
GENERATE_CONFIG_SUBCMD = Cli.SubCmds.GENERATE_CONFIG.name.lower()
42+
GENERATE_CONFIG_SUBCMD = Cli.SubCmds.GENERATE_CONFIG.name.lower().replace("_", "-")
4343
PUBLISH_SUBCMD = Cli.SubCmds.PUBLISH.name.lower()
4444
VERSION_SUBCMD = Cli.SubCmds.VERSION.name.lower()
4545

tests/e2e/cmd_config/test_generate_config.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
from __future__ import annotations
22

33
import json
4+
import subprocess
5+
import sys
6+
from sys import executable as python_interpreter
47
from typing import TYPE_CHECKING
58

69
import pytest
710
import tomlkit
811

12+
import semantic_release
913
from semantic_release.cli.config import RawConfig
1014

1115
from tests.const import GENERATE_CONFIG_SUBCMD, MAIN_PROG_NAME, VERSION_SUBCMD
@@ -19,6 +23,9 @@
1923
from tests.conftest import RunCliFn
2024
from tests.fixtures.example_project import ExProjectDir
2125

26+
# Constant
27+
NULL_BYTE = b"\x00"
28+
2229

2330
@pytest.fixture
2431
def raw_config_dict() -> dict[str, Any]:
@@ -157,3 +164,74 @@ def test_generate_config_pyproject_toml(
157164
# Evaluate: Check that the version command in noop mode ran successfully
158165
# which means PSR loaded the configuration successfully
159166
assert_successful_exit_code(result, cli_cmd)
167+
168+
169+
@pytest.mark.skipif(sys.platform != "win32", reason="Windows-specific encoding check")
170+
@pytest.mark.parametrize(
171+
"console_executable",
172+
(
173+
"C:\\Windows\\System32\\WindowsPowerShell\\v1.0\\powershell.exe",
174+
# "C:\\Windows\\System32\\cmd.exe", # CMD.exe does not support specifying encoding for output
175+
),
176+
)
177+
@pytest.mark.usefixtures(repo_w_no_tags_conventional_commits.__name__)
178+
def test_generate_config_toml_utf8_bytes_windows(
179+
console_executable: str,
180+
example_project_dir: ExProjectDir,
181+
run_cli: RunCliFn,
182+
) -> None:
183+
"""
184+
Given an example project directory
185+
When generating a TOML configuration file via Powershell redirection
186+
Then the emitted file contains only UTF-8 bytes and no NUL bytes
187+
"""
188+
if "powershell.exe" not in console_executable.lower():
189+
pytest.skip("Only PowerShell is currently supported for this test")
190+
191+
output_file = example_project_dir / "releaserc.toml"
192+
psr_cmd = [
193+
python_interpreter,
194+
"-m",
195+
semantic_release.__name__,
196+
GENERATE_CONFIG_SUBCMD,
197+
"-f",
198+
"toml",
199+
]
200+
201+
redirection_cmd = (
202+
f"{str.join(' ', psr_cmd)} | Out-File -Encoding utf8 {output_file}"
203+
)
204+
205+
# Act: Generate the config file via subprocess call to PowerShell
206+
proc = subprocess.run( # noqa: S602, not a security concern in testing & required for redirection
207+
redirection_cmd,
208+
executable=console_executable,
209+
shell=True,
210+
stdin=None,
211+
capture_output=True,
212+
check=True,
213+
)
214+
215+
config_as_bytes = output_file.read_bytes()
216+
assert config_as_bytes, "Generated config file is empty!"
217+
assert (
218+
NULL_BYTE not in config_as_bytes
219+
), f"Generated config file '{output_file}' contains NUL bytes!"
220+
assert not proc.stderr
221+
assert not proc.stdout
222+
223+
# Act: Validate that the generated config is a valid configuration for PSR
224+
cli_cmd = [
225+
MAIN_PROG_NAME,
226+
"--noop",
227+
"--strict",
228+
"-c",
229+
str(output_file),
230+
VERSION_SUBCMD,
231+
"--print",
232+
]
233+
result = run_cli(cli_cmd[1:])
234+
235+
# Evaluate: Check that the version command in noop mode ran successfully
236+
# which means PSR loaded the configuration successfully
237+
assert_successful_exit_code(result, cli_cmd)

0 commit comments

Comments
 (0)