Skip to content

Commit dfc80ee

Browse files
committed
gguf.py updates to support v4
1 parent 6db4788 commit dfc80ee

2 files changed

Lines changed: 28 additions & 12 deletions

File tree

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,17 @@ Optional CUDA/ROCm backends can be enabled with `-DUSE_CUDA=ON` / `-DUSE_ROCM=ON
108108

109109
`t81-convert`, `t81-gguf`, and `t81-qat` automate quantize→export→train flows with progress reporting and validation hooks. Browse [docs/references/cli-usage.md](docs/references/cli-usage.md), [docs/diagrams/cli-workflows-mermaid.md](docs/diagrams/cli-workflows-mermaid.md), and [examples/cli-examples.md](examples/cli-examples.md) for recipes.
110110

111+
## GGUF v4 compliance
112+
113+
t81’s GGUF exports already mirror the llama.cpp conventions; v4’s mandatory `gguf_header` additions are worth calling out for everybody writing their own converter:
114+
115+
- **Header bump** – write `version = 4` instead of 3 so llama.cpp accepts the file and no longer fails with “unsupported version”.
116+
- **Global alignment metadata** – after `tensor_count`/`kv_count` emit `alignment` (default 32, power-of-two) and `reserved` (0) before the metadata block, and compute tensor padding with `GGML_PAD(size, alignment)` so every tensor data block ends on that boundary.
117+
- **Tensor padding & metadata rules** – rely on the new alignment field instead of optional metadata keys, keep `general.alignment` as a `uint32_t` power-of-two, and let missing/invalid values fail fast instead of corrupting mmaped loads.
118+
- **Implementation note**`struct gguf_header { char magic[4]; uint32_t version; uint64_t tensor_count, kv_count; uint32_t alignment; uint32_t reserved; };` plus explicit `fwrite(&alignment, sizeof(uint32_t), 1, f); fwrite(&reserved, sizeof(uint32_t), 1, f);` immediately after writing `kv_count` is enough to match the official layout.
119+
120+
The eight extra header bytes are negligible even for huge models, but they unlock ARM64-friendly alignment, predictable metadata parsing, and wide compatibility with upcoming llama.cpp releases.
121+
111122
## Use cases
112123

113124
- Ternary LLM weight quantization and GGUF exports for Hugging Face + `llama.cpp`.

t81/gguf.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import t81lib
2121
from .nn import Linear as TernaryLinear
2222

23-
HEADER_STRUCT = struct.Struct("<4sIIIIQQQQ")
2423
HEADER_MAGIC = b"GGUF"
25-
HEADER_VERSION = 0x00000003
24+
HEADER_VERSION = 4
2625
HEADER_ALIGNMENT = 32
26+
HEADER_STRUCT = struct.Struct("<4sIQQII")
2727
HEADER_SIZE = HEADER_STRUCT.size
2828

2929
GGML_TYPE_TQ1_0 = 250
@@ -236,9 +236,6 @@ def write_gguf(
236236
len(tensor_payloads),
237237
metadata_count,
238238
HEADER_ALIGNMENT,
239-
HEADER_ALIGNMENT,
240-
tensor_infos_offset,
241-
tensor_data_offset,
242239
0,
243240
)
244241

@@ -257,7 +254,11 @@ def write_gguf(
257254
handle.write(tensor_data_section)
258255

259256

260-
def _parse_metadata(buffer: bytes, offset: int, count: int) -> Mapping[str, Any]:
257+
def _parse_metadata(
258+
buffer: bytes,
259+
offset: int,
260+
count: int,
261+
) -> tuple[Mapping[str, Any], int]:
261262
metadata: dict[str, Any] = {}
262263
cursor = offset
263264
for _ in range(count):
@@ -285,7 +286,7 @@ def _parse_metadata(buffer: bytes, offset: int, count: int) -> Mapping[str, Any]
285286
cursor = value_end + 1
286287
else:
287288
raise ValueError(f"unsupported metadata value type {value_type}")
288-
return metadata
289+
return metadata, cursor
289290

290291

291292
def _parse_tensor_infos(buffer: bytes, offset: int, count: int) -> list[_TensorInfo]:
@@ -343,15 +344,19 @@ def read_gguf(path: str | Path, *, dequantize: bool = True) -> Mapping[str, torc
343344
version,
344345
num_tensors,
345346
metadata_kv_count,
346-
_metadata_alignment,
347-
_tensor_alignment,
348-
tensor_infos_offset,
349-
tensor_data_offset,
347+
alignment,
348+
reserved,
350349
) = HEADER_STRUCT.unpack_from(buffer, 0)
351350
if magic != HEADER_MAGIC or version != HEADER_VERSION:
352351
raise ValueError("GGUF header mismatch")
352+
if alignment != HEADER_ALIGNMENT:
353+
raise ValueError("unsupported GGUF alignment")
354+
if reserved != 0:
355+
raise ValueError("reserved header field must be zero")
353356

354-
metadata = _parse_metadata(buffer, HEADER_SIZE, metadata_kv_count)
357+
metadata, metadata_end = _parse_metadata(buffer, HEADER_SIZE, metadata_kv_count)
358+
metadata_size = metadata_end - HEADER_SIZE
359+
tensor_infos_offset = _align(HEADER_SIZE + metadata_size)
355360
block_rows = int(metadata.get("quantization.block_size", GGUF_QUANT_BLOCK_ROWS))
356361
tensor_infos = _parse_tensor_infos(buffer, tensor_infos_offset, num_tensors)
357362
sorted_infos = sorted(tensor_infos, key=lambda info: info.offset)

0 commit comments

Comments
 (0)