diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 5c53b2ba..4a6bc331 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -414,6 +414,22 @@ jobs: name: jacoco-report path: target/site/jacoco/jacoco.xml if-no-files-found: ignore + - name: Run PIT mutation tests + run: mvn --batch-mode --no-transfer-progress test-compile org.pitest:pitest-maven:mutationCoverage -Dmaven.javadoc.skip=true + - name: Extract PIT survivors + if: always() + run: | + echo "=== PIT Survived Mutations ===" + for html_file in $(find target/pit-reports -name "*.html" -type f 2>/dev/null | sort); do + if grep -q "SURVIVED" "$html_file"; then + echo "Found survivors in $html_file:" + grep -B 2 -A 3 "SURVIVED" "$html_file" + echo "" + fi + done + - uses: actions/upload-artifact@v7 + if: always() + with: { name: pit-reports, path: target/pit-reports/ } - name: Memory after tests if: always() run: free -h diff --git a/.mvn/jvm.config b/.mvn/jvm.config new file mode 100644 index 00000000..504456f9 --- /dev/null +++ b/.mvn/jvm.config @@ -0,0 +1,10 @@ +--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED +--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED +--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED +--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED diff --git a/.mvn/jvm.config.license b/.mvn/jvm.config.license new file mode 100644 index 00000000..b918686f --- /dev/null +++ b/.mvn/jvm.config.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: 2026 Bernard Ladenthin + +SPDX-License-Identifier: MIT diff --git a/CLAUDE.md b/CLAUDE.md index b6bec371..8f48354e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co Java bindings for [llama.cpp](https://github.com/ggerganov/llama.cpp) via JNI, providing a high-level API for LLM inference in Java. The Java layer communicates with a native C++ library through JNI. -Current llama.cpp pinned version: **b9442** +Current llama.cpp pinned version: **b9495** ## Upgrading CUDA Version @@ -702,21 +702,36 @@ interim measure until that work lands. ## Open TODOs -- **[URGENT] Replace jqwik with QuickTheories.** Upstream is openly hostile to the AI-assisted workflow this project uses (jqwik 1.10.0 added a deliberate prompt-injection string to test stdout; jqwik 1.10.1 release notes added: *"This project is not meant to be used by any 'AI' coding agents at all."*). See the "jqwik prompt-injection in test output" section above for context and links. Replace the one jqwik test class in this repo (`LlamaParameterProperties`) with one of (in order of preference): - - **QuickTheories** (`org.quicktheories:quicktheories`, MIT) — preferred. Native JUnit Jupiter (5/6); plain `@Test` methods with `qt().forAll(...).check(...)` bodies. No `@RunWith`, no JUnit Vintage engine. Property-based generation with shrinking preserved; the fluent DSL (`integers().between(...)`, `floats().between(...)`, `strings().basicLatinAlphabet().ofLengthBetween(...)`) covers every constraint the current jqwik tests use. - - **junit-quickcheck** (`com.pholser:junit-quickcheck-core` + `-generators`) — closest annotation match to jqwik but requires the JUnit Vintage engine alongside Jupiter; only use if the QuickTheories DSL turns out to be a poor fit. - - A minimal hand-rolled `@ParameterizedTest` + `@MethodSource`/`@ArgumentsSource` approach using JUnit Jupiter that is already on the classpath. Lower dependency count; loses shrinking and built-in generators. - - Remove the jqwik dependency from `pom.xml` (and the `jqwik.version` property), drop the jqwik bullet from any test-frameworks documentation, and verify CI is green with the replacement. Until this lands, the doc-only warning section above is the interim mitigation. +- **DO NOT UPGRADE jqwik past 1.9.3.** jqwik 1.10.0 added a deliberate anti-AI prompt-injection string to test stdout; the 1.10.1 user guide states the library "is not meant to be used by any 'AI' coding agents at all." 1.9.3 is the last pre-disclosure release and is the pinned version for this repo. Any CI / Dependabot / contributor PR that bumps `jqwik.version` past 1.9.3 must be rejected. The library is otherwise actively maintained and the current pin is the equilibrium position; replacement candidates (QuickTheories, junit-quickcheck, hand-rolled `@ParameterizedTest`) were evaluated and rejected because all available alternatives are either dormant since 2019 or strictly worse on the integration / shrinking axis. See the "jqwik prompt-injection in test output" section above for the full incident reference. - **`@VisibleForTesting` audit.** No usages currently. Walk the production tree for package-private/protected methods or fields that exist purely so tests can reach them, and either annotate (`com.google.common.annotations.VisibleForTesting`) or move into the test source tree. -- **Strict null-safety with Maven hard-check.** Nullability annotations today are sporadic and from `org.jetbrains.annotations`. Migrate to JSpecify (`org.jspecify:jspecify`) and add Error Prone + NullAway in the compiler plugin so the build fails on potential NPEs (the BitcoinAddressFinder pom.xml already does this and is a working reference). -- **At least one LogCaptor smoke test.** SLF4J + Logback are wired in (`OSInfo` uses an SLF4J logger; `LlamaLoader` deliberately uses `System.err` for bootstrap). Add a `LogCaptor.forClass(OSInfo.class)` test that confirms a known log message actually fires through the configured pipeline, so a future logback misconfiguration is caught at test time rather than silently swallowed. - -- **Expose `common_params::skip_download` via `ModelParameters.setSkipDownload(boolean)`.** Added in b9437 (`--skip-download` CLI flag); when set, `common_params_handle_models()` returns `false` instead of attempting any HF download, and `common_download_file_single()` returns `-2` on missing-file / ETag-mismatch. Useful for air-gapped / pre-staged-model deployments where any outbound network call is a failure mode. Pair with handling `common_skip_download_exception` from the JNI side so the Java caller sees a typed "model unavailable" failure instead of a generic load error. +- **Null-safety refinement.** JSpecify + NullAway are now enforced at compile time in **strict JSpecify mode** with the extra options `CheckOptionalEmptiness`, `AcknowledgeRestrictiveAnnotations`, `AcknowledgeAndroidRecent`, `AssertsEnabled` (see `pom.xml`); `@NullMarked` on the three packages via `package-info.java`; JDK module exports in `.mvn/jvm.config`. The legacy `org.jetbrains.annotations` dep has been removed; all nullability annotations are JSpecify. Public-API methods that may legitimately have no value use `Optional` rather than `@Nullable T` (`ChatResponse.getFirstMessage`, `ChatMessage.getParts`, `ChatRequest.buildToolsJson`). Open follow-up: review remaining unannotated public API surfaces for places where `@Nullable` would be more precise than the implicit non-null default. + +- **Further-strictness open points (cross-repo, not yet done).** Items below are tracked across all four Bernard-Ladenthin Java repos and can be picked up incrementally: + - **SpotBugs `effort=Max` + `threshold=Low`** — currently default effort/threshold. Raising both surfaces more findings (and takes longer per build). Worth a one-off experiment to triage what appears before committing. + - ~~**Error Prone bug-pattern promotions to `ERROR`**~~ — **DONE** in 855f447 ("Promote 12 Error Prone bug patterns to ERROR + enable -Xlint:all (no -Werror under release=8)"). Twelve high-confidence patterns are now promoted via `-Xep::ERROR` args in `pom.xml` (`BoxedPrimitiveEquality`, `EqualsHashCode`, `EqualsIncompatibleType`, `IdentityBinaryExpression`, `SelfAssignment`, `SelfComparison`, `SelfEquals`, `DeadException`, `FormatString`, `InvalidPatternSyntax`, `OptionalEquality`, `ImpossibleNullComparison`). + - ~~**`javac -Werror` + `-Xlint:all,-serial,-options`**~~ — **DONE for this repo** in 3e2efbb ("Turn on javac -Werror"; earlier `-Xlint:all` setup in 855f447) with `-Xlint:all,-serial,-options,-classfile,-processing`. Approximately 20 distinct Error Prone warnings were addressed before flipping the switch: EqualsGetClass on `Pair` (instanceof); MissingOverride on `PoolingType` / `RopeScalingType`; JdkObsolete in `LlamaLoader` (`LinkedList` → `ArrayList`); StringSplitter in `LlamaLoader` (inline suppress — the empty-entry quirk is harmless because we explicitly skip blanks); 3× StringCaseLocaleUsage in `OSInfo` (added `Locale.ROOT`); EmptyCatch in `OSInfo.isAlpineLinux` (rationale comment added); FutureReturnValueIgnored in `LlamaModel.completeAsync` (deliberate fire-and-forget callback, suppressed); Finalize on `LlamaModel.finalize` (intentional finalizer-attack guard, suppressed); MixedMutabilityReturnType in 4 parser methods (`Collections.emptyList()` → `new ArrayList<>()`); EnumOrdinal in `InferenceParameters.setMiroStat` (wire format requires the ordinal, suppressed with rationale); EscapedEntity in `InferenceParameters` javadoc (`<` → `<` inside `@code`); 4× TypeParameterUnusedInFormals on the self-typing builder idiom (suppressed); AnnotateFormatMethod on `Java8CompatibilityHelper.formatted` (callers pass runtime templates, suppressed); SafeVarargs + varargs on `Java8CompatibilityHelper.listOf`. Cross-repo: streambuffer + plugin already done; BAF has a separate catalogued warning list. + - ~~**`-parameters` javac arg**~~ — **DONE** in 4350cf2 ("Trivial strictness bundle: -parameters, --release, OnlyNullMarked"). `true` is set in `maven-compiler-plugin` config; real parameter names are now baked into bytecode. + - ~~**`--release N`** instead of `-source N -target N`~~ — **DONE** in 4350cf2 (same bundle commit). `8` is wired in `maven-compiler-plugin`, forcing the API surface to actually match the target JDK. + - ~~**Mutation-testing threshold enforcement (PIT)**~~ — **DONE** in 62f8a00 ("Wire PIT mutation testing narrowed to Pair") plus bb93a8f (docs) and 3bfa51f (README badge). `streambuffer` enforces 100 % mutation coverage over its whole package. **This repo and `llamacpp-ai-index-maven-plugin` / `BitcoinAddressFinder` use a "single class, full plumbing" pattern**: PIT is wired in `pom.xml` and runs on every CI build (in the `test-java-linux-x86_64` job) with `100`, but `` is narrowed to `net.ladenthin.llama.Pair`. The intent is to keep the wiring exercised and the gate live without forcing every class up to 100 % mutation coverage at once. Expand `` incrementally as classes reach parity (README TODO tracks this). + - **Checker Framework as a second static-nullness pass** — **DONE for this repo** in c63870b ("Add Checker Framework Nullness Checker as a 2nd static-nullness pass") (and `streambuffer`, `llamacpp-ai-index-maven-plugin`). The Nullness Checker (4.1.0) is wired in `pom.xml` and runs alongside NullAway. `toJsonString` uses `@PolyNull` (with a NullAway-suppress because NullAway has no PolyNull); native-method constructor calls in `LlamaModel` carry `@SuppressWarnings("method.invocation")`; `Pair.equals` and `Usage.equals` declare `@Nullable Object`; `LlamaSystemProperties` getters return `@Nullable String` to match javadoc; `getPackage()` and resource-stream null derefs are guarded. Remaining cross-repo work: `BitcoinAddressFinder`. + - **JPMS `module-info.java` with `@NullMarked` at module level** — **DONE for this repo** in 0fd066a ("Add JPMS module descriptor for the java-llama.cpp JNI bindings"); 9528e79 ("Move @NullMarked to module level + fix Java version badge to 8+") then moved `@NullMarked` from per-package `package-info.java` to the module descriptor (and `streambuffer`, `llamacpp-ai-index-maven-plugin`); remaining cross-repo work covers `BitcoinAddressFinder`. The module `net.ladenthin.llama` exports the three hand-written public packages (`net.ladenthin.llama`, `.args`, `.json`). The native libraries shipped under `/net/ladenthin/llama/{OS}/{ARCH}/` continue to load through `LlamaLoader.class.getResourceAsStream(...)` because that lookup runs against the loader's own module, which is this module, so no `opens` directive is needed. Two-execution `maven-compiler-plugin` pattern (release 8 for sources, release 9 for `module-info.java`); the resulting jar carries `module-info.class` at its root and is backward-compatible with Java 8 classpath consumers. Module-level `@NullMarked` was subsequently adopted in 9528e79 (previously deferred): the annotation now lives on the module descriptor instead of per-package `package-info.java`, mirroring the layout the sister repos converged on. + - ~~**Banned-API enforcement**~~ — **DONE** in 8baae0c ("Add Maven Enforcer with the four standard rules; pin slf4j-api") for `bannedDependencies`/`dependencyConvergence`, and 329d764 ("test(archunit): ban System.exit, new Random, Thread.sleep in production") for the `banned-api-checker`-style runtime bans (implemented as ArchUnit rules rather than the standalone plugin). Maven Enforcer `bannedDependencies` excludes `commons-logging`, `log4j:log4j`, old hamcrest split artifacts, and legacy `junit:junit`/`junit:junit-dep`. e6069da additionally bans `sun.*`/`com.sun.*`/`jdk.internal.*` imports in production. + - **Additional ArchUnit rules to consider** — layered-architecture rules (`layeredArchitecture().consideringAllDependencies()`), per-module banned-imports lists, public-API-surface constraints (no public mutable static state, etc.). Partial progress: 7b6667d ("test(archunit): public non-static fields must be final (LlamaOutput compliant)") covers the "no public field that is not final" sub-rule. +- ~~**At least one LogCaptor smoke test.** SLF4J + Logback are wired in (`OSInfo` uses an SLF4J logger; `LlamaLoader` deliberately uses `System.err` for bootstrap). Add a `LogCaptor.forClass(OSInfo.class)` test that confirms a known log message actually fires through the configured pipeline, so a future logback misconfiguration is caught at test time rather than silently swallowed.~~ **DONE** in `LoggingSmokeTest` (two tests): (1) `slf4jPipelineEmits` directly emits a known INFO event through `LoggerFactory.getLogger(OSInfo.class)` and asserts LogCaptor saw it — catches broken SLF4J binding / misrouted Logback config; (2) `getHardwareNameLogsError_whenProcessRunnerThrows` swaps `OSInfo.processRunner` with a stub that throws `IOException`, then asserts the production `error("Error while running uname -m", e)` line at `OSInfo.java:299` was captured — pins the production log call as part of the contract. + +- ~~**Expose `common_params::skip_download` via `ModelParameters.setSkipDownload(boolean)`.**~~ **DONE**: `ModelFlag.SKIP_DOWNLOAD` + `ModelParameters.setSkipDownload(boolean)` + `ModelParameters.hasFlag(ModelFlag)` ship as a strict-addition Java API. Upstream raises `common_skip_download_exception` inside `common_download_file_single`, but it is caught inside upstream `common_params_parse_ex` (`common/arg.cpp:476`) and surfaces only as a `false` return from `common_params_parse` — so the JNI never sees the exception directly. The Java layer therefore uses a heuristic in `SkipDownloadFailureTranslator`: when `SKIP_DOWNLOAD` is set AND the JNI throws `LlamaException("Failed to parse model parameters")`, the failure is translated to a typed public `ModelUnavailableException` (extends the now-public `LlamaException`). 7 unit tests in `LlamaModelSkipDownloadTest` cover the round-trip + every translation edge case (skip-set + parse-failed → typed; skip-set + unrelated message → passthrough; skip-not-set + parse-failed → passthrough; null message → passthrough). No JNI / native rebuild required. - **Expose `--spec-draft-backend-sampling` toggle via `ModelParameters.setSpecDraftBackendSampling(boolean)`.** Added in b9437 (env `LLAMA_ARG_SPEC_DRAFT_BACKEND_SAMPLING`). Backend sampling for the speculative draft is enabled by default upstream but auto-disabled on `LLAMA_SPLIT_MODE_TENSOR` setups; an explicit Java-side setter lets callers force-disable it for benchmarking or for backends with sampler bugs. Add only after a real user request — this is plumbing that mostly matters for speculative-decoding power users. +- **Expose runtime reasoning control via `InferenceParameters.setReasoningControl(boolean)` + `LlamaModel.endReasoning(...)`.** Added in b9444–b9490: new `common_params_sampling::reasoning_control` flag arms the budget sampler so reasoning can be ended at runtime, and new `common_sampler_reasoning_budget_force(common_sampler *)` triggers the end-of-thinking token injection on the next sample. Upstream also adds a `POST /v1/chat/completions/control` server endpoint accepting `{"id": "...", "action": "reasoning_end"}`. Java mapping would be: (a) `InferenceParameters.setReasoningControl(boolean)` arms the sampler on the inference run, (b) a new `LlamaModel.endReasoning(int slotId)` (or per-streaming-task-id) JNI method calls the upstream `common_sampler_reasoning_budget_force` against the slot's sampler. Useful for interactive UIs that want a "skip thinking and answer now" button. Add only after a real user request — relevant only for reasoning-trained models (DeepSeek-R1, Qwen3-Thinking, GPT-OSS-Reasoner, etc.). + +- **Expose `llama_context_params::n_outputs_max` via `ModelParameters.setMaxOutputs(int)`.** Added in b9444–b9490 (default `-1` = derived from `n_batch`). Caps the number of output slots allocated per context; relevant for memory-constrained setups that always run with `logits_all=false` and want to prevent over-allocation when `n_batch` is large. Trivial JNI plumbing (one `cparams` field passthrough); add when a user reports OOM on context creation tied to output slot pre-allocation. + +- **Expose Multi-Token Prediction toggle via `ModelParameters.setMtp(boolean)`.** Existed since the Qwen3.5 MTP work; b9444–b9490 extends it to Step-3.5. CLI flags `--mtp`/`--no-mtp` (env `LLAMA_ARG_MTP`) control whether the draft head runs alongside the main model for accelerated decoding. Java setter would route to `common_params_speculative::type = COMMON_SPECULATIVE_TYPE_DRAFT_MTP`. Add only after a real user request — relevant only for MTP-trained models. + +- **Expose `llama_vocab::get_suppress_tokens()` via `LlamaModel.getSuppressTokens()`.** Added in b9490–b9495 alongside the new `tokenizer.ggml.suppress_tokens` GGUF key and the `LLM_KV_TOKENIZER_SUPPRESS_TOKENS` constant. When a GGUF declares this array, upstream stores it on `llama_vocab::impl::suppress_tokens` and exposes it via the new `llama_vocab::get_suppress_tokens()` accessor. The bias is **applied automatically** inside the model forward graph — the Gemma4 Unified graph (`src/models/gemma4.cpp`) reads the list and adds a `-INFINITY` logit bias to those token IDs via a new `llm_graph_input_logits_bias` input so the model cannot emit them (used to block `` / `` placeholders). A Java mirror would be `public int[] getSuppressTokens()` on `LlamaModel`: a read-only inspector returning the suppression list for debugging or for callers running their own sampling who want to replicate the same bias. Value is low (the bias is auto-applied, Java callers cannot change it; java-llama.cpp does not expose custom logit-bias hooks at this level); cost is trivial (one JNI passthrough + a `getSuppressTokens()` Java method). Add only after a real user request — same posture as the b9444–b9490 follow-ups (`setReasoningControl`, `setMaxOutputs`, `setMtp`) queued above. + - **`@VisibleForTesting` design-fit review.** Complement to the audit above: for every existing or planned `@VisibleForTesting` usage, ask whether widening access is the cleanest path to testability. Common alternatives that should be preferred when applicable: (a) inject the dependency through the constructor and have the test pass a stub or fake; (b) extract the tested behaviour into a separate testable helper class with public methods; (c) restructure the production API so what the test wants to verify is observable through normal public methods. Only keep the annotation where these alternatives are materially worse. `@VisibleForTesting` should be the last resort, not the first. - **Package hierarchy review.** Walk the full `src/main/java/.../` tree and assess whether the current package layout still expresses the design intent. Look for: classes that have drifted into the wrong package as the codebase grew; flat "kitchen-sink" packages that should be split (high class count, mixed concerns); deeply nested packages that fragment cohesive components; circular dependencies between packages; missing seams where a sub-package boundary would prevent leaking implementation details. Produce a target tree as a separate planning step BEFORE making any moves — large package refactors are expensive to review and easy to do twice if the target isn't clear up front. @@ -724,3 +739,18 @@ interim measure until that work lands. - **Class and method naming review (pair with the package hierarchy work).** While the package hierarchy review is in flight, also audit class and method names for the same kinds of drift: stale names that no longer describe what the class actually does after years of growth; over-abbreviated or cryptic identifiers (`Utils`, `Helper`, `Mgr`, `do*`, `process*`) that hide responsibilities; method names whose verbs do not match the actual side effects (named `get*` but writes, named `is*` but mutates, etc.); name collisions across packages that force qualified imports everywhere. Renames are far cheaper to do INSIDE a package-restructure commit than as standalone follow-ups (one IDE refactor pass touches both the move and the rename), so capture name changes in the same target tree as the package plan rather than as a separate later step. - **Abstract the Java and test writing guidelines to a workspace-level shared layer.** The Java code-writing rules and test-writing conventions referenced from this CLAUDE.md (`CODE_WRITING_GUIDE.md`, `TEST_WRITING_GUIDE.md` where present, and the `.claude/skills/java-tdd-guide/SKILL.md` skill) are already nearly identical across all 4 Bernard-Ladenthin Java repos (`BitcoinAddressFinder`, `llamacpp-ai-index-maven-plugin`, `streambuffer`, `java-llama.cpp`) and the duplication will drift over time. Lift them into a single workspace-level location that AI assistants pick up regardless of which repo they were opened in: the canonical Java conventions go into a workspace-wide Claude skill (e.g. `~/.claude/skills/java-tdd-guide/SKILL.md` already exists as the seed); per-repo `CLAUDE.md` only keeps repo-specific supplements (build commands, module layout, project-specific testing notes) and points at the shared skill instead of duplicating the rules. Same plan covers any other workspace-level seams (shared editor config, shared `.spotbugs-exclude.xml` fragments for cross-repo idioms, shared GitHub-workflow templates). Capture the canonical version BEFORE deleting the per-repo files; do not delete files in this pass. + +- **Feature backlog from similar projects.** See [`docs/feature-investigation-similar-projects.md`](docs/feature-investigation-similar-projects.md) for the consolidated investigation across the 5 pure-Java sibling runtimes ([llama3.java](https://github.com/mukel/llama3.java), [gemma4.java](https://github.com/mukel/gemma4.java), [gptoss.java](https://github.com/mukel/gptoss.java), [qwen35.java](https://github.com/mukel/qwen35.java), [nemotron3.java](https://github.com/mukel/nemotron3.java)) plus the dormant alternative JNI binding [llamacpp4j](https://github.com/sebicom/llamacpp4j). The doc captures 18 candidate items grouped into cross-cutting themes (UTF-8 streaming boundary safety, thinking-channel router, operator timing line, jbang single-file example, README system-properties table, etc.) and per-repo unique findings (Harmony channel decoder, Qwen empty-`` injection, llama_state_* save/load, llama_adapter_lora_* hot-apply, etc.), each with effort sizing (XS / S / M / L) and a prioritised backlog. **Recommended first batch** (items 1, 3, 4, 5): UTF-8 boundary-safe streaming decoder + per-run timing line + one jbang-runnable example + a README system-properties table; ~1-2 days total, no JNI changes. + +- **Evaluate GraalVM Native Image as an alternative distribution target.** Reference: [GraalVM Native Image](https://www.graalvm.org/latest/reference-manual/native-image/). The pure-Java sibling projects in the README's "Similar Projects" list (mukel's `llama3.java` / `gemma4.java` / `gptoss.java` / `qwen35.java` / `nemotron3.java`) demonstrate that single-jar, no-JNI Java inference is viable for individual model architectures. Native Image opens an orthogonal direction for THIS project: AOT-compile the Java layer + JNI bridge to a self-contained binary that bundles the libjllama.so (or per-OS equivalent) and starts in milliseconds without a JVM, which would make jllama usable in CLI tools, serverless functions, and short-lived processes where JVM startup is the dominant cost. + + **What to investigate before committing**: + - **JNI-loading shape.** Native Image supports JNI but requires `--enable-native-access=ALL-UNNAMED` + reflection/JNI configuration files (`reflect-config.json`, `jni-config.json`, `resource-config.json`) describing every class/method/field reachable across the JNI boundary. The 17 native methods in `jllama.cpp` plus the JNI-side `FindClass` / `GetFieldID` / `GetMethodID` calls at `JNI_OnLoad` need to be mapped. The GraalVM tracing agent (`-agentlib:native-image-agent=config-output-dir=...`) can auto-generate the config during a representative test run, but the `LlamaLoader` JAR-extraction path needs at least one resource-config rule for `net/ladenthin/llama/{OS}/{ARCH}/lib*.so`. + - **Native-library packaging.** The current `LlamaLoader` extracts the OS-specific `.so`/`.dll`/`.dylib` from the JAR to a tmp dir at first use. Native Image needs the same file at AOT-execution time, so either (a) ship the native lib alongside the produced binary as a sidecar file and adjust `LlamaLoader` to find it on the same directory, or (b) embed the native lib as a resource and keep the existing extract-to-tmpdir flow (which Native Image supports via `resource-config.json`). + - **CUDA / Metal / OpenCL backend selection.** Today the choice between CPU-only / `cuda13-linux-x86-64` / `opencl-android-aarch64` JARs is at Maven-classifier time. Native Image would need either one binary per backend (multiplying the release matrix) or a runtime selector inside `LlamaLoader` that picks among bundled backend libs. The latter is a bigger refactor. + - **Startup-time benchmark to justify the work.** Measure cold-start of a current java-llama.cpp `LlamaModel(new ModelParameters().setModel("...").setNPredict(1))` invocation: how much is JVM startup + class load vs JNI load + model parse + tokenize + 1 token? If JVM startup is < 10 % of cold-start, Native Image yields little. If JVM startup is > 50 %, it's a clear win for CLI / serverless use cases. + - **Maintenance cost.** Native Image adds a second build matrix (per OS × per backend × per JDK) and a new failure surface (Native Image config drift when a llama.cpp version bump adds new JNI-reachable types). Should ship only with a CI job that exercises the Native Image build on at least one OS, otherwise the config files will rot silently. + + **Out of scope until evidence supports it**: actually implementing any of the above. This entry exists so that when someone asks "can I ship java-llama.cpp as a single 30 MB binary?" the answer points to a concrete investigation plan rather than restarting from zero. + +- **Adopt a standard `CLAUDE.md` template/tool for cross-repo consistency.** The four Bernard-Ladenthin Java repos (`BitcoinAddressFinder`, `llamacpp-ai-index-maven-plugin`, `streambuffer`, `java-llama.cpp`) each carry their own hand-grown `CLAUDE.md`; section ordering, headings, and conventions have already drifted between them. Evaluate adopting a standardised template — for example [`centminmod/my-claude-code-setup` `CLAUDE-template-1.md`](https://github.com/centminmod/my-claude-code-setup/blob/master/CLAUDE-template-1.md) — so every repo's `CLAUDE.md` shares the same top-level structure (project overview, build/test commands, conventions, open TODOs, …) and so future edits land in predictable places. Pairs with the "Abstract the Java and test writing guidelines to a workspace-level shared layer" TODO above: the template covers the per-repo structure, the workspace skill covers the shared content. Capture the template choice and the migration plan BEFORE rewriting any existing `CLAUDE.md`; do not rewrite files in this pass. diff --git a/CMakeLists.txt b/CMakeLists.txt index d5fe64cf..057c03ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,7 +114,7 @@ set(LLAMA_BUILD_APP OFF CACHE BOOL "" FORCE) FetchContent_Declare( llama.cpp GIT_REPOSITORY https://github.com/ggerganov/llama.cpp.git - GIT_TAG b9442 + GIT_TAG b9495 ) FetchContent_MakeAvailable(llama.cpp) diff --git a/README.md b/README.md index b3a8b110..8fc69bea 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,13 @@ **Build:** -![Java 11+](https://img.shields.io/badge/Java-11%2B-informational) -![JUnit](https://img.shields.io/badge/tested%20with-JUnit4-yellow) +![Java 8+](https://img.shields.io/badge/Java-8%2B-informational) +![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows%20%7C%20Android-lightgrey) +[![JPMS](https://img.shields.io/badge/JPMS-modular%20JAR-25A162)](https://openjdk.org/projects/jigsaw/) +![JUnit](https://img.shields.io/badge/tested%20with-JUnit6-25A162) +[![JSpecify](https://img.shields.io/badge/JSpecify-1.0.0%20%40NullMarked-25A162)](https://jspecify.dev) +[![NullAway](https://img.shields.io/badge/NullAway-strict%20JSpecify-25A162)](https://github.com/uber/NullAway) +[![Checker Framework](https://img.shields.io/badge/Checker%20Framework-Nullness-25A162)](https://checkerframework.org) +[![Error Prone](https://img.shields.io/badge/Error%20Prone-12%20patterns%20at%20ERROR-25A162)](https://errorprone.info) +[![Maven Enforcer](https://img.shields.io/badge/Maven%20Enforcer-strict-25A162)](https://maven.apache.org/enforcer/) [![jqwik](https://img.shields.io/badge/tested%20with-jqwik-1f6feb)](https://jqwik.net) [![ArchUnit](https://img.shields.io/badge/tested%20with-ArchUnit-c71a36)](https://www.archunit.org) [![SpotBugs](https://img.shields.io/badge/analyzed%20with-SpotBugs-3b5998)](https://spotbugs.github.io) @@ -8,7 +15,7 @@ [![Lincheck](https://img.shields.io/badge/tested%20with-Lincheck-7F52FF)](https://github.com/JetBrains/lincheck) [![vmlens](https://img.shields.io/badge/tested%20with-vmlens-ff6f00)](https://vmlens.com) [![JMH](https://img.shields.io/badge/benchmarked%20with-JMH-25A162)](https://openjdk.org/projects/code-tools/jmh/) -[![llama.cpp b9442](https://img.shields.io/badge/llama.cpp-%23b9442-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9442) +[![llama.cpp b9495](https://img.shields.io/badge/llama.cpp-%23b9495-informational)](https://github.com/ggml-org/llama.cpp/releases/tag/b9495) [![Publish](https://github.com/bernardladenthin/java-llama.cpp/actions/workflows/publish.yml/badge.svg)](https://github.com/bernardladenthin/java-llama.cpp/actions/workflows/publish.yml) [![CodeQL](https://github.com/bernardladenthin/java-llama.cpp/actions/workflows/codeql.yml/badge.svg)](https://github.com/bernardladenthin/java-llama.cpp/actions/workflows/codeql.yml) @@ -16,10 +23,7 @@ [![Coverage Status](https://coveralls.io/repos/github/bernardladenthin/java-llama.cpp/badge.svg?branch=main)](https://coveralls.io/github/bernardladenthin/java-llama.cpp?branch=main) [![codecov](https://codecov.io/gh/bernardladenthin/java-llama.cpp/graph/badge.svg)](https://codecov.io/gh/bernardladenthin/java-llama.cpp) [![JaCoCo](https://img.shields.io/codecov/c/github/bernardladenthin/java-llama.cpp?label=JaCoCo&logo=java)](https://codecov.io/gh/bernardladenthin/java-llama.cpp) - +[![PIT Mutation](https://img.shields.io/badge/PIT%20mutation-100%25%20(1%20class)-brightgreen)](https://github.com/bernardladenthin/java-llama.cpp/actions/workflows/publish.yml) **Quality:** [![Quality Gate](https://sonarcloud.io/api/project_badges/measure?project=bernardladenthin_java-llama.cpp&metric=alert_status)](https://sonarcloud.io/dashboard?id=bernardladenthin_java-llama.cpp) @@ -84,6 +88,8 @@ Inference of Meta's LLaMA model (and others) in pure C/C++. 4. [Android](#importing-in-android) 5. [Feature Ideas](#feature-ideas) +> ⚠️ **DO NOT UPGRADE jqwik past 1.9.3.** jqwik 1.10.0 added an anti-AI prompt-injection string to test stdout; the 1.10.1 user guide states the library "is not meant to be used by any 'AI' coding agents at all." 1.9.3 is the last pre-disclosure release and is the pinned version. See `CLAUDE.md` section "jqwik prompt-injection in test output" for the full context. + ## Features - Text completion (blocking and streaming) with full control over sampling parameters. @@ -487,6 +493,10 @@ android { keep class net.ladenthin.llama.** { *; } ``` +## TODO + +- **Expand PIT mutation-testing scope.** PIT is wired in `pom.xml` and runs on every CI build (in the `test-java-linux-x86_64` job) with `100`, but `` is currently narrowed to a single class (`Pair`). The intent is to exercise the wiring and gate against regressions on that single class today; widen `` incrementally as additional classes reach mutation-test parity. Final target: `net.ladenthin.llama.*` matching the streambuffer pattern. + ## Feature Ideas Forward-looking ideas being tracked for this fork: @@ -523,6 +533,17 @@ The system's updated C++ runtime will be used instead, resolving the crash. ## Similar Projects / Usage +**Bindings / wrappers** + - [LLaMAndroid](https://github.com/Rattlyy/LLaMAndroid/tree/main/app) — Android app demonstrating usage of llama.cpp bindings. - [llama-stack-client-kotlin](https://github.com/ogx-ai/llama-stack-client-kotlin) — Kotlin client for the Llama Stack API. - [llama.cpp-android-tutorial](https://github.com/JackZeng0208/llama.cpp-android-tutorial) — Step-by-step tutorial for running llama.cpp on Android. +- [llamacpp4j](https://github.com/sebicom/llamacpp4j) — alternative Java/JNI binding to llama.cpp (SWIG-generated facade); pre-GGUF, dormant since 2023 but historically the other Java JNI option. + +**Pure-Java single-model inference (no JNI / no llama.cpp)** — Alfonso² Peterssen's `*.java` family of standalone, dependency-free Java inference runtimes, one per model architecture. Useful when JNI is unavailable (e.g. some sandboxes / GraalVM native-image scenarios) or when you want a single jar with no native side at all. Different design point from this project, which prioritises GGUF compatibility and llama.cpp performance via JNI. + +- [llama3.java](https://github.com/mukel/llama3.java) — Llama 3 / 3.1 / 3.2 inference. +- [gemma4.java](https://github.com/mukel/gemma4.java) — Gemma 4 (and earlier Gemma 2/3) inference. +- [gptoss.java](https://github.com/mukel/gptoss.java) — GPT-OSS architecture inference. +- [qwen35.java](https://github.com/mukel/qwen35.java) — Qwen 3.5 inference. +- [nemotron3.java](https://github.com/mukel/nemotron3.java) — NVIDIA Nemotron-3 inference. diff --git a/docs/feature-investigation-similar-projects.md b/docs/feature-investigation-similar-projects.md new file mode 100644 index 00000000..748f747a --- /dev/null +++ b/docs/feature-investigation-similar-projects.md @@ -0,0 +1,199 @@ +# Feature Investigation — ideas from pure-Java sibling runtimes and `llamacpp4j` + +Comparison sources (all surveyed in one pass for this document): + +| Repo | Shape | License | Survey notes | +|------|-------|---------|--------------| +| [mukel/llama3.java](https://github.com/mukel/llama3.java) | Pure Java, single-file (~3.4k LOC), Vector API + GraalVM Native Image | MIT | Llama 3 / 3.1 / 3.2 | +| [mukel/gemma4.java](https://github.com/mukel/gemma4.java) | Pure Java, single-file (~3.9k LOC) | Apache 2.0 | Gemma 4 + earlier Gemma 2/3 | +| [mukel/gptoss.java](https://github.com/mukel/gptoss.java) | Pure Java, single-file | Apache 2.0 | OpenAI GPT-OSS (Harmony chat format) | +| [mukel/qwen35.java](https://github.com/mukel/qwen35.java) | Pure Java, single-file | Apache 2.0 | Qwen 3.5 dense + MoE | +| [mukel/nemotron3.java](https://github.com/mukel/nemotron3.java) | Pure Java, single-file | Apache 2.0 | NVIDIA Nemotron-3 (dense + MoE + recurrent SSM) | +| [sebicom/llamacpp4j](https://github.com/sebicom/llamacpp4j) | Alternative JNI binding (SWIG-generated facade over `llama.h`) | unspecified | **Dormant** — 1 commit (2023-07-04), pre-GGUF (llama.cpp build 491), no LICENSE, no tests, no CI | + +The 5 `mukel` projects are written by the same author (Alfonso² Peterssen), share a single-file template, and re-implement GGUF parsing + tensor kernels in pure Java. They are NOT direct competitors to `java-llama.cpp` (which delegates inference to llama.cpp via JNI); they are interesting because they have **better operator-facing ergonomics** at the CLI and example layers. + +`llamacpp4j` is the only other Java-side JNI binding to llama.cpp; the survey looked specifically for API-shape ideas and capabilities not currently exposed here. + +Effort sizing (mirrors [`feature-investigation-llama-stack-client-kotlin.md`](feature-investigation-llama-stack-client-kotlin.md)): + +| Size | Calendar effort (1 engineer) | Description | +|------|------------------------------|-------------| +| XS | < 0.5 day | Trivial Java-side change, no JNI | +| S | 0.5 – 2 days | Java surface + minor JNI/JSON wiring | +| M | 2 – 5 days | New JNI methods, native plumbing, tests | +| L | 1 – 2 weeks | New native subsystem or large API surface | + +--- + +## 1. What this project already covers + +The following are confirmed present in `java-llama.cpp` as of this survey — flagged so we do not re-investigate them: + +| Capability | Status | +|---|---| +| `setSkipDownload(boolean)` + typed `ModelUnavailableException` | ✅ (commit `37754d4`) | +| Reasoning-format toggle, reasoning-budget tokens | ✅ (`InferenceParameters#setReasoningFormat` etc.) | +| Tool calls + custom chat templates | ✅ | +| Speculative draft model | ✅ | +| Multimodal vision (mmproj) | ✅ | +| Infill (fill-in-the-middle) | ✅ | +| Streaming via `LlamaIterator` / Reactive Streams `Publisher` | ✅ | +| `CompletableFuture` async + `CancellationToken` | ✅ | +| `LoadProgressCallback` model-load progress | ✅ | + +--- + +## 2. Cross-cutting themes — universal across the 5 `mukel` projects + +These ideas appear in every (or nearly every) `mukel` runtime; portability across reasoning-model families makes them the **highest-leverage** items. + +### 2.1 Streaming UTF-8 decoder for multi-byte boundary safety *(S, medium-high priority)* + +Sources: `qwen35.java` (`StreamingDecoder`, L2929–2987), `nemotron3.java`, `gemma4.java`. + +GGUF byte-fallback tokenisation can split a single Unicode codepoint across two consecutive token pieces. `LlamaIterator` callers today can receive a `LlamaOutput.text` value containing a partial UTF-8 sequence and either render mojibake (CJK, emoji) or hand-roll their own buffering. The `mukel` runtimes wrap the token stream in a small decoder that holds back trailing bytes until a complete codepoint is available, then flushes. + +- **Why**: silent correctness bug for non-ASCII users; ~50-LOC fix. +- **Shape**: `Utf8BoundaryStreamingDecoder` helper in the Java layer (no JNI change); optional `setUtf8BoundarySafe(true)` opt-in on `InferenceParameters`, or always-on inside `LlamaIterator`. +- **Test**: use any of the existing CJK / emoji prompts; assert no partial codepoint ever crosses the iterator boundary. + +### 2.2 Tri-state thinking-channel router for reasoning models *(S, medium priority)* + +Sources: `gemma4.java`, `gptoss.java` (Harmony channels), `qwen35.java`, `nemotron3.java`. + +A `--think off|on|inline` flag with three semantics: **`off`** strips reasoning tokens from the visible stream (and from chat history), **`on`** (default) routes them to a separate sink (e.g. stderr in CLI examples), **`inline`** interleaves them in the main output. Pairs cleanly with this project's existing `setReasoningFormat`/`setReasoningBudgetTokens`. + +- **Why**: every reasoning model in this project's test matrix (Qwen3-0.6B, plus any GPT-OSS / Gemma / Nemotron load) exposes thought tokens, but operators currently hand-roll the routing. +- **Shape**: helper class `ThinkingChannelRouter` (or analogous) that consumes a `LlamaIterator` and produces two streams (visible / reasoning), plus an enum knob on `InferenceParameters`. +- **gptoss specifically**: needs a Harmony-channel state machine that recognises `<|start|>`, `<|channel|>`, `<|message|>`, `<|end|>` and exposes `analysis` / `commentary` / `final` channels separately. Worth shipping as a separate `HarmonyChannelDecoder` if GPT-OSS users materialise. *(M for the Harmony variant; S for the generic `` variant.)* + +### 2.3 Interactive chat REPL with slash commands *(XS, low-medium priority)* + +Sources: `llama3.java`, `gemma4.java`, `gptoss.java`, `qwen35.java`, `nemotron3.java`. + +`/quit`, `/exit`, `/context` (the latter prints `used / max / remaining` tokens for the current chat session). Users currently Ctrl-C out of `ChatExample`. + +- **Shape**: a `ChatRepl` example under `src/test/java/examples/`. No new production API surface — it composes existing `LlamaModel` calls. +- **Effort**: 1 new file, ~150 LOC. + +### 2.4 ANSI colour auto-detection honouring `NO_COLOR` + `TERM=dumb` *(XS, low priority)* + +Sources: `gemma4.java`, `gptoss.java`, `qwen35.java`, `nemotron3.java`. + +Tri-state `--color on|off|auto` helper that honours the [`NO_COLOR`](https://no-color.org) informal standard, detects `TERM=dumb`, and falls back to no-colour when `System.console()` is `null`. ~15 LOC; useful in every example CLI that prints reasoning tokens or perf summaries in a different style. + +### 2.5 Operator-grade timing line on stderr *(XS, medium priority)* + +Sources: `qwen35.java`, `nemotron3.java`. + +After every generation: a one-line `prompt: X tok/s (P tokens) | generation: Y tok/s (G tokens) | context: U/M` summary to stderr. `LlamaModel.getTimings()` already has all the inputs; no example formats them. + +### 2.6 `AutoCloseable Timer.log("label")` idiom *(XS, low priority)* + +Sources: `gemma4.java` (`Timer` class, L320–333), `qwen35.java`. + +`try (var t = Timer.log("Load tensors")) { ... }` prints `Load tensors: 312 ms` to stderr on close. 12-line helper. The project already times model load + JNI init + first-token latency in ad-hoc places; one helper would unify them. Friendly to `LogCaptor` (already wired in tests). + +### 2.7 `jbang`-runnable single-file example *(XS, medium priority)* + +Sources: all 5 `mukel` runtimes. + +Ship a self-contained `Example.java` with the `///usr/bin/env jbang` shebang and `//DEPS net.ladenthin:llama:5.0.0`. Lowers the "try it once" barrier from `mvn dependency:get + classpath wrangling` to one curl-and-run line. Pairs naturally with publishing on Maven Central. + +### 2.8 Documented system-properties table in the README *(XS, medium priority)* + +Sources: all `mukel` runtimes (each documents its own `-D…` knobs alongside `--flag` parameters). + +Currently the `LlamaSystemProperties` setters (`net.ladenthin.llama.lib.path`, `.tmpdir`, `.osinfo.architecture`, `.test.ngl`, the per-test `.vision.*` and `.nomic.path` properties) are scattered across `CLAUDE.md`, source javadoc, and test setup. A single README table listing every supported property + default + meaning improves discoverability. + +--- + +## 3. Per-repo unique ideas + +### 3.1 `llama3.java` + +- **`--echo` debug mode** *(XS, low)* — dump every token to stderr separately from `--stream`. Useful for teaching / first-time-user debugging. +- **`-Dllama.VectorBitSize=0|128|256|512`** *(XS, low)* — runtime knob to pin SIMD width / benchmark when multiple ISA variants are co-located. Equivalent for this project: a system property selecting GGML CPU backend variant when multiple are on the library path. + +### 3.2 `gemma4.java` + +- **README note about `llama-quantize --pure`** *(XS, low)* — mixed-quant GGUF files (e.g. `Q4_0` with embedded `F16` tensors) cause subtle issues that users discover only by trawling the upstream issue tracker. Surface the workaround in the troubleshooting section. + +### 3.3 `gptoss.java` + +- **`Reasoning: low|medium|high` system-message injection** *(S, high if GPT-OSS users present)* — add `InferenceParameters.setReasoningEffort(LOW|MEDIUM|HIGH)` that synthesises the Harmony `Reasoning: X` line. Encodes a contract operators currently discover only by reading the Jinja template. +- See also Harmony channel decoder under §2.2. + +### 3.4 `qwen35.java` + +- **"Empty `` injection" to *disable* thinking on Qwen models** *(S, medium)* — prefill the assistant header with `\n\n\n\n` so the model produces only the visible answer with zero reasoning tokens, regardless of whether llama.cpp's `reasoning_format` understands the family. Complements existing `setReasoningFormat` / `setReasoningBudgetTokens`. Should land as a `ChatRequest` option or a thin Qwen-aware preset. + +### 3.5 `nemotron3.java` + +- All unique-value findings overlap with §2 themes; no Nemotron-specific item warranted its own row beyond what §2.1 / §2.2 already cover. + +### 3.6 `llamacpp4j` + +`llamacpp4j` is dormant (single commit, July 2023, pre-GGUF era) and its design is largely uninteresting (SWIG-generated facade with opaque `SWIGTYPE_p_*` pointers leaking through). The *useful* ideas come from the underlying `llama.h` API surface that SWIG happens to expose, not from anything Sebicom designed: + +- **`llama_state_*` save/load API** *(M, medium)* — `llama_copy_state_data`, `llama_set_state_data`, `llama_save_session_file` / `llama_load_session_file`. Useful for prompt-warm-start, multi-tenant resumption, and benchmarking. `ModelParameters` doesn't surface KV-cache snapshotting as first-class Java API. +- **`llama_apply_lora_*` hot-apply at runtime** *(M, medium)* — adapter hot-swap without reloading the base model (common multi-tenant pattern). Use the modern `llama_adapter_lora_*` API, not the deprecated file-based one Sebicom exposes. +- **`llama_model_quantize` exposure** *(S, low)* — one-line wrapper that converts FP16 → Q4/Q5/Q8 GGUF in-process. Lets Java apps build a "download FP16 → quantize for this device" path without shelling out. +- **`llama_print_system_info()` wrapper** *(XS, low)* — trivial diagnostic that prints `AVX = 1 | AVX2 = 1 | …` etc. Useful for bug reports. + +**Explicitly skip from `llamacpp4j`**: the SWIG-generated facade itself (brittle, opaque pointer types leak), the `mainn(argv)` shortcut that forwards to `llama.cpp`'s reference CLI, the single-OS prebuilt `.so` checked into git, the README-documented "install JAR into local Maven repo" workflow. `java-llama.cpp`'s JSON-over-JNI + classifier-based packaging is strictly better. + +--- + +## 4. Explicitly out of scope + +Recurring "don't port" themes across all 6 sources: + +- **Pure-Java tensor kernels / GGUF parser / quantization classes** — redundant with llama.cpp; the entire raison d'être of this project is to *delegate* these to the upstream C++. +- **GraalVM Native Image AOT model preloading** — already captured as its own design-investigation TODO in `CLAUDE.md`; not duplicated here. +- **Reimplementations of samplers** (`ToppSampler`, `CategoricalSampler`) — llama.cpp's sampler chain already covers TOP_P, TYP_P, MIN_P, XTC, DRY, etc. +- **Single-file `jbang` distribution of the whole library** — wrong shape for a JNI library that ships per-OS classifier JARs. *(A single-file `jbang` *example* per §2.7 is fine; the library itself stays multi-module.)* +- **Hard-coded per-model chat-template token strings** (e.g. Gemma's `<|turn>` / `<|think|>`) — llama.cpp's chat-template engine handles these generically. + +--- + +## 5. Prioritised backlog (top picks across all 6 sources) + +Sorted by `priority × (1 / effort)`. Items in **bold** are the recommended first batch. + +| # | Item | Source(s) | Effort | Priority | +|---|------|-----------|:--:|:--:| +| 1 | **UTF-8 boundary-safe streaming decoder** | §2.1 | S | medium-high | +| 2 | **Tri-state thinking-channel router** (generic ``) | §2.2 | S | medium | +| 3 | **Operator-grade per-run timing line on stderr** | §2.5 | XS | medium | +| 4 | **`jbang`-runnable single-file example** | §2.7 | XS | medium | +| 5 | **System-properties table in README** | §2.8 | XS | medium | +| 6 | Empty `` injection (Qwen) | §3.4 | S | medium | +| 7 | `llama_state_*` save/load Java API | §3.6 | M | medium | +| 8 | `llama_adapter_lora_*` hot-apply API | §3.6 | M | medium | +| 9 | Chat REPL with `/quit /exit /context` | §2.3 | XS | low-medium | +| 10 | Harmony channel decoder for GPT-OSS | §2.2 | M | conditional (ship when GPT-OSS users ask) | +| 11 | `Reasoning: X` system-message injection | §3.3 | S | conditional | +| 12 | ANSI colour auto-detection helper | §2.4 | XS | low | +| 13 | `AutoCloseable Timer.log()` idiom | §2.6 | XS | low | +| 14 | `llama_print_system_info()` wrapper | §3.6 | XS | low | +| 15 | `llama_model_quantize` Java surface | §3.6 | S | low | +| 16 | README note on `llama-quantize --pure` | §3.2 | XS | low | +| 17 | `--echo` debug knob in example | §3.1 | XS | low | +| 18 | `-Dllama.VectorBitSize`-style ISA knob | §3.1 | XS | low | + +Items 1–5 are the recommended first batch — none requires JNI changes and each closes a documented operator pain point. + +--- + +## 6. Recommended next action + +Implement items 1, 3, 4, 5 in one focused "operator-facing ergonomics" commit: + +- UTF-8 boundary-safe streaming decoder (genuine correctness fix) +- Per-run timing line (cheap operator signal) +- One `jbang`-runnable example file +- README system-properties table + +Estimated total: ~1–2 days of work, zero JNI changes, all backed by Java-only tests. Items 2 and 6–8 are good follow-ups once a real user asks. diff --git a/docs/history/llama-cpp-breaking-changes.md b/docs/history/llama-cpp-breaking-changes.md index 473dbc71..b55636d1 100644 --- a/docs/history/llama-cpp-breaking-changes.md +++ b/docs/history/llama-cpp-breaking-changes.md @@ -283,3 +283,23 @@ Used during `llama.cpp` version bumps: when upgrading, scan this file from the r | ~b9437–b9442 | `src/llama.cpp` | `llama_prepare_model_devices()` iGPU collection now appends only the FIRST `GGML_BACKEND_DEVICE_TYPE_IGPU` device (prevents duplicate iGPU registration on multi-iGPU hosts). Behavioural fix, single-line caller in `jllama.cpp` unchanged, no project source changes required | | ~b9437–b9442 | `tools/ui/embed.cpp` + `tools/ui/src/...` (Svelte) | Webasset embedder tightened printf format specifiers (`%lu` → `%zu` and `PRIx64`); UI settings split `custom` into `customJson` + `customCss`; runtime CSS injection via ``. Project does not ship the upstream UI, no impact | | ~b9437–b9442 | `gguf-py/`, `conversion/` (Python) | New `_set_vocab_whitespace()` helper and `add_normalizer_lowercase()` GGUF writer for the new whitespace tokenizer + lowercase normalizer keys (mirrors the vocab additions above); jina-v2 Roberta-tokenizer path now branches to whitespace when `tokenizer.json` declares a `Whitespace` pre-tokenizer. Python-side only, no impact on the Java/JNI build | +| ~b9442–b9444 | `.github/workflows/build-cpu.yml` (upstream CI) | Upstream's CPU-build CI trigger paths narrowed to `**/*.h`, `**/*.hpp`, `**/*.c`, `**/*.cpp` (dropped `**/*.cu`, `**/*.cuh`, `**/*.swift`, `**/*.m`, `**/*.metal`, `**/*.comp`, `**/*.glsl`, `**/*.wgsl`) so GPU/Metal/Vulkan/WebGPU/Swift source edits no longer trigger the CPU build. Upstream-only CI plumbing; this project consumes none of upstream's workflow files and has its own `publish.yml`, no impact | +| ~b9442–b9444 | `tools/server/server-http.cpp` | `If-None-Match` conditional-GET handling now also accepts the weak ETag form `W/"..."` (previously matched only strong ETag bytes-equal); 304 Not Modified returned for either form. This is the standalone `llama-server` HTTP tool, which is not linked into the JNI build (`libllama` + `libcommon` only); no project source changes required and no new Java API surface to expose | +| ~b9444–b9490 | `common/common.cpp` | `common_prompt_batch_decode()` signature changed: new `int n_new` parameter added between `all_tokens` and `n_past`. Callers must pass the count of newly-decoded tokens for the batch. Only called inside upstream `tools/server/server-context.cpp` (compiled directly into jllama); no project source changes required — the new signature flows through transparently | +| ~b9444–b9490 | `include/llama.h` | `llama_set_warmup()` deprecated via `LLAMA_DEPRECATED` macro (warmup is now handled internally during model load + first decode). Not called from `jllama.cpp` or any project source — absorbed inside upstream-compiled code, no project changes required. If a future jllama feature wants to control warmup explicitly, that path is the deprecated one and should pick the new replacement instead | +| ~b9444–b9490 | `include/llama.h` + `src/llama-context.cpp` | New `llama_context_params::n_outputs_max` field (default `-1` = derived from `n_batch`). Limits the number of output slots allocated per context; useful for low-memory setups that always request `logits_all=false`. Not exposed by project today — consider adding `ModelParameters.setMaxOutputs(int)` if a user requests fine-grained control. Tracked under Open TODOs | +| ~b9444–b9490 | `common/arg.cpp` + `common/common.cpp` | `common_params_handle_models()` no longer sets `hf_opts.download_mmproj = true` unconditionally; instead uses `opts.download_mmproj = !params.no_mmproj` so the new `--no-mmproj` flag suppresses the multimodal projector download. Not called from project source — arg parsing happens upstream, no project changes required | +| ~b9444–b9490 | `common/sampling.h` + `common/sampling.cpp` | New `common_sampler_reasoning_budget_force(common_sampler *)` API that triggers the budget sampler to inject the end-of-thinking token on the next sample. Paired with new `common_params_sampling::reasoning_control` bool: when set, arms the budget sampler so external code (e.g. a server control endpoint) can end reasoning at runtime. Not used by project today — would pair with a future `InferenceParameters.setReasoningControl(boolean)` setter and a `LlamaModel.endReasoning(...)` helper. Tracked under Open TODOs | +| ~b9444–b9490 | `common/common.h` + `common/arg.cpp` | New `common_params::sse_ping_interval` (int32, env `LLAMA_ARG_SSE_PING_INTERVAL`, CLI `--sse-ping-interval`); server emits SSE keep-alive comments at this interval. Server-only; project does not run the upstream HTTP server (uses a direct in-process API), no Java setter required | +| ~b9444–b9490 | `tools/server/server-http.cpp` | New `POST /v1/chat/completions/control` endpoint accepting `{"id": "...", "action": "reasoning_end"}` — tells a streaming completion to wrap up reasoning early. Server-only; not linked into the JNI build (`libllama` + `libcommon` only), no project source changes required. If exposed in Java, would map to a new `LlamaModel.endReasoning(String taskId)` method that calls `common_sampler_reasoning_budget_force` on the slot's sampler. Tracked under Open TODOs | +| ~b9444–b9490 | `src/llama-hparams.h` + `src/llama-model.cpp` | Internal renames: `hparams::recurrent_layer_arr` → `hparams::is_recr_impl`; `hparams::swa_layers` → `hparams::is_swa_impl`. Internal helper fields not part of the public API; not referenced by `jllama.cpp` or any project source, no changes required | +| ~b9444–b9490 | `src/llama-arch.h` + `src/llama-arch.cpp` + `gguf-py/` | New `LLM_KV_HIDDEN_ACT` GGUF key (`%s.hidden_act`) for ModernBert SwiGLU/GeGLU activation selection; new `LLM_KV_ATTENTION_RECURRENT_LAYERS` key for hybrid (recurrent + attention) models. Additive vocabulary keys consumed automatically when loading a GGUF that sets them; no project source or Java API changes required | +| ~b9444–b9490 | `src/llama-arch.h` + `src/models/*.cpp` (new) | New model architectures: `LLM_ARCH_MELLUM` (JetBrains code-completion), `LLM_ARCH_EXAONE4_5` (LG AI multimodal), `LLM_ARCH_STEP3P7` (StepFun Step-3.7 with MTP support); `LLM_ARCH_QWEN3NEXT`/`LLM_ARCH_QWEN35`/`LLM_ARCH_QWEN35MOE` removed from `llama_model_saver_supports_arch()` allowlist. New tokenizer pre-types: `LLAMA_VOCAB_PRE_TYPE_GRANITE_EMB_MULTI = 54`, `LLAMA_VOCAB_PRE_TYPE_MELLUM2 = 55`. All additive at the architecture level — consumed automatically when loading a matching GGUF, no project source or Java API changes required | +| ~b9444–b9490 | `common/arg.cpp` | New `--mtp` / `--no-mtp` flags (env `LLAMA_ARG_MTP`) now apply to Step-3.5 in addition to the existing Qwen3.5 coverage. Multi-Token Prediction is consumed inside upstream-compiled server TUs; project does not expose an MTP setter today (would map to `ModelParameters.setMtp(boolean)`). Tracked under Open TODOs if a user requests it | +| ~b9444–b9490 | upstream build / verification | Local build with `GIT_TAG b9490` was verified clean: `cmake -B build` configures cleanly; `cmake --build build --config Release -j$(nproc)` links `libjllama.so` with zero warnings on `jllama.cpp` or any project translation unit. All breaking changes in this range are absorbed inside upstream-compiled translation units (`common.cpp`, `arg.cpp`, `llama.cpp`, `server-*.cpp`, `download.cpp`); no project source edits required for the version bump itself | +| ~b9490–b9495 | `include/llama.h` + `src/llama-ext.h` + `src/llama-context.{h,cpp}` + `src/llama-cparams.h` + `src/llama-graph.{h,cpp}` + `common/speculative.{h,cpp}` + `src/models/{qwen35,qwen35moe,step35}.cpp` | Mass terminology rename: `pre_norm` → `nextn` everywhere the pre-final-norm hidden state is referenced. Affects the public API: `llama_set_embeddings_pre_norm()` → `llama_set_embeddings_nextn()`, `llama_get_embeddings_pre_norm()` → `llama_get_embeddings_nextn()`, `llama_get_embeddings_pre_norm_ith()` → `llama_get_embeddings_nextn_ith()`. Internal: `cparams.embeddings_pre_norm` → `cparams.embeddings_nextn`, `cparams.embeddings_pre_norm_masked` → `cparams.embeddings_nextn_masked`, `llm_graph_result::t_h_pre_norm` → `t_h_nextn`, `common_speculative_need_embd_pre_norm()` → `common_speculative_need_embd_nextn()`. Qwen3.5 / Qwen3.5-MoE / Step-3.5 model graphs moved the final norm before extracting `t_h_nextn` (was after extracting the pre-norm hidden state). Project does not call any of these MTP-specific APIs directly — all references stay inside upstream-compiled translation units (`speculative.cpp`, `llama-context.cpp`, `server-context.cpp`, model TUs). Verified by grep across `src/main/cpp/*.{cpp,hpp}`: zero matches for any `pre_norm` / `nextn` / `embeddings_pre_norm*` / `t_h_pre_norm*` symbol. No project source changes required | +| ~b9490–b9495 | `ggml/src/ggml-cuda/common.cuh` + 10 CUDA kernel files | New `GGML_CUDA_RESTRICT` macro replaces `__restrict__` on kernel parameter pointers. PDL (Programmatic Dependent Launch) on Hopper requires `__restrict__` to be disabled per [llama.cpp PR #24030](https://github.com/ggml-org/llama.cpp/pull/24030); the macro expands to nothing under `GGML_CUDA_USE_PDL && __CUDA_ARCH__ >= GGML_CUDA_CC_HOPPER`, otherwise to `__restrict__`. Kernel signatures change from direct `T * __restrict__ x` parameters to `T * x_ptr` parameter + an internal `T * GGML_CUDA_RESTRICT x = x_ptr;` alias line; `GGML_UNUSED_VARS` calls in fallback branches updated to reference the `_ptr` names. Internal CUDA backend change; project does not compile any CUDA kernels in the JNI build (CUDA build uses upstream sources unchanged via FetchContent). No project source changes required | +| ~b9490–b9495 | `src/llama-arch.{h,cpp}` + `src/llama-vocab.{h,cpp}` + `gguf-py/gguf/constants.py` + `gguf-py/gguf/gguf_writer.py` | New `LLM_KV_TOKENIZER_SUPPRESS_TOKENS` GGUF key (`tokenizer.ggml.suppress_tokens`). When a GGUF declares this array, the loader stores it on `llama_vocab::impl::suppress_tokens` and exposes it via new `llama_vocab::get_suppress_tokens()` accessor. The Gemma4 model graph (`src/models/gemma4.cpp`) reads this list and appends a `-INFINITY` logit bias to those token IDs at the end of the forward graph (new `llm_graph_input_logits_bias` class). Additive: existing models without the key produce an empty `suppress_tokens` vector and the bias-add branch is skipped. Mirrors a HuggingFace transformers `suppress_tokens` parameter; specifically used for Gemma4 Unified to prevent the model from emitting `` / `` placeholder tokens. No project source or Java API changes required — the bias is applied automatically when a Gemma4U GGUF is loaded | +| ~b9490–b9495 | `gguf-py/gguf/constants.py` + `gguf-py/gguf/tensor_mapping.py` + `tools/mtmd/clip-impl.h` + `tools/mtmd/clip-model.h` + `tools/mtmd/clip.cpp` + new `tools/mtmd/models/gemma4uv.cpp` + new `tools/mtmd/models/gemma4ua.cpp` + `tools/mtmd/mtmd-audio.{h,cpp}` + `tools/mtmd/mtmd.cpp` + `conversion/__init__.py` + `conversion/gemma.py` | New Gemma4 Unified vision + audio variant (`Gemma4UnifiedForConditionalGeneration`). Adds new projector types `PROJECTOR_TYPE_GEMMA4UV` and `PROJECTOR_TYPE_GEMMA4UA` (vision uses bigger patch size with token merging done on the conv layer; audio is encoder-free, raw 16 kHz waveform chunked into 640-sample frames). New `V_ENC_EMBD_PATCH_NORM` tensor enum (`v.patch_norm.{bid}`) and 3 indexed `patch_norm_{1,2,3}_{w,b}` weights on `clip_model` (Gemma4U uses standard PyTorch LayerNorm rather than RMSNorm before/after the patch embedding). New `mtmd_audio_preprocessor_gemma4ua` mel-major waveform packer (40 ms / 16 kHz frames; no FFT, no filterbank). Multimodal additions are routed through upstream `mtmd-cli` / `mtmd-debug` binaries that the project does not link; the JNI build links `libllama` + `libcommon` only. Additive at the GGUF / projector loader level: existing GGUFs without these projector types continue to load through the previous code paths. No project source or Java API changes required | +| ~b9490–b9495 | `tools/ui/` (`package.json`, `src/lib/components/app/content/MarkdownContent/`, new `MermaidPreview.svelte`, new `DialogMermaidPreview.svelte`, new constants / icons / rehype plugins) | Upstream `llama-server` web UI gains Mermaid diagram rendering: new `mermaid@^11.15` dependency, lazy-loaded; new rehype plugin chain (`rehype-mermaid-pre`, `rehype-enhance-mermaid-blocks`) converts ` ```mermaid ` code fences to `
` and wraps them with copy / preview action buttons; the existing single-file `MarkdownContent.svelte` is split into a `.svelte` + sibling `.css` / `markdown-utils.ts` / `markdown-handlers.ts` so the new mermaid renderer can share helpers. Project does not compile or ship the upstream `tools/ui` (server-only feature, classpath-only JNI build); no impact |
+| ~b9490–b9495 | upstream build / verification | Local build with `GIT_TAG b9495` was verified clean: `cmake -B build -DBUILD_TESTING=ON` configures cleanly, `cmake --build build --config Release -j$(nproc)` links `libjllama.so` + `jllama_test` with zero warnings on any project translation unit; `ctest --test-dir build --output-on-failure` reports 435/435 tests passing. All breaking changes in this range are renames within upstream-compiled translation units; no project source edits required for the version bump itself |
diff --git a/pom.xml b/pom.xml
index 28343dbd..274a9412 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,7 +50,10 @@ SPDX-License-Identifier: MIT
 
 	
 		5.18.1
-		26.1.0
+		1.0.0
+		2.49.0
+		0.13.4
+		4.1.0
 		2.21.3
 		1.0.4
 		2.0.18
@@ -59,9 +62,16 @@ SPDX-License-Identifier: MIT
 		6.1.0
 		1.37
 		0.16
-		2.39
+		3.6
+		2.10.1
 		1.2.28
-		
+		
 		1.9.3
 		1.4.2
 		4.9.8.3
@@ -73,6 +83,22 @@ SPDX-License-Identifier: MIT
 		${git.commit.time}
 	
 
+	
+	
+		
+			
+				org.slf4j
+				slf4j-api
+				${slf4j.version}
+			
+		
+	
+
 	
 		
 			org.junit.jupiter
@@ -93,10 +119,14 @@ SPDX-License-Identifier: MIT
 			test
 		
 		
-			org.jetbrains
-			annotations
-			${jetbrains-annotations.version}
-			compile
+			org.jspecify
+			jspecify
+			${jspecify.version}
+		
+		
+			org.checkerframework
+			checker-qual
+			${checker.version}
 		
 		
 			com.fasterxml.jackson.core
@@ -153,11 +183,18 @@ SPDX-License-Identifier: MIT
 			test
 		
 		
-			org.jetbrains.kotlinx
-			lincheck-jvm
+			org.jetbrains.lincheck
+			lincheck
 			${lincheck.version}
 			test
 		
+		
+		
+			io.github.hakky54
+			logcaptor
+			${logcaptor.version}
+			test
+		
 	
 
 	
@@ -218,6 +255,11 @@ SPDX-License-Identifier: MIT
 					maven-surefire-plugin
 					3.5.5
 				
+				
+					org.apache.maven.plugins
+					maven-enforcer-plugin
+					3.6.3
+				
 				
 					org.codehaus.mojo
 					exec-maven-plugin
@@ -228,6 +270,11 @@ SPDX-License-Identifier: MIT
 					jacoco-maven-plugin
 					0.8.14
 				
+				
+					org.pitest
+					pitest-maven
+					1.25.1
+				
 				
 					org.sonatype.central
 					central-publishing-maven-plugin
@@ -236,6 +283,44 @@ SPDX-License-Identifier: MIT
 			
 		
 		
+			
+				org.apache.maven.plugins
+				maven-enforcer-plugin
+				
+					
+						enforce
+						
+							enforce
+						
+						
+							
+								
+									[3.6.3,)
+								
+								
+									[1.8,)
+								
+								
+								
+									
+										
+										commons-logging:commons-logging
+										
+										log4j:log4j
+										
+										org.hamcrest:hamcrest-core
+										org.hamcrest:hamcrest-library
+										org.hamcrest:hamcrest-all
+										
+										junit:junit
+										junit:junit-dep
+									
+								
+							
+						
+					
+				
+			
 			
 				io.github.git-commit-id
 				git-commit-id-maven-plugin
@@ -259,16 +344,105 @@ SPDX-License-Identifier: MIT
 				org.apache.maven.plugins
 				maven-compiler-plugin
 				
-					1.8
-					1.8
-					21
-					21
+					8
+					21
+					true
+					true
+					
+						
+						-Xlint:all,-serial,-options,-classfile,-processing
+						-Werror
+						
+						-processor
+						org.checkerframework.checker.nullness.NullnessChecker
+						-XDaddTypeAnnotationsToSymbol=true
+						-XDcompilePolicy=simple
+						--should-stop=ifError=FLOW
+						-Xplugin:ErrorProne -Xep:NullAway:ERROR -XepOpt:NullAway:OnlyNullMarked=true -XepOpt:NullAway:JSpecifyMode=true -XepOpt:NullAway:CheckOptionalEmptiness=true -XepOpt:NullAway:AcknowledgeRestrictiveAnnotations=true -XepOpt:NullAway:AcknowledgeAndroidRecent=true -XepOpt:NullAway:AssertsEnabled=true -Xep:BoxedPrimitiveEquality:ERROR -Xep:EqualsHashCode:ERROR -Xep:EqualsIncompatibleType:ERROR -Xep:IdentityBinaryExpression:ERROR -Xep:SelfAssignment:ERROR -Xep:SelfComparison:ERROR -Xep:SelfEquals:ERROR -Xep:DeadException:ERROR -Xep:FormatString:ERROR -Xep:InvalidPatternSyntax:ERROR -Xep:OptionalEquality:ERROR -Xep:ImpossibleNullComparison:ERROR
+					
+					
+						
+							com.google.errorprone
+							error_prone_core
+							${errorprone.version}
+						
+						
+							com.uber.nullaway
+							nullaway
+							${nullaway.version}
+						
+						
+							org.checkerframework
+							checker
+							${checker.version}
+						
+					
 				
 				
+					
+						default-compile
+						
+							
+							
+								module-info.java
+							
+						
+					
+					
+						module-info-compile
+						compile
+						
+							compile
+						
+						
+							
+							9
+							
+								module-info.java
+							
+							
+							
+						
+					
 					
 						default-testCompile
 						
-							
+							
+							false
+							
+								-XDaddTypeAnnotationsToSymbol=true
+								-XDcompilePolicy=simple
+								--should-stop=ifError=FLOW
+								-Xplugin:ErrorProne -Xep:NullAway:OFF -Xep:GuardedBy:OFF
+							
+							
 								
 									org.openjdk.jcstress
 									jcstress-core
@@ -431,6 +605,35 @@ SPDX-License-Identifier: MIT
 					test
 				
 			
+			
+				
+				org.pitest
+				pitest-maven
+				
+					
+						org.pitest
+						pitest-junit5-plugin
+						1.2.3
+					
+				
+				
+					
+						net.ladenthin.llama.Pair
+					
+					
+						net.ladenthin.llama.PairTest
+					
+					100
+					30000
+				
+			
 		
 	
 
@@ -489,6 +692,11 @@ SPDX-License-Identifier: MIT
 								compile
 							
 							
+								
+								
+									module-info.java
+								
 								
 									-h
 									src/main/cpp
@@ -565,6 +773,11 @@ SPDX-License-Identifier: MIT
 								compile
 							
 							
+								
+								
+									module-info.java
+								
 								
 									-h
 									src/main/cpp
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
new file mode 100644
index 00000000..b899ea65
--- /dev/null
+++ b/src/main/java/module-info.java
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin 
+// SPDX-FileCopyrightText: 2023-2025 Konstantin Herud
+//
+// SPDX-License-Identifier: MIT
+
+/**
+ * JPMS module descriptor for the java-llama.cpp JNI bindings.
+ *
+ * 

Exports the three hand-written public packages + * ({@code net.ladenthin.llama}, {@code net.ladenthin.llama.args}, + * {@code net.ladenthin.llama.json}). The native libraries shipped under + * {@code /net/ladenthin/llama/{OS}/{ARCH}/} are loaded by + * {@link net.ladenthin.llama.LlamaLoader} via + * {@link Class#getResourceAsStream(String)} on its own class object, so the resources + * are looked up in this module and do not need to be {@code opens}'d.

+ * + *

JSpecify {@code @NullMarked} is declared at the module level here so that no source + * file compiled at {@code --release 8} references the JSpecify annotation type directly. + * Otherwise javac would emit an unsuppressible {@code unknown enum constant + * ElementType.MODULE} classfile-read warning for each source compiled at release 8 that + * resolves {@code @NullMarked} ({@code @NullMarked} carries + * {@code @Target({MODULE, PACKAGE, TYPE})} and Java 8 does not know about + * {@code ElementType.MODULE}). Confining the reference to {@code module-info.java} — + * which compiles at {@code --release 9} — keeps that warning out of the build entirely.

+ * + *

{@code requires static org.jspecify} is needed only at compile time of this + * descriptor; JSpecify annotations carry {@code RetentionPolicy.CLASS} so module-path + * consumers never need jspecify on their runtime path. Checker Framework qualifiers and + * the Codehaus animal-sniffer annotation are likewise compile-time only. Jackson, SLF4J, + * and Reactive Streams API are referenced from ordinary sources only; javac in the + * separate {@code module-info-compile} execution compiles {@code module-info.java} in + * isolation and therefore does not need their module names. Consumers that put this jar + * on the module path will load these dependencies through their own {@code requires} + * graph; consumers on the classpath are unaffected.

+ * + *

This descriptor compiles at {@code --release 9}; the rest of the source compiles + * at {@code --release 8}. Java 8 runtimes silently ignore {@code module-info.class} at + * the JAR root.

+ */ +@org.jspecify.annotations.NullMarked +module net.ladenthin.llama { + requires static org.jspecify; + + exports net.ladenthin.llama; + exports net.ladenthin.llama.args; + exports net.ladenthin.llama.json; +} diff --git a/src/main/java/net/ladenthin/llama/ChatMessage.java b/src/main/java/net/ladenthin/llama/ChatMessage.java index 319e65ce..c581c034 100644 --- a/src/main/java/net/ladenthin/llama/ChatMessage.java +++ b/src/main/java/net/ladenthin/llama/ChatMessage.java @@ -7,6 +7,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Optional; +import org.jspecify.annotations.Nullable; /** * A single message in a chat conversation: a role ({@code "user"}, {@code "assistant"}, @@ -30,9 +32,9 @@ public final class ChatMessage { private final String role; private final String content; - private final String toolCallId; + private final @Nullable String toolCallId; private final List toolCalls; - private final List parts; + private final @Nullable List parts; /** * Plain user/assistant/system message. @@ -52,7 +54,7 @@ public ChatMessage(String role, String content) { * @param toolCallId for tool-result turns ({@code role="tool"}), the id of the originating call; {@code null} otherwise * @param toolCalls for assistant tool-call turns, the list of calls; empty otherwise */ - public ChatMessage(String role, String content, String toolCallId, List toolCalls) { + public ChatMessage(String role, String content, @Nullable String toolCallId, List toolCalls) { this(role, content, toolCallId, toolCalls, null); } @@ -75,7 +77,11 @@ public ChatMessage(String role, List parts) { } private ChatMessage( - String role, String content, String toolCallId, List toolCalls, List parts) { + String role, + String content, + @Nullable String toolCallId, + List toolCalls, + @Nullable List parts) { this.role = role; this.content = content; this.toolCallId = toolCallId; @@ -155,7 +161,7 @@ public String getContent() { * Tool-call id for tool-result turns. * @return the originating tool call id, or {@code null} for non-tool messages */ - public String getToolCallId() { + public @Nullable String getToolCallId() { return toolCallId; } @@ -169,17 +175,17 @@ public List getToolCalls() { /** * Multimodal content parts accessor. - * @return an unmodifiable list of text and image parts, or {@code null} for - * legacy text-only messages built via {@link #ChatMessage(String, String)} + * @return an unmodifiable list of text and image parts, or {@link Optional#empty()} + * for legacy text-only messages built via {@link #ChatMessage(String, String)} */ - public List getParts() { - return parts == null ? null : Collections.unmodifiableList(parts); + public Optional> getParts() { + return parts == null ? Optional.empty() : Optional.of(Collections.unmodifiableList(parts)); } /** * Whether this message carries multimodal parts (i.e. was constructed via * {@link #ChatMessage(String, List)} or {@link #userMultimodal(ContentPart...)}). - * @return {@code true} when {@link #getParts()} is non-null + * @return {@code true} when {@link #getParts()} is non-empty */ public boolean hasParts() { return parts != null; diff --git a/src/main/java/net/ladenthin/llama/ChatRequest.java b/src/main/java/net/ladenthin/llama/ChatRequest.java index c40779bd..c7e9622e 100644 --- a/src/main/java/net/ladenthin/llama/ChatRequest.java +++ b/src/main/java/net/ladenthin/llama/ChatRequest.java @@ -10,7 +10,9 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.function.Consumer; +import org.jspecify.annotations.Nullable; /** * Builder for a typed chat completion call. @@ -28,9 +30,9 @@ public final class ChatRequest { private final List messages = new ArrayList(); private final List tools = new ArrayList(); - private String toolChoice; + private @Nullable String toolChoice; private int maxToolRounds = 8; - private Consumer paramsCustomizer; + private @Nullable Consumer paramsCustomizer; /** Construct an empty request; populate via the setters. */ public ChatRequest() { @@ -75,7 +77,7 @@ public ChatRequest addTool(ToolDefinition tool) { * @param toolChoice the hint string, or {@code null} to clear * @return this builder */ - public ChatRequest setToolChoice(String toolChoice) { + public ChatRequest setToolChoice(@Nullable String toolChoice) { this.toolChoice = toolChoice; return this; } @@ -103,7 +105,7 @@ public ChatRequest setMaxToolRounds(int maxToolRounds) { * @param customizer the customizer; {@code null} clears any prior customizer * @return this builder */ - public ChatRequest setInferenceCustomizer(Consumer customizer) { + public ChatRequest setInferenceCustomizer(@Nullable Consumer customizer) { this.paramsCustomizer = customizer; return this; } @@ -128,7 +130,7 @@ public List getTools() { * Tool choice accessor. * @return the {@code tool_choice} hint, or {@code null} when unset */ - public String getToolChoice() { + public @Nullable String getToolChoice() { return toolChoice; } @@ -153,8 +155,9 @@ public String buildMessagesJson() { ObjectNode obj = MAPPER.createObjectNode(); obj.put("role", m.getRole()); obj.put("content", m.getContent() == null ? "" : m.getContent()); - if (m.getToolCallId() != null) { - obj.put("tool_call_id", m.getToolCallId()); + final String toolCallId = m.getToolCallId(); + if (toolCallId != null) { + obj.put("tool_call_id", toolCallId); } if (!m.getToolCalls().isEmpty()) { ArrayNode tc = MAPPER.createArrayNode(); @@ -176,13 +179,12 @@ public String buildMessagesJson() { } /** - * Build the OAI-style {@code tools} array as a JSON string. Returns {@code null} - * when no tools were added. + * Build the OAI-style {@code tools} array as a JSON string. * - * @return the JSON array as a string, or {@code null} when there are no tools + * @return the JSON array as a string, or {@link Optional#empty()} when no tools were added */ - public String buildToolsJson() { - if (tools.isEmpty()) return null; + public Optional buildToolsJson() { + if (tools.isEmpty()) return Optional.empty(); ArrayNode arr = MAPPER.createArrayNode(); for (ToolDefinition t : tools) { ObjectNode entry = MAPPER.createObjectNode(); @@ -198,7 +200,7 @@ public String buildToolsJson() { entry.set("function", fn); arr.add(entry); } - return arr.toString(); + return Optional.of(arr.toString()); } /** diff --git a/src/main/java/net/ladenthin/llama/ChatResponse.java b/src/main/java/net/ladenthin/llama/ChatResponse.java index 65054220..23fe5eab 100644 --- a/src/main/java/net/ladenthin/llama/ChatResponse.java +++ b/src/main/java/net/ladenthin/llama/ChatResponse.java @@ -6,6 +6,7 @@ import java.util.Collections; import java.util.List; +import java.util.Optional; /** * Typed result of {@link LlamaModel#chat(ChatRequest)} and @@ -59,10 +60,10 @@ public List getChoices() { /** * Convenience accessor for the first assistant message. - * @return the first choice's message, or {@code null} when there are no choices + * @return the first choice's message, or {@link Optional#empty()} when there are no choices */ - public ChatMessage getFirstMessage() { - return choices.isEmpty() ? null : choices.get(0).getMessage(); + public Optional getFirstMessage() { + return choices.isEmpty() ? Optional.empty() : Optional.of(choices.get(0).getMessage()); } /** @@ -70,8 +71,7 @@ public ChatMessage getFirstMessage() { * @return the first choice's message content, or {@code ""} when there are no choices */ public String getFirstContent() { - ChatMessage m = getFirstMessage(); - return m == null ? "" : m.getContent(); + return getFirstMessage().map(ChatMessage::getContent).orElse(""); } /** diff --git a/src/main/java/net/ladenthin/llama/CliParameters.java b/src/main/java/net/ladenthin/llama/CliParameters.java index c9374a8b..9904848b 100644 --- a/src/main/java/net/ladenthin/llama/CliParameters.java +++ b/src/main/java/net/ladenthin/llama/CliParameters.java @@ -10,7 +10,7 @@ import java.util.List; import java.util.Map; import net.ladenthin.llama.args.CliArg; -import org.jetbrains.annotations.Nullable; +import org.jspecify.annotations.Nullable; abstract class CliParameters { @@ -28,7 +28,9 @@ abstract class CliParameters { * @param the concrete subtype of this builder * @return this builder */ - @SuppressWarnings("unchecked") + // Self-typing builder idiom: caller fixes T to its concrete subtype so chained + // calls return the concrete builder, not CliParameters. + @SuppressWarnings({"unchecked", "TypeParameterUnusedInFormals"}) protected final T putScalar(String key, Object value) { parameters.put(key, String.valueOf(value)); return (T) this; @@ -43,7 +45,8 @@ protected final T putScalar(String key, Object value) * @param the concrete subtype of this builder * @return this builder */ - @SuppressWarnings("unchecked") + // Self-typing builder idiom — see putScalar above. + @SuppressWarnings({"unchecked", "TypeParameterUnusedInFormals"}) protected final T putEnum(String key, CliArg value) { parameters.put(key, value.getArgValue()); return (T) this; diff --git a/src/main/java/net/ladenthin/llama/ContentPart.java b/src/main/java/net/ladenthin/llama/ContentPart.java index ba6ee49a..a7689b35 100644 --- a/src/main/java/net/ladenthin/llama/ContentPart.java +++ b/src/main/java/net/ladenthin/llama/ContentPart.java @@ -10,6 +10,7 @@ import java.util.Base64; import java.util.Locale; import java.util.Objects; +import org.jspecify.annotations.Nullable; /** * One piece of a {@link ChatMessage}'s multimodal content array: either a text @@ -42,10 +43,10 @@ public enum Type { } private final Type type; - private final String text; - private final String imageUrl; + private final @Nullable String text; + private final @Nullable String imageUrl; - private ContentPart(Type type, String text, String imageUrl) { + private ContentPart(Type type, @Nullable String text, @Nullable String imageUrl) { this.type = type; this.text = text; this.imageUrl = imageUrl; @@ -139,7 +140,7 @@ public Type getType() { * Text accessor (only set for {@link Type#TEXT}). * @return the text fragment, or {@code null} for {@link Type#IMAGE_URL} parts */ - public String getText() { + public @Nullable String getText() { return text; } @@ -147,7 +148,7 @@ public String getText() { * Image URL accessor (only set for {@link Type#IMAGE_URL}). * @return the URL or data URI, or {@code null} for {@link Type#TEXT} parts */ - public String getImageUrl() { + public @Nullable String getImageUrl() { return imageUrl; } } diff --git a/src/main/java/net/ladenthin/llama/InferenceParameters.java b/src/main/java/net/ladenthin/llama/InferenceParameters.java index e5bdae9e..b73fba76 100644 --- a/src/main/java/net/ladenthin/llama/InferenceParameters.java +++ b/src/main/java/net/ladenthin/llama/InferenceParameters.java @@ -10,6 +10,7 @@ import java.util.Map; import net.ladenthin.llama.args.ContinuationMode; import net.ladenthin.llama.args.MiroStat; +import org.jspecify.annotations.Nullable; import net.ladenthin.llama.args.ReasoningFormat; import net.ladenthin.llama.args.Sampler; @@ -253,6 +254,10 @@ public InferenceParameters setPresencePenalty(float presencePenalty) { * @param mirostat the MiroStat sampling strategy * @return this builder */ + // .ordinal() is intentional here: the llama.cpp server expects the integer + // ordinal of the MiroStat enum (0 = OFF, 1 = V1, 2 = V2) on the wire. The + // declared order of MiroStat.values() matches the upstream contract. + @SuppressWarnings("EnumOrdinal") public InferenceParameters setMiroStat(MiroStat mirostat) { return putScalar(PARAM_MIROSTAT, mirostat.ordinal()); } @@ -559,7 +564,7 @@ public InferenceParameters setChatTemplateKwargs(java.util.Map k * @param messages a list of user/assistant message pairs (role as key, content as value) * @return this builder */ - public InferenceParameters setMessages(String systemMessage, List> messages) { + public InferenceParameters setMessages(@Nullable String systemMessage, List> messages) { parameters.put( PARAM_MESSAGES, serializer.buildMessages(systemMessage, messages).toString()); @@ -571,7 +576,7 @@ public InferenceParameters setMessages(String systemMessage, List} variant when callers prefer the typed * {@link ChatMessage} surface. *

* Image parts require the model to have a multimodal projector loaded via diff --git a/src/main/java/net/ladenthin/llama/Java8CompatibilityHelper.java b/src/main/java/net/ladenthin/llama/Java8CompatibilityHelper.java new file mode 100644 index 00000000..3062d704 --- /dev/null +++ b/src/main/java/net/ladenthin/llama/Java8CompatibilityHelper.java @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT +package net.ladenthin.llama; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Wrapper methods for Java 9+ APIs to provide Java 1.8 compatibility. + * This class centralizes all compatibility layer logic and can be mocked for testing. + * + *

Mirrors the pattern used by the sister repo's + * {@code net.ladenthin.maven.llamacpp.aiindex.Java8CompatibilityHelper}: each consuming + * class declares an instance field + * {@code private final Java8CompatibilityHelper compatibilityHelper = new Java8CompatibilityHelper();} + * and routes Java 9+ idioms through it. The build's {@code --release 8} compiler arg + * (see {@code pom.xml}) prevents accidental direct use of post-8 APIs in production code. + */ +public class Java8CompatibilityHelper { + + /** Creates a new {@link Java8CompatibilityHelper}. */ + public Java8CompatibilityHelper() { + // no-op + } + + /** + * Wrapper for {@code String#isBlank()} (Java 11+). + * Returns {@code true} if the string is empty or contains only whitespace, + * {@code false} otherwise. + * + * @param str the string to check; must not be {@code null} + * @return {@code true} if the string is empty or blank, {@code false} otherwise + * @throws NullPointerException if {@code str} is {@code null} + */ + public boolean isBlank(final String str) { + return str.isEmpty() || str.trim().isEmpty(); + } + + /** + * Wrapper for {@code String#formatted(Object...)} (Java 15+). + * Equivalent to {@link String#format(String, Object...)}. + * + * @param format the format string + * @param args the arguments referenced by the format specifiers in the format string + * @return a formatted string + */ + // Not annotated @FormatMethod because callers may pass a runtime format string; + // marking this @FormatMethod would propagate FormatStringAnnotation to every caller. + @SuppressWarnings("AnnotateFormatMethod") + public String formatted(final String format, final Object... args) { + return String.format(format, args); + } + + /** + * Wrapper for {@code Files#readString(Path)} (Java 11+). + * Reads all bytes from a file and decodes them using UTF-8. + * + * @param path the path to the file to read + * @return the file content as a string + * @throws IOException if an I/O error occurs reading from the file + */ + public String readString(final Path path) throws IOException { + return new String(Files.readAllBytes(path), StandardCharsets.UTF_8); + } + + /** + * Wrapper for {@code Files#writeString(Path, CharSequence, Charset)} (Java 11+). + * Writes a string to a file using the specified charset. + * + * @param path the path to the file to write + * @param content the string content to write + * @param charset the charset to encode the content with; defaults to UTF-8 if {@code null} + * @throws IOException if an I/O error occurs writing to the file + */ + public void writeString(final Path path, final String content, final @org.jspecify.annotations.Nullable Charset charset) + throws IOException { + final Charset targetCharset = charset != null ? charset : StandardCharsets.UTF_8; + Files.write(path, content.getBytes(targetCharset)); + } + + /** + * Wrapper for {@code Stream#toList()} (Java 16+). + * Collects stream elements into a {@link List}. + * + * @param stream the stream to collect + * @param the element type + * @return a list containing the stream elements + */ + public List toList(final Stream stream) { + return stream.collect(Collectors.toList()); + } + + /** + * Wrapper for {@code List#of(Object...)} (Java 9+). + * Creates a list containing the specified elements. + * + * @param elements the elements to include in the list + * @param the element type + * @return a list containing the specified elements + */ + // @SafeVarargs suppresses the warning at the listOf declaration; @SuppressWarnings + // is additionally needed because javac still flags the forwarded Arrays.asList(...) + // call as a possible-heap-pollution site even though Arrays.asList is itself + // @SafeVarargs in the JDK. + @SafeVarargs + @SuppressWarnings({"unchecked", "varargs"}) + public final List listOf(final T... elements) { + return Arrays.asList(elements); + } + + // Intentionally NOT wrapped: + // - Optional.isEmpty() (Java 11+) — use !opt.isPresent() inline instead. NullAway's + // CheckOptionalEmptiness recognises Optional.isPresent() / isEmpty() directly as + // null-narrowing for a subsequent .get(); a helper method call breaks that flow + // analysis. The two extra characters of !opt.isPresent() are worth the safety. + // - Optional.orElseThrow() no-arg (Java 10+) — use orElseThrow(() -> new ...) with + // an explicit exception type and message at each call site. A generic wrapper + // would lose the per-site context that makes the failure debuggable. + + /** + * Wrapper for {@code ByteArrayOutputStream#toString(Charset)} (Java 10+). + * Decodes the accumulated bytes with the given charset. + * + * @param baos the buffer; must not be {@code null} + * @param charset the charset to decode with; must not be {@code null} + * @return the decoded string + */ + public String toString(final ByteArrayOutputStream baos, final Charset charset) { + return new String(baos.toByteArray(), charset); + } +} diff --git a/src/main/java/net/ladenthin/llama/JsonParameters.java b/src/main/java/net/ladenthin/llama/JsonParameters.java index 98bc2ebb..a2cf18e4 100644 --- a/src/main/java/net/ladenthin/llama/JsonParameters.java +++ b/src/main/java/net/ladenthin/llama/JsonParameters.java @@ -9,6 +9,7 @@ import java.util.Map; import net.ladenthin.llama.args.CliArg; import net.ladenthin.llama.json.ParameterJsonSerializer; +import org.checkerframework.checker.nullness.qual.PolyNull; /** * The Java library re-uses most of the llama.cpp server code, which mostly works with JSONs. Thus, the complexity and @@ -42,7 +43,11 @@ public String toString() { return builder.toString(); } - String toJsonString(String text) { + // @PolyNull lets the Checker Framework see that null in returns null and non-null + // in returns non-null. NullAway has no equivalent qualifier and reads the return as + // @NonNull (under @NullMarked), so we suppress the NullAway-only complaint here. + @SuppressWarnings("NullAway") + @PolyNull String toJsonString(@PolyNull String text) { if (text == null) return null; return serializer.toJsonString(text); } @@ -59,7 +64,11 @@ String toJsonString(String text) { * @param the concrete subtype of this builder * @return this builder */ - @SuppressWarnings("unchecked") + // Self-typing builder idiom: the caller fixes T to its own concrete subtype + // so that chained calls return the concrete builder instead of JsonParameters. + // This deliberately uses T only in the return type and is not the + // "TypeParameterUnusedInFormals" anti-pattern Error Prone warns about. + @SuppressWarnings({"unchecked", "TypeParameterUnusedInFormals"}) protected final T putScalar(String key, Object value) { parameters.put(key, String.valueOf(value)); return (T) this; @@ -74,7 +83,8 @@ protected final T putScalar(String key, Object value) * @param the concrete subtype of this builder * @return this builder */ - @SuppressWarnings("unchecked") + // Self-typing builder idiom — see putScalar above. + @SuppressWarnings({"unchecked", "TypeParameterUnusedInFormals"}) protected final T putEnum(String key, CliArg value) { parameters.put(key, value.getArgValue()); return (T) this; diff --git a/src/main/java/net/ladenthin/llama/LlamaException.java b/src/main/java/net/ladenthin/llama/LlamaException.java index de2e0c05..ebc3c864 100644 --- a/src/main/java/net/ladenthin/llama/LlamaException.java +++ b/src/main/java/net/ladenthin/llama/LlamaException.java @@ -5,12 +5,33 @@ package net.ladenthin.llama; -class LlamaException extends RuntimeException { +/** + * Base unchecked exception raised by the JNI layer when a llama.cpp operation + * fails. Specific failure modes may extend this class with typed subclasses + * (e.g. {@link ModelUnavailableException}). + * + *

This was historically package-private; it was promoted to {@code public} + * to allow external callers to {@code catch} the typed subclasses by their + * common base. Existing callers that caught {@link RuntimeException} continue + * to work unchanged.

+ */ +public class LlamaException extends RuntimeException { + /** + * Creates a new {@link LlamaException} with the given message. + * + * @param message the detail message; may be {@code null} + */ public LlamaException(String message) { super(message); } + /** + * Creates a new {@link LlamaException} with the given message and cause. + * + * @param message the detail message; may be {@code null} + * @param cause the underlying cause; may be {@code null} + */ public LlamaException(String message, Throwable cause) { super(message, cause); } diff --git a/src/main/java/net/ladenthin/llama/LlamaIterable.java b/src/main/java/net/ladenthin/llama/LlamaIterable.java index 88d61769..2e4f1d36 100644 --- a/src/main/java/net/ladenthin/llama/LlamaIterable.java +++ b/src/main/java/net/ladenthin/llama/LlamaIterable.java @@ -5,8 +5,6 @@ package net.ladenthin.llama; -import org.jetbrains.annotations.NotNull; - /** * An {@link Iterable} wrapper around {@link LlamaIterator} returned by * {@link LlamaModel#generate(InferenceParameters)} and {@link LlamaModel#generateChat(InferenceParameters)}. @@ -34,7 +32,6 @@ public final class LlamaIterable implements Iterable, AutoCloseable this.iterator = iterator; } - @NotNull @Override public LlamaIterator iterator() { return iterator; diff --git a/src/main/java/net/ladenthin/llama/LlamaLoader.java b/src/main/java/net/ladenthin/llama/LlamaLoader.java index 6df905be..06b29ee8 100644 --- a/src/main/java/net/ladenthin/llama/LlamaLoader.java +++ b/src/main/java/net/ladenthin/llama/LlamaLoader.java @@ -13,10 +13,10 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; -import java.util.LinkedList; +import java.util.ArrayList; import java.util.List; import java.util.stream.Stream; -import org.jetbrains.annotations.Nullable; +import org.jspecify.annotations.Nullable; /** * Set the system properties {@code net.ladenthin.llama.lib.path} / @@ -86,7 +86,7 @@ private static void cleanPath(Path path) { } private static void loadNativeLibrary(String name) { - List triedPaths = new LinkedList<>(); + List triedPaths = new ArrayList<>(); String nativeLibName = System.mapLibraryName(name); String nativeLibPath = systemProperties.getLibPath(); @@ -112,7 +112,11 @@ private static void loadNativeLibrary(String name) { // Try to load the library from java.library.path String javaLibraryPath = System.getProperty("java.library.path", ""); - for (String ldPath : javaLibraryPath.split(File.pathSeparator)) { + // String.split's "trailing empties dropped" quirk is benign here because + // we explicitly skip empty entries with the isEmpty() check below. + @SuppressWarnings("StringSplitter") + final String[] ldPaths = javaLibraryPath.split(File.pathSeparator); + for (String ldPath : ldPaths) { if (ldPath.isEmpty()) { continue; } @@ -164,8 +168,7 @@ public static boolean loadNativeLibrary(Path path) { } } - @Nullable - private static Path extractFile(String sourceDirectory, String fileName, String targetDirectory) { + private static @Nullable Path extractFile(String sourceDirectory, String fileName, String targetDirectory) { String nativeLibraryFilePath = sourceDirectory + "/" + fileName; Path extractedFilePath = Paths.get(targetDirectory, fileName); @@ -188,6 +191,10 @@ private static Path extractFile(String sourceDirectory, String fileName, String // Check whether the contents are properly copied from the resource folder try (InputStream nativeIn = LlamaLoader.class.getResourceAsStream(nativeLibraryFilePath); InputStream extractedLibIn = Files.newInputStream(extractedFilePath)) { + if (nativeIn == null) { + System.err.println(String.format("Native library resource missing at %s", nativeLibraryFilePath)); + return null; + } if (!contentsEquals(nativeIn, extractedLibIn)) { System.err.println(String.format("Failed to write a native library file at %s", extractedFilePath)); return null; @@ -245,7 +252,12 @@ static File getTempDir() { } static String getNativeResourcePath() { - String packagePath = LlamaLoader.class.getPackage().getName().replace('.', '/'); + final Package pkg = LlamaLoader.class.getPackage(); + // LlamaLoader is in a named package, so Class.getPackage() is never null here. + if (pkg == null) { + throw new IllegalStateException("LlamaLoader.class.getPackage() returned null"); + } + String packagePath = pkg.getName().replace('.', '/'); return String.format("/%s/%s", packagePath, OSInfo.getNativeLibFolderPathForCurrentOS()); } diff --git a/src/main/java/net/ladenthin/llama/LlamaModel.java b/src/main/java/net/ladenthin/llama/LlamaModel.java index 5ec3f077..d5e21071 100644 --- a/src/main/java/net/ladenthin/llama/LlamaModel.java +++ b/src/main/java/net/ladenthin/llama/LlamaModel.java @@ -10,12 +10,15 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.function.BiConsumer; import net.ladenthin.llama.args.LogFormat; import net.ladenthin.llama.json.ChatResponseParser; import net.ladenthin.llama.json.CompletionResponseParser; import net.ladenthin.llama.json.RerankResponseParser; +import org.jspecify.annotations.Nullable; /** * This class is a wrapper around the llama.cpp functionality. @@ -52,10 +55,21 @@ public class LlamaModel implements AutoCloseable { * * * @param parameters the set of options - * @throws LlamaException if no model could be loaded from the given file path - */ + * @throws ModelUnavailableException if {@link ModelParameters#setSkipDownload(boolean) + * setSkipDownload(true)} (or + * {@link net.ladenthin.llama.args.ModelFlag#SKIP_DOWNLOAD}) + * is set and the configured model file is missing or invalid + * @throws LlamaException for any other load failure + */ + // loadModel is a native method; it does not call back into Java with this, + // so the @UnderInitialization receiver warning is a CF false positive. + @SuppressWarnings("method.invocation") public LlamaModel(ModelParameters parameters) { - loadModel(parameters.toArray()); + try { + loadModel(parameters.toArray()); + } catch (LlamaException e) { + throw SkipDownloadFailureTranslator.translate(parameters, e); + } } /** @@ -68,11 +82,19 @@ public LlamaModel(ModelParameters parameters) { * @param progress load progress sink; {@code null} disables the callback * @throws LlamaException if loading fails or the callback aborts */ + // loadModel / loadModelWithProgress are native methods; they do not call back + // into Java with this, so the @UnderInitialization receiver warning is a CF + // false positive. + @SuppressWarnings("method.invocation") public LlamaModel(ModelParameters parameters, LoadProgressCallback progress) { - if (progress == null) { - loadModel(parameters.toArray()); - } else { - loadModelWithProgress(parameters.toArray(), progress); + try { + if (progress == null) { + loadModel(parameters.toArray()); + } else { + loadModelWithProgress(parameters.toArray(), progress); + } + } catch (LlamaException e) { + throw SkipDownloadFailureTranslator.translate(parameters, e); } } @@ -232,7 +254,11 @@ public CompletableFuture completeAsync(InferenceParameters parameters) { */ public CompletableFuture completeAsync(InferenceParameters parameters, CancellationToken token) { CompletableFuture future = CompletableFuture.supplyAsync(() -> complete(parameters, token)); - future.whenComplete((result, ex) -> { + // whenComplete returns a new stage that we deliberately discard: this is a + // fire-and-forget cancellation callback attached to `future`, which is what + // the caller observes. + @SuppressWarnings("FutureReturnValueIgnored") + final CompletableFuture cancelHook = future.whenComplete((result, ex) -> { if (ex instanceof java.util.concurrent.CancellationException) { token.cancel(); } @@ -380,7 +406,7 @@ public void close() { * deleted, since the attack vector disappears together with finalization. *

*/ - @SuppressWarnings({"deprecation", "removal"}) + @SuppressWarnings({"deprecation", "removal", "Finalize"}) @Override protected final void finalize() { // no-op @@ -520,14 +546,14 @@ public String chatCompleteText(InferenceParameters parameters) { */ public ChatResponse chat(ChatRequest request) { InferenceParameters params = new InferenceParameters("").setMessagesJson(request.buildMessagesJson()); - String toolsJson = request.buildToolsJson(); - if (toolsJson != null) { + request.buildToolsJson().ifPresent(toolsJson -> { params.setToolsJson(toolsJson); - if (request.getToolChoice() != null) { - params.setToolChoice(request.getToolChoice()); + final String toolChoice = request.getToolChoice(); + if (toolChoice != null) { + params.setToolChoice(toolChoice); } params.setUseChatTemplate(true); - } + }); request.applyCustomizer(params); String raw = chatComplete(params); return chatParser.parseResponse(raw); @@ -551,13 +577,21 @@ public ChatResponse chat(ChatRequest request) { * (or the last response when the round cap is hit) */ public ChatResponse chatWithTools(ChatRequest request, java.util.Map handlers) { - ChatResponse last = null; - for (int round = 0; round < request.getMaxToolRounds(); round++) { - last = chat(request); - ChatMessage assistant = last.getFirstMessage(); - if (assistant == null || assistant.getToolCalls().isEmpty()) { + final int maxRounds = request.getMaxToolRounds(); + if (maxRounds < 1) { + throw new IllegalArgumentException( + "ChatRequest.maxToolRounds must be >= 1 (got " + maxRounds + "); " + + "chatWithTools always issues at least one chat call."); + } + ChatResponse last = chat(request); + for (int round = 1; round < maxRounds; round++) { + Optional assistantOpt = last.getFirstMessage(); + // NOTE: inline !isPresent() here (not compatibilityHelper.isEmpty) so NullAway's + // CheckOptionalEmptiness recognises this as null-narrowing for the .get() below. + if (!assistantOpt.isPresent() || assistantOpt.get().getToolCalls().isEmpty()) { return last; } + ChatMessage assistant = assistantOpt.get(); request.addMessage(assistant); for (ToolCall call : assistant.getToolCalls()) { ToolHandler handler = handlers.get(call.getName()); @@ -576,6 +610,7 @@ public ChatResponse chatWithTools(ChatRequest request, java.util.Map * Note, that you have to configure {@link InferenceParameters#setNProbs(int)} in order for probabilities to be returned. */ - @NotNull public final Map probabilities; /** @@ -38,7 +35,6 @@ public final class LlamaOutput { * Empty when {@link InferenceParameters#setNProbs(int)} is not configured or the native * response did not include {@code completion_probabilities}. */ - @NotNull public final List logprobs; /** Whether this is the final token of the generation. */ @@ -48,7 +44,6 @@ public final class LlamaOutput { * The reason generation stopped. {@link StopReason#NONE} on intermediate streaming tokens. * Only meaningful when {@link #stop} is {@code true}. */ - @NotNull public final StopReason stopReason; /** @@ -60,10 +55,10 @@ public final class LlamaOutput { * @param stopReason the stop reason ({@link StopReason#NONE} on intermediate tokens) */ public LlamaOutput( - @NotNull String text, - @NotNull Map probabilities, + String text, + Map probabilities, boolean stop, - @NotNull StopReason stopReason) { + StopReason stopReason) { this(text, probabilities, Collections.emptyList(), stop, stopReason); } @@ -77,11 +72,11 @@ public LlamaOutput( * @param stopReason the stop reason ({@link StopReason#NONE} on intermediate tokens) */ public LlamaOutput( - @NotNull String text, - @NotNull Map probabilities, - @NotNull List logprobs, + String text, + Map probabilities, + List logprobs, boolean stop, - @NotNull StopReason stopReason) { + StopReason stopReason) { this.text = text; this.probabilities = probabilities; this.logprobs = logprobs; diff --git a/src/main/java/net/ladenthin/llama/LlamaSystemProperties.java b/src/main/java/net/ladenthin/llama/LlamaSystemProperties.java index 62488318..3d30a5f0 100644 --- a/src/main/java/net/ladenthin/llama/LlamaSystemProperties.java +++ b/src/main/java/net/ladenthin/llama/LlamaSystemProperties.java @@ -5,6 +5,8 @@ package net.ladenthin.llama; +import org.jspecify.annotations.Nullable; + /** * Resolves library-specific system properties under the {@link #PREFIX} domain prefix. */ @@ -16,7 +18,7 @@ public LlamaSystemProperties() {} /** Common system-property prefix for all library-specific overrides. */ public static final String PREFIX = "net.ladenthin.llama"; - private String getProperty(String suffix) { + private @Nullable String getProperty(String suffix) { return System.getProperty(PREFIX + suffix); } @@ -25,7 +27,7 @@ private String getProperty(String suffix) { * * @return the configured library directory, or {@code null} if unset */ - public String getLibPath() { + public @Nullable String getLibPath() { return getProperty(".lib.path"); } @@ -34,7 +36,7 @@ public String getLibPath() { * * @return the configured library file name, or {@code null} if unset */ - public String getLibName() { + public @Nullable String getLibName() { return getProperty(".lib.name"); } @@ -44,7 +46,7 @@ public String getLibName() { * * @return the configured temp directory, or {@code null} if unset */ - public String getTmpDir() { + public @Nullable String getTmpDir() { return getProperty(".tmpdir"); } @@ -53,7 +55,7 @@ public String getTmpDir() { * * @return the configured architecture override, or {@code null} if unset */ - public String getOsinfoArchitecture() { + public @Nullable String getOsinfoArchitecture() { return getProperty(".osinfo.architecture"); } @@ -62,7 +64,7 @@ public String getOsinfoArchitecture() { * * @return the configured GPU layer count as a string, or {@code null} if unset */ - public String getTestNgl() { + public @Nullable String getTestNgl() { return getProperty(".test.ngl"); } } diff --git a/src/main/java/net/ladenthin/llama/ModelParameters.java b/src/main/java/net/ladenthin/llama/ModelParameters.java index 65fb026c..d0afb196 100644 --- a/src/main/java/net/ladenthin/llama/ModelParameters.java +++ b/src/main/java/net/ladenthin/llama/ModelParameters.java @@ -1418,13 +1418,49 @@ public ModelParameters clearFlag(ModelFlag flag) { return this; } + /** + * Returns whether the given flag is currently set on this builder. + * + * @param flag the flag to query + * @return {@code true} if {@link #setFlag(ModelFlag)} was called for {@code flag} and + * {@link #clearFlag(ModelFlag)} has not since removed it; {@code false} otherwise + */ + public boolean hasFlag(ModelFlag flag) { + return parameters.containsKey(flag.getCliFlag()); + } + + /** + * Skip any model file download — only validation is performed (default: {@code false}). + * + *

When enabled, the upstream loader will NOT attempt any outbound network call to + * download the configured model. If the model file is missing or invalid (e.g. ETag + * mismatch), {@link LlamaModel#LlamaModel(ModelParameters)} throws a typed + * {@link ModelUnavailableException} so the caller can distinguish an air-gapped miss + * from a genuine misconfiguration.

+ * + *

Useful for air-gapped / pre-staged-model deployments where any outbound network + * call is itself a failure mode.

+ * + * @param skip {@code true} to skip downloads (set {@link ModelFlag#SKIP_DOWNLOAD}), + * {@code false} to clear the flag and allow downloads + * @return this builder + */ + public ModelParameters setSkipDownload(boolean skip) { + if (skip) { + setFlag(ModelFlag.SKIP_DOWNLOAD); + } else { + clearFlag(ModelFlag.SKIP_DOWNLOAD); + } + return this; + } + /** * Returns whether the given parameter key has not been explicitly set. * * @param key the parameter key without the {@code --} prefix * @return {@code true} if the key is absent from the configured parameters */ - public boolean isDefault(String key) { + public boolean isUnset(String key) { return !parameters.containsKey("--" + key); } } diff --git a/src/main/java/net/ladenthin/llama/ModelUnavailableException.java b/src/main/java/net/ladenthin/llama/ModelUnavailableException.java new file mode 100644 index 00000000..dfa57fad --- /dev/null +++ b/src/main/java/net/ladenthin/llama/ModelUnavailableException.java @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +package net.ladenthin.llama; + +import net.ladenthin.llama.args.ModelFlag; + +/** + * Thrown by {@link LlamaModel#LlamaModel(ModelParameters)} when + * {@link ModelFlag#SKIP_DOWNLOAD} (or {@link ModelParameters#setSkipDownload(boolean) + * setSkipDownload(true)}) is set and the configured model file is missing or + * invalid — i.e. the loader would have had to download a replacement but is + * forbidden to. + * + *

Lets air-gapped / pre-staged-model deployments distinguish "model file + * absent" from generic configuration errors. Upstream raises + * {@code common_skip_download_exception} which is caught inside + * {@code common_params_parse_ex} and surfaces as a {@code false} return; the + * Java layer combines that with the {@code SKIP_DOWNLOAD} flag to recognise the + * skip-download case and translate it to this typed exception.

+ */ +public class ModelUnavailableException extends LlamaException { + + /** + * Creates a new {@link ModelUnavailableException} with the given message. + * + * @param message the detail message; may be {@code null} + */ + public ModelUnavailableException(String message) { + super(message); + } + + /** + * Creates a new {@link ModelUnavailableException} with the given message and cause. + * + * @param message the detail message; may be {@code null} + * @param cause the underlying cause; may be {@code null} + */ + public ModelUnavailableException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/main/java/net/ladenthin/llama/OSInfo.java b/src/main/java/net/ladenthin/llama/OSInfo.java index 222574d7..cf40d5f9 100644 --- a/src/main/java/net/ladenthin/llama/OSInfo.java +++ b/src/main/java/net/ladenthin/llama/OSInfo.java @@ -227,7 +227,7 @@ private static boolean isRunningAndroid() { * @return {@code true} if the JVM identifies itself as Android */ public static boolean isAndroidRuntime() { - return System.getProperty("java.runtime.name", "").toLowerCase().contains("android"); + return System.getProperty("java.runtime.name", "").toLowerCase(Locale.ROOT).contains("android"); } /** @@ -237,7 +237,7 @@ public static boolean isAndroidRuntime() { */ public static boolean isAndroidTermux() { try { - return processRunner.runAndWaitFor("uname -o").toLowerCase().contains("android"); + return processRunner.runAndWaitFor("uname -o").toLowerCase(Locale.ROOT).contains("android"); } catch (InterruptedException e) { Thread.currentThread().interrupt(); return false; @@ -257,7 +257,7 @@ public static boolean isAndroidTermux() { public static boolean isMusl() { Path mapFilesDir = Paths.get("/proc/self/map_files"); try (Stream dirStream = Files.list(mapFilesDir)) { - return dirStream.map(OSInfo::toRealPathOrEmpty).anyMatch(s -> s.toLowerCase() + return dirStream.map(OSInfo::toRealPathOrEmpty).anyMatch(s -> s.toLowerCase(Locale.ROOT) .contains("musl")); } catch (Exception ignored) { // fall back to checking for alpine linux in the event we're using an older kernel which @@ -282,6 +282,9 @@ private static boolean isAlpineLinux() { try (Stream osLines = Files.lines(Paths.get("/etc/os-release"))) { return osLines.anyMatch(l -> l.startsWith("ID") && l.contains("alpine")); } catch (Exception ignored2) { + // Treat any I/O / parse failure as "not Alpine" — the file is absent on + // non-Linux hosts and unreadable in sandboxed Linux runtimes; either + // way the answer is the same and there is nothing meaningful to log. } return false; } diff --git a/src/main/java/net/ladenthin/llama/Pair.java b/src/main/java/net/ladenthin/llama/Pair.java index 6dbe96e3..ceff22f0 100644 --- a/src/main/java/net/ladenthin/llama/Pair.java +++ b/src/main/java/net/ladenthin/llama/Pair.java @@ -6,6 +6,7 @@ package net.ladenthin.llama; import java.util.Objects; +import org.jspecify.annotations.Nullable; /** * A generic immutable key-value pair. @@ -53,11 +54,10 @@ public int hashCode() { } @Override - public boolean equals(Object obj) { + public boolean equals(@Nullable Object obj) { if (this == obj) return true; - if (obj == null) return false; - if (getClass() != obj.getClass()) return false; - Pair other = (Pair) obj; + if (!(obj instanceof Pair)) return false; + Pair other = (Pair) obj; return Objects.equals(key, other.key) && Objects.equals(value, other.value); } diff --git a/src/main/java/net/ladenthin/llama/ProcessRunner.java b/src/main/java/net/ladenthin/llama/ProcessRunner.java index 61a93f68..0a54c10d 100644 --- a/src/main/java/net/ladenthin/llama/ProcessRunner.java +++ b/src/main/java/net/ladenthin/llama/ProcessRunner.java @@ -12,6 +12,9 @@ import java.util.concurrent.TimeUnit; class ProcessRunner { + + private final Java8CompatibilityHelper compatibilityHelper = new Java8CompatibilityHelper(); + String runAndWaitFor(String command) throws IOException, InterruptedException { Process p = Runtime.getRuntime().exec(splitArgs(command)); p.waitFor(); @@ -37,7 +40,7 @@ private static String[] splitArgs(String command) { return command.split(" "); } - private static String getProcessOutput(Process process) throws IOException { + private String getProcessOutput(Process process) throws IOException { try (InputStream in = process.getInputStream()) { int readLen; ByteArrayOutputStream b = new ByteArrayOutputStream(); @@ -45,7 +48,7 @@ private static String getProcessOutput(Process process) throws IOException { while ((readLen = in.read(buf, 0, buf.length)) >= 0) { b.write(buf, 0, readLen); } - return b.toString(StandardCharsets.UTF_8); + return compatibilityHelper.toString(b, StandardCharsets.UTF_8); } } } diff --git a/src/main/java/net/ladenthin/llama/Session.java b/src/main/java/net/ladenthin/llama/Session.java index 41a96200..8d0188ea 100644 --- a/src/main/java/net/ladenthin/llama/Session.java +++ b/src/main/java/net/ladenthin/llama/Session.java @@ -8,6 +8,7 @@ import java.util.Collections; import java.util.List; import java.util.function.Consumer; +import org.jspecify.annotations.Nullable; /** * Thin multi-turn conversation wrapper over a {@link LlamaModel} slot. Maintains an @@ -31,9 +32,9 @@ public final class Session implements AutoCloseable { private final LlamaModel model; private final int slotId; - private final String systemMessage; + private final @Nullable String systemMessage; private final List> turns = new ArrayList>(); - private final Consumer paramsCustomizer; + private final @Nullable Consumer paramsCustomizer; private final Object lock = new Object(); private boolean streamingActive; @@ -45,7 +46,7 @@ public final class Session implements AutoCloseable { * @param slotId the slot id used by {@link #save(String)} / {@link #restore(String)} * @param systemMessage optional system prompt (may be {@code null} or empty) */ - public Session(LlamaModel model, int slotId, String systemMessage) { + public Session(LlamaModel model, int slotId, @Nullable String systemMessage) { this(model, slotId, systemMessage, null); } @@ -58,7 +59,11 @@ public Session(LlamaModel model, int slotId, String systemMessage) { * @param systemMessage optional system prompt * @param paramsCustomizer applied to each request's parameters; may be {@code null} */ - public Session(LlamaModel model, int slotId, String systemMessage, Consumer paramsCustomizer) { + public Session( + LlamaModel model, + int slotId, + @Nullable String systemMessage, + @Nullable Consumer paramsCustomizer) { this.model = model; this.slotId = slotId; this.systemMessage = systemMessage; diff --git a/src/main/java/net/ladenthin/llama/SkipDownloadFailureTranslator.java b/src/main/java/net/ladenthin/llama/SkipDownloadFailureTranslator.java new file mode 100644 index 00000000..3c6ec985 --- /dev/null +++ b/src/main/java/net/ladenthin/llama/SkipDownloadFailureTranslator.java @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +package net.ladenthin.llama; + +import net.ladenthin.llama.args.ModelFlag; + +/** + * Pure-Java translator from the generic {@link LlamaException} raised by the JNI + * loader to the typed {@link ModelUnavailableException} when + * {@link ModelFlag#SKIP_DOWNLOAD} is set and the load failed because the + * configured model file was missing or invalid. + * + *

Lives outside {@link LlamaModel} so that unit tests can exercise the + * translation heuristic without triggering {@code LlamaModel}'s + * {@link LlamaLoader} static initializer (which loads the JNI library and is + * not available in CPU-only / non-native test environments).

+ * + *

Why a heuristic and not a direct exception catch

+ * + *

Upstream raises {@code common_skip_download_exception} inside + * {@code common_download_file_single} when {@code --skip-download} is set and + * the file is missing or has a stale ETag. However that exception is caught + * INSIDE upstream's own {@code common_params_parse_ex} (at + * {@code common/arg.cpp:476}) and surfaces only as a {@code false} return + * from {@code common_params_parse}. The JNI layer reports the {@code false} + * return as a generic {@link LlamaException} with the message + * {@value #LOAD_PARSE_FAILED_MESSAGE}. The Java layer therefore cannot catch + * the C++ exception directly and instead recognises the combined signal: + * {@code SKIP_DOWNLOAD} flag set + JNI message matches.

+ */ +final class SkipDownloadFailureTranslator { + + /** + * Substring used by the JNI bridge when {@code common_params_parse} returns + * {@code false}; matched at the Java layer to recognise the + * {@code SKIP_DOWNLOAD} case. + */ + static final String LOAD_PARSE_FAILED_MESSAGE = "Failed to parse model parameters"; + + private SkipDownloadFailureTranslator() { + // utility — not instantiable + } + + /** + * Translates a generic load failure into a typed + * {@link ModelUnavailableException} when the user opted into + * {@link ModelFlag#SKIP_DOWNLOAD} and the JNI surfaced the + * {@value #LOAD_PARSE_FAILED_MESSAGE} message; otherwise returns the + * original exception unchanged so the caller can re-throw it as-is. + * + * @param parameters the parameters passed to the failing constructor + * @param original the original load failure to translate or pass through + * @return a {@link ModelUnavailableException} when the heuristic matches; + * otherwise the original {@code LlamaException} + */ + static LlamaException translate(ModelParameters parameters, LlamaException original) { + if (parameters.hasFlag(ModelFlag.SKIP_DOWNLOAD) + && original.getMessage() != null + && original.getMessage().contains(LOAD_PARSE_FAILED_MESSAGE)) { + return new ModelUnavailableException( + "Model unavailable: --skip-download is set but the configured model file is missing or " + + "invalid (no download attempted).", + original); + } + return original; + } +} diff --git a/src/main/java/net/ladenthin/llama/StopReason.java b/src/main/java/net/ladenthin/llama/StopReason.java index 32f51be6..c31c2809 100644 --- a/src/main/java/net/ladenthin/llama/StopReason.java +++ b/src/main/java/net/ladenthin/llama/StopReason.java @@ -5,6 +5,8 @@ package net.ladenthin.llama; +import org.jspecify.annotations.Nullable; + /** * The reason why token generation stopped for a {@link LlamaOutput}. * @@ -31,9 +33,9 @@ public enum StopReason { /** Token budget exhausted. Server {@code "stop_type"} value: {@code "limit"}. */ MAX_TOKENS("limit"); - private final String stopType; + private final @Nullable String stopType; - StopReason(String stopType) { + StopReason(@Nullable String stopType) { this.stopType = stopType; } @@ -43,7 +45,7 @@ public enum StopReason { * * @return the stop-type string, or {@code null} for {@link #NONE} */ - public String getStopType() { + public @Nullable String getStopType() { return stopType; } @@ -55,7 +57,7 @@ public String getStopType() { * @param stopType the raw stop-type string, or {@code null} / empty for absent field * @return the corresponding {@link StopReason}, or {@link #NONE} if unrecognised */ - public static StopReason fromStopType(String stopType) { + public static StopReason fromStopType(@Nullable String stopType) { if (stopType == null) return NONE; switch (stopType) { case "eos": diff --git a/src/main/java/net/ladenthin/llama/Timings.java b/src/main/java/net/ladenthin/llama/Timings.java index 3fe8048d..0910a9fe 100644 --- a/src/main/java/net/ladenthin/llama/Timings.java +++ b/src/main/java/net/ladenthin/llama/Timings.java @@ -5,6 +5,7 @@ package net.ladenthin.llama; import com.fasterxml.jackson.databind.JsonNode; +import org.jspecify.annotations.Nullable; /** * Per-completion timing data parsed from a llama.cpp result {@code timings} block. @@ -70,7 +71,7 @@ public Timings( * @param node the {@code timings} object node; may be a missing-node * @return a populated {@link Timings} (all-zero when {@code node} is missing/null) */ - public static Timings fromJson(JsonNode node) { + public static Timings fromJson(@Nullable JsonNode node) { if (node == null || node.isMissingNode() || node.isNull()) { return new Timings(0, 0, 0.0, 0.0, 0, 0.0, 0.0, 0, 0); } diff --git a/src/main/java/net/ladenthin/llama/Usage.java b/src/main/java/net/ladenthin/llama/Usage.java index 771a4d32..9708a5e3 100644 --- a/src/main/java/net/ladenthin/llama/Usage.java +++ b/src/main/java/net/ladenthin/llama/Usage.java @@ -4,6 +4,8 @@ package net.ladenthin.llama; +import org.jspecify.annotations.Nullable; + /** * Token-usage counters, modeled after the OpenAI / Llama Stack {@code usage} block. *

@@ -52,7 +54,7 @@ public long getTotalTokens() { } @Override - public boolean equals(Object o) { + public boolean equals(@Nullable Object o) { if (this == o) return true; if (!(o instanceof Usage)) return false; Usage u = (Usage) o; diff --git a/src/main/java/net/ladenthin/llama/args/ModelFlag.java b/src/main/java/net/ladenthin/llama/args/ModelFlag.java index 7850e867..af5807d5 100644 --- a/src/main/java/net/ladenthin/llama/args/ModelFlag.java +++ b/src/main/java/net/ladenthin/llama/args/ModelFlag.java @@ -107,7 +107,19 @@ public enum ModelFlag { MMPROJ_AUTO("--mmproj-auto"), /** Offload the mmproj vision projection model to the GPU. */ - MMPROJ_OFFLOAD("--mmproj-offload"); + MMPROJ_OFFLOAD("--mmproj-offload"), + + /** + * Skip any model file download — only validation is performed. Useful for air-gapped or + * pre-staged-model deployments where any outbound network call is a failure mode. + * + *

When this flag is set and the configured model file is missing or invalid (e.g. ETag + * mismatch), upstream throws {@code common_skip_download_exception} during arg parsing, + * which is caught inside {@code common_params_parse_ex} and surfaces as a {@code false} + * return; the Java layer translates that combined signal into a typed + * {@link net.ladenthin.llama.ModelUnavailableException}.

+ */ + SKIP_DOWNLOAD("--skip-download"); private final String cliFlag; diff --git a/src/main/java/net/ladenthin/llama/args/PoolingType.java b/src/main/java/net/ladenthin/llama/args/PoolingType.java index 0182c48a..ce948029 100644 --- a/src/main/java/net/ladenthin/llama/args/PoolingType.java +++ b/src/main/java/net/ladenthin/llama/args/PoolingType.java @@ -91,6 +91,7 @@ public enum PoolingType implements CliArg { * * @return the pooling type string (e.g. {@code "mean"}, {@code "cls"}) */ + @Override public String getArgValue() { return argValue; } diff --git a/src/main/java/net/ladenthin/llama/args/RopeScalingType.java b/src/main/java/net/ladenthin/llama/args/RopeScalingType.java index 8d73657d..09e88383 100644 --- a/src/main/java/net/ladenthin/llama/args/RopeScalingType.java +++ b/src/main/java/net/ladenthin/llama/args/RopeScalingType.java @@ -29,6 +29,7 @@ public enum RopeScalingType implements CliArg { this.argValue = value; } + @Override public String getArgValue() { return argValue; } diff --git a/src/main/java/net/ladenthin/llama/args/package-info.java b/src/main/java/net/ladenthin/llama/args/package-info.java new file mode 100644 index 00000000..18542d5e --- /dev/null +++ b/src/main/java/net/ladenthin/llama/args/package-info.java @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +/** + * Typed enums for CLI-arg-valued options consumed by {@link net.ladenthin.llama.CliParameters}. + * + *

JSpecify {@code @NullMarked} is declared at module level in + * {@code module-info.java} and applies to this package transitively. + */ +package net.ladenthin.llama.args; diff --git a/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java b/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java index 93e7cd55..6cb71e24 100644 --- a/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java +++ b/src/main/java/net/ladenthin/llama/json/ChatResponseParser.java @@ -162,7 +162,9 @@ public ChatResponse parseResponse(String json) { } private List parseChoices(JsonNode arr) { - if (!arr.isArray() || arr.size() == 0) return Collections.emptyList(); + // Mutable ArrayList on both branches keeps the return-type contract consistent + // (Error Prone MixedMutabilityReturnType). + if (!arr.isArray() || arr.size() == 0) return new ArrayList<>(); List out = new ArrayList(arr.size()); for (JsonNode c : arr) { int index = c.path("index").asInt(0); @@ -180,7 +182,7 @@ private List parseChoices(JsonNode arr) { } private List parseToolCalls(JsonNode arr) { - if (!arr.isArray() || arr.size() == 0) return Collections.emptyList(); + if (!arr.isArray() || arr.size() == 0) return new ArrayList<>(); List out = new ArrayList(arr.size()); for (JsonNode tc : arr) { String id = tc.path("id").asText(""); diff --git a/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java b/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java index 35ca8f5d..f195eebc 100644 --- a/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java +++ b/src/main/java/net/ladenthin/llama/json/CompletionResponseParser.java @@ -160,7 +160,9 @@ public Map parseProbabilities(JsonNode root) { public List parseLogprobs(JsonNode root) { JsonNode array = root.path("completion_probabilities"); if (!array.isArray() || array.size() == 0) { - return Collections.emptyList(); + // Return a mutable empty ArrayList to keep the return type consistent + // with the non-empty branch below (Error Prone MixedMutabilityReturnType). + return new ArrayList<>(); } List result = new ArrayList(array.size()); for (JsonNode entry : array) { diff --git a/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java b/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java index cd6f949a..e469aa39 100644 --- a/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java +++ b/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java @@ -9,6 +9,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; +import org.jspecify.annotations.Nullable; import com.fasterxml.jackson.databind.node.ObjectNode; import java.io.IOException; import java.util.Collection; @@ -79,7 +80,7 @@ public String toJsonString(String value) { * @return a Jackson {@link ArrayNode} of {@code {"role", "content"}} message objects * @throws IllegalArgumentException if any message has an invalid role */ - public ArrayNode buildMessages(String systemMessage, List> messages) { + public ArrayNode buildMessages(@Nullable String systemMessage, List> messages) { ArrayNode arr = OBJECT_MAPPER.createArrayNode(); if (systemMessage != null && !systemMessage.isEmpty()) { ObjectNode sys = OBJECT_MAPPER.createObjectNode(); @@ -118,15 +119,18 @@ public ArrayNode buildMessages(List messages) { msg.put("role", message.getRole()); if (message.hasParts()) { ArrayNode parts = OBJECT_MAPPER.createArrayNode(); - for (ContentPart p : message.getParts()) { + for (ContentPart p : message.getParts().orElseThrow( + () -> new IllegalStateException("hasParts() was true but getParts() was empty"))) { ObjectNode part = OBJECT_MAPPER.createObjectNode(); if (p.getType() == ContentPart.Type.TEXT) { part.put("type", "text"); - part.put("text", p.getText()); + final String text = p.getText(); + part.put("text", text != null ? text : ""); } else { part.put("type", "image_url"); ObjectNode imageUrl = OBJECT_MAPPER.createObjectNode(); - imageUrl.put("url", p.getImageUrl()); + final String url = p.getImageUrl(); + imageUrl.put("url", url != null ? url : ""); part.set("image_url", imageUrl); } parts.add(part); diff --git a/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java b/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java index f5d63a1d..346e4c5b 100644 --- a/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java +++ b/src/main/java/net/ladenthin/llama/json/RerankResponseParser.java @@ -61,7 +61,9 @@ public List> parse(String json) { */ public List> parse(JsonNode arr) { if (!arr.isArray() || arr.size() == 0) { - return Collections.emptyList(); + // Mutable empty list keeps the return-type contract consistent + // (Error Prone MixedMutabilityReturnType). + return new ArrayList<>(); } List> results = new ArrayList>(); for (JsonNode entry : arr) { diff --git a/src/main/java/net/ladenthin/llama/json/package-info.java b/src/main/java/net/ladenthin/llama/json/package-info.java new file mode 100644 index 00000000..0d68fa73 --- /dev/null +++ b/src/main/java/net/ladenthin/llama/json/package-info.java @@ -0,0 +1,11 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +/** + * JSON serialization helpers for {@link net.ladenthin.llama} request / response shapes. + * + *

JSpecify {@code @NullMarked} is declared at module level in + * {@code module-info.java} and applies to this package transitively. + */ +package net.ladenthin.llama.json; diff --git a/src/main/java/net/ladenthin/llama/package-info.java b/src/main/java/net/ladenthin/llama/package-info.java new file mode 100644 index 00000000..ca7cad49 --- /dev/null +++ b/src/main/java/net/ladenthin/llama/package-info.java @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +/** + * Java bindings for llama.cpp. + * + *

JSpecify {@code @NullMarked} is declared at module level in + * {@code module-info.java} and applies transitively to every package + * in this module: every parameter, return value, and field is non-null + * unless explicitly annotated {@code @Nullable}. NullAway and the + * Checker Framework Nullness Checker both enforce this at compile + * time via the configured Error Prone compiler plugin (see + * {@code pom.xml}). Public-API methods that may legitimately have + * no value prefer {@code java.util.Optional} over + * {@code @Nullable T}. + */ +package net.ladenthin.llama; diff --git a/src/test/java/net/ladenthin/llama/CancellationTokenLincheckTest.java b/src/test/java/net/ladenthin/llama/CancellationTokenLincheckTest.java index ac62e425..4119e832 100644 --- a/src/test/java/net/ladenthin/llama/CancellationTokenLincheckTest.java +++ b/src/test/java/net/ladenthin/llama/CancellationTokenLincheckTest.java @@ -4,8 +4,8 @@ package net.ladenthin.llama; import org.jetbrains.kotlinx.lincheck.LinChecker; -import org.jetbrains.kotlinx.lincheck.annotations.Operation; -import org.jetbrains.kotlinx.lincheck.strategy.managed.modelchecking.ModelCheckingOptions; +import org.jetbrains.lincheck.datastructures.ModelCheckingOptions; +import org.jetbrains.lincheck.datastructures.Operation; import org.junit.jupiter.api.Test; /** diff --git a/src/test/java/net/ladenthin/llama/ChatResponseTest.java b/src/test/java/net/ladenthin/llama/ChatResponseTest.java index 1383d5e3..9769a7e8 100644 --- a/src/test/java/net/ladenthin/llama/ChatResponseTest.java +++ b/src/test/java/net/ladenthin/llama/ChatResponseTest.java @@ -5,7 +5,6 @@ package net.ladenthin.llama; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.List; @@ -61,7 +60,7 @@ public void parsesToolCalls() { + "]},\"finish_reason\":\"tool_calls\"}]," + "\"usage\":{\"prompt_tokens\":3,\"completion_tokens\":7}}"; ChatResponse r = parser.parseResponse(json); - ChatMessage m = r.getFirstMessage(); + ChatMessage m = r.getFirstMessage().orElseThrow(); assertEquals("assistant", m.getRole()); List tc = m.getToolCalls(); assertEquals(2, tc.size()); @@ -80,7 +79,7 @@ public void parsesObjectShapedArguments() { + "{\"name\":\"f\",\"arguments\":{\"a\":1,\"b\":2}}}]}," + "\"finish_reason\":\"tool_calls\"}]}"; ChatResponse r = parser.parseResponse(json); - String args = r.getFirstMessage().getToolCalls().get(0).getArgumentsJson(); + String args = r.getFirstMessage().orElseThrow().getToolCalls().get(0).getArgumentsJson(); // exact text isn't guaranteed, but must contain both fields assertTrue(args.contains("\"a\":1"), "expected serialized object, got: " + args); assertTrue(args.contains("\"b\":2")); @@ -112,7 +111,7 @@ public void buildMessagesJsonRoundTripsToolTurns() { @Test public void buildToolsJsonEmptyWhenNoTools() { ChatRequest req = new ChatRequest().addMessage("user", "hi"); - assertNull(req.buildToolsJson()); + assertTrue(req.buildToolsJson().isEmpty()); } @Test @@ -120,7 +119,7 @@ public void buildToolsJsonInlinesParameterSchema() { ChatRequest req = new ChatRequest() .addTool(new ToolDefinition( "echo", "Echo a string", "{\"type\":\"object\",\"properties\":{\"s\":{\"type\":\"string\"}}}")); - String tools = req.buildToolsJson(); + String tools = req.buildToolsJson().orElseThrow(); assertTrue(tools.contains("\"type\":\"function\""), tools); assertTrue(tools.contains("\"name\":\"echo\""), tools); assertTrue(tools.contains("\"properties\""), tools); diff --git a/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java b/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java index 01a192b1..711646f9 100644 --- a/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java +++ b/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java @@ -3,12 +3,16 @@ // SPDX-License-Identifier: MIT package net.ladenthin.llama; +import static com.tngtech.archunit.lang.syntax.ArchRuleDefinition.fields; import static com.tngtech.archunit.lang.syntax.ArchRuleDefinition.noClasses; +import static com.tngtech.archunit.library.dependencies.SlicesRuleDefinition.slices; import com.tngtech.archunit.core.importer.ImportOption; import com.tngtech.archunit.junit.AnalyzeClasses; import com.tngtech.archunit.junit.ArchTest; import com.tngtech.archunit.lang.ArchRule; +import java.util.Random; +import org.slf4j.Logger; @AnalyzeClasses(packages = "net.ladenthin.llama", importOptions = ImportOption.DoNotIncludeTests.class) public class LlamaArchitectureTest { @@ -34,4 +38,98 @@ public class LlamaArchitectureTest { .should() .dependOnClassesThat() .resideInAnyPackage("org.junit..", "net.jqwik..", "com.tngtech.archunit.."); + + /** + * Every SLF4J {@link Logger} field follows the {@code private static final} idiom. + */ + @ArchTest + static final ArchRule loggersArePrivateStaticFinal = fields() + .that() + .haveRawType(Logger.class) + .should() + .bePrivate() + .andShould() + .beStatic() + .andShould() + .beFinal(); + + /** + * No package cycles between sub-packages. Catches design drift where a leaf + * package starts importing from its parent or sibling. + */ + @ArchTest + static final ArchRule noPackageCycles = slices() + .matching("net.ladenthin.llama.(*)..") + .should() + .beFreeOfCycles(); + + /** + * Production code must not import unsupported / internal JDK packages. + * These are not part of the Java SE API and may change or disappear without notice. + * {@code OSInfo} is vendored from xerial/sqlite-jdbc and was already audited; + * if it ever pulls in sun.*, this rule fails and forces a re-audit. + */ + @ArchTest + static final ArchRule noInternalJdkImports = noClasses() + .that() + .resideInAPackage("net.ladenthin.llama..") + .should() + .dependOnClassesThat() + .resideInAnyPackage("sun..", "com.sun..", "jdk.internal.."); + + /** + * Public mutable state forbidden: any non-static field declared + * {@code public} must also be {@code final}. {@link LlamaOutput} is an + * immutable value class with {@code public final} fields — that pattern + * remains allowed because the fields ARE final. + */ + @ArchTest + static final ArchRule noPublicMutableFields = fields() + .that() + .arePublic() + .and() + .areNotStatic() + .should() + .beFinal(); + + /** + * Production code must not call {@link System#exit(int)}; throw an exception instead. + */ + @ArchTest + static final ArchRule noSystemExit = noClasses() + .that() + .resideInAPackage("net.ladenthin.llama..") + .should() + .callMethod(System.class, "exit", int.class) + .allowEmptyShould(true); + + /** + * Production code must not construct {@link java.util.Random}; {@code Random} is a non-cryptographic + * PRNG (CWE-338). Use {@link java.security.SecureRandom} or {@link java.util.concurrent.ThreadLocalRandom} + * depending on whether cryptographic strength or thread-local fast jitter is needed. + */ + @ArchTest + static final ArchRule noNewRandom = noClasses() + .that() + .resideInAPackage("net.ladenthin.llama..") + .should() + .callConstructor(Random.class) + .orShould() + .callConstructor(Random.class, long.class) + .allowEmptyShould(true); + + /** + * Production code must not call {@link Thread#sleep(long)} / {@link Thread#sleep(long, int)}; + * prefer {@link java.util.concurrent.BlockingQueue#poll(long, java.util.concurrent.TimeUnit)} or + * {@link java.util.concurrent.locks.Condition#await(long, java.util.concurrent.TimeUnit)}. + */ + @ArchTest + static final ArchRule noThreadSleep = noClasses() + .that() + .resideInAPackage("net.ladenthin.llama..") + .should() + .callMethod(Thread.class, "sleep", long.class) + .orShould() + .callMethod(Thread.class, "sleep", long.class, int.class) + .allowEmptyShould(true); } diff --git a/src/test/java/net/ladenthin/llama/LlamaModelSkipDownloadTest.java b/src/test/java/net/ladenthin/llama/LlamaModelSkipDownloadTest.java new file mode 100644 index 00000000..dcf4eae5 --- /dev/null +++ b/src/test/java/net/ladenthin/llama/LlamaModelSkipDownloadTest.java @@ -0,0 +1,102 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +package net.ladenthin.llama; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertInstanceOf; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import net.ladenthin.llama.args.ModelFlag; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +/** + * Unit tests for the {@code SKIP_DOWNLOAD} plumbing on {@link ModelParameters} and the + * paired translation in {@link SkipDownloadFailureTranslator}. + * + *

These tests do NOT load the native library — they exercise pure Java logic: + * the boolean-setter round-trip via {@link ModelParameters#hasFlag(ModelFlag)} and the + * static translation heuristic that promotes a generic {@link LlamaException} to a typed + * {@link ModelUnavailableException} when the {@link ModelFlag#SKIP_DOWNLOAD} flag is + * set.

+ */ +public class LlamaModelSkipDownloadTest { + + /** Default constructor used by JUnit Jupiter. */ + public LlamaModelSkipDownloadTest() { + // no-op + } + + @Test + @DisplayName("setSkipDownload(true) sets the SKIP_DOWNLOAD flag") + public void setSkipDownload_true_setsFlag() { + ModelParameters p = new ModelParameters().setSkipDownload(true); + assertTrue(p.hasFlag(ModelFlag.SKIP_DOWNLOAD)); + } + + @Test + @DisplayName("setSkipDownload(false) clears the SKIP_DOWNLOAD flag") + public void setSkipDownload_false_clearsFlag() { + ModelParameters p = new ModelParameters().setSkipDownload(true).setSkipDownload(false); + assertFalse(p.hasFlag(ModelFlag.SKIP_DOWNLOAD)); + } + + @Test + @DisplayName("hasFlag returns false by default") + public void hasFlag_byDefault_returnsFalse() { + assertFalse(new ModelParameters().hasFlag(ModelFlag.SKIP_DOWNLOAD)); + } + + @Test + @DisplayName("translate: SKIP_DOWNLOAD set + 'Failed to parse' message -> ModelUnavailableException") + public void translate_skipDownloadSetAndParseFailed_returnsTypedException() { + ModelParameters p = new ModelParameters().setSkipDownload(true); + LlamaException original = new LlamaException("Failed to parse model parameters"); + + LlamaException translated = SkipDownloadFailureTranslator.translate(p, original); + + assertInstanceOf(ModelUnavailableException.class, translated); + assertNotNull(translated.getMessage()); + assertTrue( + translated.getMessage().contains("--skip-download"), + "message should mention the --skip-download flag for caller diagnosis"); + assertSame(original, translated.getCause(), "original exception should be preserved as cause"); + } + + @Test + @DisplayName("translate: SKIP_DOWNLOAD set but unrelated message -> original exception passes through") + public void translate_skipDownloadSetButUnrelatedMessage_returnsOriginal() { + ModelParameters p = new ModelParameters().setSkipDownload(true); + LlamaException original = new LlamaException("could not allocate VRAM"); + + LlamaException translated = SkipDownloadFailureTranslator.translate(p, original); + + assertSame(original, translated); + } + + @Test + @DisplayName("translate: SKIP_DOWNLOAD NOT set -> original exception passes through even on parse-failed") + public void translate_skipDownloadNotSet_returnsOriginal() { + ModelParameters p = new ModelParameters(); // skip-download not set + LlamaException original = new LlamaException("Failed to parse model parameters"); + + LlamaException translated = SkipDownloadFailureTranslator.translate(p, original); + + assertSame(original, translated); + } + + @Test + @DisplayName("translate: null message -> original exception passes through") + public void translate_nullMessage_returnsOriginal() { + ModelParameters p = new ModelParameters().setSkipDownload(true); + LlamaException original = new LlamaException((String) null); + + LlamaException translated = SkipDownloadFailureTranslator.translate(p, original); + + assertSame(original, translated); + } +} diff --git a/src/test/java/net/ladenthin/llama/LlamaModelTest.java b/src/test/java/net/ladenthin/llama/LlamaModelTest.java index 6b3255b6..2605f627 100644 --- a/src/test/java/net/ladenthin/llama/LlamaModelTest.java +++ b/src/test/java/net/ladenthin/llama/LlamaModelTest.java @@ -362,7 +362,7 @@ public void testTypedChat() { ChatResponse r = model.chat(req); assertNotNull(r); assertFalse(r.getChoices().isEmpty()); - assertNotNull(r.getFirstMessage()); + assertTrue(r.getFirstMessage().isPresent()); assertTrue(r.getUsage().getTotalTokens() > 0); } diff --git a/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java b/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java new file mode 100644 index 00000000..82e884d5 --- /dev/null +++ b/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2026 Bernard Ladenthin +// +// SPDX-License-Identifier: MIT + +package net.ladenthin.llama; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import nl.altindag.log.LogCaptor; +import org.junit.jupiter.api.Test; +import org.slf4j.LoggerFactory; + +@ClaudeGenerated( + purpose = "Smoke-test the SLF4J + Logback pipeline so a future binding or " + + "configuration regression is caught at test time rather than silently " + + "swallowing logs in production.") +public class LoggingSmokeTest { + + /** + * Direct binding/routing check: emit a known event through the configured + * pipeline and assert LogCaptor saw it. Fails if SLF4J binds to NOPLogger + * or if Logback is misconfigured to drop INFO from this logger. + */ + @Test + public void slf4jPipelineEmits() { + try (LogCaptor captor = LogCaptor.forClass(OSInfo.class)) { + LoggerFactory.getLogger(OSInfo.class).info("smoke"); + assertTrue( + captor.getInfoLogs().contains("smoke"), + "SLF4J pipeline did not deliver INFO event to LogCaptor; " + + "binding or Logback config is broken"); + } + } + + /** + * Production call-site check: trigger {@link OSInfo#getHardwareName()} on a + * stub {@link ProcessRunner} that throws, and assert the catch-block's + * {@code error} log is captured. Pins the production log line as part of + * the contract — an accidental refactor that drops the logger call fails + * this test. + */ + @Test + public void getHardwareNameLogsError_whenProcessRunnerThrows() { + ProcessRunner original = OSInfo.processRunner; + try (LogCaptor captor = LogCaptor.forClass(OSInfo.class)) { + OSInfo.processRunner = new ProcessRunner() { + @Override + String runAndWaitFor(String command) throws IOException { + throw new IOException("boom"); + } + }; + assertEquals("unknown", OSInfo.getHardwareName()); + assertTrue( + captor.getErrorLogs().stream() + .anyMatch(m -> m.contains("Error while running uname -m")), + "expected error log 'Error while running uname -m' was not captured"); + } finally { + OSInfo.processRunner = original; + } + } +} diff --git a/src/test/java/net/ladenthin/llama/ModelParametersExtendedTest.java b/src/test/java/net/ladenthin/llama/ModelParametersExtendedTest.java index bef4a3d4..1f4dc4f2 100644 --- a/src/test/java/net/ladenthin/llama/ModelParametersExtendedTest.java +++ b/src/test/java/net/ladenthin/llama/ModelParametersExtendedTest.java @@ -1058,22 +1058,22 @@ public void testToArrayComplexCombination() { } // ------------------------------------------------------------------------- - // isDefault — extended + // isUnset — extended // ------------------------------------------------------------------------- @Test public void testIsDefaultForCtxSize() { ModelParameters p = new ModelParameters(); - assertTrue(p.isDefault("ctx-size")); + assertTrue(p.isUnset("ctx-size")); p.setCtxSize(2048); - assertFalse(p.isDefault("ctx-size")); + assertFalse(p.isUnset("ctx-size")); } @Test public void testIsDefaultForFlagOnly() { ModelParameters p = new ModelParameters(); - assertTrue(p.isDefault("flash-attn")); + assertTrue(p.isUnset("flash-attn")); p.enableFlashAttn(); - assertFalse(p.isDefault("flash-attn")); + assertFalse(p.isUnset("flash-attn")); } } diff --git a/src/test/java/net/ladenthin/llama/ModelParametersTest.java b/src/test/java/net/ladenthin/llama/ModelParametersTest.java index 59a295cf..7bd8630e 100644 --- a/src/test/java/net/ladenthin/llama/ModelParametersTest.java +++ b/src/test/java/net/ladenthin/llama/ModelParametersTest.java @@ -23,7 +23,7 @@ + "correct CLI argument formatting for enum-based setters (PoolingType, RopeScalingType, " + "CacheType, GpuSplitMode, NumaStrategy, MiroStat) and composite-value setters " + "(loraScaled, controlVectorScaled, controlVectorLayerRange), semicolon-separated " - + "lowercase sampler list, isDefault key-presence check, and the CliParameters base " + + "lowercase sampler list, isUnset key-presence check, and the CliParameters base " + "behaviour: toString omits 'null' for flag-only entries, toArray always prepends an " + "empty argv[0] string and omits values for null-valued flags.") public class ModelParametersTest { @@ -185,25 +185,25 @@ public void testSetControlVectorLayerRangeSameStartEnd() { } // ------------------------------------------------------------------------- - // isDefault + // isUnset // ------------------------------------------------------------------------- @Test public void testIsDefaultTrueWhenNotSet() { ModelParameters p = new ModelParameters(); - assertTrue(p.isDefault("threads")); + assertTrue(p.isUnset("threads")); } @Test public void testIsDefaultFalseWhenSet() { ModelParameters p = new ModelParameters().setThreads(4); - assertFalse(p.isDefault("threads")); + assertFalse(p.isUnset("threads")); } @Test public void testIsDefaultFalseAfterFlagOnly() { ModelParameters p = new ModelParameters().enableEmbedding(); - assertFalse(p.isDefault("embedding")); + assertFalse(p.isUnset("embedding")); } // ------------------------------------------------------------------------- diff --git a/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java b/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java index 523f098c..9fb5cafc 100644 --- a/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java +++ b/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java @@ -32,7 +32,7 @@ public class MultimodalMessagesTest { public void hasPartsIsFalseForLegacyConstructor() { ChatMessage m = new ChatMessage("user", "hello"); assertFalse(m.hasParts()); - assertEquals(null, m.getParts()); + assertTrue(m.getParts().isEmpty()); } @Test @@ -40,7 +40,7 @@ public void hasPartsIsTrueForPartsConstructor() { ChatMessage m = new ChatMessage( "user", Arrays.asList(ContentPart.text("hi"), ContentPart.imageUrl("data:image/png;base64,AAAA"))); assertTrue(m.hasParts()); - assertEquals(2, m.getParts().size()); + assertEquals(2, m.getParts().orElseThrow().size()); } @Test @@ -60,9 +60,10 @@ public void userMultimodalFactoryBuildsUserMessage() { ChatMessage m = ChatMessage.userMultimodal( ContentPart.text("what is this?"), ContentPart.imageUrl("data:image/jpeg;base64,Y")); assertEquals("user", m.getRole()); - assertEquals(2, m.getParts().size()); - assertEquals(ContentPart.Type.TEXT, m.getParts().get(0).getType()); - assertEquals(ContentPart.Type.IMAGE_URL, m.getParts().get(1).getType()); + List parts = m.getParts().orElseThrow(); + assertEquals(2, parts.size()); + assertEquals(ContentPart.Type.TEXT, parts.get(0).getType()); + assertEquals(ContentPart.Type.IMAGE_URL, parts.get(1).getType()); } @Test @@ -80,7 +81,7 @@ public void nullPartsListIsRejected() { public void getPartsListIsUnmodifiable() { ChatMessage m = ChatMessage.userMultimodal(ContentPart.text("x")); try { - m.getParts().add(ContentPart.text("y")); + m.getParts().orElseThrow().add(ContentPart.text("y")); fail("getParts() must return an unmodifiable list"); } catch (UnsupportedOperationException expected) { // ok diff --git a/src/test/java/net/ladenthin/llama/PairTest.java b/src/test/java/net/ladenthin/llama/PairTest.java index 5a591561..d04819d0 100644 --- a/src/test/java/net/ladenthin/llama/PairTest.java +++ b/src/test/java/net/ladenthin/llama/PairTest.java @@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.*; +import java.util.Objects; import org.junit.jupiter.api.Test; public class PairTest { @@ -107,6 +108,16 @@ public void testHashCodeWithNull() { assertNotNull(pair.hashCode()); } + @Test + public void testHashCodeMatchesObjectsHash() { + // Pins hashCode() to Objects.hash(key, value) exactly. + // Without this, PIT's PrimitiveReturnsMutator survives by replacing + // the return with 0 - the existing assertNotNull tests cannot detect + // that because hashCode()'s primitive int autoboxes to a non-null Integer. + Pair pair = new Pair<>("key", 123); + assertEquals(Objects.hash("key", 123), pair.hashCode()); + } + @Test public void testToString() { Pair pair = new Pair<>("testKey", 42); diff --git a/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java b/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java index 4b01e6a7..86b46b04 100644 --- a/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java +++ b/src/test/java/net/ladenthin/llama/args/ModelFlagTest.java @@ -48,6 +48,7 @@ public static Collection data() { {ModelFlag.NO_CLEAR_IDLE, "--no-cache-idle-slots"}, {ModelFlag.MMPROJ_AUTO, "--mmproj-auto"}, {ModelFlag.MMPROJ_OFFLOAD, "--mmproj-offload"}, + {ModelFlag.SKIP_DOWNLOAD, "--skip-download"}, }); } @@ -63,7 +64,7 @@ public void testGetCliFlag(ModelFlag flag, String expectedCliFlag) { @Test public void testEnumCount() { - assertEquals(31, ModelFlag.values().length); + assertEquals(32, ModelFlag.values().length); } @ParameterizedTest(name = "{0} -> {1}")