diff --git a/CLAUDE.md b/CLAUDE.md
index 8f48354e..eef74dd2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -264,7 +264,11 @@ mvn test -Dtest=LlamaModelTest#testGenerateAnswer
```
**Optional models** referenced by individual tests are gated on a system
-property so CI can skip them cleanly when the GGUF is not downloaded:
+property so CI can skip them cleanly when the GGUF is not downloaded.
+The full property → consumer → default table for every `net.ladenthin.llama.*`
+property the library understands (runtime + test) is the user-facing
+**[System Properties Reference](README.md#system-properties-reference)** in
+the README. The summary below covers only the optional-model bindings:
| Property | Default test that uses it | Model |
|----------|---------------------------|-------|
@@ -640,117 +644,21 @@ EXPECT_FALSE(j.contains("stop_type")); // filtered out
## Javadoc Conventions
-### HTML Entities
-
-In Javadoc comments, never use bare Unicode characters for operators and symbols. Use HTML entities instead:
-
-| Symbol | HTML entity |
-|---|---|
-| `<` | `<` |
-| `>` | `>` |
-| `≤` | `≤` |
-| `≥` | `≥` |
-| `→` | `→` |
-| `←` | `←` |
-| `≠` | `≠` |
-
-Use numeric hex entities (`NNNN;`) for any Unicode symbol outside ASCII. Named entities (`<`, `>`) are acceptable for `<` and `>`.
+See [`../workspace/policies/javadoc-conventions.md`](../workspace/policies/javadoc-conventions.md).
## SpotBugs Suppressions
-`spotbugs-exclude.xml` at the repo root contains documented suppressions for findings that are by-design or false positives. **When refactoring or renaming code referenced in that file, re-check the affected `
{@code toString} is generated by Lombok over the {@code cancelled} flag. + * {@code equals}/{@code hashCode} are intentionally NOT generated: a token is a + * lifecycle handle managed by identity (the calling thread keeps a reference and + * the inference loop observes that same instance), not a value object.
*/ +@ToString public final class CancellationToken { private volatile boolean cancelled; diff --git a/src/main/java/net/ladenthin/llama/ChatChoice.java b/src/main/java/net/ladenthin/llama/ChatChoice.java index 2583f179..2ab3db5f 100644 --- a/src/main/java/net/ladenthin/llama/ChatChoice.java +++ b/src/main/java/net/ladenthin/llama/ChatChoice.java @@ -4,10 +4,15 @@ package net.ladenthin.llama; +import lombok.EqualsAndHashCode; +import lombok.ToString; + /** * One choice in a chat completion response: the assistant message and the finish reason. * Mirrors the OpenAI {@code choices[i]} object. */ +@ToString +@EqualsAndHashCode public final class ChatChoice { private final int index; diff --git a/src/main/java/net/ladenthin/llama/ChatMessage.java b/src/main/java/net/ladenthin/llama/ChatMessage.java index c581c034..1a86eb43 100644 --- a/src/main/java/net/ladenthin/llama/ChatMessage.java +++ b/src/main/java/net/ladenthin/llama/ChatMessage.java @@ -8,6 +8,7 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import lombok.EqualsAndHashCode; import org.jspecify.annotations.Nullable; /** @@ -24,10 +25,16 @@ * Multimodal turns carry a non-null {@link #getParts()} list of {@link ContentPart}s * (text and image references). When parts are present they take precedence over * {@link #getContent()} during serialization; the upstream OAI chat path - * (see {@link InferenceParameters#setMessages(java.util.List)}) emits an array-form + * (see {@link InferenceParameters#withMessages(java.util.List)}) emits an array-form * {@code content} field that the compiled-in {@code mtmd} pipeline understands. * + * + *{@code equals}/{@code hashCode} are generated by Lombok over all fields. + * {@code toString} is intentionally handwritten (not Lombok-generated) so that + * conversation traces in logs render as "{@code role: content}" or + * "{@code role (tool_calls=N): content}" instead of a verbose field dump.
*/ +@EqualsAndHashCode public final class ChatMessage { private final String role; @@ -70,12 +77,19 @@ public ChatMessage(String role, String content, @Nullable String toolCallId, Lis public ChatMessage(String role, List- * Bundles the conversation messages, optional tool definitions, an optional - * {@code tool_choice} hint, and an {@link InferenceParameters} customizer that gets - * applied to the underlying request just before invocation. Built with the fluent - * setters; consumed by {@link LlamaModel#chat(ChatRequest)} and + * Immutable typed chat-completion request, populated through a functional + * "wither / appender" API. + * + *
The request carries the conversation messages, optional tool definitions, + * an optional {@code tool_choice} hint, and an {@link InferenceParameters} + * customiser applied to the underlying request just before invocation. Because + * {@link InferenceParameters} is itself immutable, the customiser is a + * {@link UnaryOperator} that takes a parameter set and returns the transformed + * one — callers chain {@code withX(...)} calls on the input and return the + * resulting instance. The type is consumed by + * {@link LlamaModel#chat(ChatRequest)} and * {@link LlamaModel#chatWithTools(ChatRequest, java.util.Map)}. - *
+ * + *All instances are immutable: every field is {@code final} and the + * stored lists are wrapped with {@link Collections#unmodifiableList(List)}. + * Modification methods return a new {@code ChatRequest} instance with + * the requested change applied; the original is untouched. This makes + * {@code ChatRequest} safe to share across threads and gives it a meaningful + * value-equality semantics (two requests with the same content compare + * equal regardless of identity). + * + *
Use {@link #empty()} as the entry point, then chain {@code append*} + * (for list fields) and {@code with*} (for scalar fields): + * + *
{@code
+ * ChatRequest req = ChatRequest.empty()
+ * .appendMessage("system", "be terse")
+ * .appendMessage("user", "two plus two?")
+ * .withMaxToolRounds(2)
+ * .withInferenceCustomizer(p -> p.withNPredict(8).withSeed(1));
+ * }
+ *
+ * Each call allocates a new {@code ChatRequest}. The cost is intentional: + * the API is functional, so a caller can hold an intermediate request and + * derive variants without worrying about hidden state changes. + * + *
{@code @EqualsAndHashCode} compares messages, tools, {@code toolChoice},
+ * and {@code maxToolRounds} by value. The {@code paramsCustomizer}
+ * {@link UnaryOperator} is excluded from equality: lambdas have
+ * compiler-synthesised identity equality which is not value-shaped, so
+ * including it would mean two structurally-identical requests with the same
+ * customiser source code rarely compare equal — surprising for the typical
+ * snapshot-testing and caching use cases. The customiser is also excluded
+ * from {@link ToString} for the same reason (the rendered hash is noise).
*/
+@ToString
+@EqualsAndHashCode
public final class ChatRequest {
private static final ObjectMapper MAPPER = new ObjectMapper();
- private final List
The append API only offers atomic turn commits: + * + *
The wire-format the model sees is built by + * {@link #messagesWithPendingUserTurn(String)}, which returns a fresh list + * containing the committed turns plus a pending user turn — without + * mutating the underlying transcript. This is the mechanism by which the + * model receives the prompt before the user turn is committed. + * + *
This class is not internally synchronised. {@link Session} owns + * the single instance and serialises access via its intrinsic lock, so the + * transcript itself does not need additional synchronisation. Callers that + * use {@code ChatTranscript} directly must provide their own synchronisation + * if shared across threads. + * + *
Lombok-generated over the system message and turns list. The turns list
+ * IS included because it is the operationally interesting state for log
+ * traces. {@code equals}/{@code hashCode} are intentionally NOT generated:
+ * a transcript instance is identified by its lifecycle owner ({@link Session}),
+ * not by its accumulated content.
+ */
+@ToString
+final class ChatTranscript {
+
+ private final @Nullable String systemMessage;
+ private final List {@code equals}/{@code hashCode} are generated by Lombok over the parameters map.
+ * {@code toString} is intentionally handwritten (not Lombok-generated): it emits the
+ * accumulated parameters as a space-separated CLI argv-style string that callers can
+ * forward to the native CLI. Replacing it with a Lombok field dump would break that
+ * consumer contract.
+ */
+@EqualsAndHashCode
abstract class CliParameters {
final Map
* Bundles the generated text with parsed {@link Usage}, {@link Timings},
* per-token {@link TokenLogprob} entries (populated only when
- * {@link InferenceParameters#setNProbs(int)} > 0), and the {@link StopReason}.
+ * {@link InferenceParameters#withNProbs(int)} > 0), and the {@link StopReason}.
* The raw native JSON is exposed via {@link #getRawJson()} as an escape hatch.
* {@code equals}/{@code hashCode} are generated by Lombok over all fields.
+ * {@code toString} is intentionally handwritten (not Lombok-generated): it
+ * returns the generated text verbatim so that {@code result + ""} or
+ * {@code String.valueOf(result)} produce the completion text rather than a
+ * verbose field dump. This is a public-API contract preserved from the
+ * pre-Lombok shape. All instances are immutable: the inherited {@code parameters} map is
+ * {@link java.util.Collections#unmodifiableMap(Map) unmodifiable} and every
+ * {@code withX} call routes through the parent's protected helpers to allocate a
+ * new {@code InferenceParameters} with one entry inserted or replaced. The
+ * original instance is never touched.
+ *
+ * The legacy {@code new InferenceParameters(prompt)} constructor remains
+ * available and is exactly equivalent to {@link #of(String)}.
+ *
+ * {@code equals}/{@code hashCode} are generated by Lombok with {@code callSuper=true}
+ * so the parent {@link JsonParameters} parameters map participates in equality.
+ * {@code toString} is inherited from {@link JsonParameters} and emits the accumulated
+ * parameters as a JSON object string consumed by the native server.
*/
@SuppressWarnings("unused")
+@EqualsAndHashCode(callSuper = true)
public final class InferenceParameters extends JsonParameters {
private static final String PARAM_PROMPT = "prompt";
@@ -64,638 +94,613 @@ public final class InferenceParameters extends JsonParameters {
private static final String PARAM_REASONING_FORMAT = "reasoning_format";
private static final String PARAM_REASONING_BUDGET_TOKENS = "reasoning_budget_tokens";
private static final String PARAM_CONTINUE_FINAL_MESSAGE = "continue_final_message";
+ private static final String PARAM_TOOLS = "tools";
+ private static final String PARAM_TOOL_CHOICE = "tool_choice";
+
+ private static final InferenceParameters EMPTY = new InferenceParameters();
+
+ /** Private no-arg: starts from an empty parameter map. */
+ private InferenceParameters() {
+ super();
+ }
+
+ /** Private all-args: wraps a pre-built unmodifiable map verbatim. */
+ private InferenceParameters(Map
- * Per-request equivalent of {@link ModelParameters#setJsonSchema(String)}, which is
- * applied once at model load time.
+ * Returns a new request with a per-request JSON-schema constraint replaced. The
+ * native server converts the schema to a GBNF grammar internally; the schema string
+ * is passed verbatim and must be valid JSON Schema.
*
- * @param schema JSON Schema as a JSON-encoded string (e.g. {@code "{\"type\":\"object\"...}"})
- * @return this builder
+ * @param schema JSON Schema as a JSON-encoded string
+ * @return a new instance; this instance is unchanged
*/
- public InferenceParameters setJsonSchema(String schema) {
- parameters.put(PARAM_JSON_SCHEMA, schema);
- return this;
+ public InferenceParameters withJsonSchema(String schema) {
+ return withRaw(PARAM_JSON_SCHEMA, schema);
}
/**
- * Override which part of the prompt is penalized for repetition.
- * E.g. if original prompt is "Alice: Hello!" and penaltyPrompt is "Hello!", only the latter will be penalized if
- * repeated. See pull request 3727 for more details.
+ * Returns a new request with the repetition-penalty prompt-portion override replaced.
*
- * @param penaltyPrompt the string portion of the prompt to penalize for repetition
- * @return this builder
+ * @param penaltyPrompt the string portion of the prompt to penalize; {@code null} clears
+ * @return a new instance; this instance is unchanged
*/
- public InferenceParameters setPenaltyPrompt(String penaltyPrompt) {
- parameters.put(PARAM_PENALTY_PROMPT, toJsonString(penaltyPrompt));
- return this;
+ public InferenceParameters withPenaltyPrompt(@Nullable String penaltyPrompt) {
+ return withOptionalJson(PARAM_PENALTY_PROMPT, penaltyPrompt);
}
/**
- * Override which tokens to penalize for repetition.
- * E.g. if original prompt is "Alice: Hello!" and penaltyPrompt corresponds to the token ids of "Hello!", only the
- * latter will be penalized if repeated.
- * See pull request 3727 for more details.
+ * Returns a new request with the repetition-penalty prompt-portion override replaced
+ * (token-id form). Empty input is a no-op (returns {@code this}).
*
- * @param tokens the token ids of the prompt portion to penalize for repetition
- * @return this builder
+ * @param tokens token ids of the prompt portion to penalize
+ * @return a new instance with the array set, or {@code this} if {@code tokens} is empty
*/
- public InferenceParameters setPenaltyPrompt(int[] tokens) {
- if (tokens.length > 0) {
- parameters.put(
- PARAM_PENALTY_PROMPT, serializer.buildIntArray(tokens).toString());
+ public InferenceParameters withPenaltyPrompt(int... tokens) {
+ if (tokens.length == 0) {
+ return this;
}
- return this;
+ return withRaw(PARAM_PENALTY_PROMPT, serializer.buildIntArray(tokens).toString());
}
/**
- * Set whether to ignore end of stream token and continue generating (implies --logit-bias 2-inf)
+ * Returns a new request with the EOS-ignore flag replaced.
*
* @param ignoreEos whether to ignore the end-of-stream token
- * @return this builder
+ * @return a new instance; this instance is unchanged
*/
- public InferenceParameters setIgnoreEos(boolean ignoreEos) {
- return putScalar(PARAM_IGNORE_EOS, ignoreEos);
+ public InferenceParameters withIgnoreEos(boolean ignoreEos) {
+ return withScalar(PARAM_IGNORE_EOS, ignoreEos);
}
/**
- * Modify the likelihood of tokens appearing in the completion by their id. E.g.,
- * Example:
- *
- * Image parts require the model to have a multimodal projector loaded via
- * {@link ModelParameters#setMmproj(String)}. The upstream OAI chat parser
- * routes {@code image_url} blocks through the compiled-in {@code mtmd}
- * pipeline; no additional JNI configuration is needed on the Java side.
- * The stateless instance has no fields, so the Lombok-generated {@code toString}
+ * renders as "{@code Java8CompatibilityHelper()}" — informative enough to satisfy the
+ * fb-contrib IMC_IMMATURE_CLASS_NO_TOSTRING contract. Note this class also exposes a
+ * {@code toString(ByteArrayOutputStream, Charset)} method for stream decoding;
+ * that is unrelated to the generated {@link Object#toString()} override.
*/
+@ToString
public class Java8CompatibilityHelper {
/** Creates a new {@link Java8CompatibilityHelper}. */
@@ -81,7 +89,8 @@ public String readString(final Path path) throws IOException {
* @param charset the charset to encode the content with; defaults to UTF-8 if {@code null}
* @throws IOException if an I/O error occurs writing to the file
*/
- public void writeString(final Path path, final String content, final @org.jspecify.annotations.Nullable Charset charset)
+ public void writeString(
+ final Path path, final String content, final @org.jspecify.annotations.Nullable Charset charset)
throws IOException {
final Charset targetCharset = charset != null ? charset : StandardCharsets.UTF_8;
Files.write(path, content.getBytes(targetCharset));
diff --git a/src/main/java/net/ladenthin/llama/JsonParameters.java b/src/main/java/net/ladenthin/llama/JsonParameters.java
index a2cf18e4..cf3415ad 100644
--- a/src/main/java/net/ladenthin/llama/JsonParameters.java
+++ b/src/main/java/net/ladenthin/llama/JsonParameters.java
@@ -5,26 +5,61 @@
package net.ladenthin.llama;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
+import lombok.EqualsAndHashCode;
import net.ladenthin.llama.args.CliArg;
import net.ladenthin.llama.json.ParameterJsonSerializer;
-import org.checkerframework.checker.nullness.qual.PolyNull;
+import org.jspecify.annotations.Nullable;
/**
- * The Java library re-uses most of the llama.cpp server code, which mostly works with JSONs. Thus, the complexity and
- * maintainability is much lower if we work with JSONs. This class provides a simple abstraction to easily create
- * JSON object strings by filling a The native server consumes parameters as a JSON object, so the type holds an
+ * unmodifiable {@code Map {@code equals}/{@code hashCode} are generated by Lombok over the {@code parameters}
+ * map. {@code toString} is intentionally handwritten (not Lombok-generated): it emits an
+ * actual JSON object string of the accumulated parameters and is consumed by callers
+ * that hand the result to the native server. The {@code serializer} field is excluded
+ * from equality because it is a stateless helper instance (all instances of the same
+ * class are functionally equivalent).
*/
+@EqualsAndHashCode
abstract class JsonParameters {
- // We save parameters directly as a String map here, to re-use as much as possible of the (json-based) C++ code.
- // The JNI code for a proper Java-typed data object is comparatively too complex and hard to maintain.
- final Map A plain for-each loop without try-with-resources continues to work; the {@link #close()}
* method just will not be called on early exit in that case.
*/
+@ToString
public final class LlamaIterable implements Iterable {@link LlamaIterator} implements {@link AutoCloseable}. When used via {@link LlamaIterable}
* inside a try-with-resources block, {@link #close()} is called automatically on early exit
* (e.g. {@code break}), preventing the native task slot from leaking.
+ *
+ * {@code toString} is generated by Lombok over the task id, the {@code hasNext}
+ * flag, and the parser collaborator; the {@link LlamaModel} reference is excluded
+ * because it would recursively dump the entire native model state.
+ * {@code equals}/{@code hashCode} are intentionally NOT generated: iterators are
+ * lifecycle handles tied to a single in-progress task, managed by identity. The library files are automatically extracted from this project's package (JAR).
*
+ * Historically the loader also honoured a {@code net.ladenthin.llama.lib.name}
+ * property that overrode the resolved library filename. Upstream removed the
+ * code path that read it in {@code kherud/java-llama.cpp} commit {@code 6bb63e1}
+ * ("add ggml shared library to binding") when the loader was extended to
+ * load multiple shared libraries (ggml + jllama) as separate files — the
+ * single-name-override model is incompatible with that. The Javadoc mention
+ * has since been a documentation lie in both upstream and this fork; it has
+ * now been removed here, and the corresponding {@code getLibName()} getter
+ * has been deleted from {@code LlamaSystemProperties}.
+ *
* usage: call {@link #initialize()} before using the library.
*
* @author leo
*/
@SuppressWarnings("UseOfSystemOutOrSystemErr")
+@ToString
class LlamaLoader {
private static boolean extracted = false;
@@ -255,7 +267,9 @@ static String getNativeResourcePath() {
final Package pkg = LlamaLoader.class.getPackage();
// LlamaLoader is in a named package, so Class.getPackage() is never null here.
if (pkg == null) {
- throw new IllegalStateException("LlamaLoader.class.getPackage() returned null");
+ throw new IllegalStateException(
+ "LlamaLoader.class.getPackage() returned null (classLoader="
+ + LlamaLoader.class.getClassLoader() + ")");
}
String packagePath = pkg.getName().replace('.', '/');
return String.format("/%s/%s", packagePath, OSInfo.getNativeLibFolderPathForCurrentOS());
diff --git a/src/main/java/net/ladenthin/llama/LlamaModel.java b/src/main/java/net/ladenthin/llama/LlamaModel.java
index d5e21071..695c2b68 100644
--- a/src/main/java/net/ladenthin/llama/LlamaModel.java
+++ b/src/main/java/net/ladenthin/llama/LlamaModel.java
@@ -10,10 +10,10 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.function.BiConsumer;
+import lombok.ToString;
import net.ladenthin.llama.args.LogFormat;
import net.ladenthin.llama.json.ChatResponseParser;
import net.ladenthin.llama.json.CompletionResponseParser;
@@ -32,9 +32,19 @@
* {@code toString} is generated by Lombok over the native context handle ({@code ctx})
+ * plus the parser collaborator references; that gives logs and debuggers a useful
+ * "{@code LlamaModel(ctx=12345..., ...)}" identity dump.
+ * {@code equals}/{@code hashCode} are intentionally NOT generated: model instances own
+ * a native context and are managed by reference identity, not by value.
- * Logprobs are populated only when {@link InferenceParameters#setNProbs(int)} is > 0.
+ * Logprobs are populated only when {@link InferenceParameters#withNProbs(int)} is > 0.
* The raw native JSON is preserved on {@link CompletionResult#getRawJson()}.
*
* @param parameters the inference configuration
* @return a populated {@link CompletionResult}
*/
public CompletionResult completeWithStats(InferenceParameters parameters) {
- parameters.setStream(false);
- int taskId = requestCompletion(parameters.toString());
+ InferenceParameters nonStreaming = parameters.withStream(false);
+ int taskId = requestCompletion(nonStreaming.toString());
String json = receiveCompletionJson(taskId);
return completionParser.parseCompletionResult(json);
}
@@ -206,29 +216,6 @@ public java.util.List
* Callers are responsible for producing a JSON Schema that matches the target type;
@@ -724,22 +712,21 @@ public String getMetrics() {
* the schema has already been set on {@code parameters}.
*
* @param type the target POJO class for Jackson deserialization
- * @param schema JSON Schema string applied via {@code setJsonSchema}
- * @param parameters inference parameters (will be mutated to include the schema)
+ * @param schema JSON Schema string applied via {@code withJsonSchema}
+ * @param parameters inference parameters (a new derivation with the schema set is used)
* @param {@code equals}/{@code hashCode} are generated by Lombok over all fields.
+ * {@code toString} is intentionally handwritten (not Lombok-generated): it returns
+ * the generated text fragment verbatim so that {@code String.valueOf(output)}
+ * reproduces the streamed text. This is a public-API contract preserved from the
+ * pre-Lombok shape.
*/
+@EqualsAndHashCode
public final class LlamaOutput {
/**
@@ -26,13 +34,13 @@ public final class LlamaOutput {
* raw {@code prob} or {@code logprob} from the native response. For richer per-token
* detail (token id and the {@code top_logprobs} alternatives), use {@link #logprobs}.
*
- * Note, that you have to configure {@link InferenceParameters#setNProbs(int)} in order for probabilities to be returned.
+ * Note, that you have to configure {@link InferenceParameters#withNProbs(int)} in order for probabilities to be returned.
*/
public final Map
- * Each {@link #subscribe(Subscriber)} starts a fresh inference task on a dedicated
- * background thread and honours {@code Subscription.request(n)} for backpressure:
- * the emitter thread only calls {@code iterator.next()} while there is outstanding
- * demand. When the iterator's stop token arrives the publisher calls
- * {@code onComplete}; on cancellation it closes the iterator and stops emitting.
- *
- * Construct via {@link LlamaModel#streamPublisher(InferenceParameters)} or
- * {@link LlamaModel#streamChatPublisher(InferenceParameters)}. The publisher is
- * single-subscriber: a second {@link #subscribe(Subscriber)} call signals
- * {@code onError(IllegalStateException)}.
- * {@link #toString()} re-serializes to compact JSON and is suitable for
- * {@code assertEquals} in unit tests. {@code equals}/{@code hashCode} are generated by Lombok with {@code callSuper=true}
+ * so the parent {@link CliParameters} parameters map participates in equality. The
+ * stateless {@code serializer} helper is excluded from equality because all instances
+ * of the same class are functionally equivalent. {@code toString} is inherited from
+ * {@link CliParameters} and emits the accumulated parameters as a CLI argv-style
+ * string consumed by the native binary. {@code equals}/{@code hashCode} are generated by Lombok over the underlying
+ * {@link JsonNode} field, which is the correct value semantics for this wrapper.
+ * {@code toString} is intentionally handwritten (not Lombok-generated) so the
+ * compact-JSON re-serialisation contract is preserved. {@code toString} is generated by Lombok over the slot id, system message, and
+ * accumulated turns. The owning {@link LlamaModel} is excluded because its
+ * {@code toString} would render native state. The {@code paramsCustomizer}
+ * {@link UnaryOperator} is excluded because lambda {@code toString} is the implementation
+ * hash, not useful in logs. The intrinsic {@code lock} is excluded as a noise field.
+ * {@code equals}/{@code hashCode} are intentionally NOT generated: a session is a
+ * mutable lifecycle handle managed by identity. Format: Speculative-decoding runs append a {@code | draft: N (M accepted)} segment.
+ * Empty {@link Timings} (both {@code promptN} and {@code predictedN} zero) are
+ * skipped — logging the all-zero fallback on a parse failure or on early
+ * cancellation is pure noise. The dedicated logger name lets users suppress just this per-run line in
+ * logback without touching the rest of the {@code net.ladenthin.llama} logging
+ * tree, e.g.: No-op when the timings carry no useful data (both prompt and predicted
+ * token counts are zero — typically a parse failure or an early
+ * cancellation) or when the logger is below {@code INFO}.
- * Populated when {@link InferenceParameters#setNProbs(int)} is > 0. The native server
+ * Populated when {@link InferenceParameters#withNProbs(int)} is > 0. The native server
* emits one of two equivalent shapes depending on whether post-sampling probabilities are
* enabled:
* {@code toString} is generated by Lombok over the stored fields, with the size
+ * of the {@code topLogprobs} list (rather than the full list) rendered via
+ * {@link ToString.Include @ToString.Include} on a private accessor to preserve the
+ * handwritten "{@code top=N}" summary form. {@code equals}/{@code hashCode} are generated by Lombok over all fields.
+ * {@code toString} is intentionally handwritten (not Lombok-generated) so that
+ * tool-call traces in logs render in function-call syntax
+ * "{@code name(argsJson)[id]}" instead of a field dump.Design
+ *
+ * Construction patterns
+ *
+ * {@code
+ * InferenceParameters params = InferenceParameters.of("two plus two?")
+ * .withNPredict(8)
+ * .withSeed(1)
+ * .withTemperature(0.2f);
+ * }
+ *
+ * Map.of(15043, 1f)
- * to increase the likelihood of token ' Hello', or a negative value to decrease it.
- * Note, this method overrides any previous calls to
- *
- *
+ * Returns a new request with the logit bias (token-id form) replaced. Empty input is a
+ * no-op (returns {@code this}). This entry overrides any prior logit-bias setter.
*
- * @param logitBias a map from token id to bias value
- * @return this builder
+ * @param logitBias token-id to bias-value
+ * @return a new instance with the bias set, or {@code this} if {@code logitBias} is empty
*/
- public InferenceParameters setTokenIdBias(Map
- *
+ * Returns a new request with the disabled token-id set replaced (logit-bias form with
+ * negative infinity). Empty input is a no-op (returns {@code this}). Overrides prior
+ * logit-bias setters.
*
- * @param tokenIds the collection of token ids to disable
- * @return this builder
+ * @param tokenIds token ids to disable
+ * @return a new instance with the bias set, or {@code this} if {@code tokenIds} is empty
*/
- public InferenceParameters disableTokenIds(CollectionMap.of(" Hello", 1f)
- * to increase the likelihood of token id 15043, or a negative value to decrease it.
- * Note, this method overrides any previous calls to
- *
- *
+ * Returns a new request with the logit bias (token-string form) replaced. Empty input
+ * is a no-op (returns {@code this}). Overrides prior logit-bias setters.
*
- * @param logitBias a map from token string to bias value
- * @return this builder
+ * @param logitBias token string to bias value
+ * @return a new instance with the bias set, or {@code this} if {@code logitBias} is empty
*/
- public InferenceParameters setTokenBias(Map
- *
+ * Returns a new request with the disabled token-string set replaced (logit-bias form
+ * with negative infinity). Empty input is a no-op (returns {@code this}). Overrides
+ * prior logit-bias setters.
*
- * @param tokens the collection of token strings to disable
- * @return this builder
+ * @param tokens token strings to disable
+ * @return a new instance with the bias set, or {@code this} if {@code tokens} is empty
*/
- public InferenceParameters disableTokens(Collection{@code
- * Map
+ * Returns a new request with custom Jinja template kwargs replaced. Values must be
+ * valid JSON.
*
- * @param kwargs map of variable names to JSON-serialized values
- * @return this builder
+ * @param kwargs variable names to JSON-serialized values
+ * @return a new instance; this instance is unchanged
*/
- public InferenceParameters setChatTemplateKwargs(java.util.MapMap<String, String> with key value pairs.
+ * Immutable base for JSON-shaped parameter builders.
+ *
+ *
+ * prompt: 12 tok in 84.3 ms (142.4 tok/s) | gen: 256 tok in 5031.7 ms (50.9 tok/s) | cache: 0
+ *
+ *
+ *
+ * <logger name="net.ladenthin.llama.timings" level="OFF"/>
+ *
+ */
+public final class TimingsLogger {
+
+ /** Dedicated SLF4J logger name for the per-run timing line. */
+ public static final String LOGGER_NAME = "net.ladenthin.llama.timings";
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(LOGGER_NAME);
+
+ private TimingsLogger() {
+ // utility class; not instantiable.
+ }
+
+ /**
+ * Formats a single-line timing summary suitable for the {@value #LOGGER_NAME}
+ * SLF4J logger. Exposed for callers that want to emit the same line through
+ * a different sink (e.g. {@code System.err} in a CLI tool).
+ *
+ * @param t the timings to format
+ * @return a single-line summary (no trailing newline)
+ */
+ public static String format(Timings t) {
+ StringBuilder sb = new StringBuilder()
+ .append("prompt: ")
+ .append(t.getPromptN())
+ .append(" tok in ")
+ .append(formatMs(t.getPromptMs()))
+ .append(" ms (")
+ .append(formatRate(t.getPromptPerSecond()))
+ .append(" tok/s)")
+ .append(" | gen: ")
+ .append(t.getPredictedN())
+ .append(" tok in ")
+ .append(formatMs(t.getPredictedMs()))
+ .append(" ms (")
+ .append(formatRate(t.getPredictedPerSecond()))
+ .append(" tok/s)")
+ .append(" | cache: ")
+ .append(t.getCacheN());
+ if (t.getDraftN() > 0) {
+ sb.append(" | draft: ")
+ .append(t.getDraftN())
+ .append(" (")
+ .append(t.getDraftNAccepted())
+ .append(" accepted)");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Logs the per-run timing summary at {@code INFO} level on the dedicated
+ * {@value #LOGGER_NAME} logger.
+ *
+ *
Value equality / {@code toString} are generated by Lombok over the two stored + * counters. The derived {@link #getTotalTokens()} sum is included in {@code toString} + * via {@link ToString.Include @ToString.Include} so the rendered output retains the + * convenience field that the handwritten version exposed.
*/ +@ToString +@EqualsAndHashCode public final class Usage { private final long promptTokens; @@ -49,27 +57,8 @@ public long getCompletionTokens() { * Convenience sum of the prompt and completion counts. * @return sum of prompt and completion tokens */ + @ToString.Include public long getTotalTokens() { return promptTokens + completionTokens; } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) return true; - if (!(o instanceof Usage)) return false; - Usage u = (Usage) o; - return promptTokens == u.promptTokens && completionTokens == u.completionTokens; - } - - @Override - public int hashCode() { - return (int) (promptTokens * 31 + completionTokens); - } - - @Override - public String toString() { - return "Usage{promptTokens=" + promptTokens - + ", completionTokens=" + completionTokens - + ", totalTokens=" + getTotalTokens() + "}"; - } } diff --git a/src/main/java/net/ladenthin/llama/args/ContinuationMode.java b/src/main/java/net/ladenthin/llama/args/ContinuationMode.java index 92fa58bd..b01f540f 100644 --- a/src/main/java/net/ladenthin/llama/args/ContinuationMode.java +++ b/src/main/java/net/ladenthin/llama/args/ContinuationMode.java @@ -11,7 +11,7 @@ *Maps to the string-valued branch of llama.cpp's * {@code common_chat_continuation_parse}. The boolean form * ({@code true}/{@code false}) is exposed separately via - * {@code InferenceParameters.setContinueFinalMessage(boolean)}. + * {@code InferenceParameters.withContinueFinalMessage(boolean)}. */ public enum ContinuationMode { diff --git a/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java b/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java index 60f93c85..84d2fba3 100644 --- a/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java +++ b/src/main/java/net/ladenthin/llama/args/ReasoningFormat.java @@ -11,7 +11,7 @@ * *
Passed as {@code "reasoning_format"} in inference requests. Only meaningful when the model
* uses a thinking tag (e.g. {@code
When inference is configured with {@link InferenceParameters#setNProbs(int)} > 0, + *
When inference is configured with {@link InferenceParameters#withNProbs(int)} > 0, * each chunk additionally carries a {@code completion_probabilities} array: *
{@code
* {
@@ -119,7 +120,7 @@ public String extractContent(JsonNode node) {
* and do not interfere with field lookup.
*
* Returns an empty map when the field is absent or the array is empty.
- * Requires {@code InferenceParameters#setNProbs(int)} to be configured before inference.
+ * Requires {@code InferenceParameters#withNProbs(int)} to be configured before inference.
*
* @param root the top-level completion response node
* @return map from token string to probability; empty when no probability data is present
@@ -152,7 +153,7 @@ public Map parseProbabilities(JsonNode root) {
* ({@code top_probs} for post-sampling mode or {@code top_logprobs} for pre-sampling).
*
* Returns an empty list when the field is absent or empty. Requires
- * {@link InferenceParameters#setNProbs(int)} to be configured.
+ * {@link InferenceParameters#withNProbs(int)} to be configured.
*
* @param root the top-level completion response node
* @return list of {@link TokenLogprob}; empty when no probability data is present
@@ -191,6 +192,7 @@ public CompletionResult parseCompletionResult(String json) {
node.path("tokens_evaluated").asLong(0L),
node.path("tokens_predicted").asLong(0L));
Timings timings = Timings.fromJson(node.path("timings"));
+ TimingsLogger.log(timings);
List logprobs = parseLogprobs(node);
StopReason stopReason =
StopReason.fromStopType(node.path("stop_type").asText(""));
diff --git a/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java b/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java
index e469aa39..e6df169d 100644
--- a/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java
+++ b/src/main/java/net/ladenthin/llama/json/ParameterJsonSerializer.java
@@ -9,7 +9,6 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ArrayNode;
-import org.jspecify.annotations.Nullable;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.io.IOException;
import java.util.Collection;
@@ -19,6 +18,7 @@
import net.ladenthin.llama.ContentPart;
import net.ladenthin.llama.Pair;
import net.ladenthin.llama.args.Sampler;
+import org.jspecify.annotations.Nullable;
/**
* Pure JSON builders for inference request parameters.
@@ -119,8 +119,8 @@ public ArrayNode buildMessages(List messages) {
msg.put("role", message.getRole());
if (message.hasParts()) {
ArrayNode parts = OBJECT_MAPPER.createArrayNode();
- for (ContentPart p : message.getParts().orElseThrow(
- () -> new IllegalStateException("hasParts() was true but getParts() was empty"))) {
+ for (ContentPart p : message.getParts()
+ .orElseThrow(() -> new IllegalStateException("hasParts() was true but getParts() was empty"))) {
ObjectNode part = OBJECT_MAPPER.createObjectNode();
if (p.getType() == ContentPart.Type.TEXT) {
part.put("type", "text");
@@ -183,7 +183,7 @@ public ArrayNode buildSamplers(Sampler... samplers) {
* @param values the token IDs to include
* @return a Jackson {@link ArrayNode} of integer values
*/
- public ArrayNode buildIntArray(int[] values) {
+ public ArrayNode buildIntArray(int... values) {
ArrayNode arr = OBJECT_MAPPER.createArrayNode();
for (int v : values) arr.add(v);
return arr;
diff --git a/src/test/java/examples/ChatExample.java b/src/test/java/examples/ChatExample.java
index e185475c..4a225eea 100644
--- a/src/test/java/examples/ChatExample.java
+++ b/src/test/java/examples/ChatExample.java
@@ -34,8 +34,8 @@ public static void main(String... args) throws Exception {
messages.add(new Pair<>("user", input));
StringBuilder response = new StringBuilder();
InferenceParameters inferParams = new InferenceParameters("")
- .setMessages(system, messages)
- .setUseChatTemplate(true);
+ .withMessages(system, messages)
+ .withUseChatTemplate(true);
System.out.print("Assistant: ");
for (LlamaOutput output : model.generate(inferParams)) {
System.out.print(output);
diff --git a/src/test/java/examples/GrammarExample.java b/src/test/java/examples/GrammarExample.java
index b633f270..02b97134 100644
--- a/src/test/java/examples/GrammarExample.java
+++ b/src/test/java/examples/GrammarExample.java
@@ -16,7 +16,7 @@ public static void main(String... args) {
String grammar =
"root ::= (expr \"=\" term \"\\n\")+\n" + "expr ::= term ([-+*/] term)*\n" + "term ::= [0-9]";
ModelParameters modelParams = new ModelParameters().setModel("models/mistral-7b-instruct-v0.2.Q2_K.gguf");
- InferenceParameters inferParams = new InferenceParameters("").setGrammar(grammar);
+ InferenceParameters inferParams = new InferenceParameters("").withGrammar(grammar);
try (LlamaModel model = new LlamaModel(modelParams)) {
for (LlamaOutput output : model.generate(inferParams)) {
System.out.print(output);
diff --git a/src/test/java/examples/InfillExample.java b/src/test/java/examples/InfillExample.java
index 93d758b9..9ef9e1f5 100644
--- a/src/test/java/examples/InfillExample.java
+++ b/src/test/java/examples/InfillExample.java
@@ -21,7 +21,7 @@ public static void main(String... args) {
try (LlamaModel model = new LlamaModel(modelParams)) {
System.out.print(prefix);
InferenceParameters inferParams =
- new InferenceParameters("").setInputPrefix(prefix).setInputSuffix(suffix);
+ new InferenceParameters("").withInputPrefix(prefix).withInputSuffix(suffix);
for (LlamaOutput output : model.generate(inferParams)) {
System.out.print(output);
}
diff --git a/src/test/java/examples/MainExample.java b/src/test/java/examples/MainExample.java
index 8c6c40e1..c37c2d97 100644
--- a/src/test/java/examples/MainExample.java
+++ b/src/test/java/examples/MainExample.java
@@ -39,10 +39,10 @@ public static void main(String... args) throws IOException {
System.out.print("Llama: ");
prompt += "\nLlama: ";
InferenceParameters inferParams = new InferenceParameters(prompt)
- .setTemperature(0.7f)
- .setPenalizeNl(true)
- .setMiroStat(MiroStat.V2)
- .setStopStrings("User:");
+ .withTemperature(0.7f)
+ .withPenalizeNl(true)
+ .withMiroStat(MiroStat.V2)
+ .withStopStrings("User:");
for (LlamaOutput output : model.generate(inferParams)) {
System.out.print(output);
prompt += output;
diff --git a/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java b/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java
index a57a88b6..6f07530f 100644
--- a/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java
+++ b/src/test/java/net/ladenthin/llama/ChatAdvancedTest.java
@@ -83,10 +83,10 @@ public static void tearDown() {
@Test
public void testCachePromptConsistentOutput() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setCachePrompt(true);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withCachePrompt(true);
String first = model.complete(params);
String second = model.complete(params);
@@ -108,10 +108,10 @@ public void testCachePromptConsistentOutput() {
public void testUnboundedGenerationTerminatesAtStopString() {
// Use a stop string that the model will produce quickly
InferenceParameters params = new InferenceParameters("A B C D E F G")
- .setNPredict(-1)
- .setSeed(42)
- .setTemperature(0.0f)
- .setStopStrings("E");
+ .withNPredict(-1)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withStopStrings("E");
String output = model.complete(params);
@@ -132,11 +132,11 @@ public void testUnboundedGenerationTerminatesAtStopString() {
@Test
public void testSetNProbsStreamingJsonHasProbabilities() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(5)
- .setSeed(42)
- .setTemperature(0.0f)
- .setNProbs(3)
- .setStream(true);
+ .withNPredict(5)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withNProbs(3)
+ .withStream(true);
int taskId = model.requestCompletion(params.toString());
@@ -196,7 +196,7 @@ public void testCustomChatTemplateAcceptedWithoutError() {
String customTemplate = "{% for m in messages %}" + "{{ m.role | upper }}: {{ m.content }}" + "{% endfor %}";
InferenceParameters params =
- new InferenceParameters("").setMessages(null, messages).setChatTemplate(customTemplate);
+ new InferenceParameters("").withMessages(null, messages).withChatTemplate(customTemplate);
// Must not throw; parameter is accepted and forwarded to native layer
String result = model.applyTemplate(params);
@@ -224,11 +224,11 @@ public void testUseChatTemplateInGenerate() {
messages.add(new Pair<>("user", "Write one word."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setUseChatTemplate(true)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withUseChatTemplate(true)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
StringBuilder output = new StringBuilder();
for (LlamaOutput token : model.generate(params)) {
@@ -250,13 +250,13 @@ public void testUseChatTemplateInGenerate() {
@Test
public void testRepeatAndFrequencyAndPresencePenalty() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.5f)
- .setRepeatPenalty(1.3f)
- .setFrequencyPenalty(0.3f)
- .setPresencePenalty(0.2f)
- .setRepeatLastN(32);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.5f)
+ .withRepeatPenalty(1.3f)
+ .withFrequencyPenalty(0.3f)
+ .withPresencePenalty(0.2f)
+ .withRepeatLastN(32);
String output = model.complete(params);
assertFalse(output.isEmpty(), "Penalty params must not produce empty output");
@@ -274,12 +274,12 @@ public void testRepeatAndFrequencyAndPresencePenalty() {
@Test
public void testCustomSamplerChain() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.7f)
- .setTopK(40)
- .setTopP(0.9f)
- .setSamplers(Sampler.TOP_K, Sampler.TOP_P, Sampler.TEMPERATURE);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.7f)
+ .withTopK(40)
+ .withTopP(0.9f)
+ .withSamplers(Sampler.TOP_K, Sampler.TOP_P, Sampler.TEMPERATURE);
String output = model.complete(params);
assertFalse(output.isEmpty(), "Custom sampler chain must produce non-empty output");
@@ -297,11 +297,11 @@ public void testCustomSamplerChain() {
@Test
public void testMiroStatV2Sampling() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setMiroStat(MiroStat.V2)
- .setMiroStatTau(5.0f)
- .setMiroStatEta(0.1f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withMiroStat(MiroStat.V2)
+ .withMiroStatTau(5.0f)
+ .withMiroStatEta(0.1f);
String output = model.complete(params);
assertFalse(output.isEmpty(), "MiroStat V2 must produce non-empty output");
@@ -319,10 +319,10 @@ public void testMiroStatV2Sampling() {
@Test
public void testRequestCompletionDirectStreaming() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setStream(true);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withStream(true);
int taskId = model.requestCompletion(params.toString());
@@ -377,10 +377,10 @@ public void testDisableTokenIdsAccepted() {
int disabledId = eosTokens[eosTokens.length - 1];
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .disableTokenIds(Collections.singletonList(disabledId));
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withDisabledTokenIds(Collections.singletonList(disabledId));
String output = model.complete(params);
assertFalse(output.isEmpty(), "disableTokenIds must not produce empty output");
@@ -398,11 +398,11 @@ public void testDisableTokenIdsAccepted() {
@Test
public void testPenaltyPromptStringAccepted() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setPenaltyPrompt("def ")
- .setRepeatPenalty(1.2f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withPenaltyPrompt("def ")
+ .withRepeatPenalty(1.2f);
assertFalse(model.complete(params).isEmpty(), "setPenaltyPrompt(String) must produce output");
}
@@ -413,11 +413,11 @@ public void testPenaltyPromptTokenArrayAccepted() {
Assumptions.assumeTrue(penaltyTokens.length > 0, "Need at least one penalty token");
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setPenaltyPrompt(penaltyTokens)
- .setRepeatPenalty(1.2f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withPenaltyPrompt(penaltyTokens)
+ .withRepeatPenalty(1.2f);
assertFalse(model.complete(params).isEmpty(), "setPenaltyPrompt(int[]) must produce output");
}
@@ -434,10 +434,10 @@ public void testPenaltyPromptTokenArrayAccepted() {
public void testMultipleStopStringsFirstMatchTerminates() {
// Prompt that will produce digits quickly; stop at first of several options
InferenceParameters params = new InferenceParameters("1 2 3 4 5 6 7 8 9")
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setStopStrings("4", "5", "6");
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withStopStrings("4", "5", "6");
String output = model.complete(params);
@@ -460,10 +460,10 @@ public void testMultipleStopStringsFirstMatchTerminates() {
@Test
public void testMinPSamplerAccepted() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.7f)
- .setMinP(0.05f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.7f)
+ .withMinP(0.05f);
assertFalse(model.complete(params).isEmpty(), "setMinP must produce output");
}
@@ -471,10 +471,10 @@ public void testMinPSamplerAccepted() {
@Test
public void testTfsZSamplerAccepted() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.7f)
- .setTfsZ(0.95f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.7f)
+ .withTfsZ(0.95f);
assertFalse(model.complete(params).isEmpty(), "setTfsZ must produce output");
}
@@ -482,10 +482,10 @@ public void testTfsZSamplerAccepted() {
@Test
public void testTypicalPSamplerAccepted() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.7f)
- .setTypicalP(0.9f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.7f)
+ .withTypicalP(0.9f);
assertFalse(model.complete(params).isEmpty(), "setTypicalP must produce output");
}
@@ -502,10 +502,10 @@ public void testTypicalPSamplerAccepted() {
@Test
public void testNKeepAllTokensAccepted() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setNKeep(-1);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withNKeep(-1);
assertFalse(model.complete(params).isEmpty(), "setNKeep(-1) must produce output");
}
@@ -523,10 +523,10 @@ public void testNKeepAllTokensAccepted() {
public void testDisableTokensStringFormAccepted() {
// Disable a token that is very unlikely to appear in a Python snippet
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .disableTokens(Arrays.asList("!!!"));
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withDisabledTokens(Arrays.asList("!!!"));
assertFalse(model.complete(params).isEmpty(), "disableTokens must not produce empty output");
}
@@ -542,11 +542,11 @@ public void testDisableTokensStringFormAccepted() {
@Test
public void testMiroStatV1Sampling() {
InferenceParameters params = new InferenceParameters(SIMPLE_PROMPT)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setMiroStat(MiroStat.V1)
- .setMiroStatTau(5.0f)
- .setMiroStatEta(0.1f);
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withMiroStat(MiroStat.V1)
+ .withMiroStatTau(5.0f)
+ .withMiroStatEta(0.1f);
assertFalse(model.complete(params).isEmpty(), "MiroStat V1 must produce non-empty output");
}
diff --git a/src/test/java/net/ladenthin/llama/ChatRequestTest.java b/src/test/java/net/ladenthin/llama/ChatRequestTest.java
new file mode 100644
index 00000000..cde53682
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/ChatRequestTest.java
@@ -0,0 +1,182 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Running documentation of the {@link ChatRequest} immutability + wither-pattern
+ * contract. Every modification method returns a NEW request; the original is
+ * never mutated. Two requests with the same content compare equal regardless
+ * of identity.
+ */
+class ChatRequestTest {
+
+ @Nested
+ @DisplayName("immutability — every modifier returns a fresh instance")
+ class Immutability {
+
+ @Test
+ void appendMessageReturnsNewInstance() {
+ ChatRequest original = ChatRequest.empty();
+ ChatRequest derived = original.appendMessage("user", "hi");
+ assertNotSame(original, derived);
+ assertEquals(0, original.getMessages().size(), "original is untouched");
+ assertEquals(1, derived.getMessages().size(), "derived has the message");
+ }
+
+ @Test
+ void appendToolReturnsNewInstance() {
+ ChatRequest original = ChatRequest.empty();
+ ChatRequest derived = original.appendTool(new ToolDefinition("echo", "Echo", "{}"));
+ assertNotSame(original, derived);
+ assertEquals(0, original.getTools().size());
+ assertEquals(1, derived.getTools().size());
+ }
+
+ @Test
+ void withToolChoiceReturnsNewInstance() {
+ ChatRequest original = ChatRequest.empty();
+ ChatRequest derived = original.withToolChoice("auto");
+ assertNotSame(original, derived);
+ assertFalse(original.getToolChoice().isPresent(), "original toolChoice unset");
+ assertEquals("auto", derived.getToolChoice().orElseThrow());
+ }
+
+ @Test
+ void withMaxToolRoundsReturnsNewInstance() {
+ ChatRequest original = ChatRequest.empty();
+ ChatRequest derived = original.withMaxToolRounds(2);
+ assertNotSame(original, derived);
+ assertEquals(ChatRequest.DEFAULT_MAX_TOOL_ROUNDS, original.getMaxToolRounds());
+ assertEquals(2, derived.getMaxToolRounds());
+ }
+
+ @Test
+ void withInferenceCustomizerReturnsNewInstance() {
+ ChatRequest original = ChatRequest.empty();
+ ChatRequest derived = original.withInferenceCustomizer(p -> p.withSeed(42));
+ assertNotSame(original, derived);
+ }
+
+ @Test
+ @DisplayName("chained derivations leave every intermediate untouched")
+ void chainedDerivationsLeaveIntermediatesUntouched() {
+ ChatRequest a = ChatRequest.empty();
+ ChatRequest b = a.appendMessage("user", "hi");
+ ChatRequest c = b.appendMessage("assistant", "hello");
+ ChatRequest d = c.withMaxToolRounds(3);
+
+ assertEquals(0, a.getMessages().size());
+ assertEquals(1, b.getMessages().size());
+ assertEquals(2, c.getMessages().size());
+ assertEquals(2, d.getMessages().size());
+ assertEquals(ChatRequest.DEFAULT_MAX_TOOL_ROUNDS, c.getMaxToolRounds());
+ assertEquals(3, d.getMaxToolRounds());
+ }
+
+ @Test
+ @DisplayName("the messages accessor returns an unmodifiable view")
+ void messagesAccessorIsUnmodifiable() {
+ ChatRequest req = ChatRequest.empty().appendMessage("user", "hi");
+ assertThrows(UnsupportedOperationException.class, () -> req.getMessages().clear());
+ }
+
+ @Test
+ @DisplayName("the tools accessor returns an unmodifiable view")
+ void toolsAccessorIsUnmodifiable() {
+ ChatRequest req = ChatRequest.empty().appendTool(new ToolDefinition("e", "d", "{}"));
+ assertThrows(UnsupportedOperationException.class, () -> req.getTools().clear());
+ }
+ }
+
+ @Nested
+ @DisplayName("equality — value semantics")
+ class Equality {
+
+ @Test
+ void twoEmptyRequestsAreEqual() {
+ assertEquals(ChatRequest.empty(), ChatRequest.empty());
+ }
+
+ @Test
+ void sameContentSameEquality() {
+ ChatRequest a = ChatRequest.empty().appendMessage("user", "hi").withMaxToolRounds(3);
+ ChatRequest b = ChatRequest.empty().appendMessage("user", "hi").withMaxToolRounds(3);
+ assertEquals(a, b);
+ assertEquals(a.hashCode(), b.hashCode());
+ }
+
+ @Test
+ void differentMessagesNotEqual() {
+ ChatRequest a = ChatRequest.empty().appendMessage("user", "hi");
+ ChatRequest b = ChatRequest.empty().appendMessage("user", "bye");
+ assertNotEquals(a, b);
+ }
+
+ @Test
+ void differentMaxToolRoundsNotEqual() {
+ ChatRequest a = ChatRequest.empty().withMaxToolRounds(2);
+ ChatRequest b = ChatRequest.empty().withMaxToolRounds(3);
+ assertNotEquals(a, b);
+ }
+
+ @Test
+ @DisplayName("the customiser is excluded from equality — two requests with the same content but different lambdas are equal")
+ void customizerExcludedFromEquality() {
+ ChatRequest a = ChatRequest.empty().withInferenceCustomizer(p -> p.withSeed(1));
+ ChatRequest b = ChatRequest.empty().withInferenceCustomizer(p -> p.withSeed(2));
+ assertEquals(a, b, "different lambda identities must NOT make the requests unequal");
+ }
+ }
+
+ @Nested
+ @DisplayName("validation")
+ class Validation {
+
+ @Test
+ void withMaxToolRoundsRejectsZero() {
+ assertThrows(IllegalArgumentException.class, () -> ChatRequest.empty().withMaxToolRounds(0));
+ }
+
+ @Test
+ void withMaxToolRoundsRejectsNegative() {
+ assertThrows(IllegalArgumentException.class, () -> ChatRequest.empty().withMaxToolRounds(-1));
+ }
+
+ @Test
+ void emptyMessageIsTheCanonicalStartingPoint() {
+ assertSame(ChatRequest.empty(), ChatRequest.empty(), "empty() is a cached singleton");
+ }
+ }
+
+ @Nested
+ @DisplayName("JSON-build helpers stay read-only")
+ class JsonHelpers {
+
+ @Test
+ void buildMessagesJsonDoesNotMutate() {
+ ChatRequest req = ChatRequest.empty().appendMessage("user", "hi");
+ String json = req.buildMessagesJson();
+ assertTrue(json.contains("\"user\""), json);
+ assertEquals(1, req.getMessages().size(), "build did not mutate the messages list");
+ }
+
+ @Test
+ void buildToolsJsonEmptyWhenNoTools() {
+ assertFalse(ChatRequest.empty().buildToolsJson().isPresent());
+ }
+ }
+}
diff --git a/src/test/java/net/ladenthin/llama/ChatResponseTest.java b/src/test/java/net/ladenthin/llama/ChatResponseTest.java
index 9769a7e8..b35611c3 100644
--- a/src/test/java/net/ladenthin/llama/ChatResponseTest.java
+++ b/src/test/java/net/ladenthin/llama/ChatResponseTest.java
@@ -95,12 +95,12 @@ public void malformedInputYieldsEmptyResponse() {
@Test
public void buildMessagesJsonRoundTripsToolTurns() {
- ChatRequest req = new ChatRequest()
- .addMessage("system", "be terse")
- .addMessage("user", "two plus two?")
- .addMessage(ChatMessage.assistantToolCalls(
+ ChatRequest req = ChatRequest.empty()
+ .appendMessage("system", "be terse")
+ .appendMessage("user", "two plus two?")
+ .appendMessage(ChatMessage.assistantToolCalls(
"", java.util.Collections.singletonList(new ToolCall("c1", "add", "{\"a\":2,\"b\":2}"))))
- .addMessage(ChatMessage.toolResult("c1", "4"));
+ .appendMessage(ChatMessage.toolResult("c1", "4"));
String msgs = req.buildMessagesJson();
assertTrue(msgs.contains("\"tool_calls\""), msgs);
@@ -110,14 +110,14 @@ public void buildMessagesJsonRoundTripsToolTurns() {
@Test
public void buildToolsJsonEmptyWhenNoTools() {
- ChatRequest req = new ChatRequest().addMessage("user", "hi");
+ ChatRequest req = ChatRequest.empty().appendMessage("user", "hi");
assertTrue(req.buildToolsJson().isEmpty());
}
@Test
public void buildToolsJsonInlinesParameterSchema() {
- ChatRequest req = new ChatRequest()
- .addTool(new ToolDefinition(
+ ChatRequest req = ChatRequest.empty()
+ .appendTool(new ToolDefinition(
"echo", "Echo a string", "{\"type\":\"object\",\"properties\":{\"s\":{\"type\":\"string\"}}}"));
String tools = req.buildToolsJson().orElseThrow();
assertTrue(tools.contains("\"type\":\"function\""), tools);
diff --git a/src/test/java/net/ladenthin/llama/ChatScenarioTest.java b/src/test/java/net/ladenthin/llama/ChatScenarioTest.java
index 4a968b4e..72f82952 100644
--- a/src/test/java/net/ladenthin/llama/ChatScenarioTest.java
+++ b/src/test/java/net/ladenthin/llama/ChatScenarioTest.java
@@ -90,10 +90,10 @@ public void testChatCompleteResponseJsonStructure() {
messages.add(new Pair<>("user", "Say the word OK."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String response = model.chatComplete(params);
@@ -117,10 +117,10 @@ public void testChatCompleteTextReturnsPlainString() {
messages.add(new Pair<>("user", "Say the word OK."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String text = model.chatCompleteText(params);
@@ -139,10 +139,10 @@ public void testChatCompleteTextMatchesChatCompleteContent() {
messages.add(new Pair<>("user", "What is 2 plus 2?"));
InferenceParameters params = new InferenceParameters("")
- .setMessages("You are a helpful assistant.", messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages("You are a helpful assistant.", messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String rawJson = model.chatComplete(params);
String text = model.chatCompleteText(params);
@@ -182,11 +182,11 @@ public void testRequestChatCompletionDirectStreaming() {
messages.add(new Pair<>("user", "Write a single word."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setStream(true);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withStream(true);
int taskId = model.requestChatCompletion(params.toString());
@@ -232,10 +232,10 @@ public void testStreamingAndBlockingOutputBothNonEmpty() {
// Blocking
InferenceParameters blockingParams = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(123)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(123)
+ .withTemperature(0.0f);
String blockingJson = model.chatComplete(blockingParams);
assertNotNull(blockingJson, "Blocking chat must return non-null JSON");
assertFalse(blockingJson.isEmpty(), "Blocking chat must return non-empty JSON");
@@ -243,10 +243,10 @@ public void testStreamingAndBlockingOutputBothNonEmpty() {
// Streaming
InferenceParameters streamingParams = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(123)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(123)
+ .withTemperature(0.0f);
StringBuilder streamedContent = new StringBuilder();
for (LlamaOutput output : model.generateChat(streamingParams)) {
streamedContent.append(output.text);
@@ -269,20 +269,20 @@ public void testChatCompleteWithStopString() {
// Unconstrained
InferenceParameters unconstrained = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String unJson = model.chatComplete(unconstrained);
String unContent = chatParser.extractChoiceContent(unJson);
// Stopped at "3"
InferenceParameters stopped = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f)
- .setStopStrings("4");
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f)
+ .withStopStrings("4");
String stJson = model.chatComplete(stopped);
String stContent = chatParser.extractChoiceContent(stJson);
@@ -317,11 +317,11 @@ public void testChatCompleteWithGrammarDoesNotThrow() {
messages.add(new Pair<>("user", "Generate output."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setGrammar("root ::= (\"a\" | \"b\")+")
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withGrammar("root ::= (\"a\" | \"b\")+")
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String responseJson = model.chatComplete(params);
@@ -349,10 +349,10 @@ public void testChatCompleteMultiTurnThreeTurns() {
for (int turn = 0; turn < 3; turn++) {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
String json = model.chatComplete(params);
String content = chatParser.extractChoiceContent(json);
@@ -383,10 +383,10 @@ public void testChatCompleteWithUnicodeContent() {
messages.add(new Pair<>("user", "Translate: café résumé naïve"));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
// Must not throw
String response = model.chatComplete(params);
@@ -410,10 +410,10 @@ public void testChatCompleteWithSpecialCharactersInContent() {
messages.add(new Pair<>("user", "He said \"hello\", path: C:\\tmp\nNew line."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
// Must not throw a JSON parse error in the native layer
String response = model.chatComplete(params);
@@ -440,10 +440,10 @@ public void testBackToBackChatCalls() {
messages.add(new Pair<>("user", prompts[i]));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
responses[i] = model.chatComplete(params);
assertNotNull(responses[i], "Call " + i + " must not return null");
@@ -582,7 +582,7 @@ public void testHandleDetokenizeRoundTrip() {
@Test
public void testSaveAndRestoreSlot() throws IOException {
// Prime the slot with a short generation so there is state to save
- model.complete(new InferenceParameters("Hello").setNPredict(5).setSeed(42));
+ model.complete(new InferenceParameters("Hello").withNPredict(5).withSeed(42));
File tempFile = File.createTempFile("llama_slot_", ".bin");
tempFile.deleteOnExit();
@@ -620,10 +620,10 @@ public void testChatCompleteNPredictOne() {
messages.add(new Pair<>("user", "Say X."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(1)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(1)
+ .withSeed(42)
+ .withTemperature(0.0f);
String response = model.chatComplete(params);
assertNotNull(response);
@@ -648,10 +648,10 @@ public void testGenerateChatStopFlagOnFinalToken() {
messages.add(new Pair<>("user", "Write one word."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(N_PREDICT)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(N_PREDICT)
+ .withSeed(42)
+ .withTemperature(0.0f);
List outputs = new ArrayList<>();
for (LlamaOutput output : model.generateChat(params)) {
diff --git a/src/test/java/net/ladenthin/llama/ChatTranscriptTest.java b/src/test/java/net/ladenthin/llama/ChatTranscriptTest.java
new file mode 100644
index 00000000..b9600bbd
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/ChatTranscriptTest.java
@@ -0,0 +1,259 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.List;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Running documentation of the two-phase commit invariant that
+ * {@link Session#send(String)} and {@link Session#stream(String)} rely on.
+ *
+ * The transcript management was extracted from {@code Session} into
+ * {@link ChatTranscript} precisely so this invariant — "transcript is mutated
+ * only on the model-call success path; on failure the pending user turn
+ * evaporates" — could be unit-tested without a GGUF model or the native
+ * {@code libjllama} library.
+ *
+ *
The contract is enforced by the API shape itself, not by tests:
+ *
+ *
+ * - The only "commit a full round" method is {@link
+ * ChatTranscript#appendRound(String, String)}, which appends both turns
+ * atomically. There is no way to commit just the user turn through this
+ * API.
+ * - The wire-format the model receives is built by
+ * {@link ChatTranscript#messagesWithPendingUserTurn(String)}, which
+ * returns a fresh list and does NOT mutate the transcript. So the
+ * pending user turn reaches the model without being committed.
+ * - Therefore: if the model call throws after the wire-format is built,
+ * {@code appendRound} is never reached, and the transcript stays
+ * exactly as it was before the call.
+ *
+ *
+ * The tests below pin both the mechanical API behaviour and the higher-level
+ * two-phase commit pattern as it is composed by {@link Session}.
+ */
+class ChatTranscriptTest {
+
+ /** Helper: simulate {@code Session.send} composing a single round through the API. */
+ private static void simulateSend(ChatTranscript t, String userMessage, String assistantReply) {
+ // Phase 1: build wire-format (model would see this).
+ List> wire = t.messagesWithPendingUserTurn(userMessage);
+ // The wire format must contain the pending turn the model is about to answer.
+ assertTrue(
+ wire.stream().anyMatch(p -> "user".equals(p.getKey()) && userMessage.equals(p.getValue())),
+ "wire-format must carry the pending user turn");
+ // Phase 2: model returned successfully — commit both turns atomically.
+ t.appendRound(userMessage, assistantReply);
+ }
+
+ /**
+ * Helper: simulate {@code Session.send} where the model call throws after the
+ * wire-format is built. The {@code appendRound} line is never reached.
+ */
+ private static void simulateSendThatModelRejects(
+ ChatTranscript t, String pendingUserMessage, RuntimeException simulatedModelFailure) {
+ // Phase 1: build wire-format (model would see this).
+ @SuppressWarnings("unused")
+ List> wire = t.messagesWithPendingUserTurn(pendingUserMessage);
+ // Phase 2: model throws — the caller (Session.send) lets the exception
+ // propagate; appendRound is NEVER called.
+ throw simulatedModelFailure;
+ }
+
+ @Nested
+ @DisplayName("mechanical API behaviour")
+ class Api {
+
+ @Test
+ @DisplayName("appendRound commits both turns atomically")
+ void appendRoundCommitsBothTurnsAtomically() {
+ ChatTranscript t = new ChatTranscript(null);
+
+ t.appendRound("hi", "hello back");
+
+ assertEquals(2, t.size());
+ List snapshot = t.snapshot();
+ assertEquals(2, snapshot.size());
+ assertEquals("user", snapshot.get(0).getRole());
+ assertEquals("hi", snapshot.get(0).getContent());
+ assertEquals("assistant", snapshot.get(1).getRole());
+ assertEquals("hello back", snapshot.get(1).getContent());
+ }
+
+ @Test
+ @DisplayName("appendUserTurn + appendAssistantTurn together produce the same shape as appendRound")
+ void appendUserAndAssistantSeparatelyMatchAppendRound() {
+ ChatTranscript a = new ChatTranscript(null);
+ ChatTranscript b = new ChatTranscript(null);
+
+ a.appendRound("hi", "hello back");
+ b.appendUserTurn("hi");
+ b.appendAssistantTurn("hello back");
+
+ assertEquals(a.snapshot(), b.snapshot(), "atomic-round and split-commit must converge");
+ }
+
+ @Test
+ @DisplayName("messagesWithPendingUserTurn does NOT mutate the transcript")
+ void messagesWithPendingUserTurnDoesNotMutate() {
+ ChatTranscript t = new ChatTranscript("system");
+ t.appendRound("first", "reply-1");
+ int sizeBefore = t.size();
+ List snapshotBefore = t.snapshot();
+
+ List> wire = t.messagesWithPendingUserTurn("pending");
+
+ // Build a wire-format containing committed turns + pending user.
+ assertEquals(3, wire.size(), "1 user + 1 assistant + 1 pending user");
+ assertEquals("user", wire.get(2).getKey());
+ assertEquals("pending", wire.get(2).getValue());
+
+ // The transcript itself MUST be unchanged.
+ assertEquals(sizeBefore, t.size(), "transcript size unchanged");
+ assertEquals(snapshotBefore, t.snapshot(), "transcript snapshot unchanged");
+ }
+
+ @Test
+ @DisplayName("messagesWithPendingUserTurn returns a fresh list each call")
+ void messagesWithPendingUserTurnReturnsFreshList() {
+ ChatTranscript t = new ChatTranscript(null);
+ List> first = t.messagesWithPendingUserTurn("hi");
+ List> second = t.messagesWithPendingUserTurn("hi");
+ assertNotSame(
+ first,
+ second,
+ "each wire-format build returns a fresh list — callers may mutate without affecting peers");
+ }
+
+ @Test
+ @DisplayName("snapshot includes system message when configured")
+ void snapshotIncludesSystemMessage() {
+ ChatTranscript t = new ChatTranscript("you are an assistant");
+ t.appendRound("hi", "hello");
+
+ List snap = t.snapshot();
+
+ assertEquals(3, snap.size());
+ assertEquals("system", snap.get(0).getRole());
+ assertEquals("you are an assistant", snap.get(0).getContent());
+ }
+
+ @Test
+ @DisplayName("snapshot omits system message when null or empty")
+ void snapshotOmitsSystemMessageWhenAbsent() {
+ assertEquals(0, new ChatTranscript(null).snapshot().size());
+ assertEquals(0, new ChatTranscript("").snapshot().size());
+ }
+
+ @Test
+ @DisplayName("snapshot is unmodifiable")
+ void snapshotIsUnmodifiable() {
+ ChatTranscript t = new ChatTranscript(null);
+ t.appendRound("hi", "hello");
+ List snap = t.snapshot();
+ assertThrows(UnsupportedOperationException.class, () -> snap.clear());
+ }
+
+ @Test
+ @DisplayName("getSystemMessage returns null when absent")
+ void getSystemMessageNullWhenAbsent() {
+ assertNull(new ChatTranscript(null).getSystemMessage());
+ }
+ }
+
+ @Nested
+ @DisplayName("two-phase commit pattern — running documentation")
+ class TwoPhaseCommit {
+
+ @Test
+ @DisplayName("simulated model failure leaves a FRESH transcript untouched")
+ void freshTranscriptUntouchedWhenModelThrows() {
+ ChatTranscript t = new ChatTranscript("system");
+ assertEquals(0, t.size(), "precondition: fresh transcript has no turns");
+ int snapshotSizeBefore = t.snapshot().size();
+
+ // Caller simulates Session.send where the model rejects the request.
+ assertThrows(
+ LlamaException.class,
+ () -> simulateSendThatModelRejects(
+ t, "first attempt", new LlamaException("simulated model failure")));
+
+ // Two-phase commit: the pending user turn never landed in the transcript.
+ // (The system message snapshot entry was there before and is still there.)
+ assertEquals(0, t.size(), "transcript MUST NOT contain the pending user turn after model failure");
+ assertEquals(
+ snapshotSizeBefore,
+ t.snapshot().size(),
+ "snapshot size unchanged by the failed call");
+ }
+
+ @Test
+ @DisplayName("simulated model failure leaves an EXISTING transcript byte-for-byte unchanged")
+ void existingTranscriptUntouchedWhenModelThrows() {
+ ChatTranscript t = new ChatTranscript("system");
+ simulateSend(t, "hi", "hello back");
+ simulateSend(t, "how are you", "i'm fine");
+
+ List before = t.snapshot();
+ assertEquals(5, before.size(), "precondition: 1 system + 2 user + 2 assistant");
+
+ // Now the model rejects a third call.
+ assertThrows(
+ LlamaException.class,
+ () -> simulateSendThatModelRejects(
+ t, "third attempt", new LlamaException("simulated model failure")));
+
+ // Two-phase commit: existing transcript is byte-for-byte unchanged.
+ List after = t.snapshot();
+ assertEquals(before, after, "failed call must leave the transcript byte-for-byte unchanged");
+ }
+
+ @Test
+ @DisplayName("simulated model success commits user + assistant atomically — never just one half")
+ void successCommitsBothTurnsAtomically() {
+ ChatTranscript t = new ChatTranscript(null);
+
+ simulateSend(t, "hi", "hello");
+
+ assertEquals(2, t.size(), "both turns committed");
+ // The shape is invariant: there is no API to commit only one half via appendRound.
+ // Spot-check that the turn pair is well-formed.
+ List snap = t.snapshot();
+ assertEquals("user", snap.get(0).getRole());
+ assertEquals("hi", snap.get(0).getContent());
+ assertEquals("assistant", snap.get(1).getRole());
+ assertEquals("hello", snap.get(1).getContent());
+ }
+
+ @Test
+ @DisplayName("stream() shape — user turn only, assistant follows via commitStreamedReply")
+ void streamShape() {
+ ChatTranscript t = new ChatTranscript(null);
+
+ // Phase 1: build wire format (would be passed to model.generateChat).
+ List> wire = t.messagesWithPendingUserTurn("tell me a joke");
+ assertEquals(1, wire.size(), "wire contains the pending user turn");
+
+ // Phase 2: model returned an iterable successfully — commit only the user turn.
+ t.appendUserTurn("tell me a joke");
+ assertEquals(1, t.size(), "user turn committed; assistant follows later");
+
+ // Later: caller invoked commitStreamedReply with the accumulated text.
+ t.appendAssistantTurn("knock knock");
+ assertEquals(2, t.size(), "round closes with the assistant turn");
+ assertEquals("assistant", t.snapshot().get(1).getRole());
+ }
+ }
+}
diff --git a/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java b/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java
index 61b1223e..16facddd 100644
--- a/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java
+++ b/src/test/java/net/ladenthin/llama/ConfigureParallelInferenceTest.java
@@ -140,7 +140,7 @@ public void testConfigureEmptyJson() {
public void testModelWorksAfterReconfiguration() {
model.configureParallelInference("{\"n_threads\":2}");
InferenceParameters params =
- new InferenceParameters("int main() {").setNPredict(5).setTemperature(0);
+ new InferenceParameters("int main() {").withNPredict(5).withTemperature(0);
String result = model.complete(params);
assertNotNull(result, "Model should produce output after reconfiguration");
assertFalse(result.isEmpty(), "Output should not be empty");
diff --git a/src/test/java/net/ladenthin/llama/InferenceParametersTest.java b/src/test/java/net/ladenthin/llama/InferenceParametersTest.java
index f96b9c6a..add91850 100644
--- a/src/test/java/net/ladenthin/llama/InferenceParametersTest.java
+++ b/src/test/java/net/ladenthin/llama/InferenceParametersTest.java
@@ -19,11 +19,11 @@
import org.junit.jupiter.api.Test;
@ClaudeGenerated(
- purpose = "Verify that every InferenceParameters setter correctly stores its value in the "
+ purpose = "Verify that every InferenceParameters wither correctly stores its value in the "
+ "internal JSON parameter map, that the toJsonString helper properly escapes all "
+ "special characters (backslash, double-quote, newline, tab, CR, '' sequence), "
- + "that collection-based setters (logit bias, disable tokens, stop strings, samplers) "
- + "produce correctly formatted JSON arrays, and that setMessages enforces the "
+ + "that collection-based withers (logit bias, disable tokens, stop strings, samplers) "
+ + "produce correctly formatted JSON arrays, and that withMessages enforces the "
+ "'user'/'assistant'-only role contract.")
public class InferenceParametersTest {
@@ -47,7 +47,7 @@ public void testConstructorWithEmptyPrompt() {
@Test
public void testSetPromptOverrides() {
InferenceParameters params = new InferenceParameters("first");
- params.setPrompt("second");
+ params = params.withPrompt("second");
assertEquals("\"second\"", params.parameters.get("prompt"));
}
@@ -57,121 +57,121 @@ public void testSetPromptOverrides() {
@Test
public void testSetNPredict() {
- InferenceParameters params = new InferenceParameters("").setNPredict(42);
+ InferenceParameters params = new InferenceParameters("").withNPredict(42);
assertEquals("42", params.parameters.get("n_predict"));
}
@Test
public void testSetTemperature() {
- InferenceParameters params = new InferenceParameters("").setTemperature(0.5f);
+ InferenceParameters params = new InferenceParameters("").withTemperature(0.5f);
assertEquals("0.5", params.parameters.get("temperature"));
}
@Test
public void testSetTopK() {
- InferenceParameters params = new InferenceParameters("").setTopK(10);
+ InferenceParameters params = new InferenceParameters("").withTopK(10);
assertEquals("10", params.parameters.get("top_k"));
}
@Test
public void testSetTopP() {
- InferenceParameters params = new InferenceParameters("").setTopP(0.9f);
+ InferenceParameters params = new InferenceParameters("").withTopP(0.9f);
assertEquals("0.9", params.parameters.get("top_p"));
}
@Test
public void testSetMinP() {
- InferenceParameters params = new InferenceParameters("").setMinP(0.1f);
+ InferenceParameters params = new InferenceParameters("").withMinP(0.1f);
assertEquals("0.1", params.parameters.get("min_p"));
}
@Test
public void testSetTfsZ() {
- InferenceParameters params = new InferenceParameters("").setTfsZ(1.0f);
+ InferenceParameters params = new InferenceParameters("").withTfsZ(1.0f);
assertEquals("1.0", params.parameters.get("tfs_z"));
}
@Test
public void testSetTypicalP() {
- InferenceParameters params = new InferenceParameters("").setTypicalP(0.8f);
+ InferenceParameters params = new InferenceParameters("").withTypicalP(0.8f);
assertEquals("0.8", params.parameters.get("typical_p"));
}
@Test
public void testSetRepeatLastN() {
- InferenceParameters params = new InferenceParameters("").setRepeatLastN(64);
+ InferenceParameters params = new InferenceParameters("").withRepeatLastN(64);
assertEquals("64", params.parameters.get("repeat_last_n"));
}
@Test
public void testSetRepeatPenalty() {
- InferenceParameters params = new InferenceParameters("").setRepeatPenalty(1.1f);
+ InferenceParameters params = new InferenceParameters("").withRepeatPenalty(1.1f);
assertEquals("1.1", params.parameters.get("repeat_penalty"));
}
@Test
public void testSetFrequencyPenalty() {
- InferenceParameters params = new InferenceParameters("").setFrequencyPenalty(0.2f);
+ InferenceParameters params = new InferenceParameters("").withFrequencyPenalty(0.2f);
assertEquals("0.2", params.parameters.get("frequency_penalty"));
}
@Test
public void testSetPresencePenalty() {
- InferenceParameters params = new InferenceParameters("").setPresencePenalty(0.3f);
+ InferenceParameters params = new InferenceParameters("").withPresencePenalty(0.3f);
assertEquals("0.3", params.parameters.get("presence_penalty"));
}
@Test
public void testSetSeed() {
- InferenceParameters params = new InferenceParameters("").setSeed(1234);
+ InferenceParameters params = new InferenceParameters("").withSeed(1234);
assertEquals("1234", params.parameters.get("seed"));
}
@Test
public void testSetNProbs() {
- InferenceParameters params = new InferenceParameters("").setNProbs(5);
+ InferenceParameters params = new InferenceParameters("").withNProbs(5);
assertEquals("5", params.parameters.get("n_probs"));
}
@Test
public void testSetMinKeep() {
- InferenceParameters params = new InferenceParameters("").setMinKeep(2);
+ InferenceParameters params = new InferenceParameters("").withMinKeep(2);
assertEquals("2", params.parameters.get("min_keep"));
}
@Test
public void testSetNKeep() {
- InferenceParameters params = new InferenceParameters("").setNKeep(-1);
+ InferenceParameters params = new InferenceParameters("").withNKeep(-1);
assertEquals("-1", params.parameters.get("n_keep"));
}
@Test
public void testSetCachePrompt() {
- InferenceParameters params = new InferenceParameters("").setCachePrompt(true);
+ InferenceParameters params = new InferenceParameters("").withCachePrompt(true);
assertEquals("true", params.parameters.get("cache_prompt"));
}
@Test
public void testSetIgnoreEos() {
- InferenceParameters params = new InferenceParameters("").setIgnoreEos(true);
+ InferenceParameters params = new InferenceParameters("").withIgnoreEos(true);
assertEquals("true", params.parameters.get("ignore_eos"));
}
@Test
public void testSetPenalizeNl() {
- InferenceParameters params = new InferenceParameters("").setPenalizeNl(false);
+ InferenceParameters params = new InferenceParameters("").withPenalizeNl(false);
assertEquals("false", params.parameters.get("penalize_nl"));
}
@Test
public void testSetDynamicTemperatureRange() {
- InferenceParameters params = new InferenceParameters("").setDynamicTemperatureRange(0.5f);
+ InferenceParameters params = new InferenceParameters("").withDynamicTemperatureRange(0.5f);
assertEquals("0.5", params.parameters.get("dynatemp_range"));
}
@Test
public void testSetDynamicTemperatureExponent() {
- InferenceParameters params = new InferenceParameters("").setDynamicTemperatureExponent(2.0f);
+ InferenceParameters params = new InferenceParameters("").withDynamicTemperatureExponent(2.0f);
assertEquals("2.0", params.parameters.get("dynatemp_exponent"));
}
@@ -181,45 +181,45 @@ public void testSetDynamicTemperatureExponent() {
@Test
public void testSetInputPrefix() {
- InferenceParameters params = new InferenceParameters("").setInputPrefix("prefix");
+ InferenceParameters params = new InferenceParameters("").withInputPrefix("prefix");
assertEquals("\"prefix\"", params.parameters.get("input_prefix"));
}
@Test
public void testSetInputSuffix() {
- InferenceParameters params = new InferenceParameters("").setInputSuffix("suffix");
+ InferenceParameters params = new InferenceParameters("").withInputSuffix("suffix");
assertEquals("\"suffix\"", params.parameters.get("input_suffix"));
}
@Test
public void testSetGrammar() {
- InferenceParameters params = new InferenceParameters("").setGrammar("root ::= \"a\"");
+ InferenceParameters params = new InferenceParameters("").withGrammar("root ::= \"a\"");
assertEquals("\"root ::= \\\"a\\\"\"", params.parameters.get("grammar"));
}
@Test
public void testSetJsonSchemaStoresVerbatim() {
String schema = "{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"}},\"required\":[\"name\"]}";
- InferenceParameters params = new InferenceParameters("").setJsonSchema(schema);
+ InferenceParameters params = new InferenceParameters("").withJsonSchema(schema);
assertEquals(schema, params.parameters.get("json_schema"));
assertTrue(params.toString().contains("\"json_schema\": " + schema));
}
@Test
public void testSetPenaltyPromptString() {
- InferenceParameters params = new InferenceParameters("").setPenaltyPrompt("Hello!");
+ InferenceParameters params = new InferenceParameters("").withPenaltyPrompt("Hello!");
assertEquals("\"Hello!\"", params.parameters.get("penalty_prompt"));
}
@Test
public void testSetUseChatTemplate() {
- InferenceParameters params = new InferenceParameters("").setUseChatTemplate(true);
+ InferenceParameters params = new InferenceParameters("").withUseChatTemplate(true);
assertEquals("true", params.parameters.get("use_jinja"));
}
@Test
public void testSetChatTemplate() {
- InferenceParameters params = new InferenceParameters("").setChatTemplate("{{messages}}");
+ InferenceParameters params = new InferenceParameters("").withChatTemplate("{{messages}}");
assertEquals("\"{{messages}}\"", params.parameters.get("chat_template"));
}
@@ -228,7 +228,7 @@ public void testSetChatTemplateKwargs() {
java.util.Map kwargs = new java.util.LinkedHashMap<>();
kwargs.put("enable_thinking", "true");
kwargs.put("max_tokens", "1024");
- InferenceParameters params = new InferenceParameters("").setChatTemplateKwargs(kwargs);
+ InferenceParameters params = new InferenceParameters("").withChatTemplateKwargs(kwargs);
String value = params.parameters.get("chat_template_kwargs");
assertNotNull(value);
assertTrue(value.contains("\"enable_thinking\":true"));
@@ -238,7 +238,7 @@ public void testSetChatTemplateKwargs() {
@Test
public void testSetChatTemplateKwargsEmpty() {
java.util.Map kwargs = new java.util.LinkedHashMap<>();
- InferenceParameters params = new InferenceParameters("").setChatTemplateKwargs(kwargs);
+ InferenceParameters params = new InferenceParameters("").withChatTemplateKwargs(kwargs);
assertEquals("{}", params.parameters.get("chat_template_kwargs"));
}
@@ -248,13 +248,13 @@ public void testSetChatTemplateKwargsEmpty() {
@Test
public void testSetTopNSigmaEnabled() {
- InferenceParameters params = new InferenceParameters("").setTopNSigma(2.0f);
+ InferenceParameters params = new InferenceParameters("").withTopNSigma(2.0f);
assertEquals("2.0", params.parameters.get("top_n_sigma"));
}
@Test
public void testSetTopNSigmaDisabled() {
- InferenceParameters params = new InferenceParameters("").setTopNSigma(-1.0f);
+ InferenceParameters params = new InferenceParameters("").withTopNSigma(-1.0f);
assertEquals("-1.0", params.parameters.get("top_n_sigma"));
}
@@ -264,68 +264,68 @@ public void testSetTopNSigmaDisabled() {
@Test
public void testSetReasoningFormatNone() {
- InferenceParameters params = new InferenceParameters("").setReasoningFormat(ReasoningFormat.NONE);
+ InferenceParameters params = new InferenceParameters("").withReasoningFormat(ReasoningFormat.NONE);
assertEquals("\"none\"", params.parameters.get("reasoning_format"));
}
@Test
public void testSetReasoningFormatAuto() {
- InferenceParameters params = new InferenceParameters("").setReasoningFormat(ReasoningFormat.AUTO);
+ InferenceParameters params = new InferenceParameters("").withReasoningFormat(ReasoningFormat.AUTO);
assertEquals("\"auto\"", params.parameters.get("reasoning_format"));
}
@Test
public void testSetReasoningFormatDeepseek() {
- InferenceParameters params = new InferenceParameters("").setReasoningFormat(ReasoningFormat.DEEPSEEK);
+ InferenceParameters params = new InferenceParameters("").withReasoningFormat(ReasoningFormat.DEEPSEEK);
assertEquals("\"deepseek\"", params.parameters.get("reasoning_format"));
}
@Test
public void testSetReasoningFormatDeepseekLegacy() {
- InferenceParameters params = new InferenceParameters("").setReasoningFormat(ReasoningFormat.DEEPSEEK_LEGACY);
+ InferenceParameters params = new InferenceParameters("").withReasoningFormat(ReasoningFormat.DEEPSEEK_LEGACY);
assertEquals("\"deepseek-legacy\"", params.parameters.get("reasoning_format"));
}
@Test
public void testSetReasoningBudgetTokensPositive() {
- InferenceParameters params = new InferenceParameters("").setReasoningBudgetTokens(512);
+ InferenceParameters params = new InferenceParameters("").withReasoningBudgetTokens(512);
assertEquals("512", params.parameters.get("reasoning_budget_tokens"));
}
@Test
public void testSetReasoningBudgetTokensZero() {
- InferenceParameters params = new InferenceParameters("").setReasoningBudgetTokens(0);
+ InferenceParameters params = new InferenceParameters("").withReasoningBudgetTokens(0);
assertEquals("0", params.parameters.get("reasoning_budget_tokens"));
}
@Test
public void testSetReasoningBudgetTokensDisabled() {
- InferenceParameters params = new InferenceParameters("").setReasoningBudgetTokens(-1);
+ InferenceParameters params = new InferenceParameters("").withReasoningBudgetTokens(-1);
assertEquals("-1", params.parameters.get("reasoning_budget_tokens"));
}
@Test
public void testSetContinueFinalMessageTrue() {
- InferenceParameters params = new InferenceParameters("").setContinueFinalMessage(true);
+ InferenceParameters params = new InferenceParameters("").withContinueFinalMessage(true);
assertEquals("true", params.parameters.get("continue_final_message"));
}
@Test
public void testSetContinueFinalMessageFalse() {
- InferenceParameters params = new InferenceParameters("").setContinueFinalMessage(false);
+ InferenceParameters params = new InferenceParameters("").withContinueFinalMessage(false);
assertEquals("false", params.parameters.get("continue_final_message"));
}
@Test
public void testSetContinueFinalMessageReasoningContent() {
InferenceParameters params =
- new InferenceParameters("").setContinueFinalMessage(ContinuationMode.REASONING_CONTENT);
+ new InferenceParameters("").withContinueFinalMessage(ContinuationMode.REASONING_CONTENT);
assertEquals("\"reasoning_content\"", params.parameters.get("continue_final_message"));
}
@Test
public void testSetContinueFinalMessageContent() {
- InferenceParameters params = new InferenceParameters("").setContinueFinalMessage(ContinuationMode.CONTENT);
+ InferenceParameters params = new InferenceParameters("").withContinueFinalMessage(ContinuationMode.CONTENT);
assertEquals("\"content\"", params.parameters.get("continue_final_message"));
}
@@ -335,31 +335,31 @@ public void testSetContinueFinalMessageContent() {
@Test
public void testSetMiroStatDisabled() {
- InferenceParameters params = new InferenceParameters("").setMiroStat(MiroStat.DISABLED);
+ InferenceParameters params = new InferenceParameters("").withMiroStat(MiroStat.DISABLED);
assertEquals("0", params.parameters.get("mirostat"));
}
@Test
public void testSetMiroStatV1() {
- InferenceParameters params = new InferenceParameters("").setMiroStat(MiroStat.V1);
+ InferenceParameters params = new InferenceParameters("").withMiroStat(MiroStat.V1);
assertEquals("1", params.parameters.get("mirostat"));
}
@Test
public void testSetMiroStatV2() {
- InferenceParameters params = new InferenceParameters("").setMiroStat(MiroStat.V2);
+ InferenceParameters params = new InferenceParameters("").withMiroStat(MiroStat.V2);
assertEquals("2", params.parameters.get("mirostat"));
}
@Test
public void testSetMiroStatTau() {
- InferenceParameters params = new InferenceParameters("").setMiroStatTau(5.0f);
+ InferenceParameters params = new InferenceParameters("").withMiroStatTau(5.0f);
assertEquals("5.0", params.parameters.get("mirostat_tau"));
}
@Test
public void testSetMiroStatEta() {
- InferenceParameters params = new InferenceParameters("").setMiroStatEta(0.1f);
+ InferenceParameters params = new InferenceParameters("").withMiroStatEta(0.1f);
assertEquals("0.1", params.parameters.get("mirostat_eta"));
}
@@ -369,20 +369,20 @@ public void testSetMiroStatEta() {
@Test
public void testSetStopStringsSingle() {
- InferenceParameters params = new InferenceParameters("").setStopStrings("stop");
+ InferenceParameters params = new InferenceParameters("").withStopStrings("stop");
assertEquals("[\"stop\"]", params.parameters.get("stop"));
}
@Test
public void testSetStopStringsMultiple() {
- InferenceParameters params = new InferenceParameters("").setStopStrings("stop1", "stop2");
+ InferenceParameters params = new InferenceParameters("").withStopStrings("stop1", "stop2");
assertEquals("[\"stop1\",\"stop2\"]", params.parameters.get("stop"));
}
@Test
public void testSetStopStringsEmpty() {
InferenceParameters params = new InferenceParameters("");
- params.setStopStrings();
+ params = params.withStopStrings();
assertFalse(params.parameters.containsKey("stop"));
}
@@ -392,27 +392,27 @@ public void testSetStopStringsEmpty() {
@Test
public void testSetSamplersSingle() {
- InferenceParameters params = new InferenceParameters("").setSamplers(Sampler.TOP_K);
+ InferenceParameters params = new InferenceParameters("").withSamplers(Sampler.TOP_K);
assertEquals("[\"top_k\"]", params.parameters.get("samplers"));
}
@Test
public void testSetSamplersMultiple() {
InferenceParameters params =
- new InferenceParameters("").setSamplers(Sampler.TOP_K, Sampler.TOP_P, Sampler.TEMPERATURE);
+ new InferenceParameters("").withSamplers(Sampler.TOP_K, Sampler.TOP_P, Sampler.TEMPERATURE);
assertEquals("[\"top_k\",\"top_p\",\"temperature\"]", params.parameters.get("samplers"));
}
@Test
public void testSetSamplersMinP() {
- InferenceParameters params = new InferenceParameters("").setSamplers(Sampler.MIN_P);
+ InferenceParameters params = new InferenceParameters("").withSamplers(Sampler.MIN_P);
assertEquals("[\"min_p\"]", params.parameters.get("samplers"));
}
@Test
public void testSetSamplersEmpty() {
InferenceParameters params = new InferenceParameters("");
- params.setSamplers();
+ params = params.withSamplers();
assertFalse(params.parameters.containsKey("samplers"));
}
@@ -423,7 +423,7 @@ public void testSetSamplersEmpty() {
@Test
public void testSetTokenIdBias() {
Map bias = Collections.singletonMap(15043, 1.0f);
- InferenceParameters params = new InferenceParameters("").setTokenIdBias(bias);
+ InferenceParameters params = new InferenceParameters("").withTokenIdBias(bias);
String value = params.parameters.get("logit_bias");
assertNotNull(value);
assertTrue(value.contains("15043"));
@@ -432,7 +432,7 @@ public void testSetTokenIdBias() {
@Test
public void testSetTokenIdBiasEmpty() {
- InferenceParameters params = new InferenceParameters("").setTokenIdBias(Collections.emptyMap());
+ InferenceParameters params = new InferenceParameters("").withTokenIdBias(Collections.emptyMap());
assertFalse(params.parameters.containsKey("logit_bias"));
}
@@ -443,7 +443,7 @@ public void testSetTokenIdBiasEmpty() {
@Test
public void testSetTokenBias() {
Map bias = Collections.singletonMap(" Hello", 1.0f);
- InferenceParameters params = new InferenceParameters("").setTokenBias(bias);
+ InferenceParameters params = new InferenceParameters("").withTokenBias(bias);
String value = params.parameters.get("logit_bias");
assertNotNull(value);
assertTrue(value.contains("Hello"));
@@ -452,7 +452,7 @@ public void testSetTokenBias() {
@Test
public void testSetTokenBiasEmpty() {
- InferenceParameters params = new InferenceParameters("").setTokenBias(Collections.emptyMap());
+ InferenceParameters params = new InferenceParameters("").withTokenBias(Collections.emptyMap());
assertFalse(params.parameters.containsKey("logit_bias"));
}
@@ -462,7 +462,7 @@ public void testSetTokenBiasEmpty() {
@Test
public void testDisableTokenIds() {
- InferenceParameters params = new InferenceParameters("").disableTokenIds(Arrays.asList(1, 2, 3));
+ InferenceParameters params = new InferenceParameters("").withDisabledTokenIds(Arrays.asList(1, 2, 3));
String value = params.parameters.get("logit_bias");
assertNotNull(value);
assertTrue(value.contains("false"));
@@ -471,13 +471,13 @@ public void testDisableTokenIds() {
@Test
public void testDisableTokenIdsEmpty() {
- InferenceParameters params = new InferenceParameters("").disableTokenIds(Collections.emptyList());
+ InferenceParameters params = new InferenceParameters("").withDisabledTokenIds(Collections.emptyList());
assertFalse(params.parameters.containsKey("logit_bias"));
}
@Test
public void testDisableTokens() {
- InferenceParameters params = new InferenceParameters("").disableTokens(Arrays.asList("bad", "word"));
+ InferenceParameters params = new InferenceParameters("").withDisabledTokens(Arrays.asList("bad", "word"));
String value = params.parameters.get("logit_bias");
assertNotNull(value);
assertTrue(value.contains("false"));
@@ -486,7 +486,7 @@ public void testDisableTokens() {
@Test
public void testDisableTokensEmpty() {
- InferenceParameters params = new InferenceParameters("").disableTokens(Collections.emptyList());
+ InferenceParameters params = new InferenceParameters("").withDisabledTokens(Collections.emptyList());
assertFalse(params.parameters.containsKey("logit_bias"));
}
@@ -496,14 +496,14 @@ public void testDisableTokensEmpty() {
@Test
public void testSetPenaltyPromptTokenIds() {
- InferenceParameters params = new InferenceParameters("").setPenaltyPrompt(new int[] {1, 2, 3});
+ InferenceParameters params = new InferenceParameters("").withPenaltyPrompt(new int[] {1, 2, 3});
assertEquals("[1,2,3]", params.parameters.get("penalty_prompt"));
}
@Test
public void testSetPenaltyPromptTokenIdsEmpty() {
InferenceParameters params = new InferenceParameters("");
- params.setPenaltyPrompt(new int[] {});
+ params = params.withPenaltyPrompt(new int[] {});
assertFalse(params.parameters.containsKey("penalty_prompt"));
}
@@ -514,7 +514,7 @@ public void testSetPenaltyPromptTokenIdsEmpty() {
@Test
public void testSetMessagesWithSystemAndUserMessages() {
List> messages = Collections.singletonList(new Pair<>("user", "Hi"));
- InferenceParameters params = new InferenceParameters("").setMessages("System msg", messages);
+ InferenceParameters params = new InferenceParameters("").withMessages("System msg", messages);
String value = params.parameters.get("messages");
assertNotNull(value);
assertTrue(value.contains("system"));
@@ -527,7 +527,7 @@ public void testSetMessagesWithSystemAndUserMessages() {
public void testSetMessagesWithAssistantRole() {
List> messages =
Arrays.asList(new Pair<>("user", "Hello"), new Pair<>("assistant", "Hi there"));
- InferenceParameters params = new InferenceParameters("").setMessages(null, messages);
+ InferenceParameters params = new InferenceParameters("").withMessages(null, messages);
String value = params.parameters.get("messages");
assertNotNull(value);
assertTrue(value.contains("assistant"));
@@ -537,7 +537,7 @@ public void testSetMessagesWithAssistantRole() {
@Test
public void testSetMessagesNoSystemMessage() {
List> messages = Collections.singletonList(new Pair<>("user", "Hello"));
- InferenceParameters params = new InferenceParameters("").setMessages(null, messages);
+ InferenceParameters params = new InferenceParameters("").withMessages(null, messages);
String value = params.parameters.get("messages");
assertNotNull(value);
assertFalse(value.contains("system"));
@@ -547,7 +547,7 @@ public void testSetMessagesNoSystemMessage() {
@Test
public void testSetMessagesEmptySystemMessage() {
List> messages = Collections.singletonList(new Pair<>("user", "Hello"));
- InferenceParameters params = new InferenceParameters("").setMessages("", messages);
+ InferenceParameters params = new InferenceParameters("").withMessages("", messages);
String value = params.parameters.get("messages");
assertFalse(value.contains("system"));
}
@@ -555,13 +555,13 @@ public void testSetMessagesEmptySystemMessage() {
@Test
public void testSetMessagesInvalidRole() {
List> messages = Collections.singletonList(new Pair<>("system", "Bad"));
- assertThrows(IllegalArgumentException.class, () -> new InferenceParameters("").setMessages(null, messages));
+ assertThrows(IllegalArgumentException.class, () -> new InferenceParameters("").withMessages(null, messages));
}
@Test
public void testSetMessagesInvalidRoleOther() {
List> messages = Collections.singletonList(new Pair<>("admin", "Hack"));
- assertThrows(IllegalArgumentException.class, () -> new InferenceParameters("").setMessages(null, messages));
+ assertThrows(IllegalArgumentException.class, () -> new InferenceParameters("").withMessages(null, messages));
}
// -------------------------------------------------------------------------
@@ -581,7 +581,7 @@ public void testToStringContainsPrompt() {
@Test
public void testToStringWithMultipleParams() {
InferenceParameters params =
- new InferenceParameters("p").setTemperature(0.7f).setTopK(20);
+ new InferenceParameters("p").withTemperature(0.7f).withTopK(20);
String json = params.toString();
assertTrue(json.contains("\"temperature\""));
assertTrue(json.contains("\"top_k\""));
@@ -625,7 +625,7 @@ public void testToJsonStringEscapesCarriageReturn() {
public void testToJsonStringNull() {
// toJsonString(null) returns null — only used internally but verify via grammar
InferenceParameters params = new InferenceParameters("");
- params.setGrammar(null);
+ params = params.withGrammar(null);
assertNull(params.parameters.get("grammar"));
}
@@ -639,15 +639,15 @@ public void testToJsonStringSlashNotEscaped() {
}
// -------------------------------------------------------------------------
- // Builder chaining returns same instance
+ // Builder chaining returns a new instance (immutable wither semantics)
// -------------------------------------------------------------------------
@Test
- public void testBuilderChainingReturnsSameInstance() {
+ public void testBuilderChainingReturnsNewInstance() {
InferenceParameters params = new InferenceParameters("");
- assertSame(params.setTemperature(0.5f), params);
- assertSame(params.setTopK(10), params);
- assertSame(params.setNPredict(5), params);
+ assertNotSame(params.withTemperature(0.5f), params);
+ assertNotSame(params.withTopK(10), params);
+ assertNotSame(params.withNPredict(5), params);
}
// -------------------------------------------------------------------------
@@ -656,13 +656,13 @@ public void testBuilderChainingReturnsSameInstance() {
@Test
public void testSetStreamTrue() {
- InferenceParameters params = new InferenceParameters("").setStream(true);
+ InferenceParameters params = new InferenceParameters("").withStream(true);
assertEquals("true", params.parameters.get("stream"));
}
@Test
public void testSetStreamFalse() {
- InferenceParameters params = new InferenceParameters("").setStream(false);
+ InferenceParameters params = new InferenceParameters("").withStream(false);
assertEquals("false", params.parameters.get("stream"));
}
@@ -675,7 +675,7 @@ public void testSetTokenIdBiasMultiple() {
Map bias = new HashMap<>();
bias.put(1, 0.5f);
bias.put(2, -1.0f);
- InferenceParameters params = new InferenceParameters("").setTokenIdBias(bias);
+ InferenceParameters params = new InferenceParameters("").withTokenIdBias(bias);
String value = params.parameters.get("logit_bias");
assertNotNull(value);
assertTrue(value.startsWith("["));
diff --git a/src/test/java/net/ladenthin/llama/JsonParametersTest.java b/src/test/java/net/ladenthin/llama/JsonParametersTest.java
index 303556f0..b5a0a15d 100644
--- a/src/test/java/net/ladenthin/llama/JsonParametersTest.java
+++ b/src/test/java/net/ladenthin/llama/JsonParametersTest.java
@@ -5,132 +5,181 @@
package net.ladenthin.llama;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertSame;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import java.util.Map;
import net.ladenthin.llama.args.CacheType;
import net.ladenthin.llama.args.CliArg;
import org.junit.jupiter.api.Test;
@ClaudeGenerated(
- purpose = "Verify the putScalar and putEnum helpers on JsonParameters: that they store the "
- + "expected string form for every primitive type used by the ModelParameters / "
- + "InferenceParameters setters (int, long, float, double, boolean), that they "
- + "overwrite a previously-set key, that putEnum uses getArgValue() rather than the "
- + "enum name, and that both helpers return the concrete builder subtype so callers "
- + "can chain in a single statement.")
+ purpose = "Verify the withScalar / withEnum / withOptionalJson / withRaw helpers on the "
+ + "immutable JsonParameters base: that they store the expected string form for every "
+ + "primitive type used by InferenceParameters (int, long, float, double, boolean), "
+ + "that withEnum uses getArgValue() rather than the enum name, that every helper "
+ + "returns a NEW instance whose parameter map carries the entry inserted or replaced "
+ + "without touching the original, and that the inherited parameters map is an "
+ + "unmodifiable view. The CliParameters subclass tests cover the legacy put-style "
+ + "helpers used by ModelParameters (which still extends CliParameters and remains "
+ + "mutable).")
public class JsonParametersTest {
private static final class TestBuilder extends JsonParameters {
- TestBuilder putScalarPublic(String key, Object value) {
- return putScalar(key, value);
+ TestBuilder() {
+ super();
}
- TestBuilder putEnumPublic(String key, CliArg value) {
- return putEnum(key, value);
+ TestBuilder(Map parameters) {
+ super(parameters);
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ protected T withParameters(Map newParameters) {
+ return (T) new TestBuilder(newParameters);
+ }
+
+ TestBuilder withScalarPublic(String key, Object value) {
+ return withScalar(key, value);
+ }
+
+ TestBuilder withEnumPublic(String key, CliArg value) {
+ return withEnum(key, value);
+ }
+
+ TestBuilder withRawPublic(String key, String value) {
+ return withRaw(key, value);
+ }
+
+ TestBuilder withOptionalJsonPublic(String key, String text) {
+ return withOptionalJson(key, text);
}
}
@Test
- public void putScalar_int_storesDecimalString() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--threads", 8);
+ public void withScalar_int_storesDecimalString() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--threads", 8);
assertEquals("8", b.parameters.get("--threads"));
}
@Test
- public void putScalar_negativeInt_storesSignedDecimal() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--predict", -1);
+ public void withScalar_negativeInt_storesSignedDecimal() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--predict", -1);
assertEquals("-1", b.parameters.get("--predict"));
}
@Test
- public void putScalar_zero_storesZero() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--keep", 0);
+ public void withScalar_zero_storesZero() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--keep", 0);
assertEquals("0", b.parameters.get("--keep"));
}
@Test
- public void putScalar_long_storesDecimalString() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--seed", 4242424242L);
+ public void withScalar_long_storesDecimalString() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--seed", 4242424242L);
assertEquals("4242424242", b.parameters.get("--seed"));
}
@Test
- public void putScalar_float_storesDotSeparatedDecimal() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--temp", 0.7f);
+ public void withScalar_float_storesDotSeparatedDecimal() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--temp", 0.7f);
// String.valueOf(float) is locale-independent and uses '.' as the decimal separator.
assertEquals("0.7", b.parameters.get("--temp"));
}
@Test
- public void putScalar_double_storesDotSeparatedDecimal() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--top-p", 0.95d);
+ public void withScalar_double_storesDotSeparatedDecimal() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--top-p", 0.95d);
assertEquals("0.95", b.parameters.get("--top-p"));
}
@Test
- public void putScalar_booleanTrue_storesLowercaseTrue() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--cache", true);
+ public void withScalar_booleanTrue_storesLowercaseTrue() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--cache", true);
assertEquals("true", b.parameters.get("--cache"));
}
@Test
- public void putScalar_booleanFalse_storesLowercaseFalse() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--cache", false);
+ public void withScalar_booleanFalse_storesLowercaseFalse() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--cache", false);
assertEquals("false", b.parameters.get("--cache"));
}
@Test
- public void putScalar_overwritesPreviousValue() {
- TestBuilder b = new TestBuilder();
- b.putScalarPublic("--threads", 4);
- b.putScalarPublic("--threads", 16);
+ public void withScalar_overwritesPreviousValue() {
+ TestBuilder b = new TestBuilder()
+ .withScalarPublic("--threads", 4)
+ .withScalarPublic("--threads", 16);
assertEquals("16", b.parameters.get("--threads"));
assertEquals(1, b.parameters.size());
}
@Test
- public void putScalar_returnsSameBuilderInstance() {
- TestBuilder b = new TestBuilder();
- TestBuilder returned = b.putScalarPublic("--threads", 1);
- assertSame(returned, b);
+ public void withScalar_returnsFreshInstance() {
+ TestBuilder original = new TestBuilder();
+ TestBuilder derived = original.withScalarPublic("--threads", 1);
+ assertNotSame(original, derived, "wither must allocate a new instance");
+ assertTrue(original.parameters.isEmpty(), "original must remain empty");
+ assertEquals("1", derived.parameters.get("--threads"));
}
@Test
- public void putEnum_usesGetArgValueNotEnumName() {
- TestBuilder b = new TestBuilder();
- b.putEnumPublic("--cache-type-k", CacheType.Q8_0);
+ public void withEnum_usesGetArgValueNotEnumName() {
+ TestBuilder b = new TestBuilder().withEnumPublic("--cache-type-k", CacheType.Q8_0);
assertEquals(CacheType.Q8_0.getArgValue(), b.parameters.get("--cache-type-k"));
// Sanity check: the stored string is not the Java enum constant name.
assertEquals("q8_0", b.parameters.get("--cache-type-k"));
}
@Test
- public void putEnum_returnsSameBuilderInstance() {
- TestBuilder b = new TestBuilder();
- TestBuilder returned = b.putEnumPublic("--cache-type-k", CacheType.F16);
- assertSame(returned, b);
+ public void withEnum_returnsFreshInstance() {
+ TestBuilder original = new TestBuilder();
+ TestBuilder derived = original.withEnumPublic("--cache-type-k", CacheType.F16);
+ assertNotSame(original, derived);
}
@Test
- public void putEnum_overwritesPreviousValue() {
- TestBuilder b = new TestBuilder();
- b.putEnumPublic("--cache-type-k", CacheType.F16);
- b.putEnumPublic("--cache-type-k", CacheType.Q8_0);
+ public void withEnum_overwritesPreviousValue() {
+ TestBuilder b = new TestBuilder()
+ .withEnumPublic("--cache-type-k", CacheType.F16)
+ .withEnumPublic("--cache-type-k", CacheType.Q8_0);
assertEquals("q8_0", b.parameters.get("--cache-type-k"));
assertEquals(1, b.parameters.size());
}
- // The CliParameters base class carries the same putScalar / putEnum helpers
- // because ModelParameters does not extend JsonParameters. Verify both
- // helpers work on a CliParameters subclass as well.
+ @Test
+ public void withRaw_storesValueVerbatim() {
+ TestBuilder b = new TestBuilder().withRawPublic("schema", "{\"type\":\"object\"}");
+ assertEquals("{\"type\":\"object\"}", b.parameters.get("schema"));
+ }
+
+ @Test
+ public void withOptionalJson_nullIsNoOpReturnsSameInstance() {
+ TestBuilder original = new TestBuilder();
+ TestBuilder derived = original.withOptionalJsonPublic("grammar", null);
+ assertSame(original, derived, "null input must short-circuit to this");
+ }
+
+ @Test
+ public void withOptionalJson_nonNullEncodesAndAllocates() {
+ TestBuilder original = new TestBuilder();
+ TestBuilder derived = original.withOptionalJsonPublic("grammar", "abc");
+ assertNotSame(original, derived);
+ assertEquals("\"abc\"", derived.parameters.get("grammar"), "value must be JSON-encoded");
+ }
+
+ @Test
+ public void parametersAccessorIsUnmodifiable() {
+ TestBuilder b = new TestBuilder().withScalarPublic("--threads", 1);
+ assertThrows(UnsupportedOperationException.class, () -> b.parameters.put("evil", "x"));
+ }
+
+ // The CliParameters base class still carries the legacy putScalar / putEnum helpers
+ // because ModelParameters does not extend JsonParameters. The CliParameters subclass
+ // remains mutable by design.
private static final class CliTestBuilder extends CliParameters {
CliTestBuilder putScalarPublic(String key, Object value) {
diff --git a/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java b/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java
index 711646f9..4c7010d9 100644
--- a/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java
+++ b/src/test/java/net/ladenthin/llama/LlamaArchitectureTest.java
@@ -43,8 +43,7 @@ public class LlamaArchitectureTest {
* Every SLF4J {@link Logger} field follows the {@code private static final} idiom.
*/
@ArchTest
- static final ArchRule loggersArePrivateStaticFinal = fields()
- .that()
+ static final ArchRule loggersArePrivateStaticFinal = fields().that()
.haveRawType(Logger.class)
.should()
.bePrivate()
@@ -58,10 +57,36 @@ public class LlamaArchitectureTest {
* package starts importing from its parent or sibling.
*/
@ArchTest
- static final ArchRule noPackageCycles = slices()
- .matching("net.ladenthin.llama.(*)..")
+ static final ArchRule noPackageCycles =
+ slices().matching("net.ladenthin.llama.(*)..").should().beFreeOfCycles();
+
+ /**
+ * The {@code args} sub-package is a true leaf: pure enums / constants
+ * ({@code Sampler}, {@code PoolingType}, {@code ModelFlag}, …). It must not
+ * import anything from elsewhere in the project — neither the root API
+ * package nor the {@code json} parser package.
+ *
+ * This pins the only stackable layer relationship in jllama. The
+ * traditional {@code layeredArchitecture()} 3-layer rule (Args → Json → Api)
+ * was attempted and rejected: {@code json} parsers/serializers genuinely
+ * depend on root-package DTOs ({@code Pair}, {@code ChatMessage},
+ * {@code ContentPart}) AND the root API genuinely depends on {@code json}
+ * parsers — they are peers in the public API layer, not a
+ * stackable hierarchy. Splitting the DTOs into a dedicated
+ * {@code net.ladenthin.llama.value} package would enable real layering,
+ * but breaks the published public-API FQNs ({@code net.ladenthin.llama.Pair}
+ * etc.) and is out of scope for an ArchUnit rule.
+ *
+ *
So the only real architectural invariant worth enforcing here is "args
+ * stays a leaf" — and that is what this rule does.
+ */
+ @ArchTest
+ static final ArchRule argsPackageIsALeaf = noClasses()
+ .that()
+ .resideInAPackage("net.ladenthin.llama.args..")
.should()
- .beFreeOfCycles();
+ .dependOnClassesThat()
+ .resideInAnyPackage("net.ladenthin.llama", "net.ladenthin.llama.json..");
/**
* Production code must not import unsupported / internal JDK packages.
@@ -84,13 +109,8 @@ public class LlamaArchitectureTest {
* remains allowed because the fields ARE final.
*/
@ArchTest
- static final ArchRule noPublicMutableFields = fields()
- .that()
- .arePublic()
- .and()
- .areNotStatic()
- .should()
- .beFinal();
+ static final ArchRule noPublicMutableFields =
+ fields().that().arePublic().and().areNotStatic().should().beFinal();
/**
* Production code must not call {@link System#exit(int)}; throw an exception instead.
diff --git a/src/test/java/net/ladenthin/llama/LlamaModelTest.java b/src/test/java/net/ladenthin/llama/LlamaModelTest.java
index 2605f627..daab1dc6 100644
--- a/src/test/java/net/ladenthin/llama/LlamaModelTest.java
+++ b/src/test/java/net/ladenthin/llama/LlamaModelTest.java
@@ -75,10 +75,10 @@ public void testGenerateAnswer() {
Map logitBias = new HashMap<>();
logitBias.put(2, 2.0f);
InferenceParameters params = new InferenceParameters(prefix)
- .setTemperature(0.95f)
- .setStopStrings("\"\"\"")
- .setNPredict(nPredict)
- .setTokenIdBias(logitBias);
+ .withTemperature(0.95f)
+ .withStopStrings("\"\"\"")
+ .withNPredict(nPredict)
+ .withTokenIdBias(logitBias);
int generated = 0;
for (LlamaOutput ignored : model.generate(params)) {
@@ -93,13 +93,13 @@ public void testGenerateInfill() {
Map logitBias = new HashMap<>();
logitBias.put(2, 2.0f);
InferenceParameters params = new InferenceParameters("")
- .setInputPrefix(prefix)
- .setInputSuffix(suffix)
- .setTemperature(0.95f)
- .setStopStrings("\"\"\"")
- .setNPredict(nPredict)
- .setTokenIdBias(logitBias)
- .setSeed(42);
+ .withInputPrefix(prefix)
+ .withInputSuffix(suffix)
+ .withTemperature(0.95f)
+ .withStopStrings("\"\"\"")
+ .withNPredict(nPredict)
+ .withTokenIdBias(logitBias)
+ .withSeed(42);
int generated = 0;
for (LlamaOutput ignored : model.generate(params)) {
@@ -111,8 +111,8 @@ public void testGenerateInfill() {
@Test
public void testGenerateGrammar() {
InferenceParameters params = new InferenceParameters("")
- .setGrammar("root ::= (\"a\" | \"b\")+")
- .setNPredict(nPredict);
+ .withGrammar("root ::= (\"a\" | \"b\")+")
+ .withNPredict(nPredict);
StringBuilder sb = new StringBuilder();
for (LlamaOutput output : model.generate(params)) {
sb.append(output);
@@ -129,11 +129,11 @@ public void testCompleteAnswer() {
Map logitBias = new HashMap<>();
logitBias.put(2, 2.0f);
InferenceParameters params = new InferenceParameters(prefix)
- .setTemperature(0.95f)
- .setStopStrings("\"\"\"")
- .setNPredict(nPredict)
- .setTokenIdBias(logitBias)
- .setSeed(42);
+ .withTemperature(0.95f)
+ .withStopStrings("\"\"\"")
+ .withNPredict(nPredict)
+ .withTokenIdBias(logitBias)
+ .withSeed(42);
String output = model.complete(params);
assertFalse(output.isEmpty());
@@ -144,13 +144,13 @@ public void testCompleteInfillCustom() {
Map logitBias = new HashMap<>();
logitBias.put(2, 2.0f);
InferenceParameters params = new InferenceParameters("")
- .setInputPrefix(prefix)
- .setInputSuffix(suffix)
- .setTemperature(0.95f)
- .setStopStrings("\"\"\"")
- .setNPredict(nPredict)
- .setTokenIdBias(logitBias)
- .setSeed(42);
+ .withInputPrefix(prefix)
+ .withInputSuffix(suffix)
+ .withTemperature(0.95f)
+ .withStopStrings("\"\"\"")
+ .withNPredict(nPredict)
+ .withTokenIdBias(logitBias)
+ .withSeed(42);
String output = model.complete(params);
assertFalse(output.isEmpty());
@@ -159,8 +159,8 @@ public void testCompleteInfillCustom() {
@Test
public void testCompleteGrammar() {
InferenceParameters params = new InferenceParameters("")
- .setGrammar("root ::= (\"a\" | \"b\")+")
- .setNPredict(nPredict);
+ .withGrammar("root ::= (\"a\" | \"b\")+")
+ .withNPredict(nPredict);
String output = model.complete(params);
assertTrue(output.matches("[ab]+"), output + " doesn't match [ab]+");
int generated = model.encode(output).length;
@@ -169,7 +169,7 @@ public void testCompleteGrammar() {
@Test
public void testCancelGenerating() {
- InferenceParameters params = new InferenceParameters(prefix).setNPredict(nPredict);
+ InferenceParameters params = new InferenceParameters(prefix).withNPredict(nPredict);
int generated = 0;
LlamaIterator iterator = model.generate(params).iterator();
@@ -194,7 +194,7 @@ public void testCancelGenerating() {
*/
@Test
public void testGenerateAutoCloseOnEarlyBreak() throws Exception {
- InferenceParameters params = new InferenceParameters(prefix).setNPredict(nPredict);
+ InferenceParameters params = new InferenceParameters(prefix).withNPredict(nPredict);
int collected = 0;
try (LlamaIterable iterable = model.generate(params)) {
@@ -209,7 +209,7 @@ public void testGenerateAutoCloseOnEarlyBreak() throws Exception {
assertTrue(collected >= 1, "Should have collected at least one token before break");
// The model must still be usable after an early-exit close
- String result = model.complete(new InferenceParameters(prefix).setNPredict(5));
+ String result = model.complete(new InferenceParameters(prefix).withNPredict(5));
assertNotNull(result, "Model must be functional after autoclosed iterator");
}
@@ -221,7 +221,7 @@ public void testGenerateAutoCloseOnEarlyBreak() throws Exception {
*/
@Test
public void testIteratorCloseIdempotent() {
- InferenceParameters params = new InferenceParameters(prefix).setNPredict(3);
+ InferenceParameters params = new InferenceParameters(prefix).withNPredict(3);
// Case A: drain to natural stop, then close()
LlamaIterable a = model.generate(params);
@@ -239,7 +239,7 @@ public void testIteratorCloseIdempotent() {
b.close();
// Model must still be usable
- assertNotNull(model.complete(new InferenceParameters(prefix).setNPredict(3)));
+ assertNotNull(model.complete(new InferenceParameters(prefix).withNPredict(3)));
}
/**
@@ -252,7 +252,7 @@ public void testIteratorCloseIdempotent() {
*/
@Test
public void testCompleteWithCancellationToken() throws Exception {
- InferenceParameters params = new InferenceParameters(prefix).setNPredict(512);
+ InferenceParameters params = new InferenceParameters(prefix).withNPredict(512);
CancellationToken token = new CancellationToken();
Thread canceller = new Thread(() -> {
@@ -277,7 +277,7 @@ public void testCompleteWithCancellationToken() throws Exception {
assertFalse(token.isCancelled(), "token should be reset after call returns");
// Model is still usable
- assertNotNull(model.complete(new InferenceParameters(prefix).setNPredict(3)));
+ assertNotNull(model.complete(new InferenceParameters(prefix).withNPredict(3)));
}
/**
@@ -288,9 +288,9 @@ public void testCompleteWithCancellationToken() throws Exception {
@Test
public void testCompleteAsync() throws Exception {
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(8).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(8).withSeed(42);
String sync =
- model.complete(new InferenceParameters(prefix).setNPredict(8).setSeed(42));
+ model.complete(new InferenceParameters(prefix).withNPredict(8).withSeed(42));
String async = model.completeAsync(params).get(30, java.util.concurrent.TimeUnit.SECONDS);
assertEquals(sync, async);
}
@@ -304,7 +304,7 @@ public void testCompleteAsync() throws Exception {
*/
@Test
public void testCompleteAsyncCancelPropagates() throws Exception {
- InferenceParameters params = new InferenceParameters(prefix).setNPredict(512);
+ InferenceParameters params = new InferenceParameters(prefix).withNPredict(512);
CancellationToken token = new CancellationToken();
java.util.concurrent.CompletableFuture future = model.completeAsync(params, token);
@@ -318,7 +318,7 @@ public void testCompleteAsyncCancelPropagates() throws Exception {
Thread.sleep(5000);
// Model is still usable
- assertNotNull(model.complete(new InferenceParameters(prefix).setNPredict(3)));
+ assertNotNull(model.complete(new InferenceParameters(prefix).withNPredict(3)));
}
/**
@@ -329,8 +329,11 @@ public void testCompleteAsyncCancelPropagates() throws Exception {
*/
@Test
public void testSessionMultiTurn() {
- try (Session session = new Session(model, 0, "You are a terse assistant.", params -> params.setNPredict(8)
- .setSeed(1))) {
+ try (Session session = new Session(
+ model,
+ 0,
+ "You are a terse assistant.",
+ params -> params.withNPredict(8).withSeed(1))) {
String r1 = session.send("Say hi.");
assertNotNull(r1);
String r2 = session.send("Say bye.");
@@ -356,9 +359,9 @@ public void testSessionMultiTurn() {
*/
@Test
public void testTypedChat() {
- ChatRequest req = new ChatRequest()
- .addMessage("user", "Say hi in one word.")
- .setInferenceCustomizer(p -> p.setNPredict(8).setSeed(1));
+ ChatRequest req = ChatRequest.empty()
+ .appendMessage("user", "Say hi in one word.")
+ .withInferenceCustomizer(p -> p.withNPredict(8).withSeed(1));
ChatResponse r = model.chat(req);
assertNotNull(r);
assertFalse(r.getChoices().isEmpty());
@@ -379,11 +382,11 @@ public void testChatWithToolsLoopShortCircuits() {
"echo",
"Echo a string",
"{\"type\":\"object\",\"properties\":{\"s\":{\"type\":\"string\"}},\"required\":[\"s\"]}");
- ChatRequest req = new ChatRequest()
- .addMessage("user", "Hello.")
- .addTool(echo)
- .setMaxToolRounds(2)
- .setInferenceCustomizer(p -> p.setNPredict(8).setSeed(1));
+ ChatRequest req = ChatRequest.empty()
+ .appendMessage("user", "Hello.")
+ .appendTool(echo)
+ .withMaxToolRounds(2)
+ .withInferenceCustomizer(p -> p.withNPredict(8).withSeed(1));
java.util.Map handlers = new java.util.HashMap<>();
handlers.put("echo", args -> args);
ChatResponse r = model.chatWithTools(req, handlers);
@@ -400,9 +403,9 @@ public void testChatWithToolsLoopShortCircuits() {
@Test
public void testCompleteBatch() {
java.util.List requests = java.util.Arrays.asList(
- new InferenceParameters(prefix).setNPredict(3).setSeed(1),
- new InferenceParameters(prefix).setNPredict(3).setSeed(2),
- new InferenceParameters(prefix).setNPredict(3).setSeed(3));
+ new InferenceParameters(prefix).withNPredict(3).withSeed(1),
+ new InferenceParameters(prefix).withNPredict(3).withSeed(2),
+ new InferenceParameters(prefix).withNPredict(3).withSeed(3));
java.util.List results = model.completeBatch(requests);
assertEquals(3, results.size());
for (String r : results) {
@@ -413,8 +416,8 @@ public void testCompleteBatch() {
@Test
public void testCompleteBatchWithStats() {
java.util.List requests = java.util.Arrays.asList(
- new InferenceParameters(prefix).setNPredict(3).setSeed(1),
- new InferenceParameters(prefix).setNPredict(3).setSeed(2));
+ new InferenceParameters(prefix).withNPredict(3).withSeed(1),
+ new InferenceParameters(prefix).withNPredict(3).withSeed(2));
java.util.List results = model.completeBatchWithStats(requests);
assertEquals(2, results.size());
for (CompletionResult r : results) {
@@ -428,10 +431,12 @@ public void testCompleteBatchWithStats() {
@Test
public void testChatBatch() {
java.util.List requests = java.util.Arrays.asList(
- new ChatRequest().addMessage("user", "Say hi.").setInferenceCustomizer(p -> p.setNPredict(4)
- .setSeed(1)),
- new ChatRequest().addMessage("user", "Say bye.").setInferenceCustomizer(p -> p.setNPredict(4)
- .setSeed(2)));
+ ChatRequest.empty()
+ .appendMessage("user", "Say hi.")
+ .withInferenceCustomizer(p -> p.withNPredict(4).withSeed(1)),
+ ChatRequest.empty()
+ .appendMessage("user", "Say bye.")
+ .withInferenceCustomizer(p -> p.withNPredict(4).withSeed(2)));
java.util.List results = model.chatBatch(requests);
assertEquals(2, results.size());
for (ChatResponse r : results) {
@@ -554,7 +559,7 @@ public void testLogText() {
LlamaModel.setLogger(LogFormat.TEXT, (level, msg) -> messages.add(new LogMessage(level, msg)));
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(nPredict).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(nPredict).withSeed(42);
model.complete(params);
assertFalse(messages.isEmpty());
@@ -572,7 +577,7 @@ public void testLogJSON() {
LlamaModel.setLogger(LogFormat.JSON, (level, msg) -> messages.add(new LogMessage(level, msg)));
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(nPredict).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(nPredict).withSeed(42);
model.complete(params);
assertFalse(messages.isEmpty());
@@ -589,7 +594,7 @@ public void testLogJSON() {
public void testLogStdout() {
// Unfortunately, `printf` can't be easily re-directed to Java. This test only works manually, thus.
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(nPredict).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(nPredict).withSeed(42);
System.out.println("########## Log Text ##########");
LlamaModel.setLogger(LogFormat.TEXT, null);
@@ -614,7 +619,7 @@ private String completeAndReadStdOut() {
try {
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(nPredict).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(nPredict).withSeed(42);
model.complete(params);
} finally {
System.out.flush();
@@ -680,11 +685,11 @@ public void testTemplate() {
userMessages.add(new Pair<>("assistant", "It depends on your interests. Do you like fiction or non-fiction?"));
InferenceParameters params = new InferenceParameters("A book recommendation system.")
- .setMessages("Book", userMessages)
- .setTemperature(0.95f)
- .setStopStrings("\"\"\"")
- .setNPredict(nPredict)
- .setSeed(42);
+ .withMessages("Book", userMessages)
+ .withTemperature(0.95f)
+ .withStopStrings("\"\"\"")
+ .withNPredict(nPredict)
+ .withSeed(42);
assertEquals(
model.applyTemplate(params),
"<|im_start|>system\nBook<|im_end|>\n<|im_start|>user\nWhat is the best book?<|im_end|>\n<|im_start|>assistant\nIt depends on your interests. Do you like fiction or non-fiction?");
@@ -700,10 +705,10 @@ public void testChatComplete() {
messages.add(new Pair<>("user", "Write a single word."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(nPredict)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(nPredict)
+ .withSeed(42)
+ .withTemperature(0.0f);
String response = model.chatComplete(params);
assertNotNull(response, "Chat completion should return a non-null response");
@@ -716,10 +721,10 @@ public void testChatCompleteWithSystemMessage() {
messages.add(new Pair<>("user", "Say hello."));
InferenceParameters params = new InferenceParameters("")
- .setMessages("You are a helpful assistant.", messages)
- .setNPredict(nPredict)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages("You are a helpful assistant.", messages)
+ .withNPredict(nPredict)
+ .withSeed(42)
+ .withTemperature(0.0f);
String response = model.chatComplete(params);
assertNotNull(response);
@@ -732,10 +737,10 @@ public void testGenerateChat() {
messages.add(new Pair<>("user", "Write a single word."));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(nPredict)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(nPredict)
+ .withSeed(42)
+ .withTemperature(0.0f);
int generated = 0;
StringBuilder sb = new StringBuilder();
@@ -754,7 +759,7 @@ public void testGenerateChatCancel() {
messages.add(new Pair<>("user", "Count from 1 to 100."));
InferenceParameters params =
- new InferenceParameters("").setMessages(null, messages).setNPredict(nPredict);
+ new InferenceParameters("").withMessages(null, messages).withNPredict(nPredict);
int generated = 0;
LlamaIterator iterator = model.generateChat(params).iterator();
@@ -781,10 +786,10 @@ public void testChatCompleteMultiTurn() {
messages.add(new Pair<>("user", "And 3+3?"));
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setNPredict(nPredict)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withNPredict(nPredict)
+ .withSeed(42)
+ .withTemperature(0.0f);
String response = model.chatComplete(params);
assertNotNull(response);
@@ -800,11 +805,11 @@ public void testChatCompleteWithTemplateKwargs() {
kwargs.put("custom_var", "\"test_value\"");
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, messages)
- .setChatTemplateKwargs(kwargs)
- .setNPredict(nPredict)
- .setSeed(42)
- .setTemperature(0.0f);
+ .withMessages(null, messages)
+ .withChatTemplateKwargs(kwargs)
+ .withNPredict(nPredict)
+ .withSeed(42)
+ .withTemperature(0.0f);
// Template kwargs should pass through without error even if
// the template doesn't use them — they're simply ignored.
@@ -822,7 +827,7 @@ public void testApplyTemplateWithKwargs() {
kwargs.put("custom_var", "\"test_value\"");
InferenceParameters params =
- new InferenceParameters("").setMessages(null, messages).setChatTemplateKwargs(kwargs);
+ new InferenceParameters("").withMessages(null, messages).withChatTemplateKwargs(kwargs);
// Should not throw — kwargs are passed through to the template
String result = model.applyTemplate(params);
@@ -846,7 +851,7 @@ public void testApplyTemplateUserOnly() {
List> messages = new ArrayList<>();
messages.add(new Pair<>("user", "Tell me a joke"));
- InferenceParameters params = new InferenceParameters("").setMessages(null, messages);
+ InferenceParameters params = new InferenceParameters("").withMessages(null, messages);
String result = model.applyTemplate(params);
@@ -870,7 +875,7 @@ public void testApplyTemplateMultipleTurns() {
messages.add(new Pair<>("assistant", "4"));
messages.add(new Pair<>("user", "And 3+3?"));
- InferenceParameters params = new InferenceParameters("").setMessages("Math tutor", messages);
+ InferenceParameters params = new InferenceParameters("").withMessages("Math tutor", messages);
String result = model.applyTemplate(params);
@@ -892,7 +897,7 @@ public void testApplyTemplateEmptySystemSkipped() {
messages.add(new Pair<>("user", "Hello"));
// empty string → setMessages skips the system block
- InferenceParameters params = new InferenceParameters("").setMessages("", messages);
+ InferenceParameters params = new InferenceParameters("").withMessages("", messages);
String result = model.applyTemplate(params);
@@ -911,7 +916,7 @@ public void testApplyTemplateLastMessageAssistantNoContinuationPrompt() {
messages.add(new Pair<>("user", "Capital of France?"));
messages.add(new Pair<>("assistant", "The capital of France is"));
- InferenceParameters params = new InferenceParameters("").setMessages(null, messages);
+ InferenceParameters params = new InferenceParameters("").withMessages(null, messages);
String result = model.applyTemplate(params);
@@ -935,8 +940,8 @@ public void testApplyTemplateLastMessageAssistantNoContinuationPrompt() {
public void testCompleteNonAsciiPrompt() {
// café, naïve, résumé contain multi-byte UTF-8 sequences
InferenceParameters params = new InferenceParameters("Translate to English: café")
- .setNPredict(nPredict)
- .setSeed(42);
+ .withNPredict(nPredict)
+ .withSeed(42);
String output = model.complete(params);
@@ -1083,7 +1088,7 @@ public void testCloseAfterGeneration() {
.setGpuLayers(gpuLayers)
.setFit(false))) {
String output =
- m.complete(new InferenceParameters("Hello").setNPredict(5).setSeed(42));
+ m.complete(new InferenceParameters("Hello").withNPredict(5).withSeed(42));
assertNotNull(output);
}
// Background thread should be fully joined before we reach here
@@ -1135,7 +1140,7 @@ public void testSpeculativeDecoding() {
.setGpuLayers(gpuLayers)
.setGpuLayersDraft(gpuLayers))) {
InferenceParameters params =
- new InferenceParameters(prefix).setNPredict(nPredict).setSeed(42);
+ new InferenceParameters(prefix).withNPredict(nPredict).withSeed(42);
// test streaming generation with speculative decoding
int generated = 0;
@@ -1213,8 +1218,8 @@ public void testGetModelMeta() throws LlamaException {
public void testIteratorTerminatesOnRepetitivePrompt() {
final int iterNPredict = 30;
InferenceParameters infer = new InferenceParameters("Repeat AAA forever: AAA AAA")
- .setNPredict(iterNPredict)
- .setTemperature(0.0f);
+ .withNPredict(iterNPredict)
+ .withTemperature(0.0f);
int count = 0;
try (LlamaIterable iterable = model.generate(infer)) {
diff --git a/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java b/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java
index 87d3c262..8d58a4a8 100644
--- a/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java
+++ b/src/test/java/net/ladenthin/llama/LlamaParameterProperties.java
@@ -11,13 +11,13 @@ public class LlamaParameterProperties {
@Property
boolean setTemperatureNeverThrows(@ForAll @FloatRange(min = 0.0f, max = 2.0f) float temperature) {
- String json = new InferenceParameters("").setTemperature(temperature).toString();
+ String json = new InferenceParameters("").withTemperature(temperature).toString();
return json.contains("temperature");
}
@Property
boolean setTopPNeverThrows(@ForAll @FloatRange(min = 0.0f, max = 1.0f) float topP) {
- String json = new InferenceParameters("").setTopP(topP).toString();
+ String json = new InferenceParameters("").withTopP(topP).toString();
return json.contains("top_p");
}
}
diff --git a/src/test/java/net/ladenthin/llama/LlamaPublisherTest.java b/src/test/java/net/ladenthin/llama/LlamaPublisherTest.java
deleted file mode 100644
index c30aad63..00000000
--- a/src/test/java/net/ladenthin/llama/LlamaPublisherTest.java
+++ /dev/null
@@ -1,202 +0,0 @@
-// SPDX-FileCopyrightText: 2026 Bernard Ladenthin
-//
-// SPDX-License-Identifier: MIT
-
-package net.ladenthin.llama;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicReference;
-import org.junit.jupiter.api.Assumptions;
-import org.junit.jupiter.api.Test;
-import org.reactivestreams.Subscriber;
-import org.reactivestreams.Subscription;
-
-@ClaudeGenerated(
- purpose = "Verify LlamaPublisher honours Reactive Streams contracts: backpressure via request(n), "
- + "stops on cancel, signals onError for invalid demand, and rejects a second subscriber.")
-public class LlamaPublisherTest {
-
- /**
- * Model-gated: subscribe, request a small batch with backpressure, observe tokens, cancel early.
- */
- @Test
- public void backpressureAndCancel() throws Exception {
- Assumptions.assumeTrue(new java.io.File(TestConstants.MODEL_PATH).exists(), "Model file not found");
- int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
-
- try (LlamaModel model = new LlamaModel(new ModelParameters()
- .setCtxSize(128)
- .setModel(TestConstants.MODEL_PATH)
- .setGpuLayers(gpuLayers)
- .setFit(false))) {
-
- LlamaPublisher pub = model.streamPublisher(
- new InferenceParameters("def hello():").setNPredict(20).setSeed(1));
-
- CountDownLatch done = new CountDownLatch(1);
- AtomicReference subRef = new AtomicReference<>();
- AtomicInteger received = new AtomicInteger();
-
- pub.subscribe(new Subscriber() {
- @Override
- public void onSubscribe(Subscription s) {
- subRef.set(s);
- s.request(2); // initial demand
- }
-
- @Override
- public void onNext(LlamaOutput o) {
- int n = received.incrementAndGet();
- if (n == 2) {
- // Verify backpressure: with demand=0 we should pause until next request.
- // Request one more to trigger another emission.
- subRef.get().request(1);
- } else if (n == 3) {
- // Cancel after the third token; subsequent onNext must not occur.
- subRef.get().cancel();
- done.countDown();
- }
- }
-
- @Override
- public void onError(Throwable t) {
- done.countDown();
- }
-
- @Override
- public void onComplete() {
- done.countDown();
- }
- });
-
- assertTrue(done.await(30, TimeUnit.SECONDS), "subscriber did not terminate in 30s");
- // After cancel we may receive 3-4 in-flight tokens; should not be far above the
- // demand actually requested (3 here).
- int got = received.get();
- assertTrue(got >= 3 && got <= 6, "expected ~3 tokens, got " + got);
- }
- }
-
- @Test
- public void singleSubscriberContract() throws Exception {
- Assumptions.assumeTrue(new java.io.File(TestConstants.MODEL_PATH).exists(), "Model file not found");
- int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
-
- try (LlamaModel model = new LlamaModel(new ModelParameters()
- .setCtxSize(128)
- .setModel(TestConstants.MODEL_PATH)
- .setGpuLayers(gpuLayers)
- .setFit(false))) {
-
- LlamaPublisher pub = model.streamPublisher(
- new InferenceParameters("def f():").setNPredict(2).setSeed(1));
-
- CountDownLatch first = new CountDownLatch(1);
- pub.subscribe(new Subscriber() {
- @Override
- public void onSubscribe(Subscription s) {
- s.request(Long.MAX_VALUE);
- }
-
- @Override
- public void onNext(LlamaOutput o) {}
-
- @Override
- public void onError(Throwable t) {
- first.countDown();
- }
-
- @Override
- public void onComplete() {
- first.countDown();
- }
- });
- assertTrue(first.await(30, TimeUnit.SECONDS));
-
- // Second subscribe must signal onError.
- AtomicReference err = new AtomicReference<>();
- CountDownLatch second = new CountDownLatch(1);
- pub.subscribe(new Subscriber() {
- @Override
- public void onSubscribe(Subscription s) {}
-
- @Override
- public void onNext(LlamaOutput o) {}
-
- @Override
- public void onError(Throwable t) {
- err.set(t);
- second.countDown();
- }
-
- @Override
- public void onComplete() {
- second.countDown();
- }
- });
- assertTrue(second.await(5, TimeUnit.SECONDS));
- assertNotNull(err.get(), "expected onError on second subscribe");
- assertTrue(err.get() instanceof IllegalStateException);
- }
- }
-
- @Test
- public void invalidRequestSignalsError() throws Exception {
- Assumptions.assumeTrue(new java.io.File(TestConstants.MODEL_PATH).exists(), "Model file not found");
- int gpuLayers = Integer.getInteger(TestConstants.PROP_TEST_NGL, TestConstants.DEFAULT_TEST_NGL);
-
- try (LlamaModel model = new LlamaModel(new ModelParameters()
- .setCtxSize(128)
- .setModel(TestConstants.MODEL_PATH)
- .setGpuLayers(gpuLayers)
- .setFit(false))) {
-
- LlamaPublisher pub = model.streamPublisher(
- new InferenceParameters("def f():").setNPredict(5).setSeed(1));
-
- AtomicReference err = new AtomicReference<>();
- CountDownLatch done = new CountDownLatch(1);
- pub.subscribe(new Subscriber() {
- @Override
- public void onSubscribe(Subscription s) {
- s.request(0);
- }
-
- @Override
- public void onNext(LlamaOutput o) {}
-
- @Override
- public void onError(Throwable t) {
- err.set(t);
- done.countDown();
- }
-
- @Override
- public void onComplete() {
- done.countDown();
- }
- });
- assertTrue(done.await(10, TimeUnit.SECONDS));
- assertNotNull(err.get(), "expected onError for request(0)");
- assertTrue(err.get() instanceof IllegalArgumentException);
- }
- }
-
- @Test
- public void nullSubscriberThrows() {
- // Construct a publisher without a model — subscribe(null) must NPE before any model use.
- try {
- new LlamaPublisher(null, null, false).subscribe(null);
- fail("expected NPE");
- } catch (NullPointerException expected) {
- assertEquals("subscriber", expected.getMessage());
- }
- }
-}
diff --git a/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java b/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java
index 82e884d5..9fb193ed 100644
--- a/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java
+++ b/src/test/java/net/ladenthin/llama/LoggingSmokeTest.java
@@ -29,8 +29,7 @@ public void slf4jPipelineEmits() {
LoggerFactory.getLogger(OSInfo.class).info("smoke");
assertTrue(
captor.getInfoLogs().contains("smoke"),
- "SLF4J pipeline did not deliver INFO event to LogCaptor; "
- + "binding or Logback config is broken");
+ "SLF4J pipeline did not deliver INFO event to LogCaptor; " + "binding or Logback config is broken");
}
}
@@ -53,8 +52,7 @@ String runAndWaitFor(String command) throws IOException {
};
assertEquals("unknown", OSInfo.getHardwareName());
assertTrue(
- captor.getErrorLogs().stream()
- .anyMatch(m -> m.contains("Error while running uname -m")),
+ captor.getErrorLogs().stream().anyMatch(m -> m.contains("Error while running uname -m")),
"expected error log 'Error while running uname -m' was not captured");
} finally {
OSInfo.processRunner = original;
diff --git a/src/test/java/net/ladenthin/llama/MemoryManagementTest.java b/src/test/java/net/ladenthin/llama/MemoryManagementTest.java
index 52f5f86a..a846065f 100644
--- a/src/test/java/net/ladenthin/llama/MemoryManagementTest.java
+++ b/src/test/java/net/ladenthin/llama/MemoryManagementTest.java
@@ -121,9 +121,9 @@ public static void tearDown() {
@Test
public void testContextShiftingAllowsContinuedGeneration() {
InferenceParameters params = new InferenceParameters(SHORT_PROMPT)
- .setNPredict(25)
- .setIgnoreEos(true) // prevent early stop so the shift is reliably triggered
- .setSeed(42);
+ .withNPredict(25)
+ .withIgnoreEos(true) // prevent early stop so the shift is reliably triggered
+ .withSeed(42);
String output = smallCtxModel.complete(params);
@@ -143,14 +143,14 @@ public void testContextShiftingAllowsContinuedGeneration() {
public void testContextShiftFollowedByFreshGeneration() {
// First call: triggers context shift
InferenceParameters shiftParams = new InferenceParameters(SHORT_PROMPT)
- .setNPredict(25)
- .setIgnoreEos(true)
- .setSeed(1);
+ .withNPredict(25)
+ .withIgnoreEos(true)
+ .withSeed(1);
smallCtxModel.complete(shiftParams);
// Second call: independent generation on the same model after the shift
InferenceParameters freshParams =
- new InferenceParameters("x = ").setNPredict(5).setSeed(2);
+ new InferenceParameters("x = ").withNPredict(5).withSeed(2);
String output = smallCtxModel.complete(freshParams);
assertNotNull(output);
@@ -173,10 +173,10 @@ public void testContextShiftFollowedByFreshGeneration() {
@Test
public void testPromptCacheGivesDeterministicOutput() {
InferenceParameters params = new InferenceParameters(CACHE_PREFIX_PROMPT)
- .setCachePrompt(true)
- .setNPredict(10)
- .setTemperature(0f) // greedy decoding: fully deterministic
- .setSeed(42);
+ .withCachePrompt(true)
+ .withNPredict(10)
+ .withTemperature(0f) // greedy decoding: fully deterministic
+ .withSeed(42);
String first = model.complete(params);
String second = model.complete(params);
@@ -196,10 +196,10 @@ public void testPromptCacheGivesDeterministicOutput() {
@Test
public void testNoCachePromptAlsoDeterministic() {
InferenceParameters params = new InferenceParameters(CACHE_PREFIX_PROMPT)
- .setCachePrompt(false)
- .setNPredict(10)
- .setTemperature(0f)
- .setSeed(42);
+ .withCachePrompt(false)
+ .withNPredict(10)
+ .withTemperature(0f)
+ .withSeed(42);
String first = model.complete(params);
String second = model.complete(params);
@@ -226,16 +226,16 @@ public void testNoCachePromptAlsoDeterministic() {
public void testPromptCachePrefixReuseSucceeds() {
// Warm the cache with the prefix prompt
InferenceParameters warmup = new InferenceParameters(CACHE_PREFIX_PROMPT)
- .setCachePrompt(true)
- .setNPredict(5)
- .setSeed(1);
+ .withCachePrompt(true)
+ .withNPredict(5)
+ .withSeed(1);
model.complete(warmup);
// Extend the prompt; the prefix is now in the KV cache and must be reused
InferenceParameters extended = new InferenceParameters(CACHE_EXTENDED_PROMPT)
- .setCachePrompt(true)
- .setNPredict(10)
- .setSeed(2);
+ .withCachePrompt(true)
+ .withNPredict(10)
+ .withSeed(2);
String output = model.complete(extended);
assertNotNull(output);
@@ -250,10 +250,10 @@ public void testPromptCachePrefixReuseSucceeds() {
@Test
public void testPromptCacheStableAcrossMultipleCalls() {
InferenceParameters params = new InferenceParameters(SHORT_PROMPT)
- .setCachePrompt(true)
- .setNPredict(8)
- .setTemperature(0f)
- .setSeed(77);
+ .withCachePrompt(true)
+ .withNPredict(8)
+ .withTemperature(0f)
+ .withSeed(77);
String first = model.complete(params);
String second = model.complete(params);
@@ -297,10 +297,10 @@ public void testContextShiftWithNKeepPreservesGeneration() {
// With ctxSize=32 and nPredict=25 the window is reliably exceeded, so the shift fires
// with the non-trivial n_keep_eff = 5 + add_bos_token path.
InferenceParameters params = new InferenceParameters(SHORT_PROMPT)
- .setNKeep(5)
- .setNPredict(25)
- .setIgnoreEos(true)
- .setSeed(42);
+ .withNKeep(5)
+ .withNPredict(25)
+ .withIgnoreEos(true)
+ .withSeed(42);
String output = smallCtxModel.complete(params);
@@ -336,9 +336,9 @@ public void testContextShiftWithNKeepPreservesGeneration() {
public void testPromptCacheCompleteMissAfterWarmup() {
// Step 1: warm the cache with a distinct prompt so cache_tokens is fully populated.
InferenceParameters warmup = new InferenceParameters(CACHE_PREFIX_PROMPT)
- .setCachePrompt(true)
- .setNPredict(5)
- .setSeed(1);
+ .withCachePrompt(true)
+ .withNPredict(5)
+ .withSeed(1);
model.complete(warmup);
// Step 2: call with a completely disjoint prompt.
@@ -347,10 +347,10 @@ public void testPromptCacheCompleteMissAfterWarmup() {
// be silently discarded / overwritten.
final String disjointPrompt = "x = ";
InferenceParameters missParams = new InferenceParameters(disjointPrompt)
- .setCachePrompt(true)
- .setNPredict(8)
- .setTemperature(0f)
- .setSeed(99);
+ .withCachePrompt(true)
+ .withNPredict(8)
+ .withTemperature(0f)
+ .withSeed(99);
String afterMiss = model.complete(missParams);
assertNotNull(afterMiss);
@@ -365,10 +365,10 @@ public void testPromptCacheCompleteMissAfterWarmup() {
.setGpuLayers(gpuLayers)
.setFit(false))) {
InferenceParameters freshParams = new InferenceParameters(disjointPrompt)
- .setCachePrompt(true)
- .setNPredict(8)
- .setTemperature(0f)
- .setSeed(99);
+ .withCachePrompt(true)
+ .withNPredict(8)
+ .withTemperature(0f)
+ .withSeed(99);
String fresh = freshModel.complete(freshParams);
assertEquals(
diff --git a/src/test/java/net/ladenthin/llama/ModelParametersTest.java b/src/test/java/net/ladenthin/llama/ModelParametersTest.java
index 7bd8630e..80bccb93 100644
--- a/src/test/java/net/ladenthin/llama/ModelParametersTest.java
+++ b/src/test/java/net/ladenthin/llama/ModelParametersTest.java
@@ -98,7 +98,7 @@ public void testSetRepeatLastNValid64() {
@Test
public void testSetRepeatLastNTooLow() {
- assertThrows(RuntimeException.class, () -> new ModelParameters().setRepeatLastN(-2));
+ assertThrows(IllegalArgumentException.class, () -> new ModelParameters().setRepeatLastN(-2));
}
// -------------------------------------------------------------------------
@@ -119,7 +119,7 @@ public void testSetDryPenaltyLastNValidZero() {
@Test
public void testSetDryPenaltyLastNTooLow() {
- assertThrows(RuntimeException.class, () -> new ModelParameters().setDryPenaltyLastN(-2));
+ assertThrows(IllegalArgumentException.class, () -> new ModelParameters().setDryPenaltyLastN(-2));
}
// -------------------------------------------------------------------------
diff --git a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
index 5f6e4f9d..8f4d4936 100644
--- a/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
+++ b/src/test/java/net/ladenthin/llama/MultimodalIntegrationTest.java
@@ -116,9 +116,9 @@ public void multimodalRequestProducesNonEmptyReply() throws Exception {
ContentPart.imageFile(Paths.get(imagePath)));
String reply = model.chatCompleteText(new InferenceParameters("")
- .setMessages(Collections.singletonList(userMsg))
- .setNPredict(48)
- .setTemperature(0.0f));
+ .withMessages(Collections.singletonList(userMsg))
+ .withNPredict(48)
+ .withTemperature(0.0f));
assertNotNull(reply, "chatCompleteText must return a string, not null");
assertFalse(reply.trim().isEmpty(), "reply must be non-empty for a multimodal prompt; got: \"" + reply + "\"");
@@ -136,16 +136,16 @@ public void multimodalThenTextOnSameModel() throws Exception {
ChatMessage img = ChatMessage.userMultimodal(
ContentPart.text("What is this?"), ContentPart.imageFile(Paths.get(imagePath)));
String firstReply = model.chatCompleteText(new InferenceParameters("")
- .setMessages(Collections.singletonList(img))
- .setNPredict(24)
- .setTemperature(0.0f));
+ .withMessages(Collections.singletonList(img))
+ .withNPredict(24)
+ .withTemperature(0.0f));
assertNotNull(firstReply);
ChatMessage textOnly = new ChatMessage("user", "Reply with the single word: ok");
String secondReply = model.chatCompleteText(new InferenceParameters("")
- .setMessages(Collections.singletonList(textOnly))
- .setNPredict(8)
- .setTemperature(0.0f));
+ .withMessages(Collections.singletonList(textOnly))
+ .withNPredict(8)
+ .withTemperature(0.0f));
assertNotNull(secondReply);
assertTrue(
secondReply.trim().length() > 0,
diff --git a/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java b/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java
index 9fb5cafc..9292f98a 100644
--- a/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java
+++ b/src/test/java/net/ladenthin/llama/MultimodalMessagesTest.java
@@ -143,9 +143,9 @@ public void serializerHandlesMixedMessages() {
@Test
public void inferenceParametersAcceptsMultimodalMessages() {
- InferenceParameters params = new InferenceParameters("");
- params.setMessages(Collections.singletonList(
- ChatMessage.userMultimodal(ContentPart.text("hi"), ContentPart.imageUrl("data:image/png;base64,QQ"))));
+ InferenceParameters params = new InferenceParameters("")
+ .withMessages(Collections.singletonList(
+ ChatMessage.userMultimodal(ContentPart.text("hi"), ContentPart.imageUrl("data:image/png;base64,QQ"))));
// setMessages encodes into the parameters map under "messages"; verify the
// resulting JSON has the array form, which is what the upstream OAI chat
// parser expects for multimodal routing.
diff --git a/src/test/java/net/ladenthin/llama/PairTest.java b/src/test/java/net/ladenthin/llama/PairTest.java
index d04819d0..fd31efc0 100644
--- a/src/test/java/net/ladenthin/llama/PairTest.java
+++ b/src/test/java/net/ladenthin/llama/PairTest.java
@@ -7,7 +7,6 @@
import static org.junit.jupiter.api.Assertions.*;
-import java.util.Objects;
import org.junit.jupiter.api.Test;
public class PairTest {
@@ -109,13 +108,16 @@ public void testHashCodeWithNull() {
}
@Test
- public void testHashCodeMatchesObjectsHash() {
- // Pins hashCode() to Objects.hash(key, value) exactly.
- // Without this, PIT's PrimitiveReturnsMutator survives by replacing
- // the return with 0 - the existing assertNotNull tests cannot detect
- // that because hashCode()'s primitive int autoboxes to a non-null Integer.
+ public void testHashCodeIsFieldDerived() {
+ // Catches PIT's PrimitiveReturnsMutator (would replace the return with 0)
+ // and AbstractMutator (would constant-fold to a fixed value) without pinning
+ // the exact implementation. Verifies hashCode is non-zero for non-trivial
+ // values and varies when either field changes — both invariants any
+ // contract-respecting hashCode must honour.
Pair pair = new Pair<>("key", 123);
- assertEquals(Objects.hash("key", 123), pair.hashCode());
+ assertNotEquals(0, pair.hashCode());
+ assertNotEquals(pair.hashCode(), new Pair<>("other", 123).hashCode());
+ assertNotEquals(pair.hashCode(), new Pair<>("key", 456).hashCode());
}
@Test
diff --git a/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java b/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java
new file mode 100644
index 00000000..36fe251c
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/ReactorIntegrationTest.java
@@ -0,0 +1,156 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.jupiter.api.Test;
+import reactor.core.publisher.Flux;
+import reactor.core.scheduler.Schedulers;
+import reactor.test.StepVerifier;
+
+/**
+ * Proves the documented "reactive integration" pattern from the README works
+ * end-to-end without adding {@code org.reactivestreams} as a runtime dependency.
+ *
+ * {@link LlamaIterable} implements {@code Iterable & AutoCloseable},
+ * so Project Reactor, RxJava 3, Kotlin coroutines {@code Flow}, and Akka Streams
+ * all wrap it in a single statement (see README "Reactive integration"). This
+ * test exercises the Reactor path because it is the most demanding contract —
+ * backpressure via {@code request(n)} and AutoCloseable cancel propagation —
+ * and the same contract underpins the other libraries' iterable adapters.
+ *
+ * {@link #mockIterable_requestBackpressureAndCancelClose()} runs without a
+ * GGUF model: it uses a fake iterable that tracks {@code close()} so the
+ * Reactor wiring is verified deterministically on every CI run.
+ *
+ *
{@link #realModel_cancelPropagatesToNativeCompletion()} additionally
+ * proves end-to-end native cancel via llama.cpp's {@code cancelCompletion}, but
+ * is gated on a model file being present (same gating pattern as
+ * {@code LlamaModelTest}).
+ */
+class ReactorIntegrationTest {
+
+ /**
+ * Mock-only contract test — runs every build. Asserts:
+ *
+ * - Reactor honours backpressure: {@code request(n)} delivers at most
+ * {@code n} items, never more (no producer overrun).
+ * - Reactor closes the {@link AutoCloseable} iterable on cancel — which
+ * is the wire by which {@code LlamaIterable.close()} → native
+ * {@code cancelCompletion} on real generations.
+ *
+ */
+ @Test
+ void mockIterable_requestBackpressureAndCancelClose() {
+ AtomicBoolean closed = new AtomicBoolean(false);
+ List tokens =
+ Arrays.asList(out("a"), out("b"), out("c"), out("d"), out("e"));
+
+ // Flux.fromIterable(iterable) does NOT auto-close AutoCloseable iterables on cancel —
+ // the canonical Reactor pattern for that is Flux.using(supplier, builder, cleanup).
+ // The cleanup runs on both completion AND cancellation, which is the wire by which
+ // LlamaIterable.close() reaches the native cancelCompletion on real generations.
+ StepVerifier.create(
+ Flux.using(
+ () -> new TrackingIterable(tokens, closed),
+ Flux::fromIterable,
+ TrackingIterable::close)
+ .subscribeOn(Schedulers.boundedElastic()),
+ 2)
+ .expectNext(out("a"), out("b"))
+ .thenRequest(2)
+ .expectNext(out("c"), out("d"))
+ .thenCancel()
+ .verify();
+
+ assertTrue(
+ closed.get(),
+ "Flux.using must call the cleanup function on cancel — this is the wire that propagates"
+ + " cancellation into llama.cpp's cancelCompletion on real generations");
+ }
+
+ /**
+ * Real-model variant. Subscribes via Reactor, takes only a handful of tokens,
+ * then immediately starts a second inference to verify the slot was released.
+ * If cancel hadn't propagated into the native side, the second inference
+ * would either block or get a busy-slot error.
+ */
+ @Test
+ void realModel_cancelPropagatesToNativeCompletion() {
+ Assumptions.assumeTrue(
+ new File(TestConstants.MODEL_PATH).exists(),
+ "real-model test requires " + TestConstants.MODEL_PATH);
+
+ ModelParameters mp = new ModelParameters()
+ .setModel(TestConstants.MODEL_PATH)
+ .setGpuLayers(Integer.getInteger(TestConstants.PROP_TEST_NGL, 0));
+ try (LlamaModel model = new LlamaModel(mp)) {
+ // First: stream via Reactor with Flux.using for proper cleanup, take 3 tokens, cancel.
+ String first = Flux.using(
+ () -> model.generate(
+ new InferenceParameters("Q: 1+1=").withNPredict(20).withTemperature(0.0f)),
+ Flux::fromIterable,
+ LlamaIterable::close)
+ .subscribeOn(Schedulers.boundedElastic())
+ .take(3)
+ .map(o -> o.text)
+ .reduce("", (a, b) -> a + b)
+ .block();
+
+ assertNotNull(first, "Reactor reduce should not produce null after take(3)");
+ assertFalse(first.isEmpty(), "expected at least one token before cancel");
+
+ // Second inference on the same model: must succeed cleanly, proving the
+ // first generation's slot was released by Flux.using's cleanup function
+ // routing through LlamaIterable.close() -> LlamaIterator.close() ->
+ // native cancelCompletion.
+ String second = model.complete(
+ new InferenceParameters("Hi").withNPredict(2).withTemperature(0.0f));
+ assertNotNull(second);
+ }
+ }
+
+ /** Minimal {@link LlamaOutput} for the mock test — empty probability map. */
+ private static LlamaOutput out(String text) {
+ return new LlamaOutput(text, Collections.emptyMap(), false, null);
+ }
+
+ /**
+ * Test-only {@link LlamaIterable}-shaped fake: an {@code Iterable & AutoCloseable}
+ * that tracks {@code close()} so the test can assert Reactor invoked it on cancel.
+ * Mirrors {@link LlamaIterable}'s public contract exactly; the production class is
+ * {@code final} so we can't extend it, but the {@code Iterable + AutoCloseable} pair
+ * IS the contract reactive libs depend on — that is what we exercise here.
+ */
+ private static final class TrackingIterable implements Iterable, AutoCloseable {
+ private final List items;
+ private final AtomicBoolean closed;
+
+ TrackingIterable(List items, AtomicBoolean closed) {
+ this.items = items;
+ this.closed = closed;
+ }
+
+ @Override
+ public Iterator iterator() {
+ return items.iterator();
+ }
+
+ @Override
+ public void close() {
+ closed.set(true);
+ }
+ }
+}
diff --git a/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java b/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java
index ac450c2c..2f516147 100644
--- a/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java
+++ b/src/test/java/net/ladenthin/llama/ReasoningBudgetTest.java
@@ -98,8 +98,8 @@ public static void tearDown() {
@Test
public void testThinkingDefault_reasoningContentAndAnswerPresent() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
- .setNPredict(N_PREDICT);
+ .withMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
+ .withNPredict(N_PREDICT);
String json = model.chatComplete(params);
String reasoningContent = parser.extractChoiceReasoningContent(json);
@@ -133,9 +133,9 @@ public void testThinkingDefault_reasoningContentAndAnswerPresent() {
@Test
public void testReasoningBudgetZero_parameterAccepted_thinkingNotSuppressed() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
- .setReasoningBudgetTokens(0)
- .setNPredict(N_PREDICT);
+ .withMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
+ .withReasoningBudgetTokens(0)
+ .withNPredict(N_PREDICT);
String json = model.chatComplete(params);
@@ -186,9 +186,9 @@ public void testReasoningBudgetZero_parameterAccepted_thinkingNotSuppressed() {
@Test
public void testReasoningBudgetZero_expectedBehavior_suppressesThinking() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
- .setReasoningBudgetTokens(0)
- .setNPredict(N_PREDICT);
+ .withMessages(null, Collections.singletonList(new Pair<>("user", "What is 2+2?")))
+ .withReasoningBudgetTokens(0)
+ .withNPredict(N_PREDICT);
String json = model.chatComplete(params);
assertNotNull(json, "Response JSON must not be null");
@@ -215,10 +215,10 @@ public void testReasoningBudgetZero_expectedBehavior_suppressesThinking() {
@Test
public void testReasoningBudgetPositive_parameterAccepted() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(
+ .withMessages(
null, Collections.singletonList(new Pair<>("user", "Think step by step: what is 3 times 7?")))
- .setReasoningBudgetTokens(100)
- .setNPredict(N_PREDICT);
+ .withReasoningBudgetTokens(100)
+ .withNPredict(N_PREDICT);
String json = model.chatComplete(params);
assertNotNull(json, "Response JSON must not be null");
diff --git a/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java b/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java
index 20bbae09..aaaf24e0 100644
--- a/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java
+++ b/src/test/java/net/ladenthin/llama/ResponseJsonStructureTest.java
@@ -297,9 +297,9 @@ public void testOaiCompletionFinishReasonLength() {
@Test
public void testOaiChatCompletionHasChoices() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
- .setNPredict(N_PREDICT)
- .setTemperature(0);
+ .withMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
+ .withNPredict(N_PREDICT)
+ .withTemperature(0);
String result = model.chatComplete(params);
assertTrue(result.contains("\"choices\""), "Chat response must contain 'choices'");
}
@@ -307,9 +307,9 @@ public void testOaiChatCompletionHasChoices() {
@Test
public void testOaiChatCompletionHasUsage() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
- .setNPredict(N_PREDICT)
- .setTemperature(0);
+ .withMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
+ .withNPredict(N_PREDICT)
+ .withTemperature(0);
String result = model.chatComplete(params);
assertTrue(result.contains("\"usage\""), "Chat response must contain 'usage'");
}
@@ -317,9 +317,9 @@ public void testOaiChatCompletionHasUsage() {
@Test
public void testOaiChatCompletionHasMessageObject() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
- .setNPredict(N_PREDICT)
- .setTemperature(0);
+ .withMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
+ .withNPredict(N_PREDICT)
+ .withTemperature(0);
String result = model.chatComplete(params);
assertTrue(result.contains("\"message\""), "Chat response must contain 'message'");
}
@@ -327,9 +327,9 @@ public void testOaiChatCompletionHasMessageObject() {
@Test
public void testOaiChatCompletionObjectType() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
- .setNPredict(N_PREDICT)
- .setTemperature(0);
+ .withMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
+ .withNPredict(N_PREDICT)
+ .withTemperature(0);
String result = model.chatComplete(params);
assertTrue(
result.contains("\"object\":\"chat.completion\""), "Chat response 'object' must be 'chat.completion'");
@@ -338,9 +338,9 @@ public void testOaiChatCompletionObjectType() {
@Test
public void testOaiChatCompletionMessageHasRole() {
InferenceParameters params = new InferenceParameters("")
- .setMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
- .setNPredict(N_PREDICT)
- .setTemperature(0);
+ .withMessages(null, java.util.Collections.singletonList(new Pair<>("user", "Say hello")))
+ .withNPredict(N_PREDICT)
+ .withTemperature(0);
String result = model.chatComplete(params);
assertTrue(result.contains("\"role\":\"assistant\""), "Message must contain 'role':'assistant'");
}
diff --git a/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java b/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java
index 13856df2..edac3777 100644
--- a/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java
+++ b/src/test/java/net/ladenthin/llama/SessionConcurrencyTest.java
@@ -89,7 +89,7 @@ public void testConcurrentSendProducesAlternatingTranscript() throws Exception {
final int threads = 2;
final int callsPerThread = 2;
try (Session session =
- new Session(model, 0, null, p -> p.setNPredict(N_PREDICT).setTemperature(0.0f))) {
+ new Session(model, 0, null, p -> p.withNPredict(N_PREDICT).withTemperature(0.0f))) {
ExecutorService pool = Executors.newFixedThreadPool(threads);
CountDownLatch start = new CountDownLatch(1);
@@ -141,7 +141,7 @@ public void testConcurrentSendProducesAlternatingTranscript() throws Exception {
@Test
public void testStreamGuardBlocksOtherOperationsUntilCommit() throws Exception {
try (Session session =
- new Session(model, 1, null, p -> p.setNPredict(N_PREDICT).setTemperature(0.0f))) {
+ new Session(model, 1, null, p -> p.withNPredict(N_PREDICT).withTemperature(0.0f))) {
try (LlamaIterable stream = session.stream("hi")) {
int before = session.getMessages().size();
@@ -220,7 +220,7 @@ public void testCommitStreamedReplyWithoutStreamThrows() {
@Test
public void testSequentialSendsAlternateRoles() {
try (Session session =
- new Session(model, 3, null, p -> p.setNPredict(N_PREDICT).setTemperature(0.0f))) {
+ new Session(model, 3, null, p -> p.withNPredict(N_PREDICT).withTemperature(0.0f))) {
session.send("a");
session.send("b");
List messages = session.getMessages();
diff --git a/src/test/java/net/ladenthin/llama/TimingsLoggerTest.java b/src/test/java/net/ladenthin/llama/TimingsLoggerTest.java
new file mode 100644
index 00000000..5f15d259
--- /dev/null
+++ b/src/test/java/net/ladenthin/llama/TimingsLoggerTest.java
@@ -0,0 +1,109 @@
+// SPDX-FileCopyrightText: 2026 Bernard Ladenthin
+//
+// SPDX-License-Identifier: MIT
+
+package net.ladenthin.llama;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import nl.altindag.log.LogCaptor;
+import org.junit.jupiter.api.Test;
+
+@ClaudeGenerated(
+ purpose = "Pin the per-run timing-line format (TimingsLogger#format) byte-for-byte "
+ + "and verify the SLF4J pipeline on the dedicated 'net.ladenthin.llama.timings' "
+ + "logger so a future format regression or accidental log-suppression is caught "
+ + "at test time.")
+public class TimingsLoggerTest {
+
+ /** Format check on a typical generation (no speculative decoding). */
+ @Test
+ public void format_standardGeneration_singleLineWithAllSegments() {
+ Timings t = new Timings(
+ /*cacheN*/ 0,
+ /*promptN*/ 12,
+ /*promptMs*/ 84.3,
+ /*promptPerSec*/ 142.4,
+ /*predictedN*/ 256,
+ /*predictedMs*/ 5031.7,
+ /*predictedPerSec*/ 50.9,
+ /*draftN*/ 0,
+ /*draftNAccepted*/ 0);
+
+ String line = TimingsLogger.format(t);
+
+ assertEquals(
+ "prompt: 12 tok in 84.3 ms (142.4 tok/s)" + " | gen: 256 tok in 5031.7 ms (50.9 tok/s)" + " | cache: 0",
+ line);
+ }
+
+ /** Speculative-decoding runs append a {@code | draft: N (M accepted)} segment. */
+ @Test
+ public void format_speculativeDecoding_includesDraftSegment() {
+ Timings t = new Timings(0, 4, 10.0, 400.0, 100, 1000.0, 100.0, 50, 35);
+
+ String line = TimingsLogger.format(t);
+
+ assertTrue(line.contains(" | draft: 50 (35 accepted)"), line);
+ }
+
+ /** Non-speculative runs do NOT append the draft segment. */
+ @Test
+ public void format_nonSpeculativeRun_omitsDraftSegment() {
+ Timings t = new Timings(0, 4, 10.0, 400.0, 100, 1000.0, 100.0, 0, 0);
+
+ String line = TimingsLogger.format(t);
+
+ assertFalse(line.contains("draft"), line);
+ }
+
+ /** Cache-hit count is rendered as-is so users can spot prompt-prefix reuse. */
+ @Test
+ public void format_cacheHits_renderedExactly() {
+ Timings t = new Timings(64, 12, 84.3, 142.4, 256, 5031.7, 50.9, 0, 0);
+
+ String line = TimingsLogger.format(t);
+
+ assertTrue(line.contains(" | cache: 64"), line);
+ }
+
+ /**
+ * Pipeline check: emit through the dedicated SLF4J logger and assert
+ * LogCaptor sees the formatted line at INFO level.
+ */
+ @Test
+ public void log_pipelineDelivery_emitsFormattedLineAtInfo() {
+ Timings t = new Timings(0, 12, 84.3, 142.4, 256, 5031.7, 50.9, 0, 0);
+
+ try (LogCaptor captor = LogCaptor.forName(TimingsLogger.LOGGER_NAME)) {
+ TimingsLogger.log(t);
+
+ assertEquals(1, captor.getInfoLogs().size());
+ assertEquals(TimingsLogger.format(t), captor.getInfoLogs().get(0));
+ }
+ }
+
+ /** Empty timings (all-zero, typically a parse failure) are not logged. */
+ @Test
+ public void log_allZeroTimings_skipsEmptyLine() {
+ Timings allZero = Timings.fromJson(null);
+
+ try (LogCaptor captor = LogCaptor.forName(TimingsLogger.LOGGER_NAME)) {
+ TimingsLogger.log(allZero);
+
+ assertTrue(captor.getInfoLogs().isEmpty(), "expected no log lines for all-zero timings");
+ }
+ }
+
+ /** Null is treated as a no-op so callers don't need to null-check. */
+ @Test
+ public void log_nullTimings_isNoOp() {
+ try (LogCaptor captor = LogCaptor.forName(TimingsLogger.LOGGER_NAME)) {
+ TimingsLogger.log(null);
+
+ assertTrue(captor.getInfoLogs().isEmpty(), "expected no log lines when input is null");
+ }
+ }
+}
diff --git a/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java b/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java
index 93c6b072..ccce4444 100644
--- a/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java
+++ b/src/test/java/net/ladenthin/llama/benchmark/InferenceParametersBenchmark.java
@@ -60,10 +60,10 @@ public void serializeDefault(Blackhole bh) {
@Benchmark
public void serializeWithSamplingParams(Blackhole bh) {
bh.consume(new InferenceParameters("")
- .setTemperature(0.7f)
- .setTopP(0.9f)
- .setNPredict(512)
- .setStopStrings("", "<|im_end|>")
+ .withTemperature(0.7f)
+ .withTopP(0.9f)
+ .withNPredict(512)
+ .withStopStrings("", "<|im_end|>")
.toString());
}
}