diff --git a/README.md b/README.md
index 627c6bf2..dae6f504 100644
--- a/README.md
+++ b/README.md
@@ -143,6 +143,11 @@ await pipeline.run();
 |
Project RDF into engine-agnostic search documents (framing + a declarative field spec) |
+
+ | @lde/search-api-graphql |
+  |
+ Engine- and domain-agnostic GraphQL surface for search: builds an executable GraphQL schema from any SearchType at runtime |
+
| @lde/search-typesense |
 |
@@ -229,6 +234,10 @@ graph TD
subgraph Publication
fastify-rdf
docgen
+ search --> text-normalization
+ search-api-graphql --> search
+ search-typesense --> search
+ search-typesense --> text-normalization
end
subgraph Monitoring
diff --git a/docs/decisions/0003-search-api-core-query-model.md b/docs/decisions/0003-search-api-core-query-model.md
index 8189cda5..df74737c 100644
--- a/docs/decisions/0003-search-api-core-query-model.md
+++ b/docs/decisions/0003-search-api-core-query-model.md
@@ -6,10 +6,8 @@ Date: 2026-06-25
Proposed
-Reconciles against the NDE stack platform docs
-(`netwerk-digitaal-erfgoed/docs` → `docs/stack/layers/platform.md`), which are themselves
-a **draft under discussion**, so several decisions below are deliberate deviations from
-the current draft, to be reconciled back into it.
+Aligned with the NDE [stack platform docs](https://docs.nde.nl/stack/layers/platform); the
+decisions below are reflected there.
## Context
@@ -19,10 +17,9 @@ declarative source so the GraphQL surface, a later REST surface, and the index c
from each other, and so a deployment can swap search engines without consumers noticing.
That requires an engine- and protocol-neutral **core** that both API surfaces and any
-engine adapter sit on. The platform draft frames this as Ports & Adapters with a framed
-JSON-LD intermediate representation, generated from SHACL + a `search:` annotation
-vocabulary. We adopt that direction but scope it to what a v1 keyword search needs, and
-diverge on a few concrete points where the draft does not fit DR’s catalog-search case.
+engine adapter sit on. The architecture is Ports & Adapters with a framed JSON-LD
+intermediate representation, generated from SHACL + a `search:` annotation vocabulary,
+scoped here to what a v1 keyword search needs.
## Decision
@@ -32,26 +29,31 @@ Two tiers: `search-*` is backend you compose; `search-api-*` is the surface you
| Tier | Package | Responsibility |
| ----------- | ------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
-| backend | `@lde/search` | field model · `SearchQuery` · filter semantics · adapter port |
+| backend | `@lde/search` | field model · `SearchQuery` · filter semantics · engine port |
| backend | `@lde/search-typesense` | engine adapter: collection schema · query/filter compiler · `search()` |
| API surface | `@lde/search-api-graphql` | field model + `SearchQuery` → GraphQL schema (runtime configuration; see [ADR 4](./0004-search-api-graphql-surface.md)) |
| API surface | `@lde/search-api-rest` | OpenAPI + route handlers (later, thin over the core) |
-This deviates from the draft’s function-mapping table (`@lde/graphql-server`,
-`@lde/rest-server`, no core row); the draft should adopt the `@lde/search*` family.
-
### Contract frozen, storage swappable
The **API contract** (the SDL shape consumers couple to) is breaking to change and must be
right in v1. The **IR / stored document** (framed JSON-LD vs a flat engine doc) lives
behind the adapter and is swappable with no consumer impact. Nothing engine-specific
(companion fields, `int32`, the engine query language) and nothing RDF-specific
-(`@context`, `@id`, IRI-keyed predicates) leaks past the adapter port.
+(`@context`, `@id`, IRI-keyed predicates) leaks past the engine port.
### Field model
The engine-neutral description of a queryable field – the runtime form of one SHACL
-NodeShape + its `search:` annotations:
+NodeShape + its `search:` annotations. **One `SearchField` declaration drives four
+consumers** – projection (RDF→flat document), the engine collection schema, the query
+semantics, and the GraphQL surface – so they cannot drift.
+
+It is a **unified** model: one declaration in place of three otherwise-separate ones – the
+projection-side `FieldSpec`/`FieldKind`, the Typesense `SEARCH_FIELDS` (collection schema +
+weights), and the query model below. `kind` plus capability flags replace the discriminated
+projection kinds, derived fields are first-class, and the Typesense-vocabulary types are
+_derived_ from `kind` rather than re-declared.
```ts
type FieldKind =
@@ -64,31 +66,48 @@ type FieldKind =
| 'reference';
interface SearchField {
- readonly name: string; // logical API name
+ readonly name: string; // logical API name; the physical fanout derives from it
readonly kind: FieldKind;
- readonly array?: boolean;
- readonly localized?: boolean;
+ readonly path?: string; // sh:path to project from; omit for a derivation-populated field
+ readonly array?: boolean; // sh:maxCount
+ readonly required?: boolean; // sh:minCount ≥ 1 — non-null in output, non-optional in the index
+ readonly localized?: boolean; // rdf:langString / sh:languageIn (text only)
+ readonly locales?: readonly string[]; // when localized: which languages to emit
readonly output?: boolean; // appears in the schema output type
- readonly searchable?: { weight: number }; // free-text inclusion + weight
+ readonly searchable?: { weight: number }; // free-text inclusion + weight (per-locale when localized)
readonly filterable?: boolean; // usable in `where`
readonly facetable?: boolean;
readonly sortable?: boolean;
- readonly nestedStrategy?: 'labelOnly' | 'idOnly' | 'inline'; // for `reference`
- readonly group?: { readonly name: string; readonly prefix: string }; // deployment delta
+ readonly ref?: { type: string; strategy: 'labelOnly' | 'idOnly' | 'inline' }; // kind: 'reference'
+ readonly transform?: (value: string) => string; // projection-time value transform
+ readonly facetRanges?: readonly FacetRange[]; // numeric facet: fixed [min, max) range bins (histogram) vs per-value buckets
}
-interface SearchSchema {
+type Derivation = (document: SearchDocument, node: FramedNode) => void;
+
+// One root type (one SHACL NodeShape); a whole deployment’s declaration is the
+// SearchSchema, a map of SearchTypes keyed by type IRI (built with searchSchema()).
+interface SearchType {
+ readonly type: string; // sh:targetClass
readonly fields: readonly SearchField[];
+ readonly derivations?: readonly Derivation[]; // computed fields: status, booleans
}
```
-Maps onto SHACL + `search:` (`kind`←`sh:datatype`, `array`←`sh:maxCount`,
-`localized`←`sh:languageIn`, `facetable`←`search:facetable`, `sortable`←`search:sortable`,
-`nestedStrategy`←`sh:node`/`sh:class` + `search:nestedStrategy`) so an eventual generator
-emits it unchanged. The `group` companion (coarse grouped facets, e.g. `format_group`) and
-the `status_rank` tie-break sort are **deployment-specific deltas**, never in `@lde/search`.
-`relevance` is _not_ a delta: every full-text engine ranks by match score, so it is a
-generic reserved sort the adapter understands.
+Maps onto SHACL + `search:` (`kind`←`sh:datatype`/`sh:nodeKind`, `path`←`sh:path`,
+`array`←`sh:maxCount`, `localized`←`sh:languageIn`, `facetable`←`search:facetable`,
+`sortable`←`search:sortable`, `ref`←`sh:node`/`sh:class` + `search:nestedStrategy`) so an
+eventual generator emits it unchanged. A field with **no `path`** is a derived field –
+populated by a `Derivation` rather than projected from the IR – yet it still carries full
+query/schema/output behavior, which is how the former separate projection `FieldSpec` is
+subsumed. The physical field names a declaration fans out to (`${name}_search_${locale}`,
+`${name}_sort_${locale}`, `${name}_search`) follow one convention owned by
+`@lde/search`, so projection, collection schema and query compiler agree. The `status_rank`
+tie-break sort is a **deployment-specific delta**, never in `@lde/search`. Grouped facets need
+no field-model mechanism at all: a deployment derivation materializes group tokens (e.g.
+`group:rdf`) into the field’s own values – see Consequences. `relevance` is _not_ a delta:
+every full-text engine ranks by match score, so it is a generic reserved sort the adapter
+understands.
### `SearchQuery` – the neutral query IR
@@ -147,25 +166,82 @@ variable-based clients (`$o: DatasetOrderBy`) break, so a future array is a deli
**Inclusive bounds only** – `min`/`max`, no `gt`/`gte`/`lt`/`lte`: self-documenting,
matches Typesense’s native inclusive range, covers every DR case, additively reversible.
-Grouped facets need no special shape – `group:`-prefixed tokens travel as ordinary `in`
-strings and the adapter splits/unions them.
-
-### Adapter port and result
+A numeric facet returns **range buckets** (`[min, max)` bins declared per field); the adapter
+maps them to the engine’s native range faceting.
+
+**Grouped facets need no special engine mechanism; they are denormalized at index time.**
+A coarse category alongside granular values (e.g. `group:rdf` next to media types, `group:person`
+next to class IRIs) is materialized into the field’s own values during projection, so at query
+time a group token is an ordinary value: faceted natively, filtered by plain membership
+(`field.in: ["group:rdf"]` unions with granular values for free), and — where the field is
+`output` – read like any other value. There is no `_group` companion, no `group:`-prefix split,
+no filter rewriting in the adapter; the engine stays dumb and denormalization (the document
+store’s strength) does the work. A cross-source signal that is not a subset of the field (e.g. a
+SPARQL capability derived from `conformsTo`, not a media type) is likewise materialized as a plain
+value by a deployment derivation.
+
+The trade-off this design accepts: **group membership is fixed at index time.** Because the
+group token is baked into each document’s values during projection, redefining a group (which
+granular values map to `group:rdf`) is an index-data change that takes effect only on **reindex** –
+there is no query-time mapping to edit. The constraint is acceptable here because group definitions
+are deployment projection config that already drives indexing, and reindexing is already the
+pipeline’s job; it would not suit a system where grouping is user-defined or changes frequently.
+
+### Engine port and result
+
+The **port** is the interface the core defines; a concrete engine **adapter**
+(`@lde/search-typesense`’s `TypesenseSearchEngine`) implements it. Naming the port for the
+capability (`SearchEngine`), not the pattern piece, keeps `TypesenseSearchEngine implements
+SearchEngine` readable.
```ts
-interface SearchAdapter {
- search(query: SearchQuery, schema: SearchSchema): Promise;
+// FacetField / OutputField default to `string` (ergonomic) and a deployment narrows them
+// to its type’s facetable / output field names for typo-safe facet and document access
+// (helpers FacetFieldsOf / OutputFieldsOf, or the EngineFor alias).
+interface SearchEngine<
+ FacetField extends string = string,
+ OutputField extends string = string,
+> {
+ search(
+ query: SearchQuery,
+ searchType: SearchType,
+ ): Promise>;
}
-interface SearchResult {
- readonly hits: readonly { id: string; document: SearchDocument }[];
+interface SearchResult<
+ FacetField extends string = string,
+ OutputField extends string = string,
+> {
+ readonly hits: readonly SearchHit[];
readonly total: number;
+ // Keyed by facet field name; `Partial` because only the queried facets are present.
+ // A bucket’s `label` (a LocalizedValue) is the engine-resolved canonical data label,
+ // present only for reference (IRI-keyed) facets; absent for token/free-string facets,
+ // whose display the consumer owns (its own i18n, or the value itself).
readonly facets: Readonly<
- Record
+ Partial<
+ Record<
+ FacetField,
+ readonly { value: string; count: number; label?: LocalizedValue }[]
+ >
+ >
>;
}
-type SearchDocument = Record;
+// `id` (the stable document key, an IRI) stays out of the document: it is the hit’s
+// identity, always present, a different contract from the optional logical field values,
+// and maps straight onto the GraphQL output’s `id: String!`.
+interface SearchHit {
+ readonly id: string;
+ readonly document: ResultDocument;
+}
+
+// The logical result document. Named distinctly from the flat, fanned-out projection
+// `SearchDocument` that lives index-side: this carries logical fields (language maps,
+// references) ready for a surface to shape.
+type ResultDocument = Readonly<
+ Partial>
+>;
type SearchValue =
| string
| number
@@ -192,7 +268,7 @@ per-shape types (e.g. `Organization`, `Term`) with `label` exposed as `name`
- **IR / adapter-return:** JSON-LD language map (`@container: @language`), `@set` arrays,
`und` for untagged. Matches schema-profile #171 (language maps are more usable as a data
- model) and the platform draft’s envelope.
+ model) and the stack platform envelope.
- **GraphQL surface:** a single **best-first** `Accept-Language`-ordered list
(`[LanguageString!]!`, see [ADR 4](./0004-search-api-graphql-surface.md)). `[0]` is the
value to display; **`[0].language` is the language actually served** – the per-field
@@ -208,7 +284,7 @@ argument (deferred): a parallel arg would duplicate the header and need preceden
Chosen over a `{nl,en}` map (silently yields `undefined` for a missing language, no defined
fallback order) and over a separate resolved scalar (the value must be a `LanguageString` to
carry its language anyway, so the scalar saved only the `[0]` index – not worth a second
-field plus a deviation from the draft / Network-of-Terms list shape). Grounded in measured
+field plus diverging from the Network-of-Terms list shape). Grounded in measured
data and all three substrates:
- **A (descriptions, measured):** bilingual `nl`/`en`, ~86% Dutch-only → an English user gets
@@ -222,32 +298,26 @@ have an English title) is distinct from content `dct:language` (already filterab
preference; expressible as a facetable dimension (languages-present-in-a-localized-field),
not enabled for DR v1, more relevant for B/C.
-### Other reconciled decisions
+### Other decisions
- **Numbered pagination** (`offset`/`limit`, presented as page/per-page), not Relay
cursors. DR is a page-numbered faceted browser with totals; Typesense is natively
page/per-page; the ~2,500-doc corpus never paginates deep enough for offset cost to bite;
and the blue/green alias swap removes the mutation-drift that motivates cursors.
- **Sidecar canonical labels**, not inline `labelOnly` as default. Facets need one
- canonical label per entity; the draft’s own two-source model puts canonical labels in a
- separate collection, which is what DR’s `labels` collection is. `nestedStrategy` is
- carried as metadata but inline `labelOnly` is not the default.
-- **Logical typed result document** at the query seam; framed JSON-LD kept index-side. The
- draft treats framed JSON-LD as the universal IR; we scope it to the index/projection
- artifact (its payoff – vector/LDES/UI sinks – is object-search’s, not catalog-search’s),
- gated on the generic framing packages existing rather than on DR.
+ canonical label per entity, kept in a separate collection — DR’s `labels` collection. A
+ reference’s `strategy` is carried as metadata; `labelOnly` is the v1 default, not inline.
+- **Logical typed result document** at the query seam; framed JSON-LD kept index-side as the
+ index/projection artifact (its payoff – vector/LDES/UI sinks – is object-search’s, not
+ catalog-search’s), gated on the generic framing packages existing rather than on DR.
## Consequences
- One declarative source drives GraphQL, later REST, and the index; they cannot drift.
- The engine is a swappable adapter; the contract outlives engine choices.
-- Adopted from the draft unchanged: the Stable API Contract discipline, `nestedStrategy` as
- a concept, the surface `LanguageString` list, folding at the adapter boundary + query
- side via `@lde/text-normalization`, SDL-in-projection vs filter-compiler-in-adapter.
-- Deviations to reconcile into the platform draft: numbered pagination; sidecar labels;
- logical result doc (framed JSON-LD scoped to index-side); `min`/`max` filter ranges; the
- `@lde/search*` naming and a core package row.
+- Carried through: the Stable API Contract discipline, the reference `strategy` concept, the
+ surface `LanguageString` list, folding at the adapter boundary + query side via
+ `@lde/text-normalization`, SDL-in-projection vs filter-compiler-in-adapter.
- Deferred: REST surface; framed-JSON-LD materialised view (nested storage, index-time
label inlining, detail-page-on-index, terms-collection split); semantic/hybrid (vector)
- search; unifying the projection `FieldSpec` (RDF→doc) with this `SearchField`
- (query/output) into one field declaration.
+ search.
diff --git a/docs/decisions/0004-search-api-graphql-surface.md b/docs/decisions/0004-search-api-graphql-surface.md
index d6aff824..678d6d04 100644
--- a/docs/decisions/0004-search-api-graphql-surface.md
+++ b/docs/decisions/0004-search-api-graphql-surface.md
@@ -11,42 +11,34 @@ Builds on [ADR 3 (Search API core query model)](./0003-search-api-core-query-mod
## Context
Given the engine-neutral core of [ADR 3](./0003-search-api-core-query-model.md), the first
-API surface is GraphQL. The platform draft requires the surface to be derived from the same
-source as the index, never hand-written, so it cannot drift. It must also be framework-free:
-resolvers are standard `graphql-js`, not tied to Fastify/Mercurius, so any GraphQL server
-can host the schema (DR mounts it inline; a Fastify wrapper is deferred and, if ever built,
-is a separate package).
+API surface is GraphQL, derived from the same source as the index so it cannot drift. It must
+be framework-free: resolvers are standard `graphql-js`, not tied to Fastify/Mercurius, so any
+GraphQL server can host the schema (DR mounts it inline; a Fastify wrapper is a deferred
+separate package).
## Decision
### Runtime configuration, not code generation
-The platform draft frames this as _generating_ the surface – emitting GraphQL SDL **and**
-resolvers as artifacts. We deviate: nothing is emitted or committed. The schema is
-**constructed at runtime from the field-model configuration** (`buildSearchSchema(config)`),
-once at startup, and the resolvers are **generic functions inside the package** attached to
-that schema. A better name for the draft’s “generation” step, at least for this surface, is
-**runtime configuration**.
-
-This matters because the resolvers are inherently generic – there is essentially one root
-resolver that maps args to a `SearchQuery`, calls the adapter, and maps the result back;
-the field model only parameterises data. Codegen would emit N near-identical resolver stubs
+The surface is **constructed at runtime from the field-model configuration**
+(`buildGraphQLSchema(config)`), once at startup, with generic resolvers shipped in the package
+attached to that schema – nothing is emitted or committed. The resolvers are inherently
+generic (one root resolver maps args to a `SearchQuery`, calls the engine, and maps the result
+back; the field model only parameterises data), so codegen would emit N near-identical stubs
that all delegate to the same logic, plus a build step and staleness risk, for no benefit.
-**No SDL artifact.** A live GraphQL API serves its own schema via introspection, so clients
-need no committed `.graphql` file. The field-model diff is the reviewable change. A
-`printSchema()` helper exists only as an **optional** CI snapshot test for catching
-accidental breaking changes to the frozen contract – not a shipped artifact.
-
-> Deviation from the stack draft: the draft’s “generate SDL + resolvers” becomes
-> _construct the schema at runtime from configuration; resolvers are generic and in-package;
-> SDL is served live via introspection, not emitted._ For the reconciliation list.
+A live GraphQL API serves its own schema via introspection, so clients need no committed
+`.graphql` file; the field-model diff is the reviewable change. `printGraphQLSchema()` exists
+only as an **optional** CI snapshot test guarding the frozen contract against accidental
+breaking changes – not a shipped artifact.
### The schema-building function
```ts
-function buildSearchSchema(
- schema: SearchSchema,
+// Generic over the config *value’s* type (capture it `as const satisfies SearchType`), so
+// one declaration drives both the runtime schema and the static TS types below.
+function buildGraphQLSchema(
+ schema: S,
options: {
typeName: string; // 'Dataset' – drives all derived type names
queryField?: string; // root field; default lowercased plural of typeName
@@ -60,30 +52,69 @@ function buildSearchSchema(
},
): GraphQLSchema; // executable schema: types + generic resolvers attached
+// Static types derived from the SAME config value’s type (compile-time only, erased at
+// runtime); one source, no codegen, no drift. Exported for typed in-process callers/tests.
+type OutputOf; // { id: string; title: LanguageString[]; size: number | null; … }
+type WhereOf; // { format?: StringFilter; size?: FloatRange; … }
+type OrderByOf; // { field: 'RELEVANCE' | 'TITLE' | …; direction: 'ASC' | 'DESC' }
+type FacetOf; // the facetable-field-name union
+
// also exported for manual composition / non-default servers:
function buildSearchTypeDefsAndResolvers(
schema,
options,
): { typeDefs: string; resolvers: object };
// optional CI helper only:
-function printSearchSchema(schema, options): string; // SDL, for a snapshot/breaking-change test
+function printGraphQLSchema(schema, options): string; // SDL, for a snapshot/breaking-change test
```
-`buildSearchSchema` is the standalone, framework-agnostic artifact (depends only on
+`buildGraphQLSchema` is the standalone, framework-agnostic artifact (depends only on
`graphql` + `@graphql-tools/schema`). Deep customisation passes `extendTypeDefs`/
`extendResolvers` (merged before `makeExecutableSchema`, since Mercurius registers once) or
composes the exported typeDefs/resolvers by hand.
+### A typed surface the contract does not depend on
+
+One `as const satisfies SearchType` declaration drives two **independent** projections: the
+**runtime contract** (the `GraphQLSchema`, built at startup by reading the value –
+`field.kind`, `output`, `facetable`, …) and a **static TS mirror** (`OutputOf` /
+`WhereOf` / `OrderByOf` / `FacetOf`, computed from `typeof schema` via mapped types).
+
+The contract **does not depend on the TS types.** `as const`/`satisfies` are compile-time only
+and erased, so the served schema is byte-identical whether or not the mirror exists – it is a
+developer-experience overlay. The two derivations can drift (the runtime kind→GraphQL-type
+mapping lives in `buildGraphQLSchema`; the type-level mapping in `OutputOf` duplicates it),
+so the **contract** is guarded by the optional `printGraphQLSchema()` SDL snapshot (the real
+artifact), while the TS mirror only catches our own coding mistakes against it.
+
+Values are typed at both ends, with the resolver as the typed transform between them:
+
+| layer | localized text | reference | int64 | keyword (array) | boolean |
+| ----------------------- | ------------------------------------ | --------------------------- | ---------------- | ----------------------- | -------------------- |
+| IR (`ResultDocument`) | `LocalizedValue` (lang map) | `Reference` | `number` | `readonly string[]` | `boolean` |
+| GraphQL (`OutputOf`) | `LanguageString[]` (best-first list) | named type (`Organization`) | `Float`/`number` | `[String!]!`/`string[]` | `Boolean!`/`boolean` |
+
+What stays unchecked is only the generic resolver’s **dynamic middle**: it loops over the
+field model with runtime-string names, so TS cannot prove the object it builds matches
+`OutputOf` – it casts at that boundary, and graphql-js’s executor (not TS) enforces the
+output types at runtime (a wrong-typed return raises a field error). Same “typed boundaries,
+dynamic middle” shape as the engine port and the projection: type the edges where it is
+honest, accept a cast where iteration is inherently dynamic.
+
### Construction rules (field model → schema)
-Type names derive from `typeName`; shared types (`LanguageString`, `Facet`, `FacetBucket`,
-`SortDirection`, `StringFilter`, `IntRange`, `FloatRange`, `DateRange`) are emitted once.
+Type names derive from `typeName`; shared types (`LanguageString`, `ValueBucket`, `RangeBucket`,
+`SortDirection`, `StringFilter`, `IntRange`, `FloatRange`, `DateRange`) are emitted once, and the
+per-type keyed facets object is named `Facets`.
GraphQL field names are the field model `name` verbatim (declare camelCase).
- **Output type** – one field per `output` field: `text`+`localized` → `[LanguageString!]!` (best-first; `[0].language` = served language, the per-field `Content-Language`);
- `keyword` array → `[String!]!`, scalar → `String`; `integer` → `Int`; `number` → `Float`;
- `date` → `String` (ISO 8601); `boolean` → `Boolean!` (absent = false); `reference` →
- see below. Nullability from `array` / required / optional; `id` is `String!`.
+ `keyword` array → `[String!]!`, scalar → `String`; `integer` → `Int` (signed 32-bit);
+ `number` → `Float` (exact integers to 2^53); `date` → `String` (ISO 8601); `boolean` →
+ `Boolean!` (absent = false); `reference` → see below. Nullability from `array` / required /
+ optional; `id` is `String!`. A magnitude that can exceed 32 bits (a 64-bit count or byte size
+ – e.g. DR’s `size`) is `number` → `Float`, since `Int` would overflow; a `Long`/`BigInt`
+ custom scalar is the deferred alternative.
- **Reference types** – a `reference` field is typed by the **referenced shape**
(`sh:class`/`sh:node`), emitted once and reused by every field referencing the same shape.
Its fields follow `nestedStrategy`:
@@ -95,9 +126,9 @@ GraphQL field names are the field model `name` verbatim (declare camelCase).
| `inline` (later) | the named type plus the referenced shape’s projected fields |
So DR emits `publisher: Organization` (the `foaf:Agent` shape) and
- `terminologySource: [Term!]!`; a shape’s type is emitted once and reused by any field that
- references it. Named, not a generic GraphQL `Reference`: going `labelOnly → inline` then
- only _adds_ fields (non-breaking), whereas generic→named later would break the contract.
+ `terminologySource: [Term!]!`. Named, not a generic GraphQL `Reference`: going
+ `labelOnly → inline` then only _adds_ fields (non-breaking), whereas generic→named later
+ would break the contract.
- **`where` input** – one field per `filterable` field: `keyword`/`reference` →
`StringFilter { in: [String!] }`; `integer` → `IntRange { min, max }`; `number` →
@@ -105,14 +136,32 @@ GraphQL field names are the field model `name` verbatim (declare camelCase).
`is` value); `text` is excluded (it goes through the `query` arg).
- **`orderBy`** – `RELEVANCE` (the sane default when a `query` is present) plus every
`sortable` field, as an enum, in a single `{ field, direction }` input. Only
- publicly-selectable sorts appear here; the resolver expands the client’s one choice into
- the internal `Sort[]`, appending deployment tie-breaks like DR’s `status_rank` via
- `queryDefaults` (never exposed). Single for now because a user picks one dimension.
- Promoting it to a list later is backward-compatible only for inline-literal clients (list
- input coercion wraps a single value); **variable-based clients break** (`$o: DatasetOrderBy`
- is rejected where `[DatasetOrderBy!]` is expected), so a future array is a deliberate,
- potentially breaking change – not a free one.
-- **Facets** – an enum of every `facetable` field; requested per query, returned with counts.
+ publicly-selectable sorts appear; the resolver expands the client’s one choice into the
+ internal `Sort[]`, appending deployment tie-breaks like DR’s `status_rank` via
+ `queryDefaults` (never exposed). Single for now because a user picks one dimension; promoting
+ it to a list later is backward-compatible only for inline-literal clients (list input
+ coercion) – **variable-based clients break** (`$o: DatasetOrderBy` where `[DatasetOrderBy!]`
+ is expected) – so a future array is a deliberate, potentially breaking change.
+- **Facets** – a **keyed object** (`Facets`), one field per `facetable` field, typed by
+ the field’s kind: a numeric range-facet field is `[RangeBucket!]!`, every other facet is
+ `[ValueBucket!]!`. The facet set and each bucket shape are thus encoded **statically in the
+ schema**, not discovered at runtime through an enum + polymorphic bucket (no `__typename`, no
+ fragments). **Selection is the request**: only the facet keys a query selects are computed
+ (the resolver inspects the selection), each with its **own where-filter removed**
+ (skip-own-filter – a multi-select facet still lists its other options; dropping a `status`
+ filter also drops the valid-only default, so the status facet counts across every status).
+ Two bucket types:
+ - `ValueBucket { value, count, label }` – `value` is the selection key (filter via
+ `field.in`); `label` (nullable) is the engine-resolved canonical **data** label, present
+ only for **reference** (IRI-keyed) facets, `null` for token/free-string facets whose
+ display the consumer owns (its i18n for controlled tokens like `valid` → “Geldig”/“Valid”,
+ or the `value` itself). The null is load-bearing.
+ - `RangeBucket { min, max, count }` – a half-open `[min, max)` numeric bin (`max` null on an
+ open-ended top bin), filtered via `field.range`.
+ - A grouped facet (a coarse category alongside granular values, e.g. `group:rdf` next to media
+ types) needs **no special bucket**: its tokens are denormalized into the field at index time,
+ so they are ordinary `ValueBucket` values – faceted, filtered (`field.in: ["group:rdf"]`) and,
+ where output, read like any other value (see ADR 0003).
### Resulting schema (DR example, abridged)
@@ -137,35 +186,23 @@ type Dataset {
publisher: Organization
terminologySource: [Term!]!
format: [String!]!
- class: [String!]!
- size: Int
+ size: Float # int64 magnitude → Float, not Int (32-bit)
datePosted: String
status: String
iiif: Boolean!
# … keyword, language, iiifManifestCount, ndeSchemaAp, linkedData, terms, persistentUris
}
-input StringFilter {
- in: [String!]
-}
-input IntRange {
- min: Int
- max: Int
-}
-input DateRange {
- min: String
- max: String
-}
+# shared inputs are emitted once and reused: DR uses StringFilter + FloatRange +
+# SortDirection (IntRange / DateRange are pruned – no filterable int/date field).
input DatasetWhere {
publisher: StringFilter
format: StringFilter
class: StringFilter
status: StringFilter
- size: IntRange
- datePosted: DateRange
- iiif: Boolean
- # … keyword, language, terminologySource, catalog, ndeSchemaAp, linkedData, terms, persistentUris
+ size: FloatRange
+ # … keyword, language, terminologySource, catalog
}
enum DatasetSortField {
@@ -174,36 +211,31 @@ enum DatasetSortField {
DATE_POSTED
SIZE
}
-enum SortDirection {
- ASC
- DESC
-}
input DatasetOrderBy {
field: DatasetSortField!
direction: SortDirection! = DESC
}
-enum DatasetFacetField {
- PUBLISHER
- KEYWORD
- LANGUAGE
- FORMAT
- CLASS
- TERMINOLOGY_SOURCE
- STATUS
- IIIF
- NDE_SCHEMA_AP
- LINKED_DATA
- TERMS
- PERSISTENT_URIS
+type ValueBucket {
+ value: String! # selection key: a media type, a token (group:rdf), or an IRI for reference facets
+ count: Int!
+ label: [LanguageString!] # nullable; resolved data label for reference facets, else null
}
-type FacetBucket {
- value: String!
+type RangeBucket {
+ min: Float # half-open [min, max); max null = open-ended top bin
+ max: Float
count: Int!
}
-type Facet {
- field: DatasetFacetField!
- buckets: [FacetBucket!]!
+type DatasetFacets {
+ # one field per facetable field, typed by kind; selection = request, skip-own-filter applied
+ publisher: [ValueBucket!]!
+ keyword: [ValueBucket!]!
+ language: [ValueBucket!]!
+ format: [ValueBucket!]!
+ class: [ValueBucket!]!
+ terminologySource: [ValueBucket!]!
+ status: [ValueBucket!]!
+ size: [RangeBucket!]!
}
type DatasetSearchResult {
@@ -211,7 +243,7 @@ type DatasetSearchResult {
total: Int!
page: Int!
perPage: Int!
- facets: [Facet!]!
+ facets: DatasetFacets!
}
type Query {
@@ -220,19 +252,20 @@ type Query {
where: DatasetWhere
orderBy: DatasetOrderBy
page: Int = 1
- perPage: Int = 20
- facets: [DatasetFacetField!]
+ perPage: Int = 20 # no `facets` arg – selecting facet keys IS the request
): DatasetSearchResult!
}
```
Numbered pagination (`page`/`perPage` + `total`), per
[ADR 3](./0003-search-api-core-query-model.md) – no Relay connection. The reference types
-(`Organization`, `Term`) carry `id + name` (labelOnly) from DR’s sidecar labels collection,
-resolved by the adapter. `publisher` is single (`dct:publisher` `maxCount 1`); `creator` is
-search-only – its name feeds full-text `query` but it has no output field of its own,
-mirroring the current card. `catalog` is filter-only, so it appears in `where` but not as an
-output field.
+carry `id + name` (labelOnly) from DR’s sidecar labels collection, resolved by the adapter.
+`publisher` is single (`dct:publisher` `maxCount 1`); `creator` is search-only (its name feeds
+full-text `query` but it has no output field); `catalog` is filter-only (in `where`, not output);
+`class` is facet + filter but not output (its `group:` tokens surface only as facet buckets, never
+as card values); `datePosted` is sortable + output only; and the NDE compatibility booleans
+(`iiif`, `ndeSchemaAp`, `linkedData`, `terms`) are output-only vinkjes – in neither `where` nor the
+facets until “filter by vinkje” ships.
### Resolver behaviour
@@ -241,37 +274,33 @@ The single, generic root resolver (shipped in the package, not emitted):
1. **Args → `SearchQuery`** (pure): `query`→`text`; `where`→`Filter[]`; `orderBy`→`Sort[]`
(`RELEVANCE`→reserved `relevance`); `page`/`perPage`→`offset`/`limit`; `facets`→logical
names; `locale`←`context.acceptLanguage[0]`.
-2. **Apply `options.queryDefaults`** – the generic resolver bakes no deployment defaults;
- DR injects its policy here: default `status:=valid`; default sort `relevance` when a
- `query` is present else `title`; and the `status_rank` tie-break appended to either.
-3. **`context.adapter.search(query, schema)` → `SearchResult`.**
+2. **Apply `options.queryDefaults`** – the generic resolver bakes no deployment defaults; DR
+ injects its policy here: default `status:=valid`; default sort `relevance` when a `query` is
+ present else `title`; and the `status_rank` tie-break appended to either.
+3. **`context.engine.search(query, schema)` → `SearchResult`.**
4. **`SearchResult` → output** – scalars pass through; a `LocalizedValue` map →
- `[LanguageString]` ordered by `options.languageOrder(available, acceptLanguage)`;
- reference values likewise; facets keyed logical→enum. GraphQL field selection prunes.
+ `[LanguageString]` ordered by `options.languageOrder(available, acceptLanguage)`; reference
+ values likewise; facets keyed logical→enum. GraphQL field selection prunes.
-Default `languageOrder`: Accept-Language entries first, then remaining tagged languages,
-then untagged (`und`) last – so `[0]` is always the best available value.
+Default `languageOrder`: Accept-Language entries first, then remaining tagged languages, then
+untagged (`und`) last – so `[0]` is always the best available value.
### Lifecycle and performance
-- **Built once at startup.** The consumer calls `buildSearchSchema` during boot and hands
- the single `GraphQLSchema` to its server; the field model is static per deployment, so it
- is never rebuilt per request.
-- **Held and reused.** That one schema serves every request (Mercurius additionally
- caches/compiles it).
-- **Zero per-request penalty vs codegen.** A runtime-constructed schema is the same
- `GraphQLSchema` object codegen would have produced; the only added cost is the one-time
- build, sub-millisecond to low-single-digit-ms for a schema this size.
+- **Built once at startup, reused for every request.** The field model is static per
+ deployment, so the single `GraphQLSchema` is constructed during boot (sub-millisecond to
+ low-single-digit-ms for a schema this size) and never rebuilt per request – the same object
+ codegen would have produced, with no per-request penalty (Mercurius additionally caches it).
- **Hot path is the engine, not GraphQL.** Per-request cost is dominated by the Typesense
round-trip; parse/validate/resolve of a small query is sub-millisecond.
-- **Introspection serves the contract.** Cheap (a query against the built schema, cached by
- clients). Leave it on, or disable in production and use `printSearchSchema` for tooling.
+- **Introspection serves the contract** (cheap, client-cached). Leave it on, or disable in
+ production and use `printGraphQLSchema` for tooling.
### Context contract
```ts
interface SearchContext {
- adapter: SearchAdapter; // any engine
+ engine: SearchEngine; // the port; any engine adapter
acceptLanguage: readonly string[]; // parsed, ordered; drives locale + output ordering
}
```
@@ -281,21 +310,18 @@ Each transport populates it per request; no framework type appears in the packag
## Consequences
- The GraphQL surface is configured at runtime from the
- [ADR 3](./0003-search-api-core-query-model.md) field model, so it cannot drift from the
- index or a later REST surface, and works under any GraphQL server.
+ [ADR 3](./0003-search-api-core-query-model.md) field model, so it cannot drift from the index
+ or a later REST surface, and works under any GraphQL server.
- **Frozen (public contract):** `LanguageString`, the named reference types (`Organization`,
`Term`, …), output types, `where` operators, `orderBy` enums, numbered-pagination args,
facet types. Breaking to change – right in v1.
-- **Internal:** args→`SearchQuery` mapping, language ordering, how the adapter computes
- facets, the `SearchDocument` shape.
-- **Deviations to reconcile into the platform draft:**
- - “generate SDL + resolvers” → _runtime configuration_ (construct at startup from config;
- generic in-package resolvers; SDL served via introspection, not emitted as an artifact).
- - Named reference types per shape (`Organization`, `Term`) rather than the draft’s uniform
- `labelOnly` `{ @id, @type, name }` reference shape – chosen for ergonomics and
- additive `inline` growth.
-- Deferred: a `dataset(id)` single-resource query (detail-page-on-index direction; DR detail
- stays on SPARQL); cross-collection `@reference` joins beyond inline labels; cursor
- pagination; a `Date` scalar (kept ISO `String`); transport-layer persisted queries / cost
- limits; a root or per-field language argument (Accept-Language is the sole preference
- mechanism); metadata-language-availability filtering (a facetable dimension, not v1).
+- **Internal:** args→`SearchQuery` mapping, language ordering, how the adapter computes facets,
+ the `SearchDocument` shape.
+- **Named reference types** per shape rather than one uniform reference type – chosen for
+ ergonomics and additive `inline` growth (`labelOnly` → `inline` only adds fields).
+- Deferred: a `dataset(id)` single-resource query (DR detail stays on SPARQL); cross-collection
+ `@reference` joins beyond inline labels; cursor pagination; a `Date` scalar (kept ISO
+ `String`) and a `Long`/`BigInt` scalar for 64-bit integers (kept `Float`); transport-layer
+ persisted queries / cost limits; a root or per-field language argument (Accept-Language is the
+ sole preference mechanism); metadata-language-availability filtering (a facetable dimension,
+ not v1).
diff --git a/package-lock.json b/package-lock.json
index e1a4b8d8..6abec10e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -24953,6 +24953,10 @@
"resolved": "packages/search",
"link": true
},
+ "node_modules/@lde/search-api-graphql": {
+ "resolved": "packages/search-api-graphql",
+ "link": true
+ },
"node_modules/@lde/search-typesense": {
"resolved": "packages/search-typesense",
"link": true
@@ -33133,7 +33137,6 @@
"version": "15.10.2",
"resolved": "https://registry.npmjs.org/graphql/-/graphql-15.10.2.tgz",
"integrity": "sha512-1PRqdDPAmViWr4h1GVBT8RoPZfWSGZa7kDzleTilOfVIslsgf+cia3Nl95v1KDmR4iERPaT7WzQ+tN4MJmbg3w==",
- "dev": true,
"license": "MIT",
"engines": {
"node": ">= 10.x"
@@ -41008,7 +41011,7 @@
},
"packages/dataset": {
"name": "@lde/dataset",
- "version": "0.7.7",
+ "version": "0.7.8",
"license": "MIT",
"dependencies": {
"tslib": "^2.3.0"
@@ -41016,10 +41019,10 @@
},
"packages/dataset-registry-client": {
"name": "@lde/dataset-registry-client",
- "version": "0.8.4",
+ "version": "0.8.5",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
+ "@lde/dataset": "^0.7.8",
"@traqula/generator-sparql-1-1": "^1.1.6",
"@traqula/parser-sparql-1-1": "^1.1.5",
"@traqula/rules-sparql-1-1": "^1.1.0",
@@ -41032,34 +41035,34 @@
},
"packages/distribution-downloader": {
"name": "@lde/distribution-downloader",
- "version": "0.6.5",
+ "version": "0.6.6",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
+ "@lde/dataset": "^0.7.8",
"filenamify-url": "4.0.0",
"tslib": "^2.3.0"
}
},
"packages/distribution-health": {
"name": "@lde/distribution-health",
- "version": "0.2.1",
+ "version": "0.2.3",
"license": "MIT",
"dependencies": {
- "@lde/distribution-probe": "0.2.1",
- "@lde/sparql-importer": "0.6.5",
+ "@lde/distribution-probe": "^0.2.2",
+ "@lde/sparql-importer": "^0.6.5",
"tslib": "^2.3.0"
},
"devDependencies": {
- "@lde/dataset": "0.7.7"
+ "@lde/dataset": "^0.7.7"
}
},
"packages/distribution-monitor": {
"name": "@lde/distribution-monitor",
- "version": "0.2.1",
+ "version": "0.2.3",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/distribution-probe": "0.2.1",
+ "@lde/dataset": "^0.7.8",
+ "@lde/distribution-probe": "^0.2.2",
"c12": "^3.3.4",
"commander": "^15.0.0",
"cron": "^4.1.0",
@@ -41086,10 +41089,10 @@
},
"packages/distribution-probe": {
"name": "@lde/distribution-probe",
- "version": "0.2.1",
+ "version": "0.2.3",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
+ "@lde/dataset": "^0.7.8",
"rdf-parse": "^5.0.0",
"tslib": "^2.3.0"
}
@@ -41756,7 +41759,7 @@
},
"packages/docgen": {
"name": "@lde/docgen",
- "version": "0.6.18",
+ "version": "0.6.19",
"license": "MIT",
"dependencies": {
"@tpluscode/rdf-ns-builders": "^5.0.0",
@@ -41786,7 +41789,7 @@
},
"packages/fastify-rdf": {
"name": "@lde/fastify-rdf",
- "version": "0.4.6",
+ "version": "0.4.7",
"license": "MIT",
"dependencies": {
"@fastify/accepts": "^5.0.0",
@@ -42483,7 +42486,7 @@
},
"packages/iiif-validator": {
"name": "@lde/iiif-validator",
- "version": "0.1.4",
+ "version": "0.1.5",
"license": "MIT",
"dependencies": {
"@iiif/parser": "^2.2.10",
@@ -42492,7 +42495,7 @@
},
"packages/local-sparql-endpoint": {
"name": "@lde/local-sparql-endpoint",
- "version": "0.2.13",
+ "version": "0.2.14",
"license": "MIT",
"dependencies": {
"jest-dev-server": "11.0.0",
@@ -42505,15 +42508,15 @@
},
"packages/pipeline": {
"name": "@lde/pipeline",
- "version": "0.31.3",
+ "version": "0.31.5",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/dataset-registry-client": "0.8.4",
- "@lde/distribution-health": "0.2.1",
- "@lde/distribution-probe": "0.2.1",
- "@lde/sparql-importer": "0.6.5",
- "@lde/sparql-server": "0.4.11",
+ "@lde/dataset": "^0.7.7",
+ "@lde/dataset-registry-client": "^0.8.4",
+ "@lde/distribution-health": "^0.2.2",
+ "@lde/distribution-probe": "^0.2.2",
+ "@lde/sparql-importer": "^0.6.5",
+ "@lde/sparql-server": "^0.4.11",
"@rdfjs/namespace": "^2.0.1",
"@rdfjs/types": "^2.0.1",
"@tpluscode/rdf-ns-builders": "^5.0.0",
@@ -42531,7 +42534,7 @@
},
"packages/pipeline-console-reporter": {
"name": "@lde/pipeline-console-reporter",
- "version": "0.22.3",
+ "version": "0.22.5",
"license": "MIT",
"dependencies": {
"chalk": "^5.4.1",
@@ -42541,8 +42544,8 @@
"tslib": "^2.3.0"
},
"peerDependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/pipeline": "0.31.3"
+ "@lde/dataset": "^0.7.8",
+ "@lde/pipeline": "^0.31.4"
}
},
"packages/pipeline-console-reporter/node_modules/ansi-regex": {
@@ -42722,7 +42725,7 @@
},
"packages/pipeline-shacl-sampler": {
"name": "@lde/pipeline-shacl-sampler",
- "version": "0.5.3",
+ "version": "0.5.5",
"license": "MIT",
"dependencies": {
"@rdfjs/types": "^2.0.1",
@@ -42732,8 +42735,8 @@
"tslib": "^2.3.0"
},
"peerDependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/pipeline": "0.31.3"
+ "@lde/dataset": "^0.7.8",
+ "@lde/pipeline": "^0.31.4"
}
},
"packages/pipeline-shacl-sampler/node_modules/n3": {
@@ -42751,7 +42754,7 @@
},
"packages/pipeline-shacl-validator": {
"name": "@lde/pipeline-shacl-validator",
- "version": "0.13.3",
+ "version": "0.13.5",
"license": "MIT",
"dependencies": {
"@rdfjs/types": "^2.0.1",
@@ -42764,8 +42767,8 @@
"n3": "^2.1.0"
},
"peerDependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/pipeline": "0.31.3"
+ "@lde/dataset": "^0.7.8",
+ "@lde/pipeline": "^0.31.4"
}
},
"packages/pipeline-shacl-validator/node_modules/n3": {
@@ -42784,7 +42787,7 @@
},
"packages/pipeline-void": {
"name": "@lde/pipeline-void",
- "version": "0.29.3",
+ "version": "0.29.5",
"license": "MIT",
"dependencies": {
"@rdfjs/types": "^2.0.1",
@@ -42794,8 +42797,8 @@
"tslib": "^2.3.0"
},
"peerDependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/pipeline": "0.31.3"
+ "@lde/dataset": "^0.7.8",
+ "@lde/pipeline": "^0.31.4"
}
},
"packages/pipeline-void/node_modules/n3": {
@@ -42852,10 +42855,10 @@
},
"packages/search": {
"name": "@lde/search",
- "version": "0.1.1",
+ "version": "0.1.2",
"license": "MIT",
"dependencies": {
- "@lde/text-normalization": "0.1.0",
+ "@lde/text-normalization": "^0.1.1",
"@rdfjs/types": "^2.0.1",
"@tpluscode/rdf-ns-builders": "^5.0.0",
"jsonld": "^9.0.0",
@@ -42866,11 +42869,23 @@
"n3": "^2.1.0"
}
},
+ "packages/search-api-graphql": {
+ "name": "@lde/search-api-graphql",
+ "version": "0.1.0",
+ "license": "MIT",
+ "dependencies": {
+ "@lde/search": "^0.1.2",
+ "graphql": "^15.8.0",
+ "tslib": "^2.3.0"
+ }
+ },
"packages/search-typesense": {
"name": "@lde/search-typesense",
- "version": "0.1.0",
+ "version": "0.1.1",
"license": "MIT",
"dependencies": {
+ "@lde/search": "^0.1.2",
+ "@lde/text-normalization": "^0.1.1",
"tslib": "^2.3.0",
"typesense": "^3.0.6"
},
@@ -42894,28 +42909,28 @@
},
"packages/sparql-importer": {
"name": "@lde/sparql-importer",
- "version": "0.6.5",
+ "version": "0.6.6",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/distribution-downloader": "0.6.5",
- "@lde/task-runner": "0.2.11",
+ "@lde/dataset": "^0.7.8",
+ "@lde/distribution-downloader": "^0.6.5",
+ "@lde/task-runner": "^0.2.11",
"tslib": "^2.3.0"
}
},
"packages/sparql-qlever": {
"name": "@lde/sparql-qlever",
- "version": "0.14.10",
+ "version": "0.14.11",
"license": "MIT",
"dependencies": {
- "@lde/dataset": "0.7.7",
- "@lde/distribution-downloader": "0.6.5",
- "@lde/sparql-importer": "0.6.5",
- "@lde/sparql-server": "0.4.11",
- "@lde/task-runner": "0.2.11",
- "@lde/task-runner-docker": "0.2.13",
- "@lde/task-runner-native": "0.2.14",
- "@lde/wait-for-sparql": "0.2.13",
+ "@lde/dataset": "^0.7.8",
+ "@lde/distribution-downloader": "^0.6.5",
+ "@lde/sparql-importer": "^0.6.5",
+ "@lde/sparql-server": "^0.4.11",
+ "@lde/task-runner": "^0.2.11",
+ "@lde/task-runner-docker": "^0.2.13",
+ "@lde/task-runner-native": "^0.2.14",
+ "@lde/wait-for-sparql": "^0.2.13",
"rdf-parse": "^5.0.0",
"rdf-serialize": "^5.1.0",
"tslib": "^2.3.0",
@@ -43620,7 +43635,7 @@
},
"packages/sparql-server": {
"name": "@lde/sparql-server",
- "version": "0.4.11",
+ "version": "0.4.12",
"license": "MIT",
"dependencies": {
"tslib": "^2.3.0"
@@ -43628,7 +43643,7 @@
},
"packages/task-runner": {
"name": "@lde/task-runner",
- "version": "0.2.11",
+ "version": "0.2.12",
"license": "MIT",
"dependencies": {
"tslib": "^2.3.0"
@@ -43636,10 +43651,10 @@
},
"packages/task-runner-docker": {
"name": "@lde/task-runner-docker",
- "version": "0.2.13",
+ "version": "0.2.14",
"license": "MIT",
"dependencies": {
- "@lde/task-runner": "0.2.11",
+ "@lde/task-runner": "^0.2.12",
"dockerode": "^5.0.1",
"tslib": "^2.3.0"
},
@@ -43649,16 +43664,16 @@
},
"packages/task-runner-native": {
"name": "@lde/task-runner-native",
- "version": "0.2.14",
+ "version": "0.2.15",
"license": "MIT",
"dependencies": {
- "@lde/task-runner": "0.2.11",
+ "@lde/task-runner": "^0.2.12",
"tslib": "^2.3.0"
}
},
"packages/text-normalization": {
"name": "@lde/text-normalization",
- "version": "0.1.0",
+ "version": "0.1.1",
"license": "MIT",
"dependencies": {
"tslib": "^2.3.0"
@@ -43666,7 +43681,7 @@
},
"packages/wait-for-sparql": {
"name": "@lde/wait-for-sparql",
- "version": "0.2.13",
+ "version": "0.2.14",
"license": "MIT",
"dependencies": {
"fetch-sparql-endpoint": "^7.1.1",
diff --git a/packages/search-api-graphql/README.md b/packages/search-api-graphql/README.md
new file mode 100644
index 00000000..d6274a9d
--- /dev/null
+++ b/packages/search-api-graphql/README.md
@@ -0,0 +1,56 @@
+# @lde/search-api-graphql
+
+The GraphQL surface for the [`@lde/search`](../search) core. **Both engine- and
+domain-agnostic:** it builds an executable `GraphQLSchema` from any `SearchType`
+at runtime, and serves it with one generic resolver over any `SearchEngine`. It
+names neither your **domain** (you pass `typeName` — `Dataset`, `Person`,
+`CreativeWork`, …) nor your **engine** (the resolver calls `context.engine`, be it
+[`@lde/search-typesense`](../search-typesense) or another adapter).
+
+## Runtime configuration, not codegen
+
+`buildGraphQLSchema(searchType, { typeName })` constructs the schema once at
+startup from the field model — no SDL artifact, no generated resolver stubs. The
+field model is the single source; the GraphQL contract is whatever it produces.
+Output types, the `where`/`orderBy`/facet inputs, reference types and nullability
+are all derived from each field’s `kind` and capability flags.
+
+```ts
+import { buildGraphQLSchema } from '@lde/search-api-graphql';
+
+const gqlSchema = buildGraphQLSchema(DATASET, {
+ typeName: 'Dataset',
+ queryDefaults: (query) => ({
+ ...query,
+ where: [...query.where, { field: 'status', in: ['valid'] }],
+ }),
+});
+
+// Hand `gqlSchema` to any graphql-js server; populate the per-request context:
+// { engine: SearchEngine, acceptLanguage: string[] }
+```
+
+## What it builds
+
+- **Output type** (`typeName`) — localized text → best-first `[LanguageString!]!`
+ (`[0].language` is the language actually served); references → named per-shape
+ types (`Organization`, `Term`) with a `name`; scalars/booleans per kind; `date`
+ → ISO 8601 string; nullability from `required` / `array` / `kind`.
+- **`where`** — one input per `filterable` field (`StringFilter`, `IntRange` /
+ `FloatRange` / `DateRange`, or `Boolean`).
+- **`orderBy`** — `RELEVANCE` plus every `sortable` field, as an enum.
+- **Facets** — an enum of every `facetable` field; a bucket carries `value` +
+ `count` + a nullable `label` — the resolved data label for **reference** facets,
+ `null` for token/free-string facets whose display the consumer owns (its own
+ i18n, or the value itself).
+
+## Why it can’t drift
+
+The surface reads the same field model the index is built from, and compiles into
+the same neutral `SearchQuery` the engine consumes — so the API, the index and a
+future REST surface stay in lockstep. The contract is **frozen** (breaking to
+change), and because it is generated rather than hand-written, a _consumer_ guards
+it with a `printGraphQLSchema(searchType, options)` SDL snapshot over its **own**
+search type and `typeName` — that snapshot also catches a `buildGraphQLSchema`
+change in a future version of this library silently altering the consumer’s
+contract.
diff --git a/packages/search-api-graphql/eslint.config.mjs b/packages/search-api-graphql/eslint.config.mjs
new file mode 100644
index 00000000..2dcaf60c
--- /dev/null
+++ b/packages/search-api-graphql/eslint.config.mjs
@@ -0,0 +1,22 @@
+import baseConfig from '../../eslint.config.mjs';
+
+export default [
+ ...baseConfig,
+ {
+ files: ['**/*.json'],
+ rules: {
+ '@nx/dependency-checks': [
+ 'error',
+ {
+ ignoredFiles: [
+ '{projectRoot}/eslint.config.{js,cjs,mjs}',
+ '{projectRoot}/vite.config.{js,ts,mjs,mts}',
+ ],
+ },
+ ],
+ },
+ languageOptions: {
+ parser: await import('jsonc-eslint-parser'),
+ },
+ },
+];
diff --git a/packages/search-api-graphql/package.json b/packages/search-api-graphql/package.json
new file mode 100644
index 00000000..70f76450
--- /dev/null
+++ b/packages/search-api-graphql/package.json
@@ -0,0 +1,32 @@
+{
+ "name": "@lde/search-api-graphql",
+ "version": "0.1.0",
+ "description": "Engine- and domain-agnostic GraphQL surface for @lde/search: builds an executable GraphQLSchema from any SearchType at runtime (no codegen), served by one generic resolver over any SearchEngine. You supply the search type and typeName; it names neither your domain nor your engine.",
+ "repository": {
+ "url": "git+https://github.com/ldelements/lde.git",
+ "directory": "packages/search-api-graphql"
+ },
+ "license": "MIT",
+ "type": "module",
+ "exports": {
+ "./package.json": "./package.json",
+ ".": {
+ "types": "./dist/index.d.ts",
+ "import": "./dist/index.js",
+ "development": "./src/index.ts",
+ "default": "./dist/index.js"
+ }
+ },
+ "main": "./dist/index.js",
+ "module": "./dist/index.js",
+ "types": "./dist/index.d.ts",
+ "files": [
+ "dist",
+ "!**/*.tsbuildinfo"
+ ],
+ "dependencies": {
+ "@lde/search": "^0.1.2",
+ "graphql": "^15.8.0",
+ "tslib": "^2.3.0"
+ }
+}
diff --git a/packages/search-api-graphql/src/build-schema.ts b/packages/search-api-graphql/src/build-schema.ts
new file mode 100644
index 00000000..836cc2a6
--- /dev/null
+++ b/packages/search-api-graphql/src/build-schema.ts
@@ -0,0 +1,514 @@
+import {
+ GraphQLBoolean,
+ GraphQLEnumType,
+ GraphQLFloat,
+ GraphQLInputObjectType,
+ GraphQLInt,
+ GraphQLList,
+ GraphQLNonNull,
+ GraphQLObjectType,
+ GraphQLSchema,
+ GraphQLString,
+ printSchema,
+ type GraphQLEnumValueConfigMap,
+ type GraphQLFieldConfig,
+ type GraphQLInputFieldConfig,
+ type GraphQLInputType,
+ type GraphQLOutputType,
+} from 'graphql';
+import {
+ facetableFields,
+ filterableFields,
+ filterOperatorFor,
+ outputFields,
+ sortableFields,
+ type Filter,
+ type LocalizedValue,
+ type Reference,
+ type SearchEngine,
+ type SearchField,
+ type SearchQuery,
+ type SearchType,
+ type Sort,
+} from '@lde/search';
+import {
+ defaultLanguageOrder,
+ toLanguageStrings,
+ type LanguageOrder,
+} from './language.js';
+
+/** Populated per request by the transport; no framework type appears here. */
+export interface SearchContext {
+ readonly engine: SearchEngine;
+ /** Parsed, ordered `Accept-Language`; drives locale selection and output order. */
+ readonly acceptLanguage: readonly string[];
+ /**
+ * Called when a single facet's computation fails. The facet degrades to an
+ * empty list (a supplementary facet must not fail the whole query); supply
+ * this to log the cause. Optional — omit to swallow silently.
+ */
+ readonly onFacetError?: (field: string, error: unknown) => void;
+}
+
+export interface BuildGraphQLSchemaOptions {
+ /** Drives all derived type names, e.g. `Dataset`. */
+ readonly typeName: string;
+ /** Root query field; defaults to the lowercased plural of `typeName`. */
+ readonly queryField?: string;
+ /** Consumer policy applied to every query (default status, sort, tie-breaks). */
+ readonly queryDefaults?: (
+ query: SearchQuery,
+ context: SearchContext,
+ ) => SearchQuery;
+ /** Output-language ordering; defaults to Accept-Language-first, `und` last. */
+ readonly languageOrder?: LanguageOrder;
+}
+
+type Source = Record;
+
+const nonNullListOf = (type: GraphQLOutputType): GraphQLOutputType =>
+ new GraphQLNonNull(new GraphQLList(new GraphQLNonNull(type)));
+
+const scalarOutput = (
+ scalar: GraphQLOutputType,
+ field: SearchField,
+): GraphQLOutputType =>
+ field.required === true ? new GraphQLNonNull(scalar) : scalar;
+
+/** SCREAMING_SNAKE_CASE for an enum value name, e.g. `datePosted` → `DATE_POSTED`. */
+function screamingSnake(name: string): string {
+ return name.replace(/([a-z0-9])([A-Z])/g, '$1_$2').toUpperCase();
+}
+
+/**
+ * Construct an executable GraphQL schema from the unified {@link SearchField}
+ * model at runtime — no codegen, no SDL artifact. One generic resolver maps the
+ * arguments to a {@link SearchQuery}, calls `context.engine`, and maps the result
+ * back; the field model only parameterises data.
+ */
+export function buildGraphQLSchema(
+ searchType: SearchType,
+ options: BuildGraphQLSchemaOptions,
+): GraphQLSchema {
+ const { typeName } = options;
+ const languageOrder = options.languageOrder ?? defaultLanguageOrder;
+ const queryField =
+ options.queryField ??
+ `${typeName.charAt(0).toLowerCase()}${typeName.slice(1)}s`;
+
+ const languageString = new GraphQLObjectType({
+ name: 'LanguageString',
+ fields: {
+ language: { type: GraphQLString },
+ value: { type: new GraphQLNonNull(GraphQLString) },
+ },
+ });
+ // A plain value facet bucket: a selection key, its count, and (for reference
+ // facets) the engine-resolved data label; null for token/free-string facets
+ // whose display the consumer owns.
+ const valueBucket = new GraphQLObjectType({
+ name: 'ValueBucket',
+ fields: {
+ value: { type: new GraphQLNonNull(GraphQLString) },
+ count: { type: new GraphQLNonNull(GraphQLInt) },
+ label: {
+ type: new GraphQLList(new GraphQLNonNull(languageString)),
+ resolve: (bucket: Source, _args: unknown, context: SearchContext) => {
+ const label = bucket.label as LocalizedValue | undefined;
+ return label
+ ? toLanguageStrings(label, context.acceptLanguage, languageOrder)
+ : null;
+ },
+ },
+ },
+ });
+ // A numeric range-facet bin: half-open `[min, max)` bounds (max null on an
+ // open-ended top bin) and the count of documents in it.
+ const rangeBucket = new GraphQLObjectType({
+ name: 'RangeBucket',
+ fields: {
+ min: {
+ type: GraphQLFloat,
+ resolve: (bucket: Source) => bucket.min ?? null,
+ },
+ max: {
+ type: GraphQLFloat,
+ resolve: (bucket: Source) => bucket.max ?? null,
+ },
+ count: { type: new GraphQLNonNull(GraphQLInt) },
+ },
+ });
+ const sortDirection = new GraphQLEnumType({
+ name: 'SortDirection',
+ values: { ASC: { value: 'asc' }, DESC: { value: 'desc' } },
+ });
+ const stringFilter = new GraphQLInputObjectType({
+ name: 'StringFilter',
+ fields: {
+ in: { type: new GraphQLList(new GraphQLNonNull(GraphQLString)) },
+ },
+ });
+ const intRange = rangeInput('IntRange', GraphQLInt);
+ const floatRange = rangeInput('FloatRange', GraphQLFloat);
+ const dateRange = rangeInput('DateRange', GraphQLString);
+
+ const labelList = (
+ resolveLabel: (source: Source) => LocalizedValue | undefined,
+ ) => ({
+ type: nonNullListOf(languageString),
+ resolve: (source: Source, _args: unknown, context: SearchContext) => {
+ const value = resolveLabel(source);
+ return value
+ ? toLanguageStrings(value, context.acceptLanguage, languageOrder)
+ : [];
+ },
+ });
+
+ // One reference type per referenced shape, reused by every field.
+ const referenceTypes = new Map();
+ for (const field of outputFields(searchType)) {
+ if (
+ field.kind === 'reference' &&
+ field.ref &&
+ !referenceTypes.has(field.ref.type)
+ ) {
+ referenceTypes.set(
+ field.ref.type,
+ new GraphQLObjectType({
+ name: field.ref.type,
+ fields: {
+ id: {
+ type: new GraphQLNonNull(GraphQLString),
+ resolve: (source: Source) => (source as unknown as Reference).id,
+ },
+ name: labelList((source) => (source as unknown as Reference).label),
+ },
+ }),
+ );
+ }
+ }
+
+ const outputType = new GraphQLObjectType({
+ name: typeName,
+ fields: () => {
+ const fields: Record<
+ string,
+ GraphQLFieldConfig
+ > = {
+ id: { type: new GraphQLNonNull(GraphQLString) },
+ };
+ for (const field of outputFields(searchType)) {
+ fields[field.name] = outputFieldConfig(field);
+ }
+ return fields;
+ },
+ });
+
+ function outputFieldConfig(
+ field: SearchField,
+ ): GraphQLFieldConfig {
+ const passthrough = (source: Source) => source[field.name] ?? null;
+ switch (field.kind) {
+ case 'text':
+ return labelList(
+ (source) => source[field.name] as LocalizedValue | undefined,
+ );
+ case 'keyword':
+ return field.array === true
+ ? {
+ type: nonNullListOf(GraphQLString),
+ resolve: (s) => s[field.name] ?? [],
+ }
+ : { type: scalarOutput(GraphQLString, field), resolve: passthrough };
+ case 'reference': {
+ const referenceType = referenceTypes.get(field.ref?.type ?? '')!;
+ return field.array === true
+ ? {
+ type: nonNullListOf(referenceType),
+ resolve: (s) => s[field.name] ?? [],
+ }
+ : {
+ type:
+ field.required === true
+ ? new GraphQLNonNull(referenceType)
+ : referenceType,
+ resolve: passthrough,
+ };
+ }
+ case 'integer':
+ return { type: scalarOutput(GraphQLInt, field), resolve: passthrough };
+ case 'number':
+ return {
+ type: scalarOutput(GraphQLFloat, field),
+ resolve: passthrough,
+ };
+ case 'date':
+ // Stored as Unix seconds (int64); the surface serves ISO 8601 (ADR 4).
+ return {
+ type: scalarOutput(GraphQLString, field),
+ resolve: (source) => {
+ const value = source[field.name];
+ return typeof value === 'number'
+ ? new Date(value * 1000).toISOString()
+ : (value ?? null);
+ },
+ };
+ case 'boolean':
+ return {
+ type: new GraphQLNonNull(GraphQLBoolean),
+ resolve: (source) => source[field.name] === true,
+ };
+ }
+ }
+
+ const whereInput = new GraphQLInputObjectType({
+ name: `${typeName}Where`,
+ fields: () => {
+ const fields: Record = {};
+ for (const field of filterableFields(searchType)) {
+ fields[field.name] = { type: whereFieldType(field) };
+ }
+ return fields;
+ },
+ });
+
+ function whereFieldType(field: SearchField): GraphQLInputType {
+ switch (filterOperatorFor(field.kind)) {
+ case 'in':
+ return stringFilter;
+ case 'range':
+ return field.kind === 'integer'
+ ? intRange
+ : field.kind === 'number'
+ ? floatRange
+ : dateRange;
+ default:
+ return GraphQLBoolean;
+ }
+ }
+
+ const sortValues: GraphQLEnumValueConfigMap = {
+ RELEVANCE: { value: 'relevance' },
+ };
+ for (const field of sortableFields(searchType)) {
+ sortValues[screamingSnake(field.name)] = { value: field.name };
+ }
+ const sortField = new GraphQLEnumType({
+ name: `${typeName}SortField`,
+ values: sortValues,
+ });
+ const orderByInput = new GraphQLInputObjectType({
+ name: `${typeName}OrderBy`,
+ fields: {
+ field: { type: new GraphQLNonNull(sortField) },
+ direction: {
+ type: new GraphQLNonNull(sortDirection),
+ defaultValue: 'desc',
+ },
+ },
+ });
+
+ // Keyed facets object: one field per facetable field, typed by its kind
+ // (range fields → [RangeBucket!], else [ValueBucket!]). Each field's resolver
+ // computes that facet with its OWN where-filter removed (skip-own-filter), so a
+ // multi-select facet still lists its other options; only the selected fields
+ // are resolved (GraphQL prunes the rest), so the selection IS the request.
+ const facetsType = new GraphQLObjectType({
+ name: `${typeName}Facets`,
+ fields: () => {
+ const fields: Record<
+ string,
+ GraphQLFieldConfig
+ > = {};
+ for (const field of facetableFields(searchType)) {
+ const isRange =
+ field.facetRanges !== undefined && field.facetRanges.length > 0;
+ fields[field.name] = {
+ type: nonNullListOf(isRange ? rangeBucket : valueBucket),
+ resolve: async (
+ source: Source,
+ _args: unknown,
+ context: SearchContext,
+ ) => {
+ const query = source.query as SearchQuery;
+ // Drop this facet's own filter so its other options still count
+ // (a removed `status` filter also drops the valid-only default, so
+ // the status facet counts across every status).
+ const facetQuery: SearchQuery = {
+ ...query,
+ where: query.where.filter(
+ (filter) => filter.field !== field.name,
+ ),
+ facets: [field.name],
+ limit: 0,
+ offset: 0,
+ };
+ // A facet is supplementary: degrade a failed facet to an empty list
+ // rather than failing the whole query (which would null the non-null
+ // result and discard the items + every other facet).
+ try {
+ const result = await context.engine.search(
+ facetQuery,
+ searchType,
+ );
+ return result.facets[field.name] ?? [];
+ } catch (error) {
+ context.onFacetError?.(field.name, error);
+ return [];
+ }
+ },
+ };
+ }
+ return fields;
+ },
+ });
+
+ const resultType = new GraphQLObjectType({
+ name: `${typeName}SearchResult`,
+ fields: {
+ items: { type: nonNullListOf(outputType) },
+ total: { type: new GraphQLNonNull(GraphQLInt) },
+ page: { type: new GraphQLNonNull(GraphQLInt) },
+ perPage: { type: new GraphQLNonNull(GraphQLInt) },
+ // Resolved lazily, per selected key (skip-own-filter); the result object
+ // (which carries the resolved `query`) is the facets source.
+ facets: {
+ type: new GraphQLNonNull(facetsType),
+ resolve: (source: Source) => source,
+ },
+ },
+ });
+
+ const query = new GraphQLObjectType({
+ name: 'Query',
+ fields: {
+ [queryField]: {
+ type: new GraphQLNonNull(resultType),
+ args: {
+ query: { type: GraphQLString },
+ where: { type: whereInput },
+ orderBy: { type: orderByInput },
+ page: { type: GraphQLInt, defaultValue: 1 },
+ perPage: { type: GraphQLInt, defaultValue: 20 },
+ },
+ resolve: async (_source, args, context: SearchContext) => {
+ const built = argsToQuery(args as QueryArgs, context, searchType);
+ const finalQuery = options.queryDefaults
+ ? options.queryDefaults(built, context)
+ : built;
+ // Items + total only; facets are resolved lazily per selected key.
+ const result = await context.engine.search(
+ { ...finalQuery, facets: [] },
+ searchType,
+ );
+ return {
+ items: result.hits.map((hit) => ({ id: hit.id, ...hit.document })),
+ total: result.total,
+ // Guard against a `perPage: 0` arg: `Math.floor(0/0)` is NaN, which a
+ // non-null `Int!` cannot serialize and would fail the whole query.
+ page:
+ finalQuery.limit > 0
+ ? Math.floor(finalQuery.offset / finalQuery.limit) + 1
+ : 1,
+ perPage: finalQuery.limit,
+ // Carried for the facets resolver (skip-own-filter per key).
+ query: finalQuery,
+ };
+ },
+ },
+ },
+ });
+
+ return new GraphQLSchema({ query });
+}
+
+/**
+ * The SDL of the built schema. Not a shipped artifact — a consumer uses it for an
+ * optional CI snapshot test over its own schema, catching accidental breaking
+ * changes to its frozen contract (including a `buildGraphQLSchema` change in a
+ * future version of this library silently altering it).
+ */
+export function printGraphQLSchema(
+ searchType: SearchType,
+ options: BuildGraphQLSchemaOptions,
+): string {
+ return printSchema(buildGraphQLSchema(searchType, options));
+}
+
+interface QueryArgs {
+ readonly query?: string;
+ readonly where?: Record;
+ readonly orderBy?: { field: string; direction: 'asc' | 'desc' };
+ readonly page?: number;
+ readonly perPage?: number;
+}
+
+/** Pure args → {@link SearchQuery} mapping. */
+function argsToQuery(
+ args: QueryArgs,
+ context: SearchContext,
+ searchType: SearchType,
+): SearchQuery {
+ const perPage = args.perPage ?? 20;
+ const page = args.page ?? 1;
+ return {
+ text: args.query,
+ where: whereToFilters(args.where, searchType),
+ orderBy: args.orderBy
+ ? [{ field: args.orderBy.field, direction: args.orderBy.direction }]
+ : [],
+ limit: perPage,
+ offset: (page - 1) * perPage,
+ // Facets are requested per-key by the facets resolver, not via an arg.
+ facets: [],
+ locale: context.acceptLanguage[0] ?? 'und',
+ };
+}
+
+function whereToFilters(
+ where: Record | undefined,
+ searchType: SearchType,
+): Filter[] {
+ if (where === undefined) {
+ return [];
+ }
+ const filters: Filter[] = [];
+ for (const field of filterableFields(searchType)) {
+ const value = where[field.name];
+ if (value === undefined || value === null) {
+ continue;
+ }
+ switch (filterOperatorFor(field.kind)) {
+ case 'in':
+ filters.push({
+ field: field.name,
+ in: (value as { in?: string[] }).in ?? [],
+ });
+ break;
+ case 'range': {
+ const range = value as { min?: number | string; max?: number | string };
+ filters.push({
+ field: field.name,
+ range: { min: range.min, max: range.max },
+ });
+ break;
+ }
+ default:
+ filters.push({ field: field.name, is: value as boolean });
+ }
+ }
+ return filters;
+}
+
+function rangeInput(
+ name: string,
+ bound: typeof GraphQLInt | typeof GraphQLFloat | typeof GraphQLString,
+): GraphQLInputObjectType {
+ return new GraphQLInputObjectType({
+ name,
+ fields: { min: { type: bound }, max: { type: bound } },
+ });
+}
+
+// Re-exported for callers that compose a sort manually.
+export type { Sort };
diff --git a/packages/search-api-graphql/src/index.ts b/packages/search-api-graphql/src/index.ts
new file mode 100644
index 00000000..20c13223
--- /dev/null
+++ b/packages/search-api-graphql/src/index.ts
@@ -0,0 +1,7 @@
+export { buildGraphQLSchema, printGraphQLSchema } from './build-schema.js';
+export type {
+ SearchContext,
+ BuildGraphQLSchemaOptions,
+} from './build-schema.js';
+export { defaultLanguageOrder, toLanguageStrings } from './language.js';
+export type { LanguageString, LanguageOrder } from './language.js';
diff --git a/packages/search-api-graphql/src/language.ts b/packages/search-api-graphql/src/language.ts
new file mode 100644
index 00000000..96826f65
--- /dev/null
+++ b/packages/search-api-graphql/src/language.ts
@@ -0,0 +1,47 @@
+import type { LocalizedValue } from '@lde/search';
+
+/** One entry of the surface’s best-first `[LanguageString!]!`. `language` is null
+ * for untagged (`und`) values; `[0]` is the value to display and `[0].language`
+ * is the language actually served (the per-field `Content-Language`). */
+export interface LanguageString {
+ readonly language: string | null;
+ readonly value: string;
+}
+
+/** Orders a localized value’s available languages against the request. */
+export type LanguageOrder = (
+ available: readonly string[],
+ accept: readonly string[],
+) => readonly string[];
+
+/**
+ * Default ordering: requested languages first (in request order), then the
+ * remaining tagged languages, then untagged (`und`) last — so `[0]` is always the
+ * best available value.
+ */
+export const defaultLanguageOrder: LanguageOrder = (available, accept) => {
+ const requested = accept.filter((language) => available.includes(language));
+ const rest = available.filter(
+ (language) => language !== 'und' && !requested.includes(language),
+ );
+ const untagged = available.includes('und') ? ['und'] : [];
+ return [...requested, ...rest, ...untagged];
+};
+
+/** Flatten a language map into a best-first `LanguageString` list. */
+export function toLanguageStrings(
+ value: LocalizedValue,
+ accept: readonly string[],
+ order: LanguageOrder,
+): LanguageString[] {
+ const result: LanguageString[] = [];
+ for (const language of order(Object.keys(value), accept)) {
+ for (const text of value[language] ?? []) {
+ result.push({
+ language: language === 'und' ? null : language,
+ value: text,
+ });
+ }
+ }
+ return result;
+}
diff --git a/packages/search-api-graphql/test/__snapshots__/generator-stability.test.ts.snap b/packages/search-api-graphql/test/__snapshots__/generator-stability.test.ts.snap
new file mode 100644
index 00000000..63bc19de
--- /dev/null
+++ b/packages/search-api-graphql/test/__snapshots__/generator-stability.test.ts.snap
@@ -0,0 +1,101 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`GraphQL generator stability > emits a stable SDL for a representative schema 1`] = `
+"type Query {
+ things(query: String, where: ThingWhere, orderBy: ThingOrderBy, page: Int = 1, perPage: Int = 20): ThingSearchResult!
+}
+
+type ThingSearchResult {
+ items: [Thing!]!
+ total: Int!
+ page: Int!
+ perPage: Int!
+ facets: ThingFacets!
+}
+
+type Thing {
+ id: String!
+ title: [LanguageString!]!
+ description: [LanguageString!]!
+ keyword: [String!]!
+ creator: [Agent!]!
+ publisher: Agent
+ size: Int
+ score: Float
+ created: String
+ status: String!
+ open: Boolean!
+}
+
+type LanguageString {
+ language: String
+ value: String!
+}
+
+type Agent {
+ id: String!
+ name: [LanguageString!]!
+}
+
+type ThingFacets {
+ keyword: [ValueBucket!]!
+ creator: [ValueBucket!]!
+ publisher: [ValueBucket!]!
+ status: [ValueBucket!]!
+ open: [ValueBucket!]!
+}
+
+type ValueBucket {
+ value: String!
+ count: Int!
+ label: [LanguageString!]
+}
+
+input ThingWhere {
+ keyword: StringFilter
+ creator: StringFilter
+ publisher: StringFilter
+ size: IntRange
+ score: FloatRange
+ created: DateRange
+ status: StringFilter
+ open: Boolean
+}
+
+input StringFilter {
+ in: [String!]
+}
+
+input IntRange {
+ min: Int
+ max: Int
+}
+
+input FloatRange {
+ min: Float
+ max: Float
+}
+
+input DateRange {
+ min: String
+ max: String
+}
+
+input ThingOrderBy {
+ field: ThingSortField!
+ direction: SortDirection! = DESC
+}
+
+enum ThingSortField {
+ RELEVANCE
+ TITLE
+ SIZE
+ CREATED
+}
+
+enum SortDirection {
+ ASC
+ DESC
+}
+"
+`;
diff --git a/packages/search-api-graphql/test/build-schema.test.ts b/packages/search-api-graphql/test/build-schema.test.ts
new file mode 100644
index 00000000..243b0ec9
--- /dev/null
+++ b/packages/search-api-graphql/test/build-schema.test.ts
@@ -0,0 +1,503 @@
+import { describe, expect, it } from 'vitest';
+import { graphql, printSchema } from 'graphql';
+import type {
+ SearchEngine,
+ SearchQuery,
+ SearchResult,
+ SearchType,
+} from '@lde/search';
+import { buildGraphQLSchema, type SearchContext } from '../src/build-schema.js';
+
+const schema: SearchType = {
+ type: 'http://www.w3.org/ns/dcat#Dataset',
+ fields: [
+ {
+ name: 'title',
+ kind: 'text',
+ localized: true,
+ locales: ['nl', 'en'],
+ output: true,
+ searchable: { weight: 5 },
+ sortable: true,
+ },
+ {
+ name: 'keyword',
+ kind: 'keyword',
+ array: true,
+ facetable: true,
+ filterable: true,
+ output: true,
+ },
+ {
+ name: 'publisher',
+ kind: 'reference',
+ facetable: true,
+ filterable: true,
+ output: true,
+ ref: { type: 'Organization', strategy: 'labelOnly' },
+ },
+ {
+ name: 'size',
+ kind: 'integer',
+ filterable: true,
+ sortable: true,
+ facetable: true,
+ output: true,
+ facetRanges: [
+ { key: '0', min: 1, max: 10 },
+ { key: '1', min: 10 },
+ ],
+ },
+ { name: 'datePosted', kind: 'date', sortable: true, output: true },
+ { name: 'score', kind: 'number', output: true },
+ {
+ name: 'terminologySource',
+ kind: 'reference',
+ array: true,
+ facetable: true,
+ output: true,
+ ref: { type: 'Term', strategy: 'labelOnly' },
+ },
+ {
+ name: 'status',
+ kind: 'keyword',
+ facetable: true,
+ filterable: true,
+ required: true,
+ output: true,
+ },
+ {
+ name: 'iiif',
+ kind: 'boolean',
+ facetable: true,
+ filterable: true,
+ output: true,
+ },
+ ],
+};
+
+/** A fake engine that records the query it received and returns a canned result. */
+function fakeEngine(result: SearchResult): {
+ engine: SearchEngine;
+ received: () => SearchQuery;
+} {
+ let captured: SearchQuery;
+ return {
+ engine: {
+ async search(query) {
+ captured = query;
+ return result;
+ },
+ },
+ received: () => captured,
+ };
+}
+
+const canned: SearchResult = {
+ total: 1,
+ hits: [
+ {
+ id: 'https://d/1',
+ document: {
+ title: { nl: ['Titel'], en: ['Title'] },
+ keyword: ['kaarten'],
+ publisher: {
+ id: 'https://org/1',
+ label: { nl: ['Het Utrechts Archief'] },
+ },
+ size: 1234,
+ datePosted: 1_700_000_000,
+ score: 4.5,
+ terminologySource: [
+ { id: 'https://term/1', label: { nl: ['Kaarten'] } },
+ ],
+ status: 'valid',
+ iiif: true,
+ },
+ },
+ ],
+ facets: { keyword: [{ value: 'kaarten', count: 3 }] },
+};
+
+async function run(
+ source: string,
+ context: SearchContext,
+ variables?: Record,
+) {
+ return graphql({
+ schema: buildGraphQLSchema(schema, { typeName: 'Dataset' }),
+ source,
+ contextValue: context,
+ variableValues: variables,
+ });
+}
+
+describe('buildGraphQLSchema', () => {
+ it('resolves a query, mapping the result to the typed output', async () => {
+ const { engine, received } = fakeEngine(canned);
+ const result = await run(
+ `{
+ datasets(query: "kaart") {
+ total
+ page
+ perPage
+ items {
+ id
+ title { language value }
+ keyword
+ publisher { id name { language value } }
+ terminologySource { id name { language value } }
+ size
+ datePosted
+ score
+ status
+ iiif
+ }
+ facets { keyword { value count } }
+ }
+ }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+
+ expect(result.errors).toBeUndefined();
+ const data = result.data?.datasets as Record;
+ expect(data.total).toBe(1);
+ expect(data.page).toBe(1);
+ const item = (data.items as Record[])[0];
+ expect(item.id).toBe('https://d/1');
+ expect(item.title).toEqual([
+ { language: 'nl', value: 'Titel' },
+ { language: 'en', value: 'Title' },
+ ]);
+ expect(item.keyword).toEqual(['kaarten']);
+ expect(item.publisher).toEqual({
+ id: 'https://org/1',
+ name: [{ language: 'nl', value: 'Het Utrechts Archief' }],
+ });
+ expect(item.size).toBe(1234);
+ expect(item.datePosted).toBe('2023-11-14T22:13:20.000Z');
+ expect(item.score).toBe(4.5);
+ expect(item.terminologySource).toEqual([
+ { id: 'https://term/1', name: [{ language: 'nl', value: 'Kaarten' }] },
+ ]);
+ expect(item.iiif).toBe(true);
+ expect(data.facets).toEqual({
+ keyword: [{ value: 'kaarten', count: 3 }],
+ });
+ // The free-text arg became the query text.
+ expect(received().text).toBe('kaart');
+ });
+
+ it('orders the output list best-first for the requested language', async () => {
+ const { engine } = fakeEngine(canned);
+ const result = await run(
+ `{ datasets { items { title { language value } } } }`,
+ { engine, acceptLanguage: ['en'] },
+ );
+ const item = (
+ (result.data?.datasets as Record).items as Record<
+ string,
+ unknown
+ >[]
+ )[0];
+ expect(item.title).toEqual([
+ { language: 'en', value: 'Title' },
+ { language: 'nl', value: 'Titel' },
+ ]);
+ });
+
+ it('places untagged (und) values last with a null language', async () => {
+ const { engine } = fakeEngine({
+ total: 1,
+ facets: {},
+ hits: [
+ {
+ id: 'x',
+ document: { title: { nl: ['Titel'], und: ['Naamloos'] } },
+ },
+ ],
+ });
+ const result = await run(
+ `{ datasets { items { title { language value } datePosted } } }`,
+ { engine, acceptLanguage: ['en'] },
+ );
+ const item = (
+ (result.data?.datasets as Record).items as Record<
+ string,
+ unknown
+ >[]
+ )[0];
+ expect(item.title).toEqual([
+ { language: 'nl', value: 'Titel' },
+ { language: null, value: 'Naamloos' },
+ ]);
+ // An absent date resolves to null (the non-numeric branch).
+ expect(item.datePosted).toBeNull();
+ });
+
+ it('labels reference-facet buckets, leaving plain-facet buckets null', async () => {
+ const { engine } = fakeEngine({
+ total: 0,
+ hits: [],
+ facets: {
+ publisher: [
+ {
+ value: 'https://org/1',
+ count: 2,
+ label: { nl: ['Het Utrechts Archief'] },
+ },
+ ],
+ keyword: [{ value: 'kaarten', count: 3 }],
+ },
+ });
+ const result = await run(
+ `{ datasets { facets {
+ publisher { value count label { language value } }
+ keyword { value count label { language value } }
+ } } }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+ const facets = (result.data?.datasets as Record)
+ .facets as {
+ publisher: unknown[];
+ keyword: unknown[];
+ };
+ expect(facets.publisher).toEqual([
+ {
+ value: 'https://org/1',
+ count: 2,
+ label: [{ language: 'nl', value: 'Het Utrechts Archief' }],
+ },
+ ]);
+ expect(facets.keyword).toEqual([
+ { value: 'kaarten', count: 3, label: null },
+ ]);
+ });
+
+ it('exposes range-facet bucket bounds, null for value facets and open ends', async () => {
+ const { engine } = fakeEngine({
+ total: 0,
+ hits: [],
+ facets: {
+ size: [
+ { value: '0', count: 2, min: 1, max: 10 },
+ // Open-ended top bin: lower bound only.
+ { value: '1', count: 5, min: 10 },
+ ],
+ keyword: [{ value: 'kaarten', count: 3 }],
+ },
+ });
+ const result = await run(
+ `{ datasets { facets {
+ size { min max count }
+ keyword { value count }
+ } } }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+ const facets = (result.data?.datasets as Record)
+ .facets as {
+ size: unknown[];
+ keyword: unknown[];
+ };
+ // RangeBuckets carry their half-open bounds (max null = open-ended top bin).
+ expect(facets.size).toEqual([
+ { min: 1, max: 10, count: 2 },
+ { min: 10, max: null, count: 5 },
+ ]);
+ // A value facet's ValueBuckets carry no bounds.
+ expect(facets.keyword).toEqual([{ value: 'kaarten', count: 3 }]);
+ });
+
+ it('resolves every selected facet key, returning [] where the engine has none', async () => {
+ const { engine } = fakeEngine({
+ total: 0,
+ hits: [],
+ facets: { keyword: [{ value: 'kaarten', count: 1 }] },
+ });
+ const result = await run(
+ `{ datasets { facets {
+ keyword { value count }
+ publisher { value count }
+ terminologySource { value count }
+ status { value count }
+ iiif { value count }
+ size { min max count }
+ } } }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+ const facets = (result.data?.datasets as Record)
+ .facets as Record;
+ expect(facets.keyword).toEqual([{ value: 'kaarten', count: 1 }]);
+ // Keys the engine returned nothing for resolve to an empty list.
+ for (const key of [
+ 'publisher',
+ 'terminologySource',
+ 'status',
+ 'iiif',
+ 'size',
+ ]) {
+ expect(facets[key]).toEqual([]);
+ }
+ });
+
+ it('computes a facet with its own where-filter removed (skip-own-filter)', async () => {
+ const { engine, received } = fakeEngine({
+ total: 0,
+ hits: [],
+ facets: { keyword: [{ value: 'kaarten', count: 1 }] },
+ });
+ await run(
+ `{ datasets(where: { keyword: { in: ["x"] }, status: { in: ["valid"] } }) {
+ facets { keyword { value count } }
+ } }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+ // The keyword facet query is run with the keyword filter dropped (so its
+ // other options still count), but other filters (status) retained.
+ const facetQuery = received();
+ expect(facetQuery.facets).toEqual(['keyword']);
+ expect(
+ facetQuery.where.find((filter) => filter.field === 'keyword'),
+ ).toBeUndefined();
+ expect(facetQuery.where).toContainEqual({ field: 'status', in: ['valid'] });
+ });
+
+ it('degrades a failed facet to an empty list without failing the whole query', async () => {
+ // A facet is supplementary: its computation runs a separate search (with
+ // `facets` set). Fail only that, leaving the listing search untouched.
+ const failedFacets: string[] = [];
+ const engine: SearchEngine = {
+ async search(query) {
+ if (query.facets.length > 0) {
+ throw new Error('facet backend unavailable');
+ }
+ return canned;
+ },
+ };
+ const result = await run(
+ `{ datasets {
+ total
+ items { id }
+ facets { keyword { value count } }
+ } }`,
+ {
+ engine,
+ acceptLanguage: ['nl'],
+ onFacetError: (field) => failedFacets.push(field),
+ },
+ );
+
+ // No top-level error: the failed facet degraded rather than nulling the
+ // non-null result and discarding the items.
+ expect(result.errors).toBeUndefined();
+ const data = result.data?.datasets as Record;
+ expect(data.total).toBe(1);
+ expect((data.items as Record[])[0].id).toBe('https://d/1');
+ // The failed facet degraded to an empty list, and the cause was reported.
+ expect((data.facets as Record).keyword).toEqual([]);
+ expect(failedFacets).toEqual(['keyword']);
+ });
+
+ it('guards perPage: 0, resolving page to 1 rather than failing on NaN', async () => {
+ const { engine } = fakeEngine(canned);
+ const result = await run(`{ datasets(perPage: 0) { page total } }`, {
+ engine,
+ acceptLanguage: ['nl'],
+ });
+ expect(result.errors).toBeUndefined();
+ const data = result.data?.datasets as Record;
+ expect(data.page).toBe(1);
+ });
+
+ it('maps where, orderBy and pagination into the SearchQuery', async () => {
+ const { engine, received } = fakeEngine(canned);
+ await run(
+ `{
+ datasets(
+ where: { status: { in: ["valid"] }, keyword: {}, size: { min: 1, max: 9 }, iiif: true }
+ orderBy: { field: SIZE, direction: ASC }
+ page: 3
+ perPage: 10
+ ) { total }
+ }`,
+ { engine, acceptLanguage: ['nl'] },
+ );
+
+ const query = received();
+ expect(query.where).toContainEqual({ field: 'status', in: ['valid'] });
+ // An empty StringFilter compiles to an empty membership.
+ expect(query.where).toContainEqual({ field: 'keyword', in: [] });
+ expect(query.where).toContainEqual({
+ field: 'size',
+ range: { min: 1, max: 9 },
+ });
+ expect(query.where).toContainEqual({ field: 'iiif', is: true });
+ expect(query.orderBy).toEqual([{ field: 'size', direction: 'asc' }]);
+ // Facets are requested per key via selection, not an arg; the listing query
+ // carries none.
+ expect(query.facets).toEqual([]);
+ expect(query.limit).toBe(10);
+ expect(query.offset).toBe(20);
+ });
+
+ it('falls back to the und locale when no Accept-Language is given', async () => {
+ const { engine, received } = fakeEngine(canned);
+ await run(`{ datasets { total } }`, { engine, acceptLanguage: [] });
+ expect(received().locale).toBe('und');
+ });
+
+ it('applies queryDefaults before calling the engine', async () => {
+ let captured: SearchQuery | undefined;
+ const engine: SearchEngine = {
+ async search(query) {
+ captured = query;
+ return canned;
+ },
+ };
+ const gqlSchema = buildGraphQLSchema(schema, {
+ typeName: 'Dataset',
+ queryDefaults: (query) => ({
+ ...query,
+ where: [...query.where, { field: 'status', in: ['valid'] }],
+ orderBy: [{ field: 'relevance', direction: 'desc' }],
+ }),
+ });
+ await graphql({
+ schema: gqlSchema,
+ source: `{ datasets { total } }`,
+ contextValue: { engine, acceptLanguage: ['nl'] },
+ });
+ expect(captured?.where).toEqual([{ field: 'status', in: ['valid'] }]);
+ expect(captured?.orderBy).toEqual([
+ { field: 'relevance', direction: 'desc' },
+ ]);
+ });
+
+ it('derives nullability: required scalar non-null, optional scalar nullable, arrays/booleans non-null', () => {
+ const sdl = printSchema(
+ buildGraphQLSchema(schema, { typeName: 'Dataset' }),
+ );
+ expect(sdl).toMatch(/status: String!/); // required
+ expect(sdl).toMatch(/size: Int\b(?!!)/); // optional → nullable
+ expect(sdl).toMatch(/title: \[LanguageString!\]!/);
+ expect(sdl).toMatch(/keyword: \[String!\]!/);
+ expect(sdl).toMatch(/iiif: Boolean!/);
+ expect(sdl).toMatch(/publisher: Organization\b(?!!)/); // optional reference
+ });
+
+ it('builds the where, orderBy enum and keyed facets object from the field model', () => {
+ const sdl = printSchema(
+ buildGraphQLSchema(schema, { typeName: 'Dataset' }),
+ );
+ expect(sdl).toMatch(/enum DatasetSortField/);
+ expect(sdl).toMatch(/RELEVANCE/);
+ expect(sdl).toMatch(/SIZE/);
+ // Facets are a keyed object, one field per facetable field, typed by kind.
+ expect(sdl).toMatch(/type DatasetFacets/);
+ expect(sdl).toMatch(/keyword: \[ValueBucket!\]!/);
+ expect(sdl).toMatch(/size: \[RangeBucket!\]!/);
+ expect(sdl).toMatch(/input DatasetWhere/);
+ expect(sdl).toMatch(/status: StringFilter/);
+ expect(sdl).toMatch(/size: IntRange/);
+ });
+});
diff --git a/packages/search-api-graphql/test/generator-stability.test.ts b/packages/search-api-graphql/test/generator-stability.test.ts
new file mode 100644
index 00000000..c78b1535
--- /dev/null
+++ b/packages/search-api-graphql/test/generator-stability.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, it } from 'vitest';
+import type { SearchType } from '@lde/search';
+import { printGraphQLSchema } from '../src/build-schema.js';
+
+/**
+ * A neutral fixture exercising every kind + capability — NOT a real domain. Its
+ * SDL is snapshotted purely to pin the **generator**: any change to how
+ * `buildGraphQLSchema` maps the field model (nullability, type names, enums,
+ * reference reuse) surfaces as a snapshot diff before this library is published,
+ * so a consumer’s contract can’t shift from under it by accident.
+ */
+const THING: SearchType = {
+ type: 'https://example.org/Thing',
+ fields: [
+ {
+ name: 'title',
+ kind: 'text',
+ localized: true,
+ locales: ['nl', 'en'],
+ output: true,
+ searchable: { weight: 5 },
+ sortable: true,
+ required: true,
+ },
+ {
+ name: 'description',
+ kind: 'text',
+ localized: true,
+ locales: ['nl', 'en'],
+ output: true,
+ searchable: { weight: 2 },
+ },
+ {
+ name: 'keyword',
+ kind: 'keyword',
+ array: true,
+ facetable: true,
+ filterable: true,
+ searchable: { weight: 1 },
+ output: true,
+ },
+ // Two references sharing a shape → the Agent type is emitted once and reused.
+ {
+ name: 'creator',
+ kind: 'reference',
+ array: true,
+ facetable: true,
+ filterable: true,
+ output: true,
+ ref: { type: 'Agent', strategy: 'labelOnly' },
+ },
+ {
+ name: 'publisher',
+ kind: 'reference',
+ facetable: true,
+ filterable: true,
+ output: true,
+ ref: { type: 'Agent', strategy: 'labelOnly' },
+ },
+ {
+ name: 'size',
+ kind: 'integer',
+ filterable: true,
+ sortable: true,
+ output: true,
+ },
+ { name: 'score', kind: 'number', filterable: true, output: true },
+ {
+ name: 'created',
+ kind: 'date',
+ filterable: true,
+ sortable: true,
+ output: true,
+ },
+ {
+ name: 'status',
+ kind: 'keyword',
+ facetable: true,
+ filterable: true,
+ required: true,
+ output: true,
+ },
+ {
+ name: 'open',
+ kind: 'boolean',
+ facetable: true,
+ filterable: true,
+ output: true,
+ },
+ ],
+};
+
+describe('GraphQL generator stability', () => {
+ it('emits a stable SDL for a representative schema', () => {
+ expect(printGraphQLSchema(THING, { typeName: 'Thing' })).toMatchSnapshot();
+ });
+});
diff --git a/packages/search-api-graphql/tsconfig.json b/packages/search-api-graphql/tsconfig.json
new file mode 100644
index 00000000..62ebbd94
--- /dev/null
+++ b/packages/search-api-graphql/tsconfig.json
@@ -0,0 +1,13 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "files": [],
+ "include": [],
+ "references": [
+ {
+ "path": "./tsconfig.lib.json"
+ },
+ {
+ "path": "./tsconfig.spec.json"
+ }
+ ]
+}
diff --git a/packages/search-api-graphql/tsconfig.lib.json b/packages/search-api-graphql/tsconfig.lib.json
new file mode 100644
index 00000000..64610bac
--- /dev/null
+++ b/packages/search-api-graphql/tsconfig.lib.json
@@ -0,0 +1,26 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "compilerOptions": {
+ "rootDir": "src",
+ "outDir": "dist",
+ "tsBuildInfoFile": "dist/tsconfig.lib.tsbuildinfo",
+ "emitDeclarationOnly": false,
+ "types": ["node"]
+ },
+ "include": ["src/**/*.ts"],
+ "references": [{ "path": "../search/tsconfig.lib.json" }],
+ "exclude": [
+ "vite.config.ts",
+ "vite.config.mts",
+ "vitest.config.ts",
+ "vitest.config.mts",
+ "test/**/*.test.ts",
+ "test/**/*.spec.ts",
+ "test/**/*.test.tsx",
+ "test/**/*.spec.tsx",
+ "test/**/*.test.js",
+ "test/**/*.spec.js",
+ "test/**/*.test.jsx",
+ "test/**/*.spec.jsx"
+ ]
+}
diff --git a/packages/search-api-graphql/tsconfig.spec.json b/packages/search-api-graphql/tsconfig.spec.json
new file mode 100644
index 00000000..04480f69
--- /dev/null
+++ b/packages/search-api-graphql/tsconfig.spec.json
@@ -0,0 +1,29 @@
+{
+ "extends": "../../tsconfig.base.json",
+ "compilerOptions": {
+ "outDir": "./out-tsc/vitest",
+ "types": [
+ "vitest/globals",
+ "vitest/importMeta",
+ "vite/client",
+ "node",
+ "vitest"
+ ]
+ },
+ "include": [
+ "test/**/*.test.ts",
+ "test/**/*.spec.ts",
+ "test/**/*.test.tsx",
+ "test/**/*.spec.tsx",
+ "test/**/*.test.js",
+ "test/**/*.spec.js",
+ "test/**/*.test.jsx",
+ "test/**/*.spec.jsx",
+ "test/**/*.d.ts"
+ ],
+ "references": [
+ {
+ "path": "./tsconfig.lib.json"
+ }
+ ]
+}
diff --git a/packages/search-api-graphql/vite.config.ts b/packages/search-api-graphql/vite.config.ts
new file mode 100644
index 00000000..7434ca80
--- /dev/null
+++ b/packages/search-api-graphql/vite.config.ts
@@ -0,0 +1,21 @@
+///
+import { defineConfig, mergeConfig } from 'vite';
+import baseConfig from '../../vite.base.config.js';
+
+export default mergeConfig(
+ baseConfig,
+ defineConfig({
+ root: __dirname,
+ cacheDir: '../../node_modules/.vite/packages/search-api-graphql',
+ test: {
+ coverage: {
+ thresholds: {
+ functions: 100,
+ lines: 100,
+ branches: 88.63,
+ statements: 100,
+ },
+ },
+ },
+ }),
+);
diff --git a/packages/search-typesense/README.md b/packages/search-typesense/README.md
index b5d62bb9..efffc145 100644
--- a/packages/search-typesense/README.md
+++ b/packages/search-typesense/README.md
@@ -1,13 +1,27 @@
# @lde/search-typesense
-[Typesense](https://typesense.org/) engine adapter for RDF-backed search
-pipelines. Engine-specific (Typesense) but domain-agnostic – the caller supplies
-the collection schema and documents.
-
-The engine-agnostic half of the pipeline – framing `CONSTRUCT` quads into a
-JSON-LD IR and projecting that IR into flat documents from a declarative field
-spec – lives in [`@lde/search`](../search). This package consumes those
-documents and writes them to Typesense.
+[Typesense](https://typesense.org/) engine adapter for the engine- and
+domain-agnostic [`@lde/search`](../search) core. **Engine-specific (Typesense) but
+domain-agnostic** – you supply a `SearchType`; this package never names your
+domain. It is the Typesense implementation of the `SearchEngine` port: it derives
+a collection schema from the field model, compiles the neutral `SearchQuery` into
+Typesense search params, runs it, reconstructs the engine-neutral `SearchResult`,
+and manages the index lifecycle (blue/green rebuild).
+
+## Collection schema and engine
+
+`buildCollectionSchema(searchType, { name, defaultSortingField, … })` derives a
+Typesense collection from the unified `SearchField` model — the Typesense field
+type comes from each field’s `kind`, and the physical fanout (per-locale
+search/sort keys) matches what the projection writes, via
+`@lde/search`’s `physicalFields`, so the index and the documents cannot drift.
+
+`createTypesenseSearchEngine(client, { collection, labelsCollection })` is the
+`SearchEngine` implementation: it compiles the query, runs the search, resolves
+reference (and reference-facet) labels from the sidecar `labels` collection in a
+single lookup, and reconstructs the logical `SearchResult` — language maps,
+labelled references, labelled facet buckets. The pure halves `buildSearchParams`
+and `parseSearchResponse` are exported for direct use and testing.
## Indexing
diff --git a/packages/search-typesense/package.json b/packages/search-typesense/package.json
index b1dde852..445624fb 100644
--- a/packages/search-typesense/package.json
+++ b/packages/search-typesense/package.json
@@ -1,7 +1,7 @@
{
"name": "@lde/search-typesense",
"version": "0.1.1",
- "description": "Generic Typesense engine adapter for RDF-backed search pipelines: collection lifecycle, bulk upsert and blue/green alias swap",
+ "description": "Typesense implementation of the @lde/search SearchEngine port: collection-schema builder, query compiler, label-resolving result reconstruction, and blue/green index lifecycle. Engine-specific (Typesense) but domain-agnostic.",
"repository": {
"url": "git+https://github.com/ldelements/lde.git",
"directory": "packages/search-typesense"
@@ -25,6 +25,8 @@
"!**/*.tsbuildinfo"
],
"dependencies": {
+ "@lde/search": "^0.1.2",
+ "@lde/text-normalization": "^0.1.1",
"tslib": "^2.3.0",
"typesense": "^3.0.6"
},
diff --git a/packages/search-typesense/src/collection-schema.ts b/packages/search-typesense/src/collection-schema.ts
new file mode 100644
index 00000000..af133b08
--- /dev/null
+++ b/packages/search-typesense/src/collection-schema.ts
@@ -0,0 +1,132 @@
+import type { CollectionCreateSchema } from 'typesense';
+import type { CollectionFieldSchema } from 'typesense/lib/Typesense/Collection.js';
+import { physicalFields, type SearchField, type SearchType } from '@lde/search';
+
+/** Deployment-specific options the generic field model does not carry. */
+export interface CollectionSchemaOptions {
+ /** The Typesense collection (or alias) name. */
+ readonly name: string;
+ /** Snowball stemming locale for non-localized searchable fields (default `nl`).
+ * Localized text search fields stem in their own locale. */
+ readonly defaultLocale?: string;
+ /** The field Typesense sorts by when a query imposes no order. */
+ readonly defaultSortingField?: string;
+ /** Synonym sets the collection references (synced separately). */
+ readonly synonymSets?: readonly string[];
+}
+
+/**
+ * Build a Typesense collection schema from the unified {@link SearchType}, so
+ * the index and the projection are driven by one declarative source and cannot
+ * drift. Each field fans out into the same physical fields the projection writes
+ * ({@link physicalFields}); the Typesense field type is derived from the field
+ * `kind`, never re-declared.
+ *
+ * Stemming is enabled on every folded `*_search` field: localized text stems
+ * each `*_search_${locale}` in its own language, and a non-localized searchable
+ * field stems in `defaultLocale`.
+ */
+export function buildCollectionSchema(
+ searchType: SearchType,
+ options: CollectionSchemaOptions,
+): CollectionCreateSchema {
+ const defaultLocale = options.defaultLocale ?? 'nl';
+ const collection: CollectionCreateSchema = {
+ name: options.name,
+ fields: searchType.fields.flatMap((field) =>
+ typesenseFields(field, defaultLocale, options.defaultSortingField),
+ ),
+ };
+ if (options.defaultSortingField !== undefined) {
+ collection.default_sorting_field = options.defaultSortingField;
+ }
+ if (options.synonymSets !== undefined) {
+ collection.synonym_sets = [...options.synonymSets];
+ }
+ return collection;
+}
+
+/** The physical Typesense fields one declaration produces. */
+function typesenseFields(
+ field: SearchField,
+ defaultLocale: string,
+ defaultSortingField: string | undefined,
+): CollectionFieldSchema[] {
+ const names = physicalFields(field);
+ if (field.kind === 'text' && field.localized === true) {
+ const locales = field.locales ?? [];
+ return [
+ // Display labels: stored, not indexed for search (search uses the folded
+ // companions), accents preserved.
+ ...names.display.map(
+ (name): CollectionFieldSchema => ({
+ name,
+ type: 'string',
+ index: false,
+ optional: true,
+ }),
+ ),
+ // One folded search field per locale, each stemmed in its own language.
+ ...names.search.map(
+ (name, index): CollectionFieldSchema => ({
+ name,
+ type: 'string',
+ optional: true,
+ stem: true,
+ locale: locales[index],
+ }),
+ ),
+ ...names.sort.map(
+ (name): CollectionFieldSchema => ({
+ name,
+ type: 'string',
+ sort: true,
+ optional: true,
+ }),
+ ),
+ ];
+ }
+
+ const valueType = typesenseValueType(field);
+ const fields: CollectionFieldSchema[] = [
+ {
+ name: field.name,
+ type: valueType,
+ facet: field.facetable ?? false,
+ sort: field.sortable ?? false,
+ // A `required` field is non-optional; so is the `default_sorting_field`,
+ // which Typesense requires to be present. Everything else may be absent.
+ optional: field.required !== true && field.name !== defaultSortingField,
+ },
+ ];
+ if (field.searchable) {
+ for (const name of names.search) {
+ fields.push({
+ name,
+ type: valueType,
+ optional: true,
+ stem: true,
+ locale: defaultLocale,
+ });
+ }
+ }
+ return fields;
+}
+
+/** The Typesense field type for a non-localized field, from its `kind`. 64-bit
+ * integers (and dates, stored as Unix seconds) so large counts never overflow. */
+function typesenseValueType(field: SearchField): CollectionFieldSchema['type'] {
+ switch (field.kind) {
+ case 'integer':
+ case 'date':
+ return 'int64';
+ case 'number':
+ return 'float';
+ case 'boolean':
+ return 'bool';
+ case 'keyword':
+ case 'reference':
+ case 'text':
+ return field.array === true ? 'string[]' : 'string';
+ }
+}
diff --git a/packages/search-typesense/src/index.ts b/packages/search-typesense/src/index.ts
index 6514638d..66247957 100644
--- a/packages/search-typesense/src/index.ts
+++ b/packages/search-typesense/src/index.ts
@@ -1 +1,9 @@
export { rebuild } from './adapter.js';
+export { buildCollectionSchema } from './collection-schema.js';
+export type { CollectionSchemaOptions } from './collection-schema.js';
+export { buildSearchParams } from './query-compiler.js';
+export { createTypesenseSearchEngine, parseSearchResponse } from './search.js';
+export type {
+ TypesenseSearchEngineOptions,
+ TypesenseSearchResponse,
+} from './search.js';
diff --git a/packages/search-typesense/src/query-compiler.ts b/packages/search-typesense/src/query-compiler.ts
new file mode 100644
index 00000000..dfeede8c
--- /dev/null
+++ b/packages/search-typesense/src/query-compiler.ts
@@ -0,0 +1,242 @@
+import type { SearchParams } from 'typesense/lib/Typesense/Documents.js';
+import { fold } from '@lde/text-normalization';
+import {
+ physicalFields,
+ searchableFields,
+ type FacetRange,
+ type Filter,
+ type SearchField,
+ type SearchQuery,
+ type SearchType,
+ type Sort,
+} from '@lde/search';
+
+/**
+ * Compile the engine-neutral {@link SearchQuery} into Typesense search
+ * parameters — the query half of the engine adapter. Pure (no client, no env),
+ * so the mapping is asserted directly in unit tests. Field names come from
+ * {@link physicalFields}, the same convention the projection and the collection
+ * schema use, so a query can never reference a field the index does not carry.
+ */
+export interface CompileOptions {
+ /**
+ * Cap on the number of buckets returned per facet (`max_facet_values`). Left
+ * unset, Typesense defaults to 10 — too few for high-cardinality facets
+ * (publisher, keyword), so a deployment with such facets must raise it. Range
+ * facets return one bucket per declared range regardless, but a value > the
+ * range count is still safe.
+ */
+ readonly maxFacetValues?: number;
+}
+
+export function buildSearchParams(
+ query: SearchQuery,
+ searchType: SearchType,
+ options: CompileOptions = {},
+): SearchParams