Skip to content

Commit 7ffb6f4

Browse files
authored
Merge pull request #843 from krissetto/rag
RAG support
2 parents c75a6a1 + 8afce6a commit 7ffb6f4

52 files changed

Lines changed: 10640 additions & 43 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@ dist
33
.task
44
.DS_Store
55
evals
6-
*.db
6+
*.db*
77
/cagent
88
.crush
99
.vscode
10+
*.debug
1011

1112
# agents
1213
agent.yaml

README.md

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ See [MCP Mode documentation](./docs/MCP-MODE.md) for detailed instructions on ex
145145
- **📝 YAML configuration** - Declarative model and agent configuration.
146146
- **💭 Advanced reasoning** - Built-in "think", "todo" and "memory" tools for
147147
complex problem-solving.
148+
- **🔍 RAG (Retrieval-Augmented Generation)** - Pluggable retrieval strategies
149+
(chunked_embeddings, BM25, more to come..) with hybrid retrieval and fusion support.
148150
- **🌐 Multiple AI providers** - Support for OpenAI, Anthropic, Gemini, xA,
149151
Mistral, Nebius and [Docker Model
150152
Runner](https://docs.docker.com/ai/model-runner/).
@@ -292,6 +294,89 @@ Linux](https://docs.docker.com/ai/model-runner/get-started/#enable-dmr-in-docker
292294

293295
See the [DMR Provider documentation](docs/USAGE.md#dmr-docker-model-runner-provider-usage) for more details on runtime flags and speculative decoding options.
294296

297+
## RAG (Retrieval-Augmented Generation)
298+
299+
Give your agents access to your documents with cagent's modular RAG system. It supports multiple retrieval strategies that can be used individually or combined for hybrid search.
300+
301+
### Quick RAG Example
302+
303+
```yaml
304+
models:
305+
embedder:
306+
provider: openai
307+
model: text-embedding-3-small
308+
309+
rag:
310+
my_knowledge_base:
311+
docs: [./documents, ./pdfs]
312+
strategies:
313+
- type: chunked-embeddings
314+
model: embedder
315+
threshold: 0.5
316+
chunking:
317+
size: 1000
318+
overlap: 100
319+
results:
320+
limit: 5
321+
322+
agents:
323+
root:
324+
model: openai/gpt-4o
325+
instruction: |
326+
You are an assistant with access to an internal knowledge base.
327+
Use the knowledge base to gather context before answering user questions
328+
rag: [my_knowledge_base]
329+
```
330+
331+
### Hybrid Retrieval (Chunked-Embeddings + BM25)
332+
333+
Combine semantic search (chunked-embeddings) with keyword search (BM25) for best results:
334+
335+
```yaml
336+
rag:
337+
hybrid_search:
338+
docs: [./shared_docs]
339+
340+
strategies:
341+
- type: chunked-embeddings
342+
model: embedder
343+
threshold: 0.5
344+
limit: 20
345+
chunking:
346+
size: 1000
347+
overlap: 100
348+
349+
- type: bm25
350+
k1: 1.5
351+
b: 0.75
352+
threshold: 0.3
353+
limit: 15
354+
chunking:
355+
size: 1000
356+
overlap: 100
357+
358+
results:
359+
fusion:
360+
strategy: rrf # Reciprocal Rank Fusion
361+
k: 60
362+
deduplicate: true
363+
limit: 5
364+
365+
agents:
366+
root:
367+
model: openai/gpt-4o
368+
rag: [hybrid_search]
369+
```
370+
371+
**Features:**
372+
- **Multiple strategies**: Vector (semantic), BM25 (keyword), or both
373+
- **Parallel execution**: Strategies run concurrently for fast results
374+
- **Pluggable fusion**: RRF, weighted, or max score combining
375+
- **Per-strategy configuration**: Different thresholds, limits, and documents
376+
- **Auto file watching**: Reindex automatically on file changes
377+
378+
See the [RAG documentation](docs/RAG.md) for complete details, examples, and debugging guides.
379+
295380
## Quickly generate agents and agent teams with `cagent new`
296381

297382
Using the command `cagent new` you can quickly generate agents or multi-agent

cagent-schema.json

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@
3333
"$ref": "#/definitions/ModelConfig"
3434
}
3535
},
36+
"rag": {
37+
"type": "object",
38+
"description": "Map of RAG (Retrieval-Augmented Generation) configurations",
39+
"additionalProperties": {
40+
"$ref": "#/definitions/RAGConfig"
41+
}
42+
},
3643
"metadata": {
3744
"$ref": "#/definitions/Metadata",
3845
"description": "Configuration metadata"
@@ -188,6 +195,13 @@
188195
"schema"
189196
],
190197
"additionalProperties": false
198+
},
199+
"rag": {
200+
"type": "array",
201+
"description": "List of RAG sources to use for this agent",
202+
"items": {
203+
"type": "string"
204+
}
191205
}
192206
},
193207
"additionalProperties": false
@@ -625,6 +639,185 @@
625639
"method"
626640
],
627641
"additionalProperties": false
642+
},
643+
"RAGConfig": {
644+
"type": "object",
645+
"description": "RAG (Retrieval-Augmented Generation) configuration for document search and retrieval with pluggable strategies. Multiple strategies enable hybrid retrieval and reranking.",
646+
"properties": {
647+
"description": {
648+
"type": "string",
649+
"description": "Description of the RAG source"
650+
},
651+
"docs": {
652+
"type": "array",
653+
"description": "Shared document paths or directories indexed by all strategies",
654+
"items": {
655+
"type": "string"
656+
}
657+
},
658+
"strategies": {
659+
"type": "array",
660+
"description": "Array of retrieval strategy configurations. Each strategy can have different parameters based on its type.",
661+
"minItems": 1,
662+
"items": {
663+
"type": "object",
664+
"description": "Retrieval strategy configuration with type-specific parameters. Structured fields are limited; additional parameters are passed through as-is for strategy-specific use.",
665+
"required": ["type"],
666+
"properties": {
667+
"type": {
668+
"type": "string",
669+
"description": "Retrieval strategy type",
670+
"enum": ["chunked-embeddings", "bm25"]
671+
},
672+
"model": {
673+
"type": "string",
674+
"description": "Embedding model reference for chunked-embeddings strategies (looked up in models map, or 'auto' for automatic selection)",
675+
"examples": ["openai/text-embedding-3-small", "dmr/embeddinggemma", "auto"]
676+
},
677+
"docs": {
678+
"type": "array",
679+
"description": "Additional documents for this strategy only (augments shared docs)",
680+
"items": {
681+
"type": "string"
682+
}
683+
},
684+
"database": {
685+
"type": "string",
686+
"description": "Database path or connection string. Currently only simple string values are supported (e.g., './vector.db', './bm25.db')."
687+
},
688+
"similarity_metric": {
689+
"type": "string",
690+
"description": "Similarity metric (chunked-embeddings only). Currently only 'cosine_similarity' is implemented.",
691+
"enum": ["cosine_similarity"]
692+
},
693+
"vector_dimensions": {
694+
"type": "integer",
695+
"description": "Vector dimensions for embeddings (chunked-embeddings only). Must match your embedding model's output dimensions and is required for chunked-embeddings strategies.",
696+
"minimum": 1,
697+
"examples": [1536, 3072, 1024, 768]
698+
},
699+
"k1": {
700+
"type": "number",
701+
"description": "BM25 term frequency saturation (bm25 only, typically 1.2-2.0)",
702+
"minimum": 0
703+
},
704+
"b": {
705+
"type": "number",
706+
"description": "BM25 length normalization (bm25 only, 0-1, typically 0.75)",
707+
"minimum": 0,
708+
"maximum": 1
709+
},
710+
"threshold": {
711+
"type": "number",
712+
"description": "Minimum score threshold (0-1 for chunked-embeddings, unbounded for bm25)",
713+
"minimum": 0
714+
},
715+
"limit": {
716+
"type": "integer",
717+
"description": "Max results from this strategy (candidates for fusion). If unset, defaults to 5 in the implementation.",
718+
"minimum": 1
719+
},
720+
"chunking": {
721+
"type": "object",
722+
"description": "Text chunking configuration",
723+
"properties": {
724+
"size": {
725+
"type": "integer",
726+
"description": "Chunk size in characters. If unset, defaults to 1000 in the implementation.",
727+
"minimum": 1
728+
},
729+
"overlap": {
730+
"type": "integer",
731+
"description": "Overlap between chunks in characters. If unset, defaults to 75 in the implementation.",
732+
"minimum": 0
733+
},
734+
"respect_word_boundaries": {
735+
"type": "boolean",
736+
"description": "When true, chunks will split on the nearest whitespace boundary instead of at the exact character limit, preventing words from being truncated."
737+
}
738+
},
739+
"additionalProperties": false
740+
}
741+
},
742+
"additionalProperties": true
743+
}
744+
},
745+
"results": {
746+
"type": "object",
747+
"description": "Result post-processing configuration (fusion, deduplication, limiting). If omitted, sensible defaults are applied in code.",
748+
"properties": {
749+
"limit": {
750+
"type": "integer",
751+
"description": "Maximum number of results to return (top K)",
752+
"minimum": 1,
753+
"default": 15
754+
},
755+
"fusion": {
756+
"type": "object",
757+
"description": "Configuration for combining results from multiple strategies. If omitted and multiple strategies are configured, Reciprocal Rank Fusion (rrf) with k=60 is used.",
758+
"properties": {
759+
"strategy": {
760+
"type": "string",
761+
"description": "Fusion strategy to use",
762+
"enum": [
763+
"rrf",
764+
"reciprocal_rank_fusion",
765+
"weighted",
766+
"max"
767+
],
768+
"default": "rrf",
769+
"examples": [
770+
"rrf",
771+
"weighted"
772+
]
773+
},
774+
"k": {
775+
"type": "integer",
776+
"description": "RRF smoothing parameter k (only for RRF strategy)",
777+
"minimum": 1,
778+
"default": 60
779+
},
780+
"weights": {
781+
"type": "object",
782+
"description": "Strategy weights for weighted fusion (strategy name -> weight)",
783+
"additionalProperties": {
784+
"type": "number",
785+
"minimum": 0,
786+
"maximum": 1
787+
},
788+
"examples": [
789+
{
790+
"chunked-embeddings": 0.7,
791+
"bm25": 0.3
792+
}
793+
]
794+
}
795+
},
796+
"additionalProperties": false
797+
},
798+
"deduplicate": {
799+
"type": "boolean",
800+
"description": "Remove duplicate documents across strategies",
801+
"default": true
802+
},
803+
"include_score": {
804+
"type": "boolean",
805+
"description": "Include relevance scores in results",
806+
"default": false
807+
},
808+
"return_full_content": {
809+
"type": "boolean",
810+
"description": "Return full document content instead of just the matched chunk. The full document is read directly from the file system.",
811+
"default": false
812+
}
813+
},
814+
"additionalProperties": false
815+
}
816+
},
817+
"required": [
818+
"strategies"
819+
],
820+
"additionalProperties": false
628821
}
629822
}
630823
}

cmd/root/new.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func (f *newFlags) runNewCommand(cmd *cobra.Command, args []string) error {
6868

6969
sess := session.New(opts...)
7070

71-
a := app.New("", rt, sess, prompt)
71+
a := app.New(ctx, "", rt, sess, prompt)
7272
m := tui.New(a)
7373

7474
progOpts := []tea.ProgramOption{

cmd/root/run.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ func handleRunMode(ctx context.Context, agentFilename string, rt runtime.Runtime
240240
return err
241241
}
242242

243-
a := app.New(agentFilename, rt, sess, firstMessage)
243+
a := app.New(ctx, agentFilename, rt, sess, firstMessage)
244244
m := tui.New(a)
245245

246246
progOpts := []tea.ProgramOption{

0 commit comments

Comments
 (0)