You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
See the [DMR Provider documentation](docs/USAGE.md#dmr-docker-model-runner-provider-usage) for more details on runtime flags and speculative decoding options.
294
296
297
+
## RAG (Retrieval-Augmented Generation)
298
+
299
+
Give your agents access to your documents with cagent's modular RAG system. It supports multiple retrieval strategies that can be used individually or combined for hybrid search.
300
+
301
+
### Quick RAG Example
302
+
303
+
```yaml
304
+
models:
305
+
embedder:
306
+
provider: openai
307
+
model: text-embedding-3-small
308
+
309
+
rag:
310
+
my_knowledge_base:
311
+
docs: [./documents, ./pdfs]
312
+
strategies:
313
+
- type: chunked-embeddings
314
+
model: embedder
315
+
threshold: 0.5
316
+
chunking:
317
+
size: 1000
318
+
overlap: 100
319
+
results:
320
+
limit: 5
321
+
322
+
agents:
323
+
root:
324
+
model: openai/gpt-4o
325
+
instruction: |
326
+
You are an assistant with access to an internal knowledge base.
327
+
Use the knowledge base to gather context before answering user questions
328
+
rag: [my_knowledge_base]
329
+
```
330
+
331
+
### Hybrid Retrieval (Chunked-Embeddings + BM25)
332
+
333
+
Combine semantic search (chunked-embeddings) with keyword search (BM25) for best results:
334
+
335
+
```yaml
336
+
rag:
337
+
hybrid_search:
338
+
docs: [./shared_docs]
339
+
340
+
strategies:
341
+
- type: chunked-embeddings
342
+
model: embedder
343
+
threshold: 0.5
344
+
limit: 20
345
+
chunking:
346
+
size: 1000
347
+
overlap: 100
348
+
349
+
- type: bm25
350
+
k1: 1.5
351
+
b: 0.75
352
+
threshold: 0.3
353
+
limit: 15
354
+
chunking:
355
+
size: 1000
356
+
overlap: 100
357
+
358
+
results:
359
+
fusion:
360
+
strategy: rrf # Reciprocal Rank Fusion
361
+
k: 60
362
+
deduplicate: true
363
+
limit: 5
364
+
365
+
agents:
366
+
root:
367
+
model: openai/gpt-4o
368
+
rag: [hybrid_search]
369
+
```
370
+
371
+
**Features:**
372
+
- **Multiple strategies**: Vector (semantic), BM25 (keyword), or both
373
+
- **Parallel execution**: Strategies run concurrently for fast results
374
+
- **Pluggable fusion**: RRF, weighted, or max score combining
375
+
- **Per-strategy configuration**: Different thresholds, limits, and documents
376
+
- **Auto file watching**: Reindex automatically on file changes
377
+
378
+
See the [RAG documentation](docs/RAG.md) for complete details, examples, and debugging guides.
379
+
295
380
## Quickly generate agents and agent teams with `cagent new`
296
381
297
382
Using the command `cagent new` you can quickly generate agents or multi-agent
Copy file name to clipboardExpand all lines: cagent-schema.json
+193Lines changed: 193 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -33,6 +33,13 @@
33
33
"$ref": "#/definitions/ModelConfig"
34
34
}
35
35
},
36
+
"rag": {
37
+
"type": "object",
38
+
"description": "Map of RAG (Retrieval-Augmented Generation) configurations",
39
+
"additionalProperties": {
40
+
"$ref": "#/definitions/RAGConfig"
41
+
}
42
+
},
36
43
"metadata": {
37
44
"$ref": "#/definitions/Metadata",
38
45
"description": "Configuration metadata"
@@ -188,6 +195,13 @@
188
195
"schema"
189
196
],
190
197
"additionalProperties": false
198
+
},
199
+
"rag": {
200
+
"type": "array",
201
+
"description": "List of RAG sources to use for this agent",
202
+
"items": {
203
+
"type": "string"
204
+
}
191
205
}
192
206
},
193
207
"additionalProperties": false
@@ -625,6 +639,185 @@
625
639
"method"
626
640
],
627
641
"additionalProperties": false
642
+
},
643
+
"RAGConfig": {
644
+
"type": "object",
645
+
"description": "RAG (Retrieval-Augmented Generation) configuration for document search and retrieval with pluggable strategies. Multiple strategies enable hybrid retrieval and reranking.",
646
+
"properties": {
647
+
"description": {
648
+
"type": "string",
649
+
"description": "Description of the RAG source"
650
+
},
651
+
"docs": {
652
+
"type": "array",
653
+
"description": "Shared document paths or directories indexed by all strategies",
654
+
"items": {
655
+
"type": "string"
656
+
}
657
+
},
658
+
"strategies": {
659
+
"type": "array",
660
+
"description": "Array of retrieval strategy configurations. Each strategy can have different parameters based on its type.",
661
+
"minItems": 1,
662
+
"items": {
663
+
"type": "object",
664
+
"description": "Retrieval strategy configuration with type-specific parameters. Structured fields are limited; additional parameters are passed through as-is for strategy-specific use.",
665
+
"required": ["type"],
666
+
"properties": {
667
+
"type": {
668
+
"type": "string",
669
+
"description": "Retrieval strategy type",
670
+
"enum": ["chunked-embeddings", "bm25"]
671
+
},
672
+
"model": {
673
+
"type": "string",
674
+
"description": "Embedding model reference for chunked-embeddings strategies (looked up in models map, or 'auto' for automatic selection)",
"description": "Additional documents for this strategy only (augments shared docs)",
680
+
"items": {
681
+
"type": "string"
682
+
}
683
+
},
684
+
"database": {
685
+
"type": "string",
686
+
"description": "Database path or connection string. Currently only simple string values are supported (e.g., './vector.db', './bm25.db')."
687
+
},
688
+
"similarity_metric": {
689
+
"type": "string",
690
+
"description": "Similarity metric (chunked-embeddings only). Currently only 'cosine_similarity' is implemented.",
691
+
"enum": ["cosine_similarity"]
692
+
},
693
+
"vector_dimensions": {
694
+
"type": "integer",
695
+
"description": "Vector dimensions for embeddings (chunked-embeddings only). Must match your embedding model's output dimensions and is required for chunked-embeddings strategies.",
696
+
"minimum": 1,
697
+
"examples": [1536, 3072, 1024, 768]
698
+
},
699
+
"k1": {
700
+
"type": "number",
701
+
"description": "BM25 term frequency saturation (bm25 only, typically 1.2-2.0)",
702
+
"minimum": 0
703
+
},
704
+
"b": {
705
+
"type": "number",
706
+
"description": "BM25 length normalization (bm25 only, 0-1, typically 0.75)",
707
+
"minimum": 0,
708
+
"maximum": 1
709
+
},
710
+
"threshold": {
711
+
"type": "number",
712
+
"description": "Minimum score threshold (0-1 for chunked-embeddings, unbounded for bm25)",
713
+
"minimum": 0
714
+
},
715
+
"limit": {
716
+
"type": "integer",
717
+
"description": "Max results from this strategy (candidates for fusion). If unset, defaults to 5 in the implementation.",
718
+
"minimum": 1
719
+
},
720
+
"chunking": {
721
+
"type": "object",
722
+
"description": "Text chunking configuration",
723
+
"properties": {
724
+
"size": {
725
+
"type": "integer",
726
+
"description": "Chunk size in characters. If unset, defaults to 1000 in the implementation.",
727
+
"minimum": 1
728
+
},
729
+
"overlap": {
730
+
"type": "integer",
731
+
"description": "Overlap between chunks in characters. If unset, defaults to 75 in the implementation.",
732
+
"minimum": 0
733
+
},
734
+
"respect_word_boundaries": {
735
+
"type": "boolean",
736
+
"description": "When true, chunks will split on the nearest whitespace boundary instead of at the exact character limit, preventing words from being truncated."
737
+
}
738
+
},
739
+
"additionalProperties": false
740
+
}
741
+
},
742
+
"additionalProperties": true
743
+
}
744
+
},
745
+
"results": {
746
+
"type": "object",
747
+
"description": "Result post-processing configuration (fusion, deduplication, limiting). If omitted, sensible defaults are applied in code.",
748
+
"properties": {
749
+
"limit": {
750
+
"type": "integer",
751
+
"description": "Maximum number of results to return (top K)",
752
+
"minimum": 1,
753
+
"default": 15
754
+
},
755
+
"fusion": {
756
+
"type": "object",
757
+
"description": "Configuration for combining results from multiple strategies. If omitted and multiple strategies are configured, Reciprocal Rank Fusion (rrf) with k=60 is used.",
758
+
"properties": {
759
+
"strategy": {
760
+
"type": "string",
761
+
"description": "Fusion strategy to use",
762
+
"enum": [
763
+
"rrf",
764
+
"reciprocal_rank_fusion",
765
+
"weighted",
766
+
"max"
767
+
],
768
+
"default": "rrf",
769
+
"examples": [
770
+
"rrf",
771
+
"weighted"
772
+
]
773
+
},
774
+
"k": {
775
+
"type": "integer",
776
+
"description": "RRF smoothing parameter k (only for RRF strategy)",
777
+
"minimum": 1,
778
+
"default": 60
779
+
},
780
+
"weights": {
781
+
"type": "object",
782
+
"description": "Strategy weights for weighted fusion (strategy name -> weight)",
783
+
"additionalProperties": {
784
+
"type": "number",
785
+
"minimum": 0,
786
+
"maximum": 1
787
+
},
788
+
"examples": [
789
+
{
790
+
"chunked-embeddings": 0.7,
791
+
"bm25": 0.3
792
+
}
793
+
]
794
+
}
795
+
},
796
+
"additionalProperties": false
797
+
},
798
+
"deduplicate": {
799
+
"type": "boolean",
800
+
"description": "Remove duplicate documents across strategies",
801
+
"default": true
802
+
},
803
+
"include_score": {
804
+
"type": "boolean",
805
+
"description": "Include relevance scores in results",
806
+
"default": false
807
+
},
808
+
"return_full_content": {
809
+
"type": "boolean",
810
+
"description": "Return full document content instead of just the matched chunk. The full document is read directly from the file system.",
0 commit comments