Skip to content

Commit 113abf5

Browse files
committed
Agentic Rag PipeLine Structed With Vious Rag Techniques
0 parents  commit 113abf5

32 files changed

Lines changed: 6374 additions & 0 deletions

.gitignore

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Python-generated files
2+
__pycache__/
3+
*.py[oc]
4+
build/
5+
dist/
6+
wheels/
7+
*.egg-info
8+
9+
# Virtual environments
10+
.venv
11+
.env

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.11

README.md

Whitespace-only changes.

data/attention-is-all-you-need.pdf

218 KB
Binary file not shown.

data/state_of_the_union.txt

Lines changed: 723 additions & 0 deletions
Large diffs are not rendered by default.

logs/2025_12_01.log

Lines changed: 182 additions & 0 deletions
Large diffs are not rendered by default.

main.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import os
2+
from dotenv import load_dotenv
3+
from project.pipeline.agents import AgentWorkflow
4+
from project.logger.logging import get_logger
5+
6+
load_dotenv()
7+
8+
logger = get_logger(__name__)
9+
10+
11+
def setup_langsmith():
12+
langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
13+
if langsmith_api_key:
14+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
15+
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
16+
os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key
17+
os.environ["LANGCHAIN_PROJECT"] = "rag-corrective-pipeline"
18+
logger.info("LangSmith tracing enabled")
19+
else:
20+
logger.warning("LANGSMITH_API_KEY not found, tracing disabled")
21+
22+
23+
def main():
24+
setup_langsmith()
25+
logger.info("Starting RAG application...")
26+
27+
agent = AgentWorkflow()
28+
29+
logger.info("Setting up pipeline with Attention Is All You Need paper...")
30+
agent.setup(use_attention_paper=True)
31+
32+
agent.save_graph("workflow.png")
33+
logger.info("Workflow graph saved")
34+
35+
questions = [
36+
"What is the attention mechanism in transformers?",
37+
"Explain the multi-head attention.",
38+
"What are the advantages of the transformer architecture?"
39+
]
40+
41+
print("\n" + "="*80)
42+
print("RAG PIPELINE WITH CORRECTIVE RAG (CRAG)")
43+
print("="*80 + "\n")
44+
45+
for i, question in enumerate(questions, 1):
46+
print(f"\n{'='*80}")
47+
print(f"Question {i}: {question}")
48+
print(f"{'='*80}\n")
49+
50+
answer = agent.run(question)
51+
52+
print(f"\nAnswer:\n{answer}\n")
53+
print(f"{'='*80}\n")
54+
55+
logger.info("RAG application completed successfully")
56+
57+
58+
if __name__ == "__main__":
59+
main()

notebooks/vanilla_rag.ipynb

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "171dc240",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from dotenv import load_dotenv\n",
11+
"import os\n",
12+
"load_dotenv() \n",
13+
"os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')\n",
14+
"os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 2,
20+
"id": "efbca25c",
21+
"metadata": {},
22+
"outputs": [],
23+
"source": [
24+
"from langchain_community.document_loaders import TextLoader\n",
25+
"\n",
26+
"loader = TextLoader('..\\data\\state_of_the_union.txt', encoding='utf8')\n",
27+
"documents = loader.load()"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 3,
33+
"id": "203b53b3",
34+
"metadata": {},
35+
"outputs": [
36+
{
37+
"name": "stdout",
38+
"output_type": "stream",
39+
"text": [
40+
"Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n",
41+
"\n",
42+
"Last year COVID-19 kept us apart. This year we are finally together again. \n",
43+
"\n",
44+
"Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n",
45+
"\n",
46+
"With a duty to one another to the American people to the Constitution. \n",
47+
"\n",
48+
"And with an unwavering resolve that freedom will always triumph over tyranny. \n",
49+
"\n",
50+
"Six day\n"
51+
]
52+
}
53+
],
54+
"source": [
55+
"print(documents[0].page_content[:500])"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": 4,
61+
"id": "76bdd56f",
62+
"metadata": {},
63+
"outputs": [],
64+
"source": [
65+
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
66+
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n",
67+
"chunks = text_splitter.split_documents(documents)"
68+
]
69+
},
70+
{
71+
"cell_type": "code",
72+
"execution_count": 10,
73+
"id": "3fd6b5dd",
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"from langchain_community.embeddings import FastEmbedEmbeddings\n",
78+
"embeddings = FastEmbedEmbeddings(model_name=\"BAAI/bge-small-en-v1.5\")"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": 11,
84+
"id": "9d79271e",
85+
"metadata": {},
86+
"outputs": [],
87+
"source": [
88+
"from langchain_community.vectorstores import FAISS\n",
89+
"\n",
90+
"vectorstore = FAISS.from_documents(chunks, embeddings)"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": 13,
96+
"id": "53ec2306",
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"retriever = vectorstore.as_retriever(search_type=\"mmr\", search_kwargs={\"k\":3})"
101+
]
102+
},
103+
{
104+
"cell_type": "code",
105+
"execution_count": 14,
106+
"id": "1c9181f3",
107+
"metadata": {},
108+
"outputs": [],
109+
"source": [
110+
"from langchain_groq import ChatGroq\n",
111+
"llm = ChatGroq(model='openai/gpt-oss-120b', temperature=0.1)"
112+
]
113+
},
114+
{
115+
"cell_type": "code",
116+
"execution_count": 15,
117+
"id": "11181278",
118+
"metadata": {},
119+
"outputs": [],
120+
"source": [
121+
"from langchain_core.prompts import ChatPromptTemplate\n",
122+
"\n",
123+
"template = \"\"\"\n",
124+
"You are a helpful AI assistant. Use the following pieces of context to answer the question at the end. \n",
125+
"If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
126+
"Use the information to provide a concise and accurate answer.\n",
127+
"Question: {question}\n",
128+
"context: {context}\n",
129+
"\"\"\"\n",
130+
"\n",
131+
"prompt = ChatPromptTemplate.from_template(template)"
132+
]
133+
},
134+
{
135+
"cell_type": "code",
136+
"execution_count": 16,
137+
"id": "79752ec8",
138+
"metadata": {},
139+
"outputs": [],
140+
"source": [
141+
"from langchain_core.runnables import RunnablePassthrough\n",
142+
"from langchain_core.output_parsers import StrOutputParser\n",
143+
"rag_chain = (\n",
144+
" {\"context\": retriever, \"question\": RunnablePassthrough()}\n",
145+
" | prompt\n",
146+
" | llm\n",
147+
" | StrOutputParser()\n",
148+
")"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": 18,
154+
"id": "5d88e579",
155+
"metadata": {},
156+
"outputs": [
157+
{
158+
"name": "stdout",
159+
"output_type": "stream",
160+
"text": [
161+
"**Madam Speaker** – the title used for the presiding officer of the U.S. House of Representatives when that officer is a woman (the Speaker of the House at the time of the address).\n",
162+
"\n",
163+
"**What her (the address’s) speech is about** – the President’s opening remarks to the joint session of Congress. In this portion he:\n",
164+
"\n",
165+
"* Acknowledges the recent COVID‑19 pandemic and the fact that the nation is now gathering together again. \n",
166+
"* Calls for bipartisan unity – Democrats, Republicans and Independents – as “Americans” first. \n",
167+
"* Re‑affirms the nation’s commitment to the Constitution and to freedom. \n",
168+
"* Condemns Russia’s invasion of Ukraine, describing Vladimir Putin’s attempt to “shake the foundations of the free world” and praising the courage and determination of the Ukrainian people. \n",
169+
"\n",
170+
"So, “Madam Speaker” is the female Speaker of the House, and the speech she is hearing focuses on national recovery, bipartisan unity, and a strong stance against Russian aggression in Ukraine.\n"
171+
]
172+
}
173+
],
174+
"source": [
175+
"print(rag_chain.invoke(\"Who is Madam Speaker and What is Her Speech About?\"))"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"id": "a56e9e22",
182+
"metadata": {},
183+
"outputs": [],
184+
"source": []
185+
}
186+
],
187+
"metadata": {
188+
"kernelspec": {
189+
"display_name": "RAG Project",
190+
"language": "python",
191+
"name": "python3"
192+
},
193+
"language_info": {
194+
"codemirror_mode": {
195+
"name": "ipython",
196+
"version": 3
197+
},
198+
"file_extension": ".py",
199+
"mimetype": "text/x-python",
200+
"name": "python",
201+
"nbconvert_exporter": "python",
202+
"pygments_lexer": "ipython3",
203+
"version": "3.11.9"
204+
}
205+
},
206+
"nbformat": 4,
207+
"nbformat_minor": 5
208+
}

project/__init__.py

Whitespace-only changes.

project/config/config.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
embedding_model:
2+
provider: "fastembedding"
3+
model_name : "BAAI/bge-small-en-v1.5"
4+
5+
retriever:
6+
search_type: "mmr"
7+
top_k: 3
8+
9+
llm:
10+
provider: "langchain_groq"
11+
model: "openai/gpt-oss-120b"
12+
temperature: 0.1
13+
max_tokens: 2048
14+
15+
reranker:
16+
model_name: "rank-T5-flan"
17+
top_k: 3
18+
cache_dir: null
19+
20+

0 commit comments

Comments
 (0)