Skip to content

Commit feb17b1

Browse files
added new code
1 parent 58f58d2 commit feb17b1

1 file changed

Lines changed: 37 additions & 22 deletions

File tree

new_architecture_v4.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -40,34 +40,34 @@
4040

4141
# -------------------- MODELS & CLIENTS --------------------
4242
logging.info("Loading embedding model...")
43-
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
44-
EMBED_DIM = embedding_model.get_sentence_embedding_dimension()
43+
# embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
44+
# EMBED_DIM = embedding_model.get_sentence_embedding_dimension()
4545

4646
logging.info("Pinecone init...")
47-
pc = Pinecone(api_key=PINECONE_KEY)
48-
if INDEX_NAME not in pc.list_indexes().names():
49-
logging.info("Creating Pinecone index (if needed)...")
50-
pc.create_index(name=INDEX_NAME, dimension=EMBED_DIM, metric="cosine",
51-
spec=ServerlessSpec(cloud="aws", region="us-east-1"))
52-
index = pc.Index(INDEX_NAME)
53-
logging.info("Pinecone index ready.")
47+
# pc = Pinecone(api_key=PINECONE_KEY)
48+
# if INDEX_NAME not in pc.list_indexes().names():
49+
# logging.info("Creating Pinecone index (if needed)...")
50+
# pc.create_index(name=INDEX_NAME, dimension=EMBED_DIM, metric="cosine",
51+
# spec=ServerlessSpec(cloud="aws", region="us-east-1"))
52+
# index = pc.Index(INDEX_NAME)
53+
# logging.info("Pinecone index ready.")
5454

5555
logging.info("Initializing LLM clients...")
5656
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0, api_key=OPENAI_API_KEY)
5757
chitchat_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7, api_key=OPENAI_API_KEY)
5858
summarizer_llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.0, api_key=OPENAI_API_KEY)
5959

60-
if CROSS_ENCODER_AVAILABLE:
61-
try:
62-
logging.info("Loading cross-encoder for reranking...")
63-
cross_encoder = CrossEncoder(CROSS_ENCODER_MODEL)
64-
except Exception as e:
65-
logging.warning("Unable to initialize CrossEncoder: %s", e)
66-
CROSS_ENCODER_AVAILABLE = False
67-
cross_encoder = None
68-
else:
69-
cross_encoder = None
70-
logging.info("CrossEncoder not available - falling back to vector+BM25 fusion only.")
60+
# if CROSS_ENCODER_AVAILABLE:
61+
# try:
62+
# logging.info("Loading cross-encoder for reranking...")
63+
# cross_encoder = CrossEncoder(CROSS_ENCODER_MODEL)
64+
# except Exception as e:
65+
# logging.warning("Unable to initialize CrossEncoder: %s", e)
66+
# CROSS_ENCODER_AVAILABLE = False
67+
# cross_encoder = None
68+
# else:
69+
# cross_encoder = None
70+
# logging.info("CrossEncoder not available - falling back to vector+BM25 fusion only.")
7171

7272
# -------------------- CHAT HISTORY MANAGEMENT --------------------
7373
MAX_VERBATIM_PAIRS = 3 # keep last 3 Q-A pairs verbatim
@@ -80,6 +80,19 @@ def init_session():
8080
st.session_state.last_suggested = ""
8181
if "debug" not in st.session_state:
8282
st.session_state.debug = []
83+
if "pinecone_index" not in st.session_state:
84+
pc = Pinecone(api_key=PINECONE_KEY)
85+
index = pc.Index(INDEX_NAME)
86+
st.session_state.pinecone_index = index
87+
88+
if "reranker" not in st.session_state:
89+
cross_encoder = CrossEncoder(CROSS_ENCODER_MODEL)
90+
st.session_state.reranker = cross_encoder
91+
if "embedding_model" not in st.session_state:
92+
embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
93+
EMBED_DIM = embedding_model.get_sentence_embedding_dimension()
94+
st.session_state.embedding_model = embedding_model
95+
st.session_state.EMBED_DIM= EMBED_DIM
8396

8497
def append_debug(msg):
8598
logging.info(msg)
@@ -252,7 +265,7 @@ def get_chat_context():
252265
• Keep under 150 words.
253266
• Summarize meaningfully and in a perfect flow
254267
• Each bullet must be factual and context-bound.
255-
268+
- Answer the best answer that can be framed from the context to the query and dont mentions referance to what's not-there in the context (for eg . the document doesn't have much info about <topic> ❌ [these kind of sentence refering about the doc other than the source is not needed])
256269
- Respect chat history for coherence.
257270
- Always include a follow up question if <context_followup> is non-empty in the format(without bullet points) "Would you like to know about <a follow up question from context_followup not overlapping with the answer generated>?"
258271
@@ -287,11 +300,13 @@ def safe_json_parse(text):
287300
except Exception:
288301
return None
289302
# -------------------- HYBRID RETRIEVAL (vector -> bm25 -> cross -> fusion) --------------------
290-
def hybrid_retrieve(query, top_k_vec=6, u_cap=6):
303+
def hybrid_retrieve(query, top_k_vec=10, u_cap=10):
291304
"""
292305
Returns re-ranked candidate chunks using vector + BM25 + cross-encoder.
293306
"""
294307
# 1) Vector search
308+
embedding_model = st.session_state.embedding_model
309+
index = st.session_state.pinecone_index
295310
q_emb = embedding_model.encode(query, convert_to_numpy=True).tolist()
296311
vec_resp = index.query(
297312
vector=q_emb,

0 commit comments

Comments
 (0)