4040
4141# -------------------- MODELS & CLIENTS --------------------
4242logging .info ("Loading embedding model..." )
43- embedding_model = SentenceTransformer ("sentence-transformers/all-mpnet-base-v2" )
44- EMBED_DIM = embedding_model .get_sentence_embedding_dimension ()
43+ # embedding_model = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")
44+ # EMBED_DIM = embedding_model.get_sentence_embedding_dimension()
4545
4646logging .info ("Pinecone init..." )
47- pc = Pinecone (api_key = PINECONE_KEY )
48- if INDEX_NAME not in pc .list_indexes ().names ():
49- logging .info ("Creating Pinecone index (if needed)..." )
50- pc .create_index (name = INDEX_NAME , dimension = EMBED_DIM , metric = "cosine" ,
51- spec = ServerlessSpec (cloud = "aws" , region = "us-east-1" ))
52- index = pc .Index (INDEX_NAME )
53- logging .info ("Pinecone index ready." )
47+ # pc = Pinecone(api_key=PINECONE_KEY)
48+ # if INDEX_NAME not in pc.list_indexes().names():
49+ # logging.info("Creating Pinecone index (if needed)...")
50+ # pc.create_index(name=INDEX_NAME, dimension=EMBED_DIM, metric="cosine",
51+ # spec=ServerlessSpec(cloud="aws", region="us-east-1"))
52+ # index = pc.Index(INDEX_NAME)
53+ # logging.info("Pinecone index ready.")
5454
5555logging .info ("Initializing LLM clients..." )
5656llm = ChatOpenAI (model = "gpt-4o-mini" , temperature = 0.0 , api_key = OPENAI_API_KEY )
5757chitchat_llm = ChatOpenAI (model = "gpt-4o-mini" , temperature = 0.7 , api_key = OPENAI_API_KEY )
5858summarizer_llm = ChatOpenAI (model = "gpt-4o-mini" , temperature = 0.0 , api_key = OPENAI_API_KEY )
5959
60- if CROSS_ENCODER_AVAILABLE :
61- try :
62- logging .info ("Loading cross-encoder for reranking..." )
63- cross_encoder = CrossEncoder (CROSS_ENCODER_MODEL )
64- except Exception as e :
65- logging .warning ("Unable to initialize CrossEncoder: %s" , e )
66- CROSS_ENCODER_AVAILABLE = False
67- cross_encoder = None
68- else :
69- cross_encoder = None
70- logging .info ("CrossEncoder not available - falling back to vector+BM25 fusion only." )
60+ # if CROSS_ENCODER_AVAILABLE:
61+ # try:
62+ # logging.info("Loading cross-encoder for reranking...")
63+ # cross_encoder = CrossEncoder(CROSS_ENCODER_MODEL)
64+ # except Exception as e:
65+ # logging.warning("Unable to initialize CrossEncoder: %s", e)
66+ # CROSS_ENCODER_AVAILABLE = False
67+ # cross_encoder = None
68+ # else:
69+ # cross_encoder = None
70+ # logging.info("CrossEncoder not available - falling back to vector+BM25 fusion only.")
7171
7272# -------------------- CHAT HISTORY MANAGEMENT --------------------
7373MAX_VERBATIM_PAIRS = 3 # keep last 3 Q-A pairs verbatim
@@ -80,6 +80,19 @@ def init_session():
8080 st .session_state .last_suggested = ""
8181 if "debug" not in st .session_state :
8282 st .session_state .debug = []
83+ if "pinecone_index" not in st .session_state :
84+ pc = Pinecone (api_key = PINECONE_KEY )
85+ index = pc .Index (INDEX_NAME )
86+ st .session_state .pinecone_index = index
87+
88+ if "reranker" not in st .session_state :
89+ cross_encoder = CrossEncoder (CROSS_ENCODER_MODEL )
90+ st .session_state .reranker = cross_encoder
91+ if "embedding_model" not in st .session_state :
92+ embedding_model = SentenceTransformer ("sentence-transformers/all-mpnet-base-v2" )
93+ EMBED_DIM = embedding_model .get_sentence_embedding_dimension ()
94+ st .session_state .embedding_model = embedding_model
95+ st .session_state .EMBED_DIM = EMBED_DIM
8396
8497def append_debug (msg ):
8598 logging .info (msg )
@@ -252,7 +265,7 @@ def get_chat_context():
252265 • Keep under 150 words.
253266 • Summarize meaningfully and in a perfect flow
254267 • Each bullet must be factual and context-bound.
255-
268+ - Answer the best answer that can be framed from the context to the query and dont mentions referance to what's not-there in the context (for eg . the document doesn't have much info about <topic> ❌ [these kind of sentence refering about the doc other than the source is not needed])
256269 - Respect chat history for coherence.
257270 - Always include a follow up question if <context_followup> is non-empty in the format(without bullet points) "Would you like to know about <a follow up question from context_followup not overlapping with the answer generated>?"
258271
@@ -287,11 +300,13 @@ def safe_json_parse(text):
287300 except Exception :
288301 return None
289302# -------------------- HYBRID RETRIEVAL (vector -> bm25 -> cross -> fusion) --------------------
290- def hybrid_retrieve (query , top_k_vec = 6 , u_cap = 6 ):
303+ def hybrid_retrieve (query , top_k_vec = 10 , u_cap = 10 ):
291304 """
292305 Returns re-ranked candidate chunks using vector + BM25 + cross-encoder.
293306 """
294307 # 1) Vector search
308+ embedding_model = st .session_state .embedding_model
309+ index = st .session_state .pinecone_index
295310 q_emb = embedding_model .encode (query , convert_to_numpy = True ).tolist ()
296311 vec_resp = index .query (
297312 vector = q_emb ,
0 commit comments