22import requests
33import time
44import concurrent.futures
5+ import json
6+ import os
7+ import re
58
69st.set_page_config(page_title="LLM Comparison", layout="wide")
710
1518 line-height: 1 !important;
1619 margin-top: 28px !important;
1720}
21+
22+ div.stButton button[data-testid*="stButton-primary"] {
23+ font-size: 14px !important;
24+ height: 35px !important;
25+ }
26+
1827div[data-testid="stSelectbox"] > div {
1928 margin-right: 0px !important;
2029}
@@ -38,6 +47,44 @@ def get_models():
3847 st.warning("No models found. Ensure Ollama is running and has models pulled.")
3948 st.stop()
4049
50+ HISTORY_FILE = "chat_history.json"
51+
52+ def load_chat_history():
53+ if os.path.exists(HISTORY_FILE):
54+ try:
55+ with open(HISTORY_FILE, "r", encoding="utf-8") as f:
56+ return json.load(f)
57+ except json.JSONDecodeError:
58+ st.warning("Error decoding chat history file. Starting with empty history.")
59+ return []
60+ except Exception as e:
61+ st.error(f"Could not load chat history: {e}")
62+ return []
63+ return []
64+
65+ def save_chat_history(history):
66+ try:
67+ with open(HISTORY_FILE, "w", encoding="utf-8") as f:
68+ json.dump(history, f, indent=4)
69+ except Exception as e:
70+ st.error(f"Could not save chat history: {e}")
71+
72+ if "chat_history" not in st.session_state:
73+ st.session_state.chat_history = load_chat_history()
74+
75+ def delete_model_response(conversation_index, model_response_idx_in_entry):
76+ actual_conversation_index = len(st.session_state.chat_history) - 1 - conversation_index
77+
78+ if (0 <= actual_conversation_index < len(st.session_state.chat_history) and
79+ 0 <= model_response_idx_in_entry < len(st.session_state.chat_history[actual_conversation_index]["responses"])):
80+
81+ st.session_state.chat_history[actual_conversation_index]["responses"].pop(model_response_idx_in_entry)
82+
83+ if not st.session_state.chat_history[actual_conversation_index]["responses"]:
84+ st.session_state.chat_history.pop(actual_conversation_index)
85+
86+ save_chat_history(st.session_state.chat_history)
87+
4188prompt = st.text_area("Prompt", "")
4289
4390if "model_count" not in st.session_state:
@@ -53,9 +100,8 @@ def remove_model(index):
53100for i in range(st.session_state.model_count):
54101 col1, col2 = st.columns([0.97, 0.02])
55102 with col1:
56- # Ensure the list is long enough
57103 if i >= len(st.session_state.selected_models):
58- st.session_state.selected_models.append("")
104+ st.session_state.selected_models.append("")
59105
60106 st.session_state.selected_models[i] = st.selectbox(
61107 f"Model {i+1}",
@@ -68,17 +114,16 @@ def remove_model(index):
68114
69115selected_models_filtered = [model for model in st.session_state.selected_models if model]
70116
71- _, _, spacer, col_add, col_run = st.columns([0.5 , 0.2 , 0.1, 0.1, 0.1 ])
117+ _, col_add, col_run = st.columns([0.7 , 0.15 , 0.15 ])
72118with col_add:
73- if st.button("Add new model "):
119+ if st.button("Add New Model "):
74120 st.session_state.model_count += 1
75121 st.session_state.selected_models.append("")
76122 st.rerun()
77123with col_run:
78124 run_clicked = st.button("Run Models", type="primary")
79125
80126def query_ollama_model(model_name, prompt_text):
81- """Function to query a single Ollama model."""
82127 try:
83128 start_time = time.time()
84129 res = requests.post(
@@ -92,15 +137,18 @@ def query_ollama_model(model_name, prompt_text):
92137
93138 duration = round(end_time - start_time, 2)
94139 content = response_data.get("response", "")
95- eval_count = response_data.get("eval_count", len(content.split()))
140+
141+ cleaned_content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
142+
143+ eval_count = response_data.get("eval_count", len(cleaned_content.split()))
96144 eval_rate = response_data.get("eval_rate", round(eval_count / duration, 2) if duration > 0 else 0)
97145
98146 return {
99147 "model": model_name,
100148 "duration": duration,
101149 "eval_count": eval_count,
102150 "eval_rate": eval_rate,
103- "response": content
151+ "response": cleaned_content
104152 }
105153 except Exception as e:
106154 return {
@@ -114,14 +162,9 @@ def query_ollama_model(model_name, prompt_text):
114162if run_clicked and prompt and selected_models_filtered:
115163 responses = []
116164
117- # Create placeholders for immediate feedback
118- response_placeholders = {model: st.empty() for model in selected_models_filtered}
119-
120165 with concurrent.futures.ThreadPoolExecutor(max_workers=len(selected_models_filtered)) as executor:
121- # Submit tasks to the thread pool
122166 future_to_model = {executor.submit(query_ollama_model, model, prompt): model for model in selected_models_filtered}
123167
124- # Iterate as futures complete
125168 for future in concurrent.futures.as_completed(future_to_model):
126169 model_name = future_to_model[future]
127170 try:
@@ -136,28 +179,81 @@ def query_ollama_model(model_name, prompt_text):
136179 "response": f"Error: {exc}"
137180 })
138181
139- # Sort responses by the order of selected models for consistent display
140182 ordered_responses = []
141183 for model in selected_models_filtered:
142184 for res in responses:
143185 if res["model"] == model:
144186 ordered_responses.append(res)
145187 break
188+
189+ st.session_state.chat_history.append({"prompt": prompt, "responses": ordered_responses})
190+ save_chat_history(st.session_state.chat_history)
191+
192+ st.markdown("---")
193+ st.subheader("Previous Interactions")
194+
195+ def get_truncated_text(text, word_limit=50):
196+ words = text.split()
197+ if len(words) > word_limit:
198+ return ' '.join(words[:word_limit]) + "..."
199+ return text
146200
147- cols = st.columns(len(ordered_responses))
148- for i, res in enumerate(ordered_responses):
149- with cols[i]:
150- st.markdown(
151- f"### <span style='color:#3366cc'>{res['model']}</span>" if i % 2 == 0 else f"### <span style='color:#cc0000'>{res['model']}</span>",
152- unsafe_allow_html=True
153- )
154- st.markdown(
155- f"""
156- <div style="background-color:#e6f0ff; padding:10px; border-radius:8px; margin-bottom:10px;">
157- <b>Duration</b>: <span style="color:#3366cc;">{res['duration']} secs</span>
158- <b>Eval count</b>: <span style="color:green;">{res['eval_count']} tokens</span>
159- <b>Eval rate</b>: <span style="color:green;">{res['eval_rate']} tokens/s</span>
160- </div>
161- """, unsafe_allow_html=True
162- )
163- st.write(res["response"])
201+ if st.session_state.chat_history:
202+ for entry_idx, entry in enumerate(reversed(st.session_state.chat_history)):
203+ st.markdown(f"**Prompt:** {entry['prompt']}")
204+
205+ cols = st.columns(len(entry['responses']))
206+
207+ for i, res in enumerate(entry['responses']):
208+ with cols[i]:
209+ st.markdown(
210+ f"### <span style='color:#3366cc'>{res['model']}</span>" if i % 2 == 0 else f"### <span style='color:#cc0000'>{res['model']}</span>",
211+ unsafe_allow_html=True
212+ )
213+ st.markdown(
214+ f"""
215+ <div style="background-color:#e6f0ff; padding:10px; border-radius:8px; margin-bottom:10px;">
216+ <b>Duration</b>: <span style="color:#3366cc;">{res['duration']} secs</span>
217+ <b>Eval count</b>: <span style="color:green;">{res['eval_count']} tokens</span>
218+ <b>Eval rate</b>: <span style="color:green;">{res['eval_rate']} tokens/s</span>
219+ </div>
220+ """, unsafe_allow_html=True
221+ )
222+
223+ full_response_text = res["response"]
224+ words = full_response_text.split()
225+ content_is_longer_than_50_words = len(words) > 50
226+
227+ read_more_toggle_key = f"read_more_entry_{entry_idx}_model_{i}"
228+
229+ if read_more_toggle_key not in st.session_state:
230+ st.session_state[read_more_toggle_key] = False
231+
232+ if content_is_longer_than_50_words and not st.session_state[read_more_toggle_key]:
233+ st.write(get_truncated_text(full_response_text, word_limit=50))
234+ else:
235+ st.write(full_response_text)
236+
237+ button_cols = st.columns([0.5, 0.5])
238+
239+ with button_cols[0]:
240+ if content_is_longer_than_50_words:
241+ if not st.session_state[read_more_toggle_key]:
242+ if st.button("Read More", key=f"btn_read_{read_more_toggle_key}"):
243+ st.session_state[read_more_toggle_key] = True
244+ st.rerun()
245+ else:
246+ if st.button("Show Less", key=f"btn_less_{read_more_toggle_key}"):
247+ st.session_state[read_more_toggle_key] = False
248+ st.rerun()
249+
250+ with button_cols[1]:
251+ st.button(
252+ "Delete This Response",
253+ key=f"delete_response_{entry_idx}_{i}",
254+ on_click=delete_model_response,
255+ args=(entry_idx, i)
256+ )
257+ st.markdown("---")
258+ else:
259+ st.info("No previous interactions found. Run models to start saving history!")
0 commit comments