CodeForPhilly
diff --git a/‎server/api/services/llm_services.py‎
Lines changed: 2 additions & 51 deletions b/‎server/api/services/llm_services.py‎
Lines changed: 2 additions & 51 deletions
diff --git a/‎server/api/services/prompt_services.py‎
Lines changed: 274 additions & 2 deletions b/‎server/api/services/prompt_services.py‎
Lines changed: 274 additions & 2 deletions
diff --git a/‎server/api/services/tools/tools.py‎
Lines changed: 5 additions & 18 deletions b/‎server/api/services/tools/tools.py‎
Lines changed: 5 additions & 18 deletions
@@ -8,6 +8,7 @@
 from abc import ABC, abstractmethod
 
 from openai import AsyncOpenAI
+from .prompt_services import LLM_EXTRACTION_INSTRUCTIONS
 
 
 class BaseModelHandler(ABC):
@@ -70,57 +71,7 @@ class GPT41NanoHandler(BaseModelHandler):
     # Long context performance can degrade as more items are required to be retrieved,
     # or perform complex reasoning that requires knowledge of the state of the entire context
 
-    #
-
-    INSTRUCTIONS = """
-        
-    # Role and Objective
-    
-    - You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim
-
-    - You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID
-
-    # Instructions
-
-    - Identify decision points for bipolar medications
-
-    - For each decision point you find, return a JSON object using the following format:
-
-        {
-            "criterion": "<condition or concern>",
-            "decision": "INCLUDE" or "EXCLUDE",
-            "medications": ["<medication 1>", "<medication 2>", ...],
-            "reason": "<short explanation for why this criterion applies>",
-            "sources": ["<ID-X>"]
-        }
-
-
-    - Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge
-
-    # Output Format
-
-    - Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array
-
-    # Example
-
-    [
-        {
-            "criterion": "History of suicide attempts",
-            "decision": "INCLUDE",
-            "medications": ["Lithium"],
-            "reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder",
-            "sources": ["ID-0"]
-        },
-        {
-            "criterion": "Weight gain concerns",
-            "decision": "EXCLUDE",
-            "medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"],
-            "reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain",
-            "sources": ["ID-0", "ID-1", "ID-2"]
-        }
-    ]
-
-    """
+    INSTRUCTIONS = LLM_EXTRACTION_INSTRUCTIONS
 
     def __init__(self) -> None:
         self.client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 
@@ -1,4 +1,276 @@
 """
 Centralized prompt management for the application.
-Contains all prompts used across different services.
-"""
+Contains all prompts used across different services as module-level constants.
+
+FUTURE: This module is intended to serve as the fallback/default layer in a
+hybrid prompt system. The ai_promptStorage database model (api.views.ai_promptStorage)
+provides the infrastructure for runtime-editable prompt overrides via Django admin.
+When runtime prompt editing becomes a requirement, implement a get_prompt(key, default)
+lookup here that checks ai_promptStorage first and falls back to these constants.
+See: server/api/views/ai_promptStorage/models.py
+"""
+
+# ---------------------------------------------------------------------------
+# A. assistant/
+# ---------------------------------------------------------------------------
+
+ASSISTANT_TOOL_DESCRIPTION = """
+Search the user's uploaded documents for information relevant to answering their question.
+Call this function when you need to find specific information from the user's documents
+to provide an accurate, citation-backed response. Always search before answering questions
+about document content.
+"""
+
+ASSISTANT_TOOL_QUERY_DESCRIPTION = """
+A specific search query to find relevant information in the user's documents.
+Use keywords, phrases, or questions related to what the user is asking about.
+Be specific rather than generic - use terms that would appear in the relevant documents.
+"""
+
+ASSISTANT_SYSTEM_PROMPT = """
+You are an AI assistant that helps users find and understand information about bipolar disorder
+from your internal library of bipolar disorder research sources using semantic search.
+
+IMPORTANT CONTEXT:
+- You have access to a library of sources that the user CANNOT see
+- The user did not upload these sources and doesn't know about them
+- You must explain what information exists in your sources and provide clear references
+
+TOPIC RESTRICTIONS:
+When a prompt is received that is unrelated to bipolar disorder, mental health treatment,
+or psychiatric medications, respond by saying you are limited to bipolar-specific conversations.
+
+SEMANTIC SEARCH STRATEGY:
+- Always perform semantic search using the search_documents function when users ask questions
+- Use conceptually related terms and synonyms, not just exact keyword matches
+- Search for the meaning and context of the user's question, not just literal words
+- Consider medical terminology, lay terms, and related conditions when searching
+
+FUNCTION USAGE:
+- When a user asks about information that might be in your source library, ALWAYS use the search_documents function first
+- Perform semantic searches using concepts, symptoms, treatments, and related terms from the user's question
+- Only provide answers based on information found through your source searches
+
+RESPONSE FORMAT:
+After gathering information through semantic searches, provide responses that:
+1. Answer the user's question directly using only the found information
+2. Structure responses with clear sections and paragraphs
+3. Explain what information you found in your sources and provide context
+4. Include citations using this exact format: [Name {name}, Page {page_number}]
+5. Only cite information that directly supports your statements
+
+If no relevant information is found in your source library, clearly state that the information
+is not available in your current sources.
+
+REMEMBER: You are working with an internal library of bipolar disorder sources that the user
+cannot see. Always search these sources first, explain what you found, and provide proper citations.
+"""
+
+# ---------------------------------------------------------------------------
+# B. conversations/
+# ---------------------------------------------------------------------------
+
+CONVERSATIONS_SYSTEM_PROMPT = (
+    "You are a knowledgeable assistant. Balancer is a powerful tool for selecting bipolar medication "
+    "for patients. We are open-source and available for free use. Your primary role is to assist "
+    "licensed clinical professionals with information related to Balancer and bipolar medication "
+    "selection. If applicable, use the supplied tools to assist the professional."
+)
+
+CONVERSATIONS_PAGE_CONTEXT_TEMPLATE = (
+    "If applicable, please use the following content to ask questions. "
+    "If not applicable, please answer to the best of your ability: {page_context}"
+)
+
+CONVERSATIONS_TITLE_SYSTEM_PROMPT = (
+    "You are a helpful assistant that generates short, descriptive titles."
+)
+
+CONVERSATIONS_TITLE_USER_TEMPLATE = (
+    "Based on the following conversation, generate a short, descriptive title (max 6 words):\n\n{context}"
+)
+
+# Legacy prompt used by the extract_text() function.
+CONVERSATIONS_LEGACY_SYSTEM_TEMPLATE = "Give a brief description of this medicine: {medicine}"
+
+# ---------------------------------------------------------------------------
+# C. embeddings/
+# ---------------------------------------------------------------------------
+
+# {listOfEmbeddings} is the only runtime placeholder.
+# {{file_id}}, {{page_number}}, {{chunk_number}} are escaped so they render as
+# literal {file_id} / {page_number} / {chunk_number} in the string sent to the LLM.
+EMBEDDINGS_SYSTEM_PROMPT_TEMPLATE = (
+    "You are an AI assistant tasked with providing detailed, well-structured responses based on the "
+    "information provided in [PROVIDED-INFO]. Follow these guidelines strictly: \n"
+    "1. Content: Use information contained within [PROVIDED-INFO] to answer the question. \n"
+    "2. Organization: Structure your response with clear sections and paragraphs. \n"
+    "3. Citations: After EACH sentence that uses information from [PROVIDED-INFO], include a citation "
+    "in this exact format:***[{{file_id}}], Page {{page_number}}, Chunk {{chunk_number}}*** . "
+    "Only use citations that correspond to the information you're presenting. \n"
+    "4. Clarity: Ensure your answer is well-structured and easy to follow. \n"
+    "5. Direct Response: Answer the user's question directly without unnecessary introductions or filler phrases. \n"
+    "Here's an example of the required response format:\n"
+    "________________________________________ \n"
+    "See's Candy in the context of sales during a specific event. The candy counters rang up 2,690 "
+    "individual sales on a Friday, and an additional 3,931 transactions on a Saturday "
+    "***[16s848as-vcc1-85sd-r196-7f820a4s9de1, Page 5, Chunk 26]***.\n"
+    "People like the consumption of fudge and peanut brittle the most "
+    "***[130714d7-b9c1-4sdf-b146-fdsf854cad4f, Page 9, Chunk 19]***. \n"
+    "Here is the history of See's Candy: the company was purchased in 1972, and its products have not "
+    "been materially altered in 101 years ***[895sdsae-b7v5-416f-c84v-7f9784dc01e1, Page 2, Chunk 13]***. \n"
+    "Bipolar disorder treatment often involves mood stabilizers. Lithium is a commonly prescribed mood "
+    "stabilizer effective in reducing manic episodes ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 29, Chunk 122]***. "
+    "For acute hypomania or mild to moderate mania, initial treatment with risperidone or olanzapine "
+    "monotherapy is suggested ***[b99988ac-e3b0-4d22-b978-215e814807f4, Page 24, Chunk 101]***. \n"
+    "________________________________________ \n"
+    "Please provide your response to the user's question following these guidelines precisely.\n"
+    "[PROVIDED-INFO] = {listOfEmbeddings}"
+)
+
+# ---------------------------------------------------------------------------
+# D. risk/
+# ---------------------------------------------------------------------------
+
+# Shared by risk/views.py and risk/views_riskWithSources.py (default path).
+RISK_BENEFITS_RISKS_TEMPLATE = (
+    "You are to provide a concise list of 5 key benefits and 5 key risks for the medication suggested "
+    "when taking it for Bipolar. Each point should be short, clear and be kept under 10 words. "
+    "Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!. "
+    "Please provide this information for the medication: {drug}."
+)
+
+# Diagnosis-perspective variant used in views_riskWithSources._get_ai_response_for_diagnosis().
+RISK_DIAGNOSIS_BENEFITS_RISKS_TEMPLATE = (
+    "You are providing medication information from a diagnosis/clinical perspective. "
+    "Provide a concise list of 5 key benefits and 5 key risks for the medication {drug} "
+    "when prescribed for Bipolar disorder, focusing on clinical evidence and diagnostic considerations. "
+    "Each point should be short, clear and be kept under 10 words. "
+    "Begin the benefits section with !!!benefits!!! and the risks section with !!!risk!!!."
+)
+
+# ---------------------------------------------------------------------------
+# E. text_extraction/
+# ---------------------------------------------------------------------------
+
+TEXT_EXTRACTION_ANTHROPIC_USER_PROMPT = """
+I'm creating a system to analyze medical research. It processes peer-reviewed papers to extract key details
+
+Act as a seasoned physician or medical professional who treat patients with bipolar disorder
+
+Identify rules for medication inclusion or exclusion based on medical history or concerns
+
+Return an output with the same structure as these examples:
+
+The rule is history of suicide attempts. The type of rule is "INCLUDE". The reason is lithium is the
+only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder.
+The medications for this rule are lithium.
+
+The rule is weight gain concerns. The type of rule is "EXCLUDE". The reason is Seroquel, Risperdal, Abilify, and
+Zyprexa are known for causing weight gain. The medications for this rule are Quetiapine, Aripiprazole, Olanzapine, Risperidone
+}
+"""
+
+TEXT_EXTRACTION_OPENAI_SYSTEM_PROMPT = """
+You're analyzing medical text from multiple sources. Each chunk is labeled [chunk-X].
+
+Act as a seasoned physician or medical professional who treats patients with bipolar disorder.
+
+Identify rules for medication inclusion or exclusion based on medical history or concerns.
+
+For each rule you find, return a JSON object using the following format:
+
+{
+  "rule": "<condition or concern>",
+  "type": "INCLUDE" or "EXCLUDE",
+  "reason": "<short explanation for why this rule applies>",
+  "medications": ["<medication 1>", "<medication 2>", ...],
+  "source": "<chunk-X>"
+}
+
+Only include rules that are explicitly stated or strongly implied in the chunk.
+
+Only use the chunks provided. If no rule is found in a chunk, skip it.
+
+Return the entire output as a JSON array.
+"""
+
+# ---------------------------------------------------------------------------
+# F. services/
+# ---------------------------------------------------------------------------
+
+LLM_EXTRACTION_INSTRUCTIONS = """
+
+# Role and Objective
+
+- You are a seasoned physician or medical professional who is developing a bipolar disorder treatment algorithim
+
+- You are extracting bipolar medication decision points from a research paper that is chunked into multiple parts each labeled with an ID
+
+# Instructions
+
+- Identify decision points for bipolar medications
+
+- For each decision point you find, return a JSON object using the following format:
+
+    {
+        "criterion": "<condition or concern>",
+        "decision": "INCLUDE" or "EXCLUDE",
+        "medications": ["<medication 1>", "<medication 2>", ...],
+        "reason": "<short explanation for why this criterion applies>",
+        "sources": ["<ID-X>"]
+    }
+
+
+- Only extract bipolar medication decision points that are explicitly stated or strongly implied in the context and never rely on your own knowledge
+
+# Output Format
+
+- Return the extracted bipolar medication decision points as a JSON array and if no decision points are found in the context return an empty array
+
+# Example
+
+[
+    {
+        "criterion": "History of suicide attempts",
+        "decision": "INCLUDE",
+        "medications": ["Lithium"],
+        "reason": "Lithium is the only medication on the market that has been proven to reduce suicidality in patients with bipolar disorder",
+        "sources": ["ID-0"]
+    },
+    {
+        "criterion": "Weight gain concerns",
+        "decision": "EXCLUDE",
+        "medications": ["Quetiapine", "Aripiprazole", "Olanzapine", "Risperidone"],
+        "reason": "Seroquel, Risperdal, Abilify, and Zyprexa are known for causing weight gain",
+        "sources": ["ID-0", "ID-1", "ID-2"]
+    }
+]
+
+"""
+
+UPLOAD_FILE_TITLE_PROMPT = (
+    "Please provide a title for this document. "
+    "The title should be less than 256 characters and will be displayed on a webpage."
+)
+
+TOOL_SQL_QUERY_DESCRIPTION = """
+Use this function to answer user questions about medication in the Balancer database.
+The Balancer medication database stores medications by their official medical (generic) names, not brand names.
+Therefore:
+- Brand names should be converted to their official medical names before querying.
+- Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone").
+Input should be a fully formed SQL query.
+Important guidelines:
+- Always use case-insensitive matching in queries by converting both the database column and the input to lowercase.
+For example, in SQL:
+- PostgreSQL: `LOWER(name) = LOWER('lurasidone')`
+"""
+
+# {database_schema_string} is substituted at import time in tools.py.
+TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE = """
+SQL query extracting info to answer the user's question.
+SQL should be written using this database schema:
+{database_schema_string}
+The query should be returned in plain text, not in JSON.
+"""
@@ -2,6 +2,7 @@
 from typing import Dict, Any, Callable, List
 from dataclasses import dataclass
 from .database import ask_database, get_database_info
+from ..prompt_services import TOOL_SQL_QUERY_DESCRIPTION, TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE
 
 database_schema_dict = get_database_info(connection)
 database_schema_string = "\n".join(
@@ -36,27 +37,13 @@ def create_tool_dict(tool: ToolFunction) -> Dict[str, Any]:
     ToolFunction(
         name="ask_database",
         func=ask_database,
-        description="""
-        Use this function to answer user questions about medication in the Balancer database.
-        The Balancer medication database stores medications by their official medical (generic) names, not brand names.
-        Therefore:
-        - Brand names should be converted to their official medical names before querying.
-        - Queries should be case-insensitive to handle any variation in how medication names are stored (e.g., "Lurasidone", "lurasidone").
-        Input should be a fully formed SQL query.
-        Important guidelines:
-        - Always use case-insensitive matching in queries by converting both the database column and the input to lowercase.
-        For example, in SQL:
-        - PostgreSQL: `LOWER(name) = LOWER('lurasidone')`
-        """,
+        description=TOOL_SQL_QUERY_DESCRIPTION,
         parameters={
             "query": {
                 "type": "string",
-                "description": f"""
-                SQL query extracting info to answer the user's question.
-                SQL should be written using this database schema:
-                {database_schema_string}
-                The query should be returned in plain text, not in JSON.
-                """
+                "description": TOOL_SQL_QUERY_PARAM_DESCRIPTION_TEMPLATE.format(
+                    database_schema_string=database_schema_string
+                )
             }
         }
     ),