11import logging
2- from typing import Any , Dict , List
32
43from flask import make_response , request
54from flask_restx import fields , Resource
65
76from application .api .answer .routes .base import answer_ns
8- from application .core .settings import settings
9- from application .storage .db .repositories .agents import AgentsRepository
10- from application .storage .db .session import db_readonly
11- from application .vectorstore .vector_creator import VectorCreator
7+ from application .services .search_service import (
8+ InvalidAPIKey ,
9+ SearchFailed ,
10+ search ,
11+ )
1212
1313logger = logging .getLogger (__name__ )
1414
1515
1616@answer_ns .route ("/api/search" )
1717class SearchResource (Resource ):
18- """Fast search endpoint for retrieving relevant documents"""
18+ """Fast search endpoint for retrieving relevant documents. """
1919
2020 search_model = answer_ns .model (
2121 "SearchModel" ,
@@ -32,135 +32,24 @@ class SearchResource(Resource):
3232 },
3333 )
3434
35- def _get_sources_from_api_key (self , api_key : str ) -> List [str ]:
36- """Get source IDs connected to the API key/agent."""
37- with db_readonly () as conn :
38- agent_data = AgentsRepository (conn ).find_by_key (api_key )
39- if not agent_data :
40- return []
41-
42- source_ids : List [str ] = []
43- # extra_source_ids is a PG ARRAY(UUID) of source UUIDs.
44- extra = agent_data .get ("extra_source_ids" ) or []
45- for src in extra :
46- if src :
47- source_ids .append (str (src ))
48-
49- if not source_ids :
50- single = agent_data .get ("source_id" )
51- if single :
52- source_ids .append (str (single ))
53-
54- return source_ids
55-
56- def _search_vectorstores (
57- self , query : str , source_ids : List [str ], chunks : int
58- ) -> List [Dict [str , Any ]]:
59- """Search across vectorstores and return results"""
60- if not source_ids :
61- return []
62-
63- results = []
64- chunks_per_source = max (1 , chunks // len (source_ids ))
65- seen_texts = set ()
66-
67- for source_id in source_ids :
68- if not source_id or not source_id .strip ():
69- continue
70-
71- try :
72- docsearch = VectorCreator .create_vectorstore (
73- settings .VECTOR_STORE , source_id , settings .EMBEDDINGS_KEY
74- )
75- docs = docsearch .search (query , k = chunks_per_source * 2 )
76-
77- for doc in docs :
78- if len (results ) >= chunks :
79- break
80-
81- if hasattr (doc , "page_content" ) and hasattr (doc , "metadata" ):
82- page_content = doc .page_content
83- metadata = doc .metadata
84- else :
85- page_content = doc .get ("text" , doc .get ("page_content" , "" ))
86- metadata = doc .get ("metadata" , {})
87-
88- # Skip duplicates
89- text_hash = hash (page_content [:200 ])
90- if text_hash in seen_texts :
91- continue
92- seen_texts .add (text_hash )
93-
94- title = metadata .get (
95- "title" , metadata .get ("post_title" , "" )
96- )
97- if not isinstance (title , str ):
98- title = str (title ) if title else ""
99-
100- # Clean up title
101- if title :
102- title = title .split ("/" )[- 1 ]
103- else :
104- # Use filename or first part of content as title
105- title = metadata .get ("filename" , page_content [:50 ] + "..." )
106-
107- source = metadata .get ("source" , source_id )
108-
109- results .append ({
110- "text" : page_content ,
111- "title" : title ,
112- "source" : source ,
113- })
114-
115- if len (results ) >= chunks :
116- break
117-
118- except Exception as e :
119- logger .error (
120- f"Error searching vectorstore { source_id } : { e } " ,
121- exc_info = True ,
122- )
123- continue
124-
125- return results [:chunks ]
126-
12735 @answer_ns .expect (search_model )
12836 @answer_ns .doc (description = "Search for relevant documents based on query" )
12937 def post (self ):
130- data = request .get_json ()
38+ data = request .get_json () or {}
13139
13240 question = data .get ("question" )
13341 api_key = data .get ("api_key" )
13442 chunks = data .get ("chunks" , 5 )
13543
13644 if not question :
13745 return make_response ({"error" : "question is required" }, 400 )
138-
13946 if not api_key :
14047 return make_response ({"error" : "api_key is required" }, 400 )
14148
142- # Validate API key
143- with db_readonly () as conn :
144- agent = AgentsRepository (conn ).find_by_key (api_key )
145- if not agent :
146- return make_response ({"error" : "Invalid API key" }, 401 )
147-
14849 try :
149- # Get sources connected to this API key
150- source_ids = self ._get_sources_from_api_key (api_key )
151-
152- if not source_ids :
153- return make_response ([], 200 )
154-
155- # Perform search
156- results = self ._search_vectorstores (question , source_ids , chunks )
157-
158- return make_response (results , 200 )
159-
160- except Exception as e :
161- logger .error (
162- f"/api/search - error: { str (e )} " ,
163- extra = {"error" : str (e )},
164- exc_info = True ,
165- )
50+ return make_response (search (api_key , question , chunks ), 200 )
51+ except InvalidAPIKey :
52+ return make_response ({"error" : "Invalid API key" }, 401 )
53+ except SearchFailed :
54+ logger .exception ("/api/search failed" )
16655 return make_response ({"error" : "Search failed" }, 500 )
0 commit comments