33from rest_framework import status
44from rest_framework .parsers import MultiPartParser , FormParser
55from django .conf import settings
6- from questions .models import Question
6+ from questions .models import Question , Course
77from questions .serializers import QuestionDetailSerializer
88import json
99import io
10+ import difflib
1011
1112
1213class GenerateQuestionsView (APIView ):
@@ -19,20 +20,38 @@ def post(self, request):
1920 count = min (int (request .data .get ('count' , 5 )), 20 ) # Max 20 at a time
2021 difficulty = request .data .get ('difficulty' , 'medium' )
2122 examples = request .data .get ('examples' , []) # Example questions for style
23+ course_id = request .data .get ('course_id' ) # For duplicate detection
24+ tag_name = request .data .get ('tag_name' ) # For duplicate detection
2225
2326 if not content :
2427 return Response ({'error' : 'Content is required' }, status = status .HTTP_400_BAD_REQUEST )
2528
29+ # Fetch existing questions for duplicate detection
30+ existing_questions = []
31+ if course_id :
32+ qs = Question .objects .filter (course_id = course_id , deleted_at__isnull = True )
33+ if tag_name :
34+ qs = qs .filter (tags__name = tag_name )
35+ existing_questions = list (qs .values_list ('text' , flat = True )[:100 ]) # Limit to 100 for prompt size
36+
2637 try :
27- print (f"[AI Generate] Provider: { provider } , Type: { question_type } , Count: { count } , Content length: { len (content )} " )
38+ print (f"[AI Generate] Provider: { provider } , Type: { question_type } , Count: { count } , Content length: { len (content )} , Existing: { len ( existing_questions ) } " )
2839
2940 if provider == 'claude' :
30- questions = self ._generate_with_claude (content , question_type , count , difficulty , examples )
41+ questions = self ._generate_with_claude (content , question_type , count , difficulty , examples , existing_questions )
3142 elif provider == 'openai' :
32- questions = self ._generate_with_openai (content , question_type , count , difficulty , examples )
43+ questions = self ._generate_with_openai (content , question_type , count , difficulty , examples , existing_questions )
3344 else :
3445 return Response ({'error' : 'Invalid provider' }, status = status .HTTP_400_BAD_REQUEST )
3546
47+ # Post-generation duplicate filtering
48+ if existing_questions :
49+ original_count = len (questions )
50+ questions = self ._filter_duplicates (questions , existing_questions )
51+ filtered_count = original_count - len (questions )
52+ if filtered_count > 0 :
53+ print (f"[AI Generate] Filtered { filtered_count } duplicate questions" )
54+
3655 print (f"[AI Generate] Success: { len (questions )} questions generated" )
3756 return Response ({'questions' : questions })
3857 except Exception as e :
@@ -41,7 +60,25 @@ def post(self, request):
4160 traceback .print_exc ()
4261 return Response ({'error' : str (e )}, status = status .HTTP_500_INTERNAL_SERVER_ERROR )
4362
44- def _build_prompt (self , content , question_type , count , difficulty , examples ):
63+ def _filter_duplicates (self , new_questions , existing_texts , threshold = 0.85 ):
64+ """Filter out questions that are too similar to existing ones."""
65+ unique_questions = []
66+ for q in new_questions :
67+ new_text = q .get ('text' , '' ).lower ().strip ()
68+ is_duplicate = False
69+ for existing_text in existing_texts :
70+ existing_lower = existing_text .lower ().strip ()
71+ # Use sequence matcher for similarity
72+ similarity = difflib .SequenceMatcher (None , new_text , existing_lower ).ratio ()
73+ if similarity >= threshold :
74+ print (f"[Duplicate] Skipping question (similarity={ similarity :.2f} ): { new_text [:50 ]} ..." )
75+ is_duplicate = True
76+ break
77+ if not is_duplicate :
78+ unique_questions .append (q )
79+ return unique_questions
80+
81+ def _build_prompt (self , content , question_type , count , difficulty , examples , existing_questions = None ):
4582 type_instructions = {
4683 'multipleChoice' : 'Create multiple choice questions with exactly 4 options (1 correct, 3 wrong).' ,
4784 'trueFalse' : 'Create true/false questions.' ,
@@ -55,6 +92,15 @@ def _build_prompt(self, content, question_type, count, difficulty, examples):
5592 for i , ex in enumerate (examples [:3 ], 1 ):
5693 example_text += f"\n Example { i } :\n { ex } \n "
5794
95+ # Add existing questions to avoid duplicates
96+ existing_text = ""
97+ if existing_questions :
98+ existing_text = "\n \n IMPORTANT: The following questions already exist. DO NOT create questions that are similar to these:\n "
99+ for i , eq in enumerate (existing_questions [:30 ], 1 ): # Limit to 30 to save tokens
100+ truncated = eq [:200 ] + "..." if len (eq ) > 200 else eq
101+ existing_text += f"- { truncated } \n "
102+ existing_text += "\n Create DIFFERENT questions that cover other aspects of the material.\n "
103+
58104 # Handle mixed question types
59105 if question_type == 'mixed' :
60106 type_instruction = """Create a MIX of different question types. Include a variety of:
@@ -120,7 +166,7 @@ def _build_prompt(self, content, question_type, count, difficulty, examples):
120166 prompt = f"""Generate { count } { difficulty } difficulty questions based on the following content.
121167
122168{ type_instruction }
123-
169+ { existing_text }
124170Return your response as a JSON array with this structure:
125171{ json_format }
126172
@@ -159,14 +205,14 @@ def _parse_json_response(self, response_text):
159205 preview = response_text [:500 ] if len (response_text ) > 500 else response_text
160206 raise ValueError (f"Failed to parse AI response as JSON: { e } . Response preview: { preview } " )
161207
162- def _generate_with_claude (self , content , question_type , count , difficulty , examples ):
208+ def _generate_with_claude (self , content , question_type , count , difficulty , examples , existing_questions = None ):
163209 import anthropic
164210
165211 if not settings .ANTHROPIC_API_KEY :
166212 raise ValueError ("Anthropic API key not configured" )
167213
168214 client = anthropic .Anthropic (api_key = settings .ANTHROPIC_API_KEY )
169- prompt = self ._build_prompt (content , question_type , count , difficulty , examples )
215+ prompt = self ._build_prompt (content , question_type , count , difficulty , examples , existing_questions )
170216
171217 print (f"[Claude] Sending request, prompt length: { len (prompt )} " )
172218
@@ -195,14 +241,14 @@ def _generate_with_claude(self, content, question_type, count, difficulty, examp
195241
196242 return self ._parse_json_response (response_text )
197243
198- def _generate_with_openai (self , content , question_type , count , difficulty , examples ):
244+ def _generate_with_openai (self , content , question_type , count , difficulty , examples , existing_questions = None ):
199245 from openai import OpenAI
200246
201247 if not settings .OPENAI_API_KEY :
202248 raise ValueError ("OpenAI API key not configured" )
203249
204250 client = OpenAI (api_key = settings .OPENAI_API_KEY )
205- prompt = self ._build_prompt (content , question_type , count , difficulty , examples )
251+ prompt = self ._build_prompt (content , question_type , count , difficulty , examples , existing_questions )
206252
207253 response = client .chat .completions .create (
208254 model = "gpt-4o" ,
0 commit comments