Skip to content

Commit db1f67e

Browse files
AryanBVclaude
andcommitted
fix: Wheat classification labels and data quality cleanup
Root cause fixes for wheat and similar HS code classifications: Database: - Cleaned 20 dirty descriptions with DGFT notification dates - Fixed malformed date pattern in code 2942 Backend (llm-navigator.service.ts): - Rewrote generateUniqueLabel to parse ": --" format correctly - Now extracts "Seed" vs "Other" qualifiers from descriptions like "Durum wheat :-- Seed" - Skip generic "Other" base labels to force meaningful qualifiers - Fixed filterOptionsByKeywordsSimple to add ALL relevant Other options (was only adding first) - Added seed pattern to generateSmartQuestion for sowing-related products Results: - Wheat now shows 4 distinct options: Durum wheat, Durum wheat - Other, Other - Seed, Other - Other - No more meaningless "(19)" or "(99)" code suffixes in labels - Durum wheat seeds correctly classifies to 1001.11.00 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent de0ddda commit db1f67e

4 files changed

Lines changed: 132 additions & 78 deletions

File tree

backend/src/services/llm-conversational-classifier.service.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ export interface LLMClassifyResponse {
3939
conversationId: string;
4040
responseType: 'questions' | 'classification' | 'error';
4141

42+
// Explicit round number for frontend deduplication
43+
roundNumber?: number;
44+
4245
// If responseType === 'questions' (matches frontend format)
4346
questions?: Array<{
4447
id: string;
@@ -436,6 +439,7 @@ async function processNavigationResult(
436439
success: true,
437440
conversationId,
438441
responseType: 'questions', // Frontend expects plural
442+
roundNumber: turnNumber, // Explicit round number for frontend deduplication
439443
questions: [{
440444
id: result.question!.id,
441445
text: result.question!.text,

backend/src/services/llm-navigator.service.ts

Lines changed: 93 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -274,17 +274,16 @@ function filterOptionsByKeywordsSimple(
274274
const matchedChapters = new Set(matchingOptions.map(o => o.code.substring(0, 2)));
275275

276276
// Only add "Other" if all matched options are from the same chapter
277-
// AND there's an "Other" option from that same chapter
277+
// AND there are "Other" options from that same chapter
278278
if (matchedChapters.size === 1) {
279279
const chapter = [...matchedChapters][0];
280-
const relevantOther = filteredOptions.find(o =>
280+
const relevantOthers = filteredOptions.filter(o =>
281281
o.isOther &&
282282
o.code.substring(0, 2) === chapter &&
283283
!matchingOptions.includes(o)
284284
);
285-
if (relevantOther) {
286-
matchingOptions.push(relevantOther);
287-
}
285+
// Add ALL relevant "Other" options (e.g., "Other - Seed" and "Other - Other" for wheat)
286+
matchingOptions.push(...relevantOthers);
288287
}
289288

290289
logger.info(`[FILTER] Keyword filter: ${filteredOptions.length} -> ${matchingOptions.length} options`);
@@ -437,6 +436,7 @@ function generateSmartQuestion(options: HierarchyOption[]): string {
437436
// Check for common distinguishing patterns - ordered by specificity
438437
const patterns = [
439438
// Most specific patterns first
439+
{ keywords: ['seed', 'sowing', 'for seed'], question: "Is this for sowing (seed) or other purposes?" },
440440
{ keywords: ['decaffeinated', 'not decaffeinated'], question: "Is the product decaffeinated?" },
441441
{ keywords: ['roasted', 'not roasted'], question: "Is the product roasted?" },
442442
{ keywords: ['crushed', 'ground', 'whole', 'neither crushed nor ground'], question: "What is the processing state?" },
@@ -786,84 +786,111 @@ async function getCodeDescription(code: string): Promise<string | null> {
786786
* ROOT CAUSE FIX: Generate UNIQUE labels that distinguish between similar codes
787787
*
788788
* Problem it solves:
789-
* "Coffee, not roasted : --Not decaffeinated" → "Coffee, not roasted"
790-
* "Coffee, not roasted : --Decaffeinated" → "Coffee, not roasted" // DUPLICATE!
789+
* "Durum wheat : -- Seed" → Should extract "Seed"
790+
* "Durum wheat : -- Other" → Should extract "Other"
791+
* "Other : -- Seed" → Should extract "Seed"
791792
*
792793
* Fixed behavior:
793-
* Step 1: Try base label (before first colon)
794-
* Step 2: If duplicate, add distinguishing part (after colon, before ----)
795-
* Step 3: If still duplicate, add code suffix
794+
* Step 1: Clean the description (remove DGFT dates)
795+
* Step 2: Parse the description format (": --" or ":--" or ": ----")
796+
* Step 3: Try base label first
797+
* Step 4: Add qualifier if we have one and it's meaningful
798+
* Step 5: Handle "Other" or empty qualifiers with seed/other distinction
799+
* Step 6: Last resort - add meaningful code part
796800
*/
797-
function generateUniqueLabel(code: string, description: string, existingLabels: Set<string>): string {
798-
// Clean up the description - remove leading dashes and extra spaces
799-
const cleanDesc = description.replace(/^[-:\s]+/, '').trim();
800-
801-
// Split by colon to separate main part from qualifier
802-
const colonParts = cleanDesc.split(/\s*:\s*/);
803-
const mainPart = (colonParts[0] || '').trim();
804-
805-
// Step 1: Try base label (just the part before colon)
806-
let label = mainPart;
807-
808-
// Truncate if too long
809-
if (label.length > 50) {
810-
label = label.substring(0, 47) + '...';
801+
function generateUniqueLabel(
802+
code: string,
803+
description: string,
804+
existingLabels: Set<string>
805+
): string {
806+
// Step 1: Clean the description
807+
let cleanDesc = description
808+
.replace(/\s*\d{2}\/\d{4}-\d{2,4}\s+\d{2}\.\d{2}\.\d{4}\s*/g, '') // Remove DGFT dates
809+
.trim();
810+
811+
// Step 2: Parse the description format
812+
// Common formats:
813+
// "Category : -- Qualifier" (e.g., "Durum wheat : -- Seed")
814+
// "Category :-- Qualifier" (e.g., "Durum wheat :-- Seed")
815+
// "Category: ---- Detail" (e.g., "Coffee, not roasted: ----Not decaffeinated")
816+
817+
let baseLabel = '';
818+
let qualifier = '';
819+
820+
// Try to split by ": --" or ":--" or ": ----"
821+
const colonDashMatch = cleanDesc.match(/^(.+?)\s*:\s*-{1,4}\s*(.+)$/);
822+
if (colonDashMatch) {
823+
baseLabel = colonDashMatch[1]!.trim();
824+
qualifier = colonDashMatch[2]!.trim();
825+
} else if (cleanDesc.includes(':')) {
826+
// Simple colon split
827+
const parts = cleanDesc.split(':');
828+
baseLabel = parts[0]!.trim();
829+
qualifier = parts.slice(1).join(':').replace(/^[\s-]+/, '').trim();
830+
} else {
831+
baseLabel = cleanDesc;
811832
}
812833

813-
if (!existingLabels.has(label)) {
814-
existingLabels.add(label);
815-
return label;
834+
// Step 3: Try base label first (but NOT if it's a generic "Other" - those need qualifiers)
835+
const isGenericBase = baseLabel.toLowerCase() === 'other';
836+
if (!isGenericBase && !existingLabels.has(baseLabel) && baseLabel.length > 0) {
837+
existingLabels.add(baseLabel);
838+
return baseLabel;
816839
}
817840

818-
// Step 2: Add the distinguishing part (after colon, before ----)
819-
// This handles: "Coffee, not roasted : --Not decaffeinated" → extracts "Not decaffeinated"
820-
if (colonParts.length > 1) {
821-
// Get qualifier from the part after colon
822-
let qualifierPart = colonParts.slice(1).join(' : '); // Rejoin in case multiple colons
823-
824-
// Remove leading dashes (e.g., "--Not decaffeinated" → "Not decaffeinated")
825-
let qualifier = qualifierPart.replace(/^[-\s]+/, '').trim();
826-
827-
// If there's a "----" separator, take the part before it
828-
if (qualifier.includes('----')) {
829-
qualifier = qualifier.split('----')[0]?.trim() || qualifier;
841+
// Step 4: Add qualifier if we have one and it's meaningful
842+
if (qualifier && qualifier.length > 0 && qualifier.toLowerCase() !== 'other') {
843+
const labelWithQualifier = `${baseLabel} - ${qualifier}`;
844+
if (!existingLabels.has(labelWithQualifier)) {
845+
existingLabels.add(labelWithQualifier);
846+
return labelWithQualifier;
830847
}
848+
}
831849

832-
// Also handle "--" separator (common in HS descriptions)
833-
if (qualifier.includes('--')) {
834-
qualifier = qualifier.split('--')[0]?.trim() || qualifier;
835-
}
836-
837-
// Clean up the qualifier
838-
qualifier = qualifier.replace(/^[-\s]+/, '').trim();
839-
840-
if (qualifier && qualifier.toLowerCase() !== 'other') {
841-
label = `${mainPart} - ${qualifier}`;
842-
843-
// Truncate if too long
844-
if (label.length > 60) {
845-
label = label.substring(0, 57) + '...';
850+
// Step 5: If qualifier is "Other" or empty, try more specific label
851+
if (qualifier.toLowerCase() === 'other' || !qualifier) {
852+
// Check if this is a seed vs non-seed distinction
853+
if (cleanDesc.toLowerCase().includes('seed')) {
854+
const seedLabel = `${baseLabel} - Seed`;
855+
if (!existingLabels.has(seedLabel)) {
856+
existingLabels.add(seedLabel);
857+
return seedLabel;
846858
}
847-
848-
if (!existingLabels.has(label)) {
849-
existingLabels.add(label);
850-
return label;
859+
} else {
860+
const otherLabel = `${baseLabel} - Other`;
861+
if (!existingLabels.has(otherLabel)) {
862+
existingLabels.add(otherLabel);
863+
return otherLabel;
851864
}
852865
}
853866
}
854867

855-
// Step 3: Add code suffix as last resort
856-
const codeParts = code.split('.');
857-
const codeEnd = codeParts[codeParts.length - 1] || code;
858-
label = `${mainPart} (${codeEnd})`;
868+
// Step 6: Last resort - add meaningful code part
869+
// Extract the last meaningful segment (e.g., "11" from "1001.11")
870+
const codeParts = code.replace(/\.00$/, '').split('.');
871+
const lastPart = codeParts[codeParts.length - 1]!;
872+
873+
// Try to make it meaningful
874+
let suffix = '';
875+
if (lastPart === '11' || lastPart === '91') {
876+
suffix = 'Seed';
877+
} else if (lastPart === '19' || lastPart === '99') {
878+
suffix = 'Other';
879+
} else if (lastPart === '00') {
880+
suffix = 'General';
881+
} else {
882+
suffix = lastPart;
883+
}
859884

860-
// Final truncation check
861-
if (label.length > 60) {
862-
label = label.substring(0, 57) + '...';
885+
const finalLabel = `${baseLabel} - ${suffix}`;
886+
if (!existingLabels.has(finalLabel)) {
887+
existingLabels.add(finalLabel);
888+
return finalLabel;
863889
}
864890

865-
existingLabels.add(label);
866-
return label;
891+
// Absolute last resort - add full code
892+
existingLabels.add(`${baseLabel} (${code})`);
893+
return `${baseLabel} (${code})`;
867894
}
868895

869896
/**

frontend/src/app/classify/page.tsx

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,19 @@ export default function ClassifyPage() {
182182
// Add AI question message with DATA (not JSX)
183183
setChatMessages(prev => {
184184
// Check if we already added this round's questions
185-
const existingQuestion = prev.find(m =>
186-
m.id.startsWith(`ai-questions-round${conversation.roundNumber}`)
187-
)
185+
const searchId = `ai-questions-round${conversation.roundNumber}`
186+
const existingQuestion = prev.find(m => m.id.startsWith(searchId))
187+
188+
// Debug logging for conversation continuation
189+
if (process.env.NODE_ENV === 'development') {
190+
console.log('[Dedup Check]', {
191+
roundNumber: conversation.roundNumber,
192+
searchingFor: searchId,
193+
found: !!existingQuestion,
194+
existingIds: prev.filter(m => m.id.startsWith('ai-questions')).map(m => m.id)
195+
})
196+
}
197+
188198
if (existingQuestion) return prev
189199

190200
return [

frontend/src/lib/hooks/use-conversation.ts

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
'use client'
22

3-
import { useState, useCallback } from 'react'
3+
import { useState, useCallback, useRef, useEffect } from 'react'
44
import {
55
classifyConversational,
66
skipToClassification,
@@ -56,6 +56,14 @@ const initialState: ConversationState = {
5656
export function useConversation() {
5757
const [state, setState] = useState<ConversationState>(initialState)
5858

59+
// Use ref to prevent stale closure issues with conversationId
60+
const conversationIdRef = useRef<string | null>(null)
61+
62+
// Keep the ref in sync with state
63+
useEffect(() => {
64+
conversationIdRef.current = state.conversationId
65+
}, [state.conversationId])
66+
5967
/**
6068
* Start a new classification conversation
6169
*/
@@ -89,7 +97,7 @@ export function useConversation() {
8997
conversationId: response.conversationId,
9098
currentQuestions: response.questions || null,
9199
questionContext: response.questionContext || null,
92-
roundNumber: response.roundNumber || 1,
100+
roundNumber: response.roundNumber ?? 1,
93101
totalQuestionsAsked: response.totalQuestionsAsked || 0
94102
}))
95103
} else if (response.responseType === 'classification' && response.result) {
@@ -115,7 +123,9 @@ export function useConversation() {
115123
* Submit answers to current questions
116124
*/
117125
const submitAnswers = useCallback(async (answers: Record<string, string>) => {
118-
if (!state.conversationId || !state.currentQuestions) return
126+
// Use ref for conversationId to prevent stale closure issues
127+
const currentConversationId = conversationIdRef.current
128+
if (!currentConversationId || !state.currentQuestions) return
119129

120130
// Add Q&A to history
121131
const newHistoryItems: ConversationHistoryItem[] = []
@@ -144,9 +154,10 @@ export function useConversation() {
144154

145155
try {
146156
const sessionId = getSessionId()
157+
// Use ref value for conversationId to avoid stale closure
147158
const response = await classifyConversational({
148159
productDescription: state.productDescription,
149-
conversationId: state.conversationId,
160+
conversationId: currentConversationId,
150161
sessionId,
151162
answers
152163
})
@@ -166,7 +177,7 @@ export function useConversation() {
166177
status: 'asking',
167178
currentQuestions: response.questions || null,
168179
questionContext: response.questionContext || null,
169-
roundNumber: response.roundNumber || prev.roundNumber + 1,
180+
roundNumber: response.roundNumber ?? (prev.roundNumber + 1),
170181
totalQuestionsAsked: response.totalQuestionsAsked || prev.totalQuestionsAsked
171182
}))
172183
} else if (response.responseType === 'classification' && response.result) {
@@ -185,19 +196,21 @@ export function useConversation() {
185196
error: error instanceof Error ? error.message : 'Unknown error occurred'
186197
}))
187198
}
188-
}, [state.conversationId, state.currentQuestions, state.productDescription])
199+
}, [state.currentQuestions, state.productDescription]) // Removed conversationId - using ref instead
189200

190201
/**
191202
* Skip remaining questions and get best guess
192203
*/
193204
const skip = useCallback(async () => {
194-
if (!state.conversationId) return
205+
// Use ref for conversationId to prevent stale closure issues
206+
const currentConversationId = conversationIdRef.current
207+
if (!currentConversationId) return
195208

196209
setState(prev => ({ ...prev, status: 'loading' }))
197210

198211
try {
199212
const sessionId = getSessionId()
200-
const response = await skipToClassification(state.conversationId, sessionId)
213+
const response = await skipToClassification(currentConversationId, sessionId)
201214

202215
if (!response.success) {
203216
setState(prev => ({
@@ -224,7 +237,7 @@ export function useConversation() {
224237
error: error instanceof Error ? error.message : 'Unknown error occurred'
225238
}))
226239
}
227-
}, [state.conversationId])
240+
}, []) // Removed conversationId - using ref instead
228241

229242
/**
230243
* Reset and start over

0 commit comments

Comments
 (0)