Skip to content

Commit 87a6295

Browse files
AryanBVclaude
andcommitted
fix: Root cause UX issues - unique labels, smart questions, include Other codes
FIX 1: Stop filtering "Other" codes in detectDistinctGroups() - "Other" codes like 0901.90 now visible as valid classification options - Sorted to appear last as fallback options FIX 2: Use generateUniqueLabel instead of cleanDescriptionForLabel - Labels are now unique, no more duplicates like "Coffee, not roasted" - Properly extracts distinguishing qualifiers (decaf/non-decaf) FIX 3: Add generateSmartQuestion for contextual questions - Questions now reflect actual differences between options - "Is the product decaffeinated?" instead of generic "Which category?" FIX 4: Handle leaf nodes properly (codes without children) - Branches, leaves, and Other codes properly ordered - No duplicate Other entries Test results: 49/51 passed (96%), all critical regression tests pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 3222ab3 commit 87a6295

1 file changed

Lines changed: 172 additions & 86 deletions

File tree

backend/src/services/llm-navigator.service.ts

Lines changed: 172 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -355,16 +355,30 @@ function detectDistinctGroups(
355355
}
356356
}
357357

358-
// RULE 2: Multiple non-other options with children at same level
359-
// This is the CORE generic logic - works for ANY product
360-
const nonOtherOptions = options.filter(o => !o.isOther);
361-
const optionsWithChildren = nonOtherOptions.filter(o => o.hasChildren);
358+
// RULE 2: Multiple options at same level - INCLUDES "Other" codes
359+
// ROOT CAUSE FIX: Keep ALL options including "Other" - it's a valid HS classification
360+
// Sort so "Other" appears last (as a fallback option)
361+
const sortedOptions = [...options].sort((a, b) => {
362+
if (a.isOther && !b.isOther) return 1; // "Other" goes last
363+
if (!a.isOther && b.isOther) return -1;
364+
return 0;
365+
});
366+
367+
// Separate branch codes (can drill down) from leaf codes (final classification)
368+
// IMPORTANT: Exclude "Other" from branches/leaves to avoid duplicates
369+
const branchOptions = sortedOptions.filter(o => o.hasChildren && !o.isOther);
370+
const leafOptions = sortedOptions.filter(o => !o.hasChildren && !o.isOther);
371+
const otherOptions = sortedOptions.filter(o => o.isOther);
372+
373+
// Combine: branches first, then leaves, then "Other" last (no duplicates)
374+
const relevantOptions = [...branchOptions, ...leafOptions, ...otherOptions];
362375

363-
logger.debug(`[DISTINCT-GROUPS] Non-other: ${nonOtherOptions.length}, with children: ${optionsWithChildren.length}`);
376+
logger.debug(`[DISTINCT-GROUPS] Total: ${sortedOptions.length}, branches: ${branchOptions.length}, leaves: ${leafOptions.length}, other: ${otherOptions.length}`);
364377

365-
if (optionsWithChildren.length >= 2) {
378+
// ROOT CAUSE FIX: Use relevantOptions (includes branches, leaves, AND "Other")
379+
if (relevantOptions.length >= 2) {
366380
// Check if user keywords clearly match ONE option
367-
const matchScores = optionsWithChildren.map(o => {
381+
const matchScores = relevantOptions.map(o => {
368382
const desc = o.description.toLowerCase();
369383
const matchCount = userKeywords.filter(kw => {
370384
if (kw.length < 3) return false;
@@ -384,16 +398,20 @@ function detectDistinctGroups(
384398
return null;
385399
}
386400

387-
logger.info(`[DISTINCT-GROUPS] ${optionsWithChildren.length} branch options at this level - generating question`);
401+
logger.info(`[DISTINCT-GROUPS] ${relevantOptions.length} options at this level - generating question`);
388402

389-
// Generate question based on description patterns
390-
const questionText = generateQuestionFromDescriptions(optionsWithChildren);
403+
// ROOT CAUSE FIX: Generate smart question based on ACTUAL differences between options
404+
const questionText = generateSmartQuestion(relevantOptions);
391405

392-
// Create groups with cleaned labels
393-
const groups = optionsWithChildren.slice(0, 6).map(o => ({
394-
name: cleanDescriptionForLabel(o.description),
395-
options: [o]
396-
}));
406+
// ROOT CAUSE FIX: Use generateUniqueLabel which handles duplicates properly
407+
const existingLabels = new Set<string>();
408+
const groups = relevantOptions.slice(0, 6).map(o => {
409+
const uniqueLabel = generateUniqueLabel(o.code, o.description, existingLabels);
410+
return {
411+
name: uniqueLabel,
412+
options: [o]
413+
};
414+
});
397415

398416
return {
399417
groups,
@@ -405,40 +423,57 @@ function detectDistinctGroups(
405423
}
406424

407425
/**
408-
* Generate a question based on what differentiates the options
409-
* Analyzes description patterns to create meaningful questions
426+
* ROOT CAUSE FIX: Generate SMART questions based on ACTUAL differences between options
427+
* This analyzes what's different between options and generates contextually relevant questions
410428
*/
411-
function generateQuestionFromDescriptions(options: HierarchyOption[]): string {
412-
const descriptions = options.map(o => o.description.toLowerCase());
413-
414-
// Check for common patterns in HS code descriptions
415-
if (descriptions.some(d => d.includes('roasted')) &&
416-
descriptions.some(d => d.includes('not roasted'))) {
417-
return 'What is the roasting status?';
418-
}
419-
420-
if (descriptions.some(d => d.includes('crushed') || d.includes('ground')) &&
421-
descriptions.some(d => d.includes('neither crushed'))) {
422-
return 'What form is your product?';
423-
}
429+
function generateSmartQuestion(options: HierarchyOption[]): string {
430+
if (options.length === 0) return "Which option best describes your product?";
424431

425-
if (descriptions.some(d => d.includes('green')) &&
426-
descriptions.some(d => d.includes('black'))) {
427-
return 'What type is your product?';
428-
}
432+
// Extract descriptions and find common/different parts
433+
const descriptions = options.map(o => o.description.toLowerCase());
429434

430-
if (descriptions.some(d => d.includes('fresh')) &&
431-
descriptions.some(d => d.includes('dried') || d.includes('frozen'))) {
432-
return 'What is the preservation state?';
435+
// Check for common distinguishing patterns - ordered by specificity
436+
const patterns = [
437+
// Most specific patterns first
438+
{ keywords: ['decaffeinated', 'not decaffeinated'], question: "Is the product decaffeinated?" },
439+
{ keywords: ['roasted', 'not roasted'], question: "Is the product roasted?" },
440+
{ keywords: ['crushed', 'ground', 'whole', 'neither crushed nor ground'], question: "What is the processing state?" },
441+
{ keywords: ['fresh', 'dried', 'frozen', 'chilled'], question: "What is the preservation state?" },
442+
{ keywords: ['raw', 'processed', 'prepared'], question: "Is this raw or processed?" },
443+
{ keywords: ['bulk', 'retail', 'packing', 'packings', 'immediate packing'], question: "What is the packaging type?" },
444+
{ keywords: ['seed', 'seeds', 'powder', 'extract', 'oil'], question: "What form is the product in?" },
445+
{ keywords: ['breeding', 'pure-bred', 'livestock'], question: "What is the purpose/type?" },
446+
{ keywords: ['arabica', 'robusta'], question: "What variety of coffee is this?" },
447+
{ keywords: ['green tea', 'black tea', 'oolong'], question: "What type of tea is this?" },
448+
{ keywords: ['parchment', 'cherry'], question: "What is the processing method?" },
449+
{ keywords: ['grade a', 'grade b', 'grade c', 'a grade', 'b grade', 'c grade', 'ab grade', 'pb grade'], question: "What grade is your product?" },
450+
{ keywords: ['husk', 'skin', 'shell'], question: "Is this a byproduct (husks/skins/shells)?" },
451+
{ keywords: ['instant', 'soluble', 'extract', 'essence'], question: "Is this an instant/processed product?" },
452+
{ keywords: ['other'], question: "Does your product fit a specific category, or is it 'Other'?" },
453+
];
454+
455+
// Find which pattern matches the differences between options
456+
for (const pattern of patterns) {
457+
const matchCount = pattern.keywords.filter(kw =>
458+
descriptions.some(d => d.includes(kw))
459+
).length;
460+
461+
// Need at least 2 matches to confirm this is the distinguishing factor
462+
if (matchCount >= 2) {
463+
return pattern.question;
464+
}
433465
}
434466

435-
if (descriptions.some(d => d.includes('decaffeinated')) &&
436-
descriptions.some(d => d.includes('not decaffeinated'))) {
437-
return 'Is the product decaffeinated?';
438-
}
467+
// Default fallback
468+
return "Which category best matches your product?";
469+
}
439470

440-
// Default generic question
441-
return 'Please select the category that best matches your product:';
471+
/**
472+
* LEGACY: Generate a question based on what differentiates the options
473+
* Kept for backward compatibility - use generateSmartQuestion instead
474+
*/
475+
function generateQuestionFromDescriptions(options: HierarchyOption[]): string {
476+
return generateSmartQuestion(options);
442477
}
443478

444479
/**
@@ -741,62 +776,93 @@ async function getCodeDescription(code: string): Promise<string | null> {
741776
}
742777

743778
/**
744-
* Generate a unique label from description
745-
* Handles HS code descriptions like "Arabica plantation: ---- A Grade"
746-
* Combines type (before colon) with grade/detail (after dashes)
779+
* ROOT CAUSE FIX: Generate UNIQUE labels that distinguish between similar codes
780+
*
781+
* Problem it solves:
782+
* "Coffee, not roasted : --Not decaffeinated" → "Coffee, not roasted"
783+
* "Coffee, not roasted : --Decaffeinated" → "Coffee, not roasted" // DUPLICATE!
784+
*
785+
* Fixed behavior:
786+
* Step 1: Try base label (before first colon)
787+
* Step 2: If duplicate, add distinguishing part (after colon, before ----)
788+
* Step 3: If still duplicate, add code suffix
747789
*/
748790
function generateUniqueLabel(code: string, description: string, existingLabels: Set<string>): string {
749791
// Clean up the description - remove leading dashes and extra spaces
750-
let cleanDesc = description.replace(/^[-:\s]+/, '').trim();
792+
const cleanDesc = description.replace(/^[-:\s]+/, '').trim();
751793

752-
// Split by colon
753-
const parts = cleanDesc.split(':').map(p => p.trim().replace(/^[-\s]+/, '').trim()).filter(p => p);
794+
// Split by colon to separate main part from qualifier
795+
const colonParts = cleanDesc.split(/\s*:\s*/);
796+
const mainPart = (colonParts[0] || '').trim();
754797

755-
let baseLabel: string;
798+
// Step 1: Try base label (just the part before colon)
799+
let label = mainPart;
756800

757-
if (parts.length >= 2) {
758-
// Format: "Type: ---- Grade" -> combine as "Type - Grade"
759-
const typePart = parts[0] || '';
760-
const gradePart = parts[parts.length - 1] || '';
761-
762-
// Clean up grade part (remove leading dashes)
763-
const cleanGrade = gradePart.replace(/^[-\s]+/, '').trim();
764-
765-
if (cleanGrade && cleanGrade.toLowerCase() !== 'other') {
766-
// Combine type and grade: "Arabica plantation - A Grade"
767-
baseLabel = `${typePart} - ${cleanGrade}`;
768-
} else if (cleanGrade.toLowerCase() === 'other') {
769-
baseLabel = `${typePart} - Other`;
770-
} else {
771-
baseLabel = typePart;
772-
}
773-
} else {
774-
baseLabel = cleanDesc;
801+
// Truncate if too long
802+
if (label.length > 50) {
803+
label = label.substring(0, 47) + '...';
804+
}
805+
806+
if (!existingLabels.has(label)) {
807+
existingLabels.add(label);
808+
return label;
775809
}
776810

777-
// Truncate if too long (but keep it longer to preserve grade info)
778-
if (baseLabel.length > 60) {
779-
baseLabel = baseLabel.substring(0, 57) + '...';
811+
// Step 2: Add the distinguishing part (after colon, before ----)
812+
// This handles: "Coffee, not roasted : --Not decaffeinated" → extracts "Not decaffeinated"
813+
if (colonParts.length > 1) {
814+
// Get qualifier from the part after colon
815+
let qualifierPart = colonParts.slice(1).join(' : '); // Rejoin in case multiple colons
816+
817+
// Remove leading dashes (e.g., "--Not decaffeinated" → "Not decaffeinated")
818+
let qualifier = qualifierPart.replace(/^[-\s]+/, '').trim();
819+
820+
// If there's a "----" separator, take the part before it
821+
if (qualifier.includes('----')) {
822+
qualifier = qualifier.split('----')[0]?.trim() || qualifier;
823+
}
824+
825+
// Also handle "--" separator (common in HS descriptions)
826+
if (qualifier.includes('--')) {
827+
qualifier = qualifier.split('--')[0]?.trim() || qualifier;
828+
}
829+
830+
// Clean up the qualifier
831+
qualifier = qualifier.replace(/^[-\s]+/, '').trim();
832+
833+
if (qualifier && qualifier.toLowerCase() !== 'other') {
834+
label = `${mainPart} - ${qualifier}`;
835+
836+
// Truncate if too long
837+
if (label.length > 60) {
838+
label = label.substring(0, 57) + '...';
839+
}
840+
841+
if (!existingLabels.has(label)) {
842+
existingLabels.add(label);
843+
return label;
844+
}
845+
}
780846
}
781847

782-
// Clean up any remaining artifacts
783-
baseLabel = baseLabel.replace(/\s+/g, ' ').trim();
848+
// Step 3: Add code suffix as last resort
849+
const codeParts = code.split('.');
850+
const codeEnd = codeParts[codeParts.length - 1] || code;
851+
label = `${mainPart} (${codeEnd})`;
784852

785-
// If label already exists, append the HS code suffix to differentiate
786-
if (existingLabels.has(baseLabel)) {
787-
// Get the last segment of the code (e.g., "11" from "0901.11.11")
788-
const codeParts = code.split('.');
789-
const suffix = codeParts[codeParts.length - 1] || code;
790-
baseLabel = `${baseLabel} (${suffix})`;
853+
// Final truncation check
854+
if (label.length > 60) {
855+
label = label.substring(0, 57) + '...';
791856
}
792857

793-
existingLabels.add(baseLabel);
794-
return baseLabel;
858+
existingLabels.add(label);
859+
return label;
795860
}
796861

797862
/**
798-
* Limit and deduplicate options, ensuring unique labels
863+
* ROOT CAUSE FIX: Limit and deduplicate options, ensuring unique labels
799864
* Max 8 options to avoid overwhelming users
865+
* NOW INCLUDES "Other" options at the end as valid classifications
800866
*/
801867
function processOptionsForDisplay(
802868
options: Array<{code: string; description: string; isOther?: boolean}>,
@@ -805,11 +871,31 @@ function processOptionsForDisplay(
805871
const result: Array<{code: string; label: string; description: string}> = [];
806872
const existingLabels = new Set<string>();
807873

808-
// Take up to maxOptions non-Other options
809-
const nonOtherOptions = options.filter(o => !o.isOther).slice(0, maxOptions);
874+
// Sort: non-Other first, then Other at the end
875+
const sortedOptions = [...options].sort((a, b) => {
876+
if (a.isOther && !b.isOther) return 1;
877+
if (!a.isOther && b.isOther) return -1;
878+
return 0;
879+
});
880+
881+
// Take up to maxOptions, including "Other" options
882+
const selectedOptions = sortedOptions.slice(0, maxOptions);
883+
884+
for (const opt of selectedOptions) {
885+
let label: string;
886+
if (opt.isOther) {
887+
// For "Other" codes, create a clear label
888+
label = 'Other';
889+
if (existingLabels.has(label)) {
890+
const codeParts = opt.code.split('.');
891+
const suffix = codeParts[codeParts.length - 1] || opt.code;
892+
label = `Other (${suffix})`;
893+
}
894+
existingLabels.add(label);
895+
} else {
896+
label = generateUniqueLabel(opt.code, opt.description || '', existingLabels);
897+
}
810898

811-
for (const opt of nonOtherOptions) {
812-
const label = generateUniqueLabel(opt.code, opt.description || '', existingLabels);
813899
result.push({
814900
code: opt.code,
815901
label,

0 commit comments

Comments
 (0)