@@ -355,16 +355,30 @@ function detectDistinctGroups(
355355 }
356356 }
357357
358- // RULE 2: Multiple non-other options with children at same level
359- // This is the CORE generic logic - works for ANY product
360- const nonOtherOptions = options . filter ( o => ! o . isOther ) ;
361- const optionsWithChildren = nonOtherOptions . filter ( o => o . hasChildren ) ;
358+ // RULE 2: Multiple options at same level - INCLUDES "Other" codes
359+ // ROOT CAUSE FIX: Keep ALL options including "Other" - it's a valid HS classification
360+ // Sort so "Other" appears last (as a fallback option)
361+ const sortedOptions = [ ...options ] . sort ( ( a , b ) => {
362+ if ( a . isOther && ! b . isOther ) return 1 ; // "Other" goes last
363+ if ( ! a . isOther && b . isOther ) return - 1 ;
364+ return 0 ;
365+ } ) ;
366+
367+ // Separate branch codes (can drill down) from leaf codes (final classification)
368+ // IMPORTANT: Exclude "Other" from branches/leaves to avoid duplicates
369+ const branchOptions = sortedOptions . filter ( o => o . hasChildren && ! o . isOther ) ;
370+ const leafOptions = sortedOptions . filter ( o => ! o . hasChildren && ! o . isOther ) ;
371+ const otherOptions = sortedOptions . filter ( o => o . isOther ) ;
372+
373+ // Combine: branches first, then leaves, then "Other" last (no duplicates)
374+ const relevantOptions = [ ...branchOptions , ...leafOptions , ...otherOptions ] ;
362375
363- logger . debug ( `[DISTINCT-GROUPS] Non-other : ${ nonOtherOptions . length } , with children : ${ optionsWithChildren . length } ` ) ;
376+ logger . debug ( `[DISTINCT-GROUPS] Total : ${ sortedOptions . length } , branches: ${ branchOptions . length } , leaves : ${ leafOptions . length } , other: ${ otherOptions . length } ` ) ;
364377
365- if ( optionsWithChildren . length >= 2 ) {
378+ // ROOT CAUSE FIX: Use relevantOptions (includes branches, leaves, AND "Other")
379+ if ( relevantOptions . length >= 2 ) {
366380 // Check if user keywords clearly match ONE option
367- const matchScores = optionsWithChildren . map ( o => {
381+ const matchScores = relevantOptions . map ( o => {
368382 const desc = o . description . toLowerCase ( ) ;
369383 const matchCount = userKeywords . filter ( kw => {
370384 if ( kw . length < 3 ) return false ;
@@ -384,16 +398,20 @@ function detectDistinctGroups(
384398 return null ;
385399 }
386400
387- logger . info ( `[DISTINCT-GROUPS] ${ optionsWithChildren . length } branch options at this level - generating question` ) ;
401+ logger . info ( `[DISTINCT-GROUPS] ${ relevantOptions . length } options at this level - generating question` ) ;
388402
389- // Generate question based on description patterns
390- const questionText = generateQuestionFromDescriptions ( optionsWithChildren ) ;
403+ // ROOT CAUSE FIX: Generate smart question based on ACTUAL differences between options
404+ const questionText = generateSmartQuestion ( relevantOptions ) ;
391405
392- // Create groups with cleaned labels
393- const groups = optionsWithChildren . slice ( 0 , 6 ) . map ( o => ( {
394- name : cleanDescriptionForLabel ( o . description ) ,
395- options : [ o ]
396- } ) ) ;
406+ // ROOT CAUSE FIX: Use generateUniqueLabel which handles duplicates properly
407+ const existingLabels = new Set < string > ( ) ;
408+ const groups = relevantOptions . slice ( 0 , 6 ) . map ( o => {
409+ const uniqueLabel = generateUniqueLabel ( o . code , o . description , existingLabels ) ;
410+ return {
411+ name : uniqueLabel ,
412+ options : [ o ]
413+ } ;
414+ } ) ;
397415
398416 return {
399417 groups,
@@ -405,40 +423,57 @@ function detectDistinctGroups(
405423}
406424
407425/**
408- * Generate a question based on what differentiates the options
409- * Analyzes description patterns to create meaningful questions
426+ * ROOT CAUSE FIX: Generate SMART questions based on ACTUAL differences between options
427+ * This analyzes what's different between options and generates contextually relevant questions
410428 */
411- function generateQuestionFromDescriptions ( options : HierarchyOption [ ] ) : string {
412- const descriptions = options . map ( o => o . description . toLowerCase ( ) ) ;
413-
414- // Check for common patterns in HS code descriptions
415- if ( descriptions . some ( d => d . includes ( 'roasted' ) ) &&
416- descriptions . some ( d => d . includes ( 'not roasted' ) ) ) {
417- return 'What is the roasting status?' ;
418- }
419-
420- if ( descriptions . some ( d => d . includes ( 'crushed' ) || d . includes ( 'ground' ) ) &&
421- descriptions . some ( d => d . includes ( 'neither crushed' ) ) ) {
422- return 'What form is your product?' ;
423- }
429+ function generateSmartQuestion ( options : HierarchyOption [ ] ) : string {
430+ if ( options . length === 0 ) return "Which option best describes your product?" ;
424431
425- if ( descriptions . some ( d => d . includes ( 'green' ) ) &&
426- descriptions . some ( d => d . includes ( 'black' ) ) ) {
427- return 'What type is your product?' ;
428- }
432+ // Extract descriptions and find common/different parts
433+ const descriptions = options . map ( o => o . description . toLowerCase ( ) ) ;
429434
430- if ( descriptions . some ( d => d . includes ( 'fresh' ) ) &&
431- descriptions . some ( d => d . includes ( 'dried' ) || d . includes ( 'frozen' ) ) ) {
432- return 'What is the preservation state?' ;
435+ // Check for common distinguishing patterns - ordered by specificity
436+ const patterns = [
437+ // Most specific patterns first
438+ { keywords : [ 'decaffeinated' , 'not decaffeinated' ] , question : "Is the product decaffeinated?" } ,
439+ { keywords : [ 'roasted' , 'not roasted' ] , question : "Is the product roasted?" } ,
440+ { keywords : [ 'crushed' , 'ground' , 'whole' , 'neither crushed nor ground' ] , question : "What is the processing state?" } ,
441+ { keywords : [ 'fresh' , 'dried' , 'frozen' , 'chilled' ] , question : "What is the preservation state?" } ,
442+ { keywords : [ 'raw' , 'processed' , 'prepared' ] , question : "Is this raw or processed?" } ,
443+ { keywords : [ 'bulk' , 'retail' , 'packing' , 'packings' , 'immediate packing' ] , question : "What is the packaging type?" } ,
444+ { keywords : [ 'seed' , 'seeds' , 'powder' , 'extract' , 'oil' ] , question : "What form is the product in?" } ,
445+ { keywords : [ 'breeding' , 'pure-bred' , 'livestock' ] , question : "What is the purpose/type?" } ,
446+ { keywords : [ 'arabica' , 'robusta' ] , question : "What variety of coffee is this?" } ,
447+ { keywords : [ 'green tea' , 'black tea' , 'oolong' ] , question : "What type of tea is this?" } ,
448+ { keywords : [ 'parchment' , 'cherry' ] , question : "What is the processing method?" } ,
449+ { keywords : [ 'grade a' , 'grade b' , 'grade c' , 'a grade' , 'b grade' , 'c grade' , 'ab grade' , 'pb grade' ] , question : "What grade is your product?" } ,
450+ { keywords : [ 'husk' , 'skin' , 'shell' ] , question : "Is this a byproduct (husks/skins/shells)?" } ,
451+ { keywords : [ 'instant' , 'soluble' , 'extract' , 'essence' ] , question : "Is this an instant/processed product?" } ,
452+ { keywords : [ 'other' ] , question : "Does your product fit a specific category, or is it 'Other'?" } ,
453+ ] ;
454+
455+ // Find which pattern matches the differences between options
456+ for ( const pattern of patterns ) {
457+ const matchCount = pattern . keywords . filter ( kw =>
458+ descriptions . some ( d => d . includes ( kw ) )
459+ ) . length ;
460+
461+ // Need at least 2 matches to confirm this is the distinguishing factor
462+ if ( matchCount >= 2 ) {
463+ return pattern . question ;
464+ }
433465 }
434466
435- if ( descriptions . some ( d => d . includes ( 'decaffeinated' ) ) &&
436- descriptions . some ( d => d . includes ( 'not decaffeinated' ) ) ) {
437- return 'Is the product decaffeinated?' ;
438- }
467+ // Default fallback
468+ return "Which category best matches your product?" ;
469+ }
439470
440- // Default generic question
441- return 'Please select the category that best matches your product:' ;
471+ /**
472+ * LEGACY: Generate a question based on what differentiates the options
473+ * Kept for backward compatibility - use generateSmartQuestion instead
474+ */
475+ function generateQuestionFromDescriptions ( options : HierarchyOption [ ] ) : string {
476+ return generateSmartQuestion ( options ) ;
442477}
443478
444479/**
@@ -741,62 +776,93 @@ async function getCodeDescription(code: string): Promise<string | null> {
741776}
742777
743778/**
744- * Generate a unique label from description
745- * Handles HS code descriptions like "Arabica plantation: ---- A Grade"
746- * Combines type (before colon) with grade/detail (after dashes)
779+ * ROOT CAUSE FIX: Generate UNIQUE labels that distinguish between similar codes
780+ *
781+ * Problem it solves:
782+ * "Coffee, not roasted : --Not decaffeinated" → "Coffee, not roasted"
783+ * "Coffee, not roasted : --Decaffeinated" → "Coffee, not roasted" // DUPLICATE!
784+ *
785+ * Fixed behavior:
786+ * Step 1: Try base label (before first colon)
787+ * Step 2: If duplicate, add distinguishing part (after colon, before ----)
788+ * Step 3: If still duplicate, add code suffix
747789 */
748790function generateUniqueLabel ( code : string , description : string , existingLabels : Set < string > ) : string {
749791 // Clean up the description - remove leading dashes and extra spaces
750- let cleanDesc = description . replace ( / ^ [ - : \s ] + / , '' ) . trim ( ) ;
792+ const cleanDesc = description . replace ( / ^ [ - : \s ] + / , '' ) . trim ( ) ;
751793
752- // Split by colon
753- const parts = cleanDesc . split ( ':' ) . map ( p => p . trim ( ) . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ) . filter ( p => p ) ;
794+ // Split by colon to separate main part from qualifier
795+ const colonParts = cleanDesc . split ( / \s * : \s * / ) ;
796+ const mainPart = ( colonParts [ 0 ] || '' ) . trim ( ) ;
754797
755- let baseLabel : string ;
798+ // Step 1: Try base label (just the part before colon)
799+ let label = mainPart ;
756800
757- if ( parts . length >= 2 ) {
758- // Format: "Type: ---- Grade" -> combine as "Type - Grade"
759- const typePart = parts [ 0 ] || '' ;
760- const gradePart = parts [ parts . length - 1 ] || '' ;
761-
762- // Clean up grade part (remove leading dashes)
763- const cleanGrade = gradePart . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ;
764-
765- if ( cleanGrade && cleanGrade . toLowerCase ( ) !== 'other' ) {
766- // Combine type and grade: "Arabica plantation - A Grade"
767- baseLabel = `${ typePart } - ${ cleanGrade } ` ;
768- } else if ( cleanGrade . toLowerCase ( ) === 'other' ) {
769- baseLabel = `${ typePart } - Other` ;
770- } else {
771- baseLabel = typePart ;
772- }
773- } else {
774- baseLabel = cleanDesc ;
801+ // Truncate if too long
802+ if ( label . length > 50 ) {
803+ label = label . substring ( 0 , 47 ) + '...' ;
804+ }
805+
806+ if ( ! existingLabels . has ( label ) ) {
807+ existingLabels . add ( label ) ;
808+ return label ;
775809 }
776810
777- // Truncate if too long (but keep it longer to preserve grade info)
778- if ( baseLabel . length > 60 ) {
779- baseLabel = baseLabel . substring ( 0 , 57 ) + '...' ;
811+ // Step 2: Add the distinguishing part (after colon, before ----)
812+ // This handles: "Coffee, not roasted : --Not decaffeinated" → extracts "Not decaffeinated"
813+ if ( colonParts . length > 1 ) {
814+ // Get qualifier from the part after colon
815+ let qualifierPart = colonParts . slice ( 1 ) . join ( ' : ' ) ; // Rejoin in case multiple colons
816+
817+ // Remove leading dashes (e.g., "--Not decaffeinated" → "Not decaffeinated")
818+ let qualifier = qualifierPart . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ;
819+
820+ // If there's a "----" separator, take the part before it
821+ if ( qualifier . includes ( '----' ) ) {
822+ qualifier = qualifier . split ( '----' ) [ 0 ] ?. trim ( ) || qualifier ;
823+ }
824+
825+ // Also handle "--" separator (common in HS descriptions)
826+ if ( qualifier . includes ( '--' ) ) {
827+ qualifier = qualifier . split ( '--' ) [ 0 ] ?. trim ( ) || qualifier ;
828+ }
829+
830+ // Clean up the qualifier
831+ qualifier = qualifier . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ;
832+
833+ if ( qualifier && qualifier . toLowerCase ( ) !== 'other' ) {
834+ label = `${ mainPart } - ${ qualifier } ` ;
835+
836+ // Truncate if too long
837+ if ( label . length > 60 ) {
838+ label = label . substring ( 0 , 57 ) + '...' ;
839+ }
840+
841+ if ( ! existingLabels . has ( label ) ) {
842+ existingLabels . add ( label ) ;
843+ return label ;
844+ }
845+ }
780846 }
781847
782- // Clean up any remaining artifacts
783- baseLabel = baseLabel . replace ( / \s + / g, ' ' ) . trim ( ) ;
848+ // Step 3: Add code suffix as last resort
849+ const codeParts = code . split ( '.' ) ;
850+ const codeEnd = codeParts [ codeParts . length - 1 ] || code ;
851+ label = `${ mainPart } (${ codeEnd } )` ;
784852
785- // If label already exists, append the HS code suffix to differentiate
786- if ( existingLabels . has ( baseLabel ) ) {
787- // Get the last segment of the code (e.g., "11" from "0901.11.11")
788- const codeParts = code . split ( '.' ) ;
789- const suffix = codeParts [ codeParts . length - 1 ] || code ;
790- baseLabel = `${ baseLabel } (${ suffix } )` ;
853+ // Final truncation check
854+ if ( label . length > 60 ) {
855+ label = label . substring ( 0 , 57 ) + '...' ;
791856 }
792857
793- existingLabels . add ( baseLabel ) ;
794- return baseLabel ;
858+ existingLabels . add ( label ) ;
859+ return label ;
795860}
796861
797862/**
798- * Limit and deduplicate options, ensuring unique labels
863+ * ROOT CAUSE FIX: Limit and deduplicate options, ensuring unique labels
799864 * Max 8 options to avoid overwhelming users
865+ * NOW INCLUDES "Other" options at the end as valid classifications
800866 */
801867function processOptionsForDisplay (
802868 options : Array < { code : string ; description : string ; isOther ?: boolean } > ,
@@ -805,11 +871,31 @@ function processOptionsForDisplay(
805871 const result : Array < { code : string ; label : string ; description : string } > = [ ] ;
806872 const existingLabels = new Set < string > ( ) ;
807873
808- // Take up to maxOptions non-Other options
809- const nonOtherOptions = options . filter ( o => ! o . isOther ) . slice ( 0 , maxOptions ) ;
874+ // Sort: non-Other first, then Other at the end
875+ const sortedOptions = [ ...options ] . sort ( ( a , b ) => {
876+ if ( a . isOther && ! b . isOther ) return 1 ;
877+ if ( ! a . isOther && b . isOther ) return - 1 ;
878+ return 0 ;
879+ } ) ;
880+
881+ // Take up to maxOptions, including "Other" options
882+ const selectedOptions = sortedOptions . slice ( 0 , maxOptions ) ;
883+
884+ for ( const opt of selectedOptions ) {
885+ let label : string ;
886+ if ( opt . isOther ) {
887+ // For "Other" codes, create a clear label
888+ label = 'Other' ;
889+ if ( existingLabels . has ( label ) ) {
890+ const codeParts = opt . code . split ( '.' ) ;
891+ const suffix = codeParts [ codeParts . length - 1 ] || opt . code ;
892+ label = `Other (${ suffix } )` ;
893+ }
894+ existingLabels . add ( label ) ;
895+ } else {
896+ label = generateUniqueLabel ( opt . code , opt . description || '' , existingLabels ) ;
897+ }
810898
811- for ( const opt of nonOtherOptions ) {
812- const label = generateUniqueLabel ( opt . code , opt . description || '' , existingLabels ) ;
813899 result . push ( {
814900 code : opt . code ,
815901 label,
0 commit comments