@@ -274,17 +274,16 @@ function filterOptionsByKeywordsSimple(
274274 const matchedChapters = new Set ( matchingOptions . map ( o => o . code . substring ( 0 , 2 ) ) ) ;
275275
276276 // Only add "Other" if all matched options are from the same chapter
277- // AND there's an "Other" option from that same chapter
277+ // AND there are "Other" options from that same chapter
278278 if ( matchedChapters . size === 1 ) {
279279 const chapter = [ ...matchedChapters ] [ 0 ] ;
280- const relevantOther = filteredOptions . find ( o =>
280+ const relevantOthers = filteredOptions . filter ( o =>
281281 o . isOther &&
282282 o . code . substring ( 0 , 2 ) === chapter &&
283283 ! matchingOptions . includes ( o )
284284 ) ;
285- if ( relevantOther ) {
286- matchingOptions . push ( relevantOther ) ;
287- }
285+ // Add ALL relevant "Other" options (e.g., "Other - Seed" and "Other - Other" for wheat)
286+ matchingOptions . push ( ...relevantOthers ) ;
288287 }
289288
290289 logger . info ( `[FILTER] Keyword filter: ${ filteredOptions . length } -> ${ matchingOptions . length } options` ) ;
@@ -437,6 +436,7 @@ function generateSmartQuestion(options: HierarchyOption[]): string {
437436 // Check for common distinguishing patterns - ordered by specificity
438437 const patterns = [
439438 // Most specific patterns first
439+ { keywords : [ 'seed' , 'sowing' , 'for seed' ] , question : "Is this for sowing (seed) or other purposes?" } ,
440440 { keywords : [ 'decaffeinated' , 'not decaffeinated' ] , question : "Is the product decaffeinated?" } ,
441441 { keywords : [ 'roasted' , 'not roasted' ] , question : "Is the product roasted?" } ,
442442 { keywords : [ 'crushed' , 'ground' , 'whole' , 'neither crushed nor ground' ] , question : "What is the processing state?" } ,
@@ -786,84 +786,111 @@ async function getCodeDescription(code: string): Promise<string | null> {
786786 * ROOT CAUSE FIX: Generate UNIQUE labels that distinguish between similar codes
787787 *
788788 * Problem it solves:
789- * "Coffee, not roasted : --Not decaffeinated" → "Coffee, not roasted"
790- * "Coffee, not roasted : --Decaffeinated" → "Coffee, not roasted" // DUPLICATE!
789+ * "Durum wheat : -- Seed" → Should extract "Seed"
790+ * "Durum wheat : -- Other" → Should extract "Other"
791+ * "Other : -- Seed" → Should extract "Seed"
791792 *
792793 * Fixed behavior:
793- * Step 1: Try base label (before first colon)
794- * Step 2: If duplicate, add distinguishing part (after colon, before ----)
795- * Step 3: If still duplicate, add code suffix
794+ * Step 1: Clean the description (remove DGFT dates)
795+ * Step 2: Parse the description format (": --" or ":--" or ": ----")
796+ * Step 3: Try base label first
797+ * Step 4: Add qualifier if we have one and it's meaningful
798+ * Step 5: Handle "Other" or empty qualifiers with seed/other distinction
799+ * Step 6: Last resort - add meaningful code part
796800 */
797- function generateUniqueLabel ( code : string , description : string , existingLabels : Set < string > ) : string {
798- // Clean up the description - remove leading dashes and extra spaces
799- const cleanDesc = description . replace ( / ^ [ - : \s ] + / , '' ) . trim ( ) ;
800-
801- // Split by colon to separate main part from qualifier
802- const colonParts = cleanDesc . split ( / \s * : \s * / ) ;
803- const mainPart = ( colonParts [ 0 ] || '' ) . trim ( ) ;
804-
805- // Step 1: Try base label (just the part before colon)
806- let label = mainPart ;
807-
808- // Truncate if too long
809- if ( label . length > 50 ) {
810- label = label . substring ( 0 , 47 ) + '...' ;
801+ function generateUniqueLabel (
802+ code : string ,
803+ description : string ,
804+ existingLabels : Set < string >
805+ ) : string {
806+ // Step 1: Clean the description
807+ let cleanDesc = description
808+ . replace ( / \s * \d { 2 } \/ \d { 4 } - \d { 2 , 4 } \s + \d { 2 } \. \d { 2 } \. \d { 4 } \s * / g, '' ) // Remove DGFT dates
809+ . trim ( ) ;
810+
811+ // Step 2: Parse the description format
812+ // Common formats:
813+ // "Category : -- Qualifier" (e.g., "Durum wheat : -- Seed")
814+ // "Category :-- Qualifier" (e.g., "Durum wheat :-- Seed")
815+ // "Category: ---- Detail" (e.g., "Coffee, not roasted: ----Not decaffeinated")
816+
817+ let baseLabel = '' ;
818+ let qualifier = '' ;
819+
820+ // Try to split by ": --" or ":--" or ": ----"
821+ const colonDashMatch = cleanDesc . match ( / ^ ( .+ ?) \s * : \s * - { 1 , 4 } \s * ( .+ ) $ / ) ;
822+ if ( colonDashMatch ) {
823+ baseLabel = colonDashMatch [ 1 ] ! . trim ( ) ;
824+ qualifier = colonDashMatch [ 2 ] ! . trim ( ) ;
825+ } else if ( cleanDesc . includes ( ':' ) ) {
826+ // Simple colon split
827+ const parts = cleanDesc . split ( ':' ) ;
828+ baseLabel = parts [ 0 ] ! . trim ( ) ;
829+ qualifier = parts . slice ( 1 ) . join ( ':' ) . replace ( / ^ [ \s - ] + / , '' ) . trim ( ) ;
830+ } else {
831+ baseLabel = cleanDesc ;
811832 }
812833
813- if ( ! existingLabels . has ( label ) ) {
814- existingLabels . add ( label ) ;
815- return label ;
834+ // Step 3: Try base label first (but NOT if it's a generic "Other" - those need qualifiers)
835+ const isGenericBase = baseLabel . toLowerCase ( ) === 'other' ;
836+ if ( ! isGenericBase && ! existingLabels . has ( baseLabel ) && baseLabel . length > 0 ) {
837+ existingLabels . add ( baseLabel ) ;
838+ return baseLabel ;
816839 }
817840
818- // Step 2: Add the distinguishing part (after colon, before ----)
819- // This handles: "Coffee, not roasted : --Not decaffeinated" → extracts "Not decaffeinated"
820- if ( colonParts . length > 1 ) {
821- // Get qualifier from the part after colon
822- let qualifierPart = colonParts . slice ( 1 ) . join ( ' : ' ) ; // Rejoin in case multiple colons
823-
824- // Remove leading dashes (e.g., "--Not decaffeinated" → "Not decaffeinated")
825- let qualifier = qualifierPart . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ;
826-
827- // If there's a "----" separator, take the part before it
828- if ( qualifier . includes ( '----' ) ) {
829- qualifier = qualifier . split ( '----' ) [ 0 ] ?. trim ( ) || qualifier ;
841+ // Step 4: Add qualifier if we have one and it's meaningful
842+ if ( qualifier && qualifier . length > 0 && qualifier . toLowerCase ( ) !== 'other' ) {
843+ const labelWithQualifier = `${ baseLabel } - ${ qualifier } ` ;
844+ if ( ! existingLabels . has ( labelWithQualifier ) ) {
845+ existingLabels . add ( labelWithQualifier ) ;
846+ return labelWithQualifier ;
830847 }
848+ }
831849
832- // Also handle "--" separator (common in HS descriptions)
833- if ( qualifier . includes ( '--' ) ) {
834- qualifier = qualifier . split ( '--' ) [ 0 ] ?. trim ( ) || qualifier ;
835- }
836-
837- // Clean up the qualifier
838- qualifier = qualifier . replace ( / ^ [ - \s ] + / , '' ) . trim ( ) ;
839-
840- if ( qualifier && qualifier . toLowerCase ( ) !== 'other' ) {
841- label = `${ mainPart } - ${ qualifier } ` ;
842-
843- // Truncate if too long
844- if ( label . length > 60 ) {
845- label = label . substring ( 0 , 57 ) + '...' ;
850+ // Step 5: If qualifier is "Other" or empty, try more specific label
851+ if ( qualifier . toLowerCase ( ) === 'other' || ! qualifier ) {
852+ // Check if this is a seed vs non-seed distinction
853+ if ( cleanDesc . toLowerCase ( ) . includes ( 'seed' ) ) {
854+ const seedLabel = `${ baseLabel } - Seed` ;
855+ if ( ! existingLabels . has ( seedLabel ) ) {
856+ existingLabels . add ( seedLabel ) ;
857+ return seedLabel ;
846858 }
847-
848- if ( ! existingLabels . has ( label ) ) {
849- existingLabels . add ( label ) ;
850- return label ;
859+ } else {
860+ const otherLabel = `${ baseLabel } - Other` ;
861+ if ( ! existingLabels . has ( otherLabel ) ) {
862+ existingLabels . add ( otherLabel ) ;
863+ return otherLabel ;
851864 }
852865 }
853866 }
854867
855- // Step 3: Add code suffix as last resort
856- const codeParts = code . split ( '.' ) ;
857- const codeEnd = codeParts [ codeParts . length - 1 ] || code ;
858- label = `${ mainPart } (${ codeEnd } )` ;
868+ // Step 6: Last resort - add meaningful code part
869+ // Extract the last meaningful segment (e.g., "11" from "1001.11")
870+ const codeParts = code . replace ( / \. 0 0 $ / , '' ) . split ( '.' ) ;
871+ const lastPart = codeParts [ codeParts . length - 1 ] ! ;
872+
873+ // Try to make it meaningful
874+ let suffix = '' ;
875+ if ( lastPart === '11' || lastPart === '91' ) {
876+ suffix = 'Seed' ;
877+ } else if ( lastPart === '19' || lastPart === '99' ) {
878+ suffix = 'Other' ;
879+ } else if ( lastPart === '00' ) {
880+ suffix = 'General' ;
881+ } else {
882+ suffix = lastPart ;
883+ }
859884
860- // Final truncation check
861- if ( label . length > 60 ) {
862- label = label . substring ( 0 , 57 ) + '...' ;
885+ const finalLabel = `${ baseLabel } - ${ suffix } ` ;
886+ if ( ! existingLabels . has ( finalLabel ) ) {
887+ existingLabels . add ( finalLabel ) ;
888+ return finalLabel ;
863889 }
864890
865- existingLabels . add ( label ) ;
866- return label ;
891+ // Absolute last resort - add full code
892+ existingLabels . add ( `${ baseLabel } (${ code } )` ) ;
893+ return `${ baseLabel } (${ code } )` ;
867894}
868895
869896/**
0 commit comments