@@ -9,7 +9,14 @@ import { assessDataQuality, type QualityScore } from "../quality/quality.ts";
99import { getScale } from "../scale/scale.ts" ;
1010import { parseWithCustomUnits } from "../custom/custom_units.ts" ;
1111import { allowsTimeConversion } from "../normalization/indicator_type_rules.ts" ;
12- import type { Explain , FXTable , Scale , TimeScale } from "../types.ts" ;
12+ import type {
13+ Explain ,
14+ FXTable ,
15+ ReportingFrequency ,
16+ Scale ,
17+ TimeScale ,
18+ UnitType ,
19+ } from "../types.ts" ;
1320
1421/**
1522 * Normalize scale string from database to Scale type
@@ -36,9 +43,20 @@ function normalizeScale(scale?: string | null): Scale | null {
3643/**
3744 * Normalize time scale string from database to TimeScale type
3845 */
46+ /**
47+ * Normalize time scale from database reporting_frequency or periodicity field.
48+ * Handles both database values (annual, quarterly, monthly) and descriptive variants.
49+ *
50+ * Note: "point-in-time" is a special database value indicating snapshot data
51+ * with no regular frequency. We return null for it since it doesn't map to a TimeScale.
52+ */
3953function normalizeTimeScale ( periodicity ?: string | null ) : TimeScale | null {
4054 if ( ! periodicity || periodicity . trim ( ) === "" ) return null ;
4155 const normalized = periodicity . toLowerCase ( ) . trim ( ) ;
56+
57+ // Handle special case: point-in-time data has no time scale
58+ if ( normalized === "point-in-time" ) return null ;
59+
4260 switch ( normalized ) {
4361 case "yearly" :
4462 case "annual" :
@@ -75,29 +93,80 @@ function normalizeTimeScale(periodicity?: string | null): TimeScale | null {
7593 * Normalize currency code from database
7694 * - Only accept known ISO currency codes; otherwise treat as non-monetary (null)
7795 */
96+ /**
97+ * Normalize currency code from database currency_code field.
98+ * Validates against known currency codes and returns uppercase ISO code.
99+ */
78100function normalizeCurrency ( currency ?: string | null ) : string | null {
79101 if ( ! currency || currency . trim ( ) === "" ) return null ;
80102 const code = currency . trim ( ) . toUpperCase ( ) ;
81103 return CURRENCY_CODES . has ( code ) ? code : null ;
82104}
83105
106+ /**
107+ * BatchItem represents economic indicator data with metadata from database.
108+ *
109+ * Database columns are provided by the classification workflow and should be
110+ * treated as authoritative over unit string parsing.
111+ *
112+ * Example DB row to BatchItem mapping:
113+ * ```
114+ * DB: AFGHANISTACONSPE | Consumer Spending | ... | AFN Million | Millions | Yearly |
115+ * AFN | flow | period-total | higher-is-positive | currency-amount | annual | true
116+ *
117+ * BatchItem: {
118+ * id: "AFGHANISTACONSPE",
119+ * name: "Consumer Spending",
120+ * value: 1500,
121+ * unit: "AFN Million",
122+ * scale: "Millions", // DB: scale column
123+ * currency_code: "AFN", // DB: currency_code column
124+ * indicator_type: "flow", // DB: type column
125+ * temporal_aggregation: "period-total", // DB: temporal_aggregation column
126+ * unit_type: "currency-amount", // DB: unit_type column
127+ * reporting_frequency: "annual",// DB: reporting_frequency column
128+ * is_currency_denominated: true // DB: is_currency_denominated column (last)
129+ * }
130+ * ```
131+ */
84132export interface BatchItem {
85133 id ?: string | number ;
86134 name ?: string ; // Indicator name for classification (e.g., "GDP", "Balance of Trade")
87135 value : number ;
88136 unit : string ;
89137
90- /** Explicit metadata fields - use if provided, otherwise parse from unit string */
91- periodicity ?: string ; // "Quarterly", "Monthly", "Yearly"
92- scale ?: string ; // "Millions", "Billions", "Thousands"
93- currency_code ?: string ; // "USD", "SAR", "XOF"
138+ /** Explicit metadata fields from database - PRIORITIZE these over unit string parsing */
139+
140+ // Time dimension (from reporting_frequency database column)
141+ // Maps database values: "annual" | "quarterly" | "monthly" | "weekly" | "daily" | "point-in-time"
142+ periodicity ?: string ; // Accepts string for backward compatibility, use reporting_frequency for new code
143+ reporting_frequency ?: ReportingFrequency | string ; // From database - accepts string for flexibility
144+
145+ // Magnitude/scale (from scale column or parsed from units)
146+ scale ?: string ; // "Millions", "Billions", "Thousands" - accepts string for flexibility
147+
148+ // Currency (from currency_code column)
149+ currency_code ?: string ; // ISO currency codes: "USD", "SAR", "XOF", etc.
150+
151+ // Unit semantic type (from unit_type column) - helps avoid incorrect parsing
152+ unit_type ?: UnitType | string ; // From database - accepts string for flexibility: "count" | "unknown" | "currency-amount" | "physical" | "percentage" | "index"
153+
154+ /** Classification from @tellimer/classify batch workflow - AUTHORITATIVE metadata */
94155
95- /** Classification from classify package - when provided, used instead of econify's own classification */
96- indicator_type ?: string ; // e.g., "flow", "stock", "percentage", "ratio", etc.
97- is_currency_denominated ?: boolean ; // true for currency amounts, false otherwise
98- /** Temporal aggregation from @tellimer/classify - how values accumulate over time */
156+ // Indicator behavior classification (from "type" DB column)
157+ indicator_type ?: string ; // "flow" | "stock" | "percentage" | "ratio" | "rate" | "index" | etc.
158+
159+ // Time aggregation behavior ( from "temporal_aggregation" DB column)
99160 temporal_aggregation ?: string ; // "point-in-time" | "period-rate" | "period-cumulative" | "period-average" | "period-total" | "not-applicable"
100161
162+ // Visual/UI hint (from "heat_map_orientation" DB column - not used in normalization)
163+ heat_map_orientation ?: string ; // "higher-is-positive" | "neutral" | "lower-is-positive"
164+
165+ // Currency conversion control (from "is_currency_denominated" DB column - LAST column in DB)
166+ // THIS IS THE DOMINANT CHECK - controls whether to apply FX conversion
167+ // Set by classification workflow based on indicator semantics
168+ is_currency_denominated ?: boolean ; // true = apply FX conversion, false = skip FX conversion
169+
101170 metadata ?: Record < string , unknown > ;
102171}
103172
@@ -376,21 +445,55 @@ function processSequentially<T extends BatchItem>(
376445/**
377446 * Process single item
378447 */
448+ /**
449+ * Process a single batch item through normalization pipeline.
450+ *
451+ * FIELD PRIORITY SYSTEM (Database First):
452+ * ========================================
453+ * This function prioritizes structured metadata from the database over unit string parsing:
454+ *
455+ * 1. TIME DIMENSION:
456+ * - reporting_frequency (DB column) → PRIMARY SOURCE
457+ * - periodicity field → fallback
458+ * - parseUnit(unit).timeScale → last resort
459+ *
460+ * 2. CURRENCY:
461+ * - is_currency_denominated flag → controls whether to look for currency
462+ * - unit_type → helps identify non-currency types (percentage, physical)
463+ * - currency_code (DB column) → PRIMARY SOURCE for currency
464+ * - parseUnit(unit).currency → fallback
465+ *
466+ * 3. MAGNITUDE/SCALE:
467+ * - scale (DB column) → PRIMARY SOURCE
468+ * - parseUnit(unit).scale → fallback
469+ * - Exception: Chinese units (hundred-millions) prefer parsed value
470+ *
471+ * 4. INDICATOR BEHAVIOR:
472+ * - temporal_aggregation (DB) → controls time conversion rules
473+ * - indicator_type (DB) → fallback for time conversion
474+ *
475+ * This approach ensures data from @tellimer/classify is authoritative.
476+ */
379477function processItem < T extends BatchItem > (
380478 item : T ,
381479 options : Omit < BatchOptions , "parallel" | "concurrency" | "progressCallback" > ,
382480) : { normalized : number ; normalizedUnit : string ; explain ?: Explain } | null {
383481 try {
384- // Parse unit to get baseline information
482+ // PRIORITY 1: Use database fields when available (reporting_frequency, currency_code, etc.)
483+ // PRIORITY 2: Parse unit string as fallback for missing metadata
385484 const parsed = parseUnit ( item . unit ) ;
386485
387- // Determine if this is count data (e.g., car registrations) to avoid currency parts in unit
388- // indicator_type must be provided from @tellimer /classify package
486+ // Extract metadata from @tellimer /classify package
389487 const indicatorName = ( item as unknown as { name ?: string } ) . name ;
390488 const indicatorType = ( item as unknown as { indicator_type ?: string } )
391489 . indicator_type ;
392490
393- // Use indicator_type from classify package to determine count data
491+ // UNIT_TYPE from database can help avoid incorrect parsing
492+ // e.g., if unit_type="percentage", we know it's not a currency amount
493+ // Available values: "count" | "unknown" | "currency-amount" | "physical" | "percentage" | "index"
494+ const unitType = item . unit_type ;
495+
496+ // Determine if this is count/volume data (avoid treating as currency)
394497 const isCountData = indicatorType === "count" || indicatorType === "volume" ;
395498
396499 // Check custom units if standard parsing returns unknown
@@ -400,25 +503,73 @@ function processItem<T extends BatchItem>(
400503 return null ;
401504 }
402505
403- // Use explicit fields if provided, otherwise fall back to parsed values
404- // Normalize explicit metadata to match expected types
506+ // PRIORITY SYSTEM: Database fields first, then unit string parsing
507+ // This ensures structured data from @tellimer /classify takes precedence
508+
405509 const isCurrencyDenominated = ( item as unknown as {
406510 is_currency_denominated ?: boolean ;
407511 } ) . is_currency_denominated ;
408512
409- // If is_currency_denominated is explicitly false, skip currency detection
410- // If true or undefined, use currency_code or parsed currency
411- const effectiveCurrency = ( isCurrencyDenominated === false )
513+ // CURRENCY CONVERSION CONTROL
514+ // ============================
515+ // is_currency_denominated (from DB) is the DOMINANT CHECK set by classification workflow
516+ // It authoritatively controls whether FX conversion should be applied
517+ //
518+ // DECISION HIERARCHY:
519+ // 1. is_currency_denominated flag → AUTHORITATIVE (from classification workflow)
520+ // - true: Apply FX conversion (value is in currency units)
521+ // - false: Skip FX conversion (even if unit text contains currency codes)
522+ // - undefined: Fall back to heuristics (unit_type, parsing)
523+ //
524+ // 2. unit_type → Validation hint only
525+ // - "currency-amount": Suggests currency (but defer to is_currency_denominated)
526+ // - "percentage", "physical", "index": Suggests non-currency
527+ //
528+ // 3. currency_code (from DB) → Which currency if conversion is enabled
529+ //
530+ // 4. parseUnit(unit) → Fallback parsing if DB fields missing
531+ //
532+ // KEY EXAMPLES:
533+ // - FX Rate "PKR/USD": is_currency_denominated=false (don't convert ratio)
534+ // - GDP "USD Million": is_currency_denominated=true (convert USD→target currency)
535+ // - Unemployment "%": is_currency_denominated=false (no currency)
536+
537+ const shouldSkipCurrency = isCurrencyDenominated === false ;
538+
539+ // Additional validation: warn if unit_type conflicts with is_currency_denominated
540+ if (
541+ typeof console !== "undefined" && unitType &&
542+ isCurrencyDenominated !== undefined
543+ ) {
544+ const unitTypeImpliesCurrency = unitType === "currency-amount" ;
545+ if ( unitTypeImpliesCurrency && isCurrencyDenominated === false ) {
546+ console . warn (
547+ `⚠️ Data quality issue: unit_type="${ unitType } " but is_currency_denominated=false for item ${
548+ item . id || "unknown"
549+ } . ` +
550+ `Using is_currency_denominated as authoritative.` ,
551+ ) ;
552+ }
553+ }
554+
555+ const effectiveCurrency = shouldSkipCurrency
412556 ? null
413557 : ( normalizeCurrency ( item . currency_code ) || parsed . currency ) ;
414558
415- // SPECIAL CASE: If unit text contains "hundred million", prefer parsed scale over database scale
416- // This handles Chinese accounting units (亿, yi = 100 million) where database may incorrectly say "Millions"
559+ // SCALE/MAGNITUDE: Database scale first, with special case for Chinese units
560+ // SPECIAL CASE: If unit text contains "hundred million" (亿), prefer parsed scale
561+ // This handles Chinese accounting units where database may incorrectly say "Millions"
417562 const effectiveScale = parsed . scale === "hundred-millions"
418563 ? parsed . scale
419564 : ( normalizeScale ( item . scale ) || parsed . scale ) ;
420565
421- const effectiveTimeScale = normalizeTimeScale ( item . periodicity ) ||
566+ // TIME SCALE: Use reporting_frequency from database FIRST (maps to periodicity)
567+ // Fallback to periodicity field, then unit string parsing as last resort
568+ // This ensures the dataset's reporting cadence (from DB) is preferred over guessing from unit text
569+ const reportingFreq = ( item as unknown as { reporting_frequency ?: string } )
570+ . reporting_frequency ;
571+ const effectiveTimeScale = normalizeTimeScale ( reportingFreq ) ||
572+ normalizeTimeScale ( item . periodicity ) ||
422573 parsed . timeScale ;
423574
424575 // Determine an explicit target magnitude for consistent unit strings (compute before normalization)
0 commit comments