Merge pull request #14 from hanjuhn/main

dongyeon1031 · web-flow · commit 9237f0a1a8bd · 2025-07-18T13:31:24.000+09:00
chore: parser 수정
diff --git a/core/shared/router/intent_router.py b/core/shared/router/intent_router.py
@@ -8,21 +8,25 @@ def intent_router(state: CustomsAgentState) -> CustomsAgentState:
     llm = get_llm()
     
     classification_prompt = """
-    다음 사용자 쿼리를 분석하여 정확히 하나의 카테고리로 분류해주세요:
+    다음 사용자 쿼리를 아래 세 카테고리 중 하나로 분류하세요.
 
-    1. customs_tracking: 통관 조회, 운송장 추적, 배송 상태 관련
-    2. tariff_prediction: 관세 계산, 세율 문의, 관세 예측 관련  
-    3. qna: 일반적인 관세/통관 관련 질문, 법령 문의, 절차 안내
+    1. customs_tracking: 운송장, 배송, 통관, 조회, 추적, 배송상태, 위치, 도착, 출고, 통관번호, 운송장번호 등
+    2. tariff_prediction: 관세, 세금, 세율, 관세 계산, 관세 예측, 세금 얼마, 관세 얼마, 관세 계산해줘, 관세 예측해줘, 세금 예측, 예상 관세, 뭘 샀어, 뭐 샀어, 샀어, 구매 등
+    3. qna: 관세청 정보, 전화번호, 법령, 수입/수출 절차, 일반 안내, 기타 FAQ
 
     # 예시
     - "관세 예측해줘" → tariff_prediction
     - "관세 계산해줘" → tariff_prediction
-    - "관세 알려줘" → tariff_prediction
     - "이 물건의 세금이 얼마나 나올까?" → tariff_prediction
+    - "노트북 샀어" → tariff_prediction
+    - "미국에서 뭐 샀어" → tariff_prediction
     - "운송장 번호 123456 어디쯤이야?" → customs_tracking
+    - "배송이 어디까지 왔어?" → customs_tracking
     - "통관 진행 상황 알려줘" → customs_tracking
     - "관세청 전화번호 알려줘" → qna
     - "수입 절차가 궁금해" → qna
+    - "관세청 홈페이지 알려줘" → qna
+    - "관세법 제12조가 뭐야?" → qna
 
     사용자 쿼리: {query}
 
diff --git a/core/tariff_prediction/agent/step_api.py b/core/tariff_prediction/agent/step_api.py
@@ -6,7 +6,7 @@
 from core.shared.utils.llm import get_llm
 
 def tariff_prediction_step_api(req: TariffPredictionRequest) -> TariffPredictionResponse:
-    step = req.stepㄴ
+    step = req.step
     # Step 자동 분류: step이 'auto'이거나 비어 있으면 LLM으로 분류
     if not step or step == 'auto':
         llm = get_llm()
@@ -36,7 +36,7 @@ def tariff_prediction_step_api(req: TariffPredictionRequest) -> TariffPrediction
         return TariffPredictionResponse(
             step="hs10_select",
             hs10_candidates=hs10_candidates,
-            message="해당하는 HS10 코드를 선택해 주세요."
+            message="HS10 코드 후보를 선택해 주세요."
         )
     elif step == "hs10_select":
         # HS10 코드, 국가, 가격 등 입력받아 관세 계산
@@ -63,6 +63,6 @@ def tariff_prediction_step_api(req: TariffPredictionRequest) -> TariffPrediction
             )
     else:
         return TariffPredictionResponse(
-            step="input",
+            step="hs6_select",
             message="잘못된 요청입니다. 상품 설명을 입력해 주세요."
         ) 
diff --git a/core/tariff_prediction/data/HS6.csv b/core/tariff_prediction/data/HS6.csv
diff --git a/core/tariff_prediction/tools/calculate_tariff_amount.py b/core/tariff_prediction/tools/calculate_tariff_amount.py
@@ -130,6 +130,9 @@ def calculate_tariff_amount(product_code: str, value: float, origin_country: str
         if cur_unit is None:
             return f"환율 정보를 찾을 수 없습니다. 국가: {origin_country}"
         usd_rate = get_exchange_rate_api(cur_unit, situation)
+        # 환율이 None이면 더미값(1300.0)으로 대체
+        if usd_rate is None:
+            usd_rate = 1300.0
         
         # 관세 계산
         tax_info = calculate_tax_amount(value, item_count, shipping_cost, float(tariff_info['관세율']), usd_rate, situation)
diff --git a/core/tariff_prediction/tools/parse_hs_results.py b/core/tariff_prediction/tools/parse_hs_results.py
@@ -13,31 +13,20 @@ def parse_hs6_result(hs6_result: str) -> List[Dict]:
     lines = hs6_result.strip().split('\n')
     for line in lines:
         if line.strip() and any(char.isdigit() for char in line):
-            # "1. 8471.30.00 (확률: 85%)" 형태 파싱
-            match = re.search(r'(\d+)\.\s*([0-9]{4}\.[0-9]{2}\.[0-9]{2})\s*\(확률:\s*(\d+)%\)', line)
+            # "1. 851770 (확률: 98.02%)" 형태 파싱
+            match = re.search(r'(\d+)\.\s*([0-9]{6})\s*\(확률:\s*([0-9.]+)%\)', line)
             if match:
                 rank = int(match.group(1))
                 code = match.group(2)
-                confidence = int(match.group(3)) / 100.0
-                
-                # HS6 코드로 변환 (첫 6자리)
-                hs6_code = '.'.join(code.split('.')[:2])
-                
+                confidence = float(match.group(3)) / 100.0
+                hs6_code = code[:4] + '.' + code[4:]
                 candidates.append({
                     'code': hs6_code,
                     'description': f'HS6 코드 {hs6_code}',
                     'confidence': confidence,
                     'full_code': code
                 })
-    
-    # 파싱 실패 시 더미 데이터 반환
-    if not candidates:
-        candidates = [
-            {'code': '8471.30', 'description': '노트북 컴퓨터', 'confidence': 0.95},
-            {'code': '8471.40', 'description': '데스크톱 컴퓨터', 'confidence': 0.85},
-            {'code': '8471.50', 'description': '서버 컴퓨터', 'confidence': 0.75}
-        ]
-    
+    print(f"[DEBUG] parse_hs6_result candidates: {candidates}")
     return candidates
 
 @tool
diff --git a/core/tariff_prediction/tools/parse_user_input.py b/core/tariff_prediction/tools/parse_user_input.py
@@ -1,57 +1,104 @@
 from typing import Dict, Any
 import re
 from langchain_core.tools import tool
-
+from core.shared.utils.llm import get_llm
+import json
 from core.tariff_prediction.constants import SUPPORTED_COUNTRIES, REMOVE_KEYWORDS, PRICE_PATTERNS, QUANTITY_PATTERNS
 
-@tool
-def parse_user_input(user_input: str) -> Dict[str, Any]:
-    """자연어 입력을 파싱하여 상품 정보를 추출합니다."""
+def parse_user_input_rule(user_input: str) -> Dict[str, Any]:
     parsed = {}
-    
-    # 가격 정보 추출 (숫자 + 원/달러/엔/위안 등)
+    # 가격 정보 추출 (만원, 천원, 원, 달러, 엔, 위안 등)
+    price = None
     for pattern in PRICE_PATTERNS:
-        matches = re.findall(pattern, user_input)
-        if matches:
-            price_str = matches[0].replace(',', '')
-            if '만원' in user_input:
-                parsed['price'] = float(price_str) * 10000
-            elif '천원' in user_input:
-                parsed['price'] = float(price_str) * 1000
-            else:
-                parsed['price'] = float(price_str)
+        match = re.search(pattern, user_input)
+        if match:
+            price_str = match.group(1).replace(',', '')
+            unit = match.group(2) if len(match.groups()) > 1 else ''
+            try:
+                price = float(price_str)
+                if '만' in unit:
+                    price *= 10000
+                elif '천' in unit:
+                    price *= 1000
+                parsed['price'] = price
+                break
+            except Exception:
+                continue
+    # 수량 정보 추출 (숫자+개, 한 개, 두 개 등)
+    quantity = None
+    for pattern in QUANTITY_PATTERNS + [r'([한두세네]) ?개']:
+        match = re.search(pattern, user_input)
+        if match:
+            try:
+                if match.group(1).isdigit():
+                    quantity = int(match.group(1))
+                else:
+                    h2n = {'한':1, '두':2, '세':3, '네':4}
+                    quantity = h2n.get(match.group(1), 1)
+                parsed['quantity'] = quantity
+                break
+            except Exception:
+                continue
+    # 국가 정보 추출 (미국에서, 일본에서 등 조사 포함)
+    country = None
+    for c in SUPPORTED_COUNTRIES.keys():
+        if c in user_input:
+            country = c
+            parsed['country'] = c
             break
-    
-    # 수량 정보 추출
-    for pattern in QUANTITY_PATTERNS:
-        matches = re.findall(pattern, user_input)
-        if matches:
-            parsed['quantity'] = int(matches[0])
+        elif c + '에서' in user_input:
+            country = c
+            parsed['country'] = c
             break
-    
-    # 국가 정보 추출
-    countries = list(SUPPORTED_COUNTRIES.keys())
-    for country in countries:
-        if country in user_input:
-            parsed['country'] = country
-            break
-    
-    # 상품 묘사 추출 (가격, 수량, 국가 정보를 제외한 나머지 부분)
-    # 먼저 가격, 수량, 국가 관련 키워드를 제거
-    cleaned_input = user_input
-    for pattern in PRICE_PATTERNS + QUANTITY_PATTERNS:
-        cleaned_input = re.sub(pattern, '', cleaned_input)
-    
-    for country in countries:
-        cleaned_input = cleaned_input.replace(country, '')
-    
-    # 일반적인 키워드 제거
-    for keyword in REMOVE_KEYWORDS:
-        cleaned_input = cleaned_input.replace(keyword, '')
-    
-    # 상품 묘사로 사용할 부분 추출
-    cleaned_input = cleaned_input.strip()
-    if cleaned_input and len(cleaned_input) > 2:  # 의미있는 길이인 경우만
-        parsed['product_name'] = cleaned_input
-    
-    return parsed 
+    # 상품명/묘사 추출
+    cleaned = user_input
+    for pattern in PRICE_PATTERNS + QUANTITY_PATTERNS + [r'([한두세네]) ?개']:
+        cleaned = re.sub(pattern, '', cleaned)
+    if country:
+        cleaned = cleaned.replace(country, '')
+        cleaned = cleaned.replace(country + '에서', '')
+    for keyword in REMOVE_KEYWORDS + ['샀어요', '구매', '예측해줘', '관세', '예측', '해줘']:
+        cleaned = cleaned.replace(keyword, '')
+    cleaned = cleaned.strip()
+    if cleaned and len(cleaned) > 1:
+        parsed['product_name'] = cleaned
+    return parsed
+
+@tool
+def parse_user_input(user_input: str) -> Dict[str, Any]:
+    """자연어 입력을 LLM으로 파싱하여 상품 정보를 추출합니다. 실패 시 rule 기반 파싱을 fallback으로 사용합니다."""
+    prompt = f"""
+아래는 관세 예측을 위한 사용자 입력입니다. 입력에서 다음 정보를 추출해 JSON으로 반환하세요.
+- product_name: 상품명 또는 상품 설명 (예: 노트북, 운동화, 블루투스 이어폰)
+- country: 구매 국가 (예: 미국, 일본, 독일 등)
+- price: 상품 가격(숫자만, 단위는 원)
+- quantity: 수량(숫자, 없으면 1)
+
+입력: "{user_input}"
+
+반환 예시:
+{{
+  "product_name": "노트북",
+  "country": "미국",
+  "price": 1500000,
+  "quantity": 1
+}}
+
+반드시 위와 같은 JSON만 반환하세요.
+"""
+    try:
+        llm = get_llm()
+        response = llm.invoke([{"role": "user", "content": prompt}])
+        json_str = response.content if hasattr(response, 'content') else str(response)
+        if not isinstance(json_str, str):
+            raise ValueError('LLM 응답이 문자열이 아님')
+        json_start = json_str.find('{')
+        json_end = json_str.rfind('}') + 1
+        parsed = json.loads(json_str[json_start:json_end])
+        # 값이 하나라도 있으면 반환
+        if parsed and (parsed.get('product_name') or parsed.get('country') or parsed.get('price')):
+            return parsed
+    except Exception:
+        pass
+    # 실패 시 rule 기반 파싱
+    return parse_user_input_rule(user_input)