pirogramming
diff --git a/‎moodico/products/management/__init__.py‎ b/‎moodico/products/management/__init__.py‎
diff --git a/‎moodico/products/management/commands/__init__.py‎ b/‎moodico/products/management/commands/__init__.py‎
diff --git a/‎moodico/products/management/commands/generate_clusters.py‎
Lines changed: 164 additions & 0 deletions b/‎moodico/products/management/commands/generate_clusters.py‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎moodico/products/management/commands/scrape_products.py‎
Lines changed: 193 additions & 0 deletions b/‎moodico/products/management/commands/scrape_products.py‎
Lines changed: 193 additions & 0 deletions
@@ -0,0 +1,164 @@
+# moodico/products/management/commands/generate_clusters.py
+
+#실행방법
+'''
+python manage.py generate_clusters --input data/all_products.json
+'''
+import os
+import json
+import numpy as np
+from django.core.management.base import BaseCommand, CommandError
+from django.conf import settings
+from django.contrib.staticfiles import finders
+from sklearn.cluster import KMeans
+from sklearn.metrics import silhouette_score
+from sklearn.preprocessing import StandardScaler
+
+# --- helpers from your script ---
+def hex_to_rgb(hex):
+    hex = hex.lstrip('#')
+    if len(hex) == 3:
+        hex = ''.join([c*2 for c in hex])
+    r = int(hex[:2], 16)
+    g = int(hex[2:4], 16)
+    b = int(hex[4:], 16)
+    return r, g, b
+
+def rgb_to_hsl(r, g, b):
+    r, g, b = r/255, g/255, b/255
+    maxc, minc = max(r, g, b), min(r, g, b)
+    l = (maxc + minc) / 2
+    if maxc == minc:
+        h = s = 0
+    else:
+        d = maxc - minc
+        s = d / (2 - maxc - minc) if l > 0.5 else d / (maxc + minc)
+        if maxc == r:
+            h = (g - b) / d + (6 if g < b else 0)
+        elif maxc == g:
+            h = (b - r) / d + 2
+        else:
+            h = (r - g) / d + 4
+        h /= 6
+    return h * 360, s, l
+
+def calculate_coordinates(h, s, l):
+    if h >= 330 or h < 60:
+        if h >= 330:
+            h -= 360
+        warm_cool_score = (h + 30) / 90
+    elif 60 <= h < 180:
+        warm_cool_score = 1 - ((h - 60) / 120)
+    elif 180 <= h < 300:
+        warm_cool_score = -((h - 180) / 120)
+    else:
+        warm_cool_score = -1 + ((h - 300) / 30)
+
+    if s < 0.05:
+        warm_cool_score = 0
+    else:
+        warm_cool_score *= s**0.8
+
+    if l < 0.1 or l > 0.9:
+        warm_cool_score *= (1 - ((abs(0.5 - l) * 2)**2))
+
+    final_warm = (warm_cool_score + 1) * 50
+    final_warm = max(0, min(100, final_warm))
+    final_deep = (1 - l) * 100
+
+    return round(final_warm, 2), round(final_deep, 2)
+
+class Command(BaseCommand):
+    help = "Generate color clusters from product JSON and save to MEDIA_ROOT/data."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--input",
+            default="data/test_products.json",
+            help="Input JSON path relative to a static dir (e.g., data/test_products.json).",
+        )
+        parser.add_argument(
+            "--clusters",
+            type=int,
+            default=4,
+            help="Number of KMeans clusters (default: 4).",
+        )
+        parser.add_argument(
+            "--no-silhouette",
+            action="store_true",
+            help="Skip silhouette score sweep.",
+        )
+
+    def handle(self, *args, **opts):
+        input_rel = opts["input"]
+        n_clusters = opts["clusters"]
+        skip_sil = opts["no_silhouette"]
+
+        # 1) Find input JSON via staticfiles finder; fallback to BASE_DIR/static
+        src = finders.find(input_rel)
+        if not src:
+            fallback = os.path.join(settings.BASE_DIR, "static", input_rel)
+            if os.path.exists(fallback):
+                src = fallback
+        if not src:
+            raise CommandError(f"Input not found: {input_rel}")
+
+        with open(src, "r", encoding="utf-8") as f:
+            products = json.load(f)
+
+        coordinates = []
+        valid_products = []
+        for p in products:
+            hex_color = p.get("hex")
+            if not hex_color:
+                continue
+            try:
+                r, g, b = hex_to_rgb(hex_color)
+                h, s, l = rgb_to_hsl(r, g, b)
+                warm, deep = calculate_coordinates(h, s, l)
+                lab_l = p.get("lab_l", 0)
+                lab_a = p.get("lab_a", 0)
+                lab_b = p.get("lab_b", 0)
+
+                p["warmCool"] = warm
+                p["lightDeep"] = deep
+                coordinates.append([warm, deep, lab_l, lab_a, lab_b])
+                valid_products.append(p)
+            except Exception:
+                continue
+
+        if not coordinates:
+            raise CommandError("No valid products with color data found.")
+
+        # 2) Cluster — standardize first, then fit
+        coords_np = np.array(coordinates, dtype=float)
+        coords_std = StandardScaler().fit_transform(coords_np)
+
+        kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
+        labels = kmeans.fit_predict(coords_std)
+
+        for i, label in enumerate(labels):
+            valid_products[i]["cluster"] = int(label)
+
+        # 3) Save outputs under MEDIA_ROOT/data (writable in prod)
+        out_dir = os.path.join(settings.MEDIA_ROOT, "data")
+        os.makedirs(out_dir, exist_ok=True)
+
+        products_out = os.path.join(out_dir, "products_clustered.json")
+        with open(products_out, "w", encoding="utf-8") as f:
+            json.dump(valid_products, f, ensure_ascii=False, indent=2)
+
+        centers_out = os.path.join(out_dir, "cluster_centers.json")
+        with open(centers_out, "w", encoding="utf-8") as f:
+            json.dump(kmeans.cluster_centers_.tolist(), f, ensure_ascii=False, indent=2)
+
+        self.stdout.write(self.style.SUCCESS(f"Wrote:\n  {products_out}\n  {centers_out}"))
+
+        # 4) Optional: silhouette sweep (k=2..10 or up to len-1)
+        if not skip_sil:
+            max_k = min(len(coords_std) - 1, 10)
+            for k in range(2, max_k + 1):
+                mdl = KMeans(n_clusters=k, random_state=42, n_init='auto')
+                lab = mdl.fit_predict(coords_std)
+                sc = silhouette_score(coords_std, lab)
+                self.stdout.write(f"k={k}  Silhouette={sc:.4f}")
@@ -0,0 +1,193 @@
+# moodico/products/management/commands/scrape_products.py
+
+## 실행방법
+'''
+python manage.py scrape_products --brands romand,3ce --scroll 4 --limit 5
+'''
+import os
+import time
+import json
+import uuid
+import requests
+import numpy as np
+from io import BytesIO
+from PIL import Image
+from skimage import color
+
+from django.core.management.base import BaseCommand
+from django.conf import settings
+
+# Selenium
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from webdriver_manager.chrome import ChromeDriverManager
+
+
+TARGETS_DEFAULT = [
+    {"brand": "romand", "url": "https://romand.co.kr/product/maincatedetail.html?cate_code=289", "category": "Lips"},
+    {"brand": "3ce",   "url": "https://www.3cecosmetics.com/all-products/lips",            "category": "Lips"},
+    {"brand": "3ce",   "url": "https://www.3cecosmetics.com/all-products/cheeks/blush",    "category": "blush"},
+    {"brand": "3ce",   "url": "https://www.3cecosmetics.com/all-products/eyes/eyeshadow",  "category": "eyeshadow"},
+]
+
+def extract_romand_items(driver, category):
+    items = driver.find_elements(By.CSS_SELECTOR, 'li.list_prd_item')
+    results = []
+    for item in items:
+        try:
+            name = item.find_element(By.CSS_SELECTOR, '.prd_title').text
+            image = item.find_element(By.CSS_SELECTOR, 'img').get_attribute('src')
+            price = item.find_element(By.CSS_SELECTOR, '.current_price').text.strip()
+            url = item.find_element(By.CSS_SELECTOR, 'a').get_attribute('href')
+            results.append({
+                "brand": "romand",
+                "category": category,
+                "name": name,
+                "color_name": name.split('/')[-1].strip(),
+                "image": image,
+                "price": price,
+                "url": url
+            })
+        except Exception as e:
+            print("Romand Error:", e)
+            continue
+    return results
+
+def extract_3ce_items(driver, category):
+    items = driver.find_elements(By.CSS_SELECTOR, 'li.tce-grid__item')
+    results = []
+    for item in items:
+        try:
+            name = item.find_element(By.CSS_SELECTOR, 'h2.tce-product-card__name').text.strip()
+            url = item.find_element(By.CSS_SELECTOR, 'a.tce-product-card__link').get_attribute("href")
+            price = item.find_element(By.CSS_SELECTOR, '.tce-product-card__price').text.strip()
+            image = item.find_element(By.CSS_SELECTOR, 'img.tce-product-card__image').get_attribute("src")
+
+            results.append({
+                "brand": "3CE",
+                "category": category,
+                "name": name,
+                "color_name": name.split('/')[-1].strip() if '/' in name else name,
+                "url": f"https://www.3cecosmetics.com{url}" if url.startswith('/') else url,
+                "image": f"https://www.3cecosmetics.com{image}" if image.startswith('/') else image,
+                "price": price
+            })
+        except Exception as e:
+            print("3CE Error:", e)
+            continue
+    return results
+
+def extract_average_color(img_url):
+    """Remove near-white background, compute average color; return (hex, L, a, b)."""
+    try:
+        response = requests.get(img_url, timeout=8)
+        response.raise_for_status()
+        img = Image.open(BytesIO(response.content)).convert('RGB')
+        img = img.resize((50, 50))
+        pixels = np.array(img).reshape(-1, 3)
+
+        # filter out very bright pixels (likely background)
+        filtered = [px for px in pixels if not all(c > 240 for c in px)]
+        if not filtered:
+            filtered = pixels
+
+        avg_rgb = np.array(filtered).mean(axis=0)
+        r, g, b = map(int, avg_rgb)
+        hex_code = '#{:02x}{:02x}{:02x}'.format(r, g, b)
+
+        rgb_norm = np.array([[avg_rgb]]) / 255.0
+        lab = color.rgb2lab(rgb_norm)[0][0]
+        lab_l, lab_a, lab_b = lab.round(2)
+
+        return hex_code, float(lab_l), float(lab_a), float(lab_b)
+    except Exception as e:
+        print(f"[Color Error] {img_url} - {e}")
+        return "#000000", 0.0, 0.0, 0.0
+
+
+class Command(BaseCommand):
+    help = "Scrape cosmetic products and dump a JSON file under MEDIA_ROOT/data/test_products.json"
+
+    def add_arguments(self, parser):
+        parser.add_argument("--scroll", type=int, default=4, help="Scroll count per page (default: 4)")
+        parser.add_argument("--headful", action="store_true", help="Run Chrome with UI (not headless)")
+        parser.add_argument("--output", default="data/test_products.json",
+                            help="Output path under MEDIA_ROOT (default: data/test_products.json)")
+        parser.add_argument("--limit", type=int, default=10,
+                            help="Number of examples to keep in test JSON (first N + last N, default: 10)")
+        parser.add_argument("--brands", default="romand,3ce",
+                            help="Comma-separated brands to scrape (romand,3ce). Default: both")
+
+    def handle(self, *args, **opts):
+        scroll_count = opts["scroll"]
+        headless = not opts["headful"]
+        brands = {b.strip().lower() for b in opts["brands"].split(",") if b.strip()}
+        output_rel = opts["output"]
+        test_limit = max(1, int(opts["limit"]))
+
+        # Selenium options
+        options = webdriver.ChromeOptions()
+        if headless:
+            options.add_argument("--headless=new")
+        options.add_argument("--no-sandbox")
+        options.add_argument("--disable-dev-shm-usage")
+
+        # Driver (downloads matching ChromeDriver if needed)
+        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+
+        # Targets
+        targets = [t for t in TARGETS_DEFAULT if t["brand"].lower() in brands]
+
+        all_products = []
+        try:
+            for target in targets:
+                brand = target["brand"]
+                url = target["url"]
+                category = target["category"]
+                self.stdout.write(f"Scraping {brand} ({category}) ...")
+
+                driver.get(url)
+                time.sleep(2)
+
+                for _ in range(scroll_count):
+                    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+                    time.sleep(2)
+
+                if brand.lower() == "romand":
+                    raw_items = extract_romand_items(driver, category)
+                elif brand.lower() == "3ce":
+                    raw_items = extract_3ce_items(driver, category)
+                else:
+                    self.stdout.write(self.style.WARNING(f"No extractor for: {brand}"))
+                    continue
+
+                self.stdout.write(f"  → {len(raw_items)} items found")
+
+                # enrich with color
+                for item in raw_items:
+                    hex_color, lab_l, lab_a, lab_b = extract_average_color(item["image"])
+                    product = {
+                        "id": str(uuid.uuid4()),
+                        **item,
+                        "hex": hex_color,
+                        "lab_l": lab_l,
+                        "lab_a": lab_a,
+                        "lab_b": lab_b,
+                    }
+                    all_products.append(product)
+        finally:
+            driver.quit()
+
+        # Save under MEDIA_ROOT/data/...
+        out_path = os.path.join(settings.MEDIA_ROOT, output_rel)
+        os.makedirs(os.path.dirname(out_path), exist_ok=True)
+
+        # keep a small test set (first N + last N)
+        test_products = all_products[:test_limit] + all_products[-test_limit:]
+
+        with open(out_path, "w", encoding="utf-8") as f:
+            json.dump(test_products, f, ensure_ascii=False, indent=2)
+
+        self.stdout.write(self.style.SUCCESS(
+            f"Saved {len(test_products)} items to {out_path} (from {len(all_products)} scraped)"))