feat: add country-specific renewables retrieval and update CEVS computation to use Parquet datasets

hk-dev13 · hk-dev13 · commit 33d03e5787e0 · 2025-08-18T21:43:04.000+07:00
diff --git a/api/clients/eea_client.py b/api/clients/eea_client.py
@@ -94,6 +94,21 @@ def get_countries_renewables(self) -> List[Dict[str, Any]]:
             })
         return normalized_data
 
+    def get_country_renewables(self, country: Optional[str]) -> Optional[Dict[str, Any]]:
+        """
+        Mengambil data energi terbarukan untuk negara tertentu.
+        """
+        if not country:
+            return None
+        
+        all_countries = self.get_countries_renewables()
+        country_lower = country.strip().lower()
+        
+        for record in all_countries:
+            if record.get("country", "").strip().lower() == country_lower:
+                return record
+        return None
+
     def get_industrial_pollution(self) -> List[Dict[str, Any]]:
         """
         Mengambil dan menormalkan data tren polusi industri.
@@ -102,9 +117,46 @@ def get_industrial_pollution(self) -> List[Dict[str, Any]]:
         dataset_id = "industrial-releases-of-pollutants-to-water"
         raw_data = self._get_parquet_data(dataset_id)
         
-        # Logika normalisasi Anda sebelumnya sudah bagus, dapat diterapkan di sini
-        # ...
+        normalized_data = []
+        for record in raw_data:
+            year = record.get("year")
+            if not year:
+                continue
+                
+            def to_float(v):
+                try:
+                    return float(v) if v not in (None, "") else None
+                except Exception:
+                    return None
+                    
+            normalized_data.append({
+                "year": int(year),
+                "cd_hg_ni_pb": to_float(record.get("cd_hg_ni_pb")),
+                "toc": to_float(record.get("toc")),
+                "total_n": to_float(record.get("total_n")),
+                "total_p": to_float(record.get("total_p")),
+                "gva": to_float(record.get("gva")),
+            })
         
-        return raw_data # Kembalikan data yang dinormalisasi
+        # Sort by year
+        normalized_data.sort(key=lambda x: x.get("year", 0))
+        return normalized_data
+
+    def compute_pollution_trend(self, records: List[Dict[str, Any]], window: int = 3) -> Dict[str, Any]:
+        """
+        Menghitung tren sederhana berdasarkan data polusi.
+        """
+        def slope_for(key: str) -> Dict[str, Any]:
+            vals = [r.get(key) for r in records if isinstance(r.get(key), (int, float))]
+            if len(vals) < 2:
+                return {"slope": 0.0, "increase": False}
+            sel = vals[-window:] if len(vals) >= window else vals
+            s = float(sel[-1] - sel[0])
+            return {"slope": s, "increase": s > 0.0}
+            
+        return {
+            "total_n": slope_for("total_n"),
+            "total_p": slope_for("total_p")
+        }
 
 __all__ = ["EEAClient"]
diff --git a/api/services/cevs_aggregator.py b/api/services/cevs_aggregator.py
@@ -43,9 +43,8 @@ def compute_cevs_for_company(company_name: str, *, company_country: Optional[str
     iso_norm = iso_client.get_iso14001_certifications(country=company_country, limit=100)
     has_iso = any(_normalize_name(r.get("nama_perusahaan")) and company_key in _normalize_name(r.get("nama_perusahaan")) for r in iso_norm)
 
-    # EEA: use indicator sample plus new CSV-based datasets (renewables and industrial pollution)
+    # EEA: use new Parquet-based datasets (renewables and industrial pollution)
     eea_client = EEAClient()
-    eea_norm = eea_client.get_indicator(country=company_country or None, limit=50)
     # New: country renewables row and EU average row for comparison
     renew_row = eea_client.get_country_renewables(company_country) if company_country else None
     renew_all = eea_client.get_countries_renewables()
@@ -71,7 +70,6 @@ def compute_cevs_for_company(company_name: str, *, company_country: Optional[str
         "base": 50.0,
         "iso_bonus": 0.0,
         "epa_penalty": 0.0,
-        "eea_bonus": 0.0,
         "renewables_bonus": 0.0,
         "pollution_penalty": 0.0,
     "policy_bonus": 0.0,
@@ -86,11 +84,6 @@ def compute_cevs_for_company(company_name: str, *, company_country: Optional[str
     components["epa_penalty"] = -epa_penalty
     score -= epa_penalty
 
-    # EEA bonus placeholder: presence of any indicator entries yields small boost
-    eea_bonus = 5.0 if eea_norm else 0.0
-    components["eea_bonus"] = eea_bonus
-    score += eea_bonus
-
     # Renewables bonus (dynamic): reward exceeding target and EU average
     renew_bonus = 0.0
     renew_details: Dict[str, Any] = {}
@@ -204,17 +197,15 @@ def slope_for(key: str) -> Dict[str, Any]:
         "sources": {
             "epa_matches": len(epa_matches),
             "iso_count": len(iso_norm),
-            "eea_count": len(eea_norm),
-            "renewables_source": os.getenv("EEA_RENEWABLES_SOURCE") or os.getenv("EEA_CSV_URL") or "local:countries-breakdown-actual-res-progress-13.csv",
-            "pollution_source": os.getenv("EEA_POLLUTION_SOURCE") or "local:industrial-releases-of-pollutants-to.csv",
+            "renewables_source": os.getenv("EEA_RENEWABLES_SOURCE") or "EEA Parquet API",
+            "pollution_source": os.getenv("EEA_POLLUTION_SOURCE") or "EEA Parquet API",
             "edgar_source": os.getenv("EDGAR_XLSX_PATH") or "local:EDGAR_emiss_on_UCDB_2024.xlsx",
             "policy_source": os.getenv("POLICY_XLSX_PATH") or "local:Annex III_Best practices and justifications.xlsx",
             "pollution_trend_source": os.getenv("CEVS_POLLUTION_SOURCE") or "auto",
         },
         "details": {
             "epa": epa_matches,
             "iso": iso_norm,
-            "eea": eea_norm,
             "renewables": {"country_row": renew_row, "eu_row": eu_row, "bonus_calc": renew_details},
             "pollution_trend": pol_details or pol_trend,
             "policy": policy_details,