GatorSense
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/train.py‎
Lines changed: 29 additions & 13 deletions b/‎examples/train.py‎
Lines changed: 29 additions & 13 deletions
diff --git a/‎neon_tree_classification/core/datamodule.py‎
Lines changed: 27 additions & 7 deletions b/‎neon_tree_classification/core/datamodule.py‎
Lines changed: 27 additions & 7 deletions
diff --git a/‎neon_tree_classification/models/lightning_modules.py‎
Lines changed: 1 addition & 3 deletions b/‎neon_tree_classification/models/lightning_modules.py‎
Lines changed: 1 addition & 3 deletions
@@ -15,6 +15,11 @@ lightning_logs/
 results_temp_dir/
 .comet.config
 
+# Training outputs
+outputs/
+test_outputs*/
+*.ckpt
+
 # Python packaging
 *.egg-info/
 build/
 
@@ -14,6 +14,8 @@
 """
 
 import argparse
+import os
+from datetime import datetime
 import lightning as L
 from lightning.pytorch.callbacks import (
     ModelCheckpoint,
@@ -23,6 +25,10 @@
 from lightning.pytorch.loggers import TensorBoardLogger
 import torch
 
+# Optimize CUDA performance for Tensor Cores
+if torch.cuda.is_available():
+    torch.set_float32_matmul_precision("medium")
+
 try:
     from lightning.pytorch.loggers import CometLogger
 
@@ -39,6 +45,7 @@
 
 
 def main():
+
     parser = argparse.ArgumentParser(description="Train NEON tree species classifier")
 
     # Data arguments
@@ -100,29 +107,39 @@ def main():
     parser.add_argument(
         "--num_workers", type=int, default=4, help="Number of data loader workers"
     )
+    parser.add_argument(
+        "--distributed", action="store_true", help="Enable distributed training"
+    )
 
     # Logging arguments
     parser.add_argument(
         "--logger", type=str, default="tensorboard", choices=["tensorboard", "comet"]
     )
-    parser.add_argument("--project_name", type=str, default="neon-tree-classification")
     parser.add_argument(
-        "--experiment_name",
+        "--output_dir",
         type=str,
-        help="Experiment name (auto-generated if not provided)",
+        help="Directory to save logs, checkpoints, and results (auto-generated if not provided)",
     )
 
     args = parser.parse_args()
 
-    # Set up experiment name
-    if args.experiment_name is None:
-        args.experiment_name = (
-            f"{args.modality}_{args.model_type}_{args.lr}_{args.batch_size}"
-        )
+    # Set up experiment name (auto-generate)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    experiment_name = (
+        f"{args.modality}_{args.model_type}_{args.lr}_{args.batch_size}_{timestamp}"
+    )
+
+    # Set up output directory (organize by modality and timestamp)
+    if args.output_dir is None:
+        args.output_dir = f"./outputs/{args.modality}_{timestamp}"
 
     print(f"🌲 Training {args.modality.upper()} classifier: {args.model_type}")
     print(f"📁 Data: {args.data_dir}")
-    print(f"🧪 Experiment: {args.experiment_name}")
+    print(f"🧪 Experiment: {experiment_name}")
+    print(f"💾 Output directory: {args.output_dir}")
+
+    # Create output directory if it doesn't exist
+    os.makedirs(args.output_dir, exist_ok=True)
 
     # Create data module
     datamodule = NeonCrownDataModule(
@@ -192,12 +209,10 @@ def main():
                 "CometML not available. Install with: pip install comet-ml"
             )
         logger = CometLogger(
-            project_name=args.project_name,
-            experiment_name=args.experiment_name,
-            save_dir="lightning_logs",
+            save_dir=args.output_dir,
         )
     else:
-        logger = TensorBoardLogger(save_dir="lightning_logs", name=args.experiment_name)
+        logger = TensorBoardLogger(save_dir=args.output_dir, name=experiment_name)
 
     # Set up callbacks
     callbacks = [
@@ -228,6 +243,7 @@ def main():
     # datamodule.setup()  # Already called above for class detection
 
     # Get class weights for imbalanced datasets
+    print("⚖️  Calculating class weights...")
     class_weights = datamodule.get_class_weights()
     if class_weights is not None:
         classifier.class_weights = class_weights
 
@@ -142,6 +142,7 @@ def __init__(
         self.label_to_idx = None
         self.idx_to_label = None
         self.num_classes = None
+        self._setup_done = False  # Guard to prevent duplicate setup
 
     def _create_default_transforms(self) -> Dict[str, Callable]:
         """Create default transform functions."""
@@ -158,6 +159,11 @@ def setup(self, stage: Optional[str] = None) -> None:
         Args:
             stage: 'fit', 'validate', 'test', or 'predict'
         """
+        # Guard against duplicate setup
+        if self._setup_done:
+            print("⚡ DataModule already set up, skipping duplicate setup")
+            return
+
         if stage is None or stage in ["fit", "validate"]:
             # Create full dataset to analyze splits
             full_dataset = NeonCrownDataset(
@@ -209,6 +215,9 @@ def setup(self, stage: Optional[str] = None) -> None:
             print(f"  Test samples: {len(self.test_dataset)}")
             print(f"  Num classes: {self.num_classes}")
 
+            # Mark setup as complete
+            self._setup_done = True
+
     def _create_label_mapping(self, dataset: NeonCrownDataset) -> None:
         """Create mapping between string labels and integer indices."""
         species_list = dataset.get_species_list()
@@ -392,20 +401,31 @@ def get_class_weights(self) -> torch.Tensor:
         if self.train_dataset is None:
             raise RuntimeError("Must call setup() before getting class weights")
 
-        # Count samples per class in training set
+        print("🔄 Calculating class weights...")
+
+        # More efficient: count from the original data instead of loading samples
+        # Get species from the training split's underlying data
         species_counts = {}
-        for i in range(len(self.train_dataset)):
-            sample = self.train_dataset[i]
-            species = sample["species"]
-            species_counts[species] = species_counts.get(species, 0) + 1
+
+        # Access the pandas DataFrame directly from train dataset
+        train_data = self.train_dataset.data
+        for _, row in train_data.iterrows():
+            species = row["species"]
+            # Convert to index using our mapping
+            species_idx = self.label_to_idx[species]
+            species_counts[species_idx] = species_counts.get(species_idx, 0) + 1
+
+        print(f"📊 Found {len(species_counts)} classes in training set")
 
         # Calculate weights (inverse frequency)
         total_samples = sum(species_counts.values())
         weights = []
-        for species in sorted(species_counts.keys()):
-            weight = total_samples / (len(species_counts) * species_counts[species])
+        # Sort by species index to maintain consistent ordering
+        for species_idx in sorted(species_counts.keys()):
+            weight = total_samples / (len(species_counts) * species_counts[species_idx])
             weights.append(weight)
 
+        print(f"✅ Class weights calculated successfully")
         return torch.tensor(weights, dtype=torch.float32)
 
     def get_species_mapping(self) -> Dict[str, int]:
 
@@ -183,9 +183,7 @@ def configure_optimizers(self):
 
         # Scheduler
         if self.hparams.scheduler == "plateau":
-            scheduler = ReduceLROnPlateau(
-                optimizer, mode="min", factor=0.5, patience=5, verbose=True
-            )
+            scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=5)
             return {
                 "optimizer": optimizer,
                 "lr_scheduler": {