Noble-Lab
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 0 deletions b/‎.gitignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cellcyclenet/_version.py‎
Lines changed: 1 addition & 1 deletion b/‎cellcyclenet/_version.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cellcyclenet/interface.py‎
Lines changed: 39 additions & 34 deletions b/‎cellcyclenet/interface.py‎
Lines changed: 39 additions & 34 deletions
diff --git a/‎cellcyclenet/models/pretrained-model_2D.pt‎
1.49 MB b/‎cellcyclenet/models/pretrained-model_2D.pt‎
1.49 MB
diff --git a/‎cellcyclenet/models/pretrained-model.pt‎ ‎…llcyclenet/models/pretrained-model_3D.pt‎cellcyclenet/models/pretrained-model.pt renamed to cellcyclenet/models/pretrained-model_3D.pt b/‎cellcyclenet/models/pretrained-model.pt‎ ‎…llcyclenet/models/pretrained-model_3D.pt‎cellcyclenet/models/pretrained-model.pt renamed to cellcyclenet/models/pretrained-model_3D.pt
diff --git a/‎cellcyclenet/unet2d.py‎
Lines changed: 128 additions & 0 deletions b/‎cellcyclenet/unet2d.py‎
Lines changed: 128 additions & 0 deletions
@@ -160,3 +160,9 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+# data folder for dev 
+data/
+
+# notebook for dev
+cellcyclenet/dev.ipynb
@@ -1 +1 @@
-__version__ = '0.1.1'
+__version__ = '0.2.0'
@@ -29,6 +29,7 @@
 from glob import glob
 from cellcyclenet.unet3d.model import UNet3D
 from cellcyclenet import models
+from cellcyclenet.unet2d import UNet2D
 from torch.utils.data import Dataset, DataLoader
 import torchvision.transforms.v2 as transforms 
 from skimage.transform import downscale_local_mean
@@ -42,12 +43,10 @@
 
 class CCN_Dataset(Dataset):
     '''Create a class to hold the PyTorch Dataset, input to PyTorch Dataloader.'''
-    def __init__(self, X, y, norm_factor, scale_factors, transform, lazy_load):
+    def __init__(self, X, y, transform, lazy_load):
         self.X = X
         self.y = y
         self.transform = transform
-        self.norm_factor = norm_factor
-        self.scale_factors = scale_factors
         self.lazy_load = lazy_load
 
     def __len__(self):
@@ -66,8 +65,6 @@ def __getitem__(self, index):
         # if initialized with image fns (lazy loading), load, normalize, and scale image #
         else:
             image = imread(self.X[index])
-            image = downscale_local_mean(image, self.scale_factors)
-            image = image / self.norm_factor
 
         # convert image to float 32 #
         X = np.asarray(image, dtype=np.float32)
@@ -92,40 +89,47 @@ def __getitem__(self, index):
 
 class CellCycleNet:
 
-    def __init__(self, state_dict_path=None):
-        # Initialize device and model architecture, load model weights #
+    def __init__(self, state_dict_path=None, is_3d=True):
+        # initialize device as GPU if available, otherwise CPU #
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.model = UNet3D(in_channels=1, out_channels=1, is_segmentation=False, f_maps=32)
+        self.is_3d = is_3d
+
+        # initialize model architecture #
+        if self.is_3d:
+            self.model = UNet3D(in_channels=1, out_channels=1, is_segmentation=False, f_maps=32)
+        else:
+            self.model = UNet2D(in_channels=1, init_features=32)
         self.model = torch.nn.DataParallel(self.model)
+
+        # if user does not provide a path to weights, load pretrained weights #
         if state_dict_path is None:
-            state_dict_path = pkg_resources.files(models).joinpath('pretrained-model.pt')
-        if torch.cuda.is_available():
+            if self.is_3d:
+                state_dict_path = pkg_resources.files(models).joinpath('pretrained-model_3D.pt')
+            else:
+                state_dict_path = pkg_resources.files(models).joinpath('pretrained-model_2D.pt')
+
+        # load model weights #
+        if torch.cuda.is_available(): # load to GPU if available #
             state_dict = torch.load(state_dict_path)
-        else:
+        else: # otherwise, load to CPU #
             state_dict = torch.load(state_dict_path, map_location=torch.device('cpu'))
         self.model.load_state_dict(state_dict)
         self.model.to(self.device)
 
     ################################################################################################
 
-    def create_dataset(self, dataframe, norm_factor, scale_factors, split_data=False, seed=15):
+    def create_dataset(self, dataframe, split_data=False, seed=15):
         '''
         Creates training, validation, and testing dataframes containing GT labels and image filenames for each nucleus.
         (NOTE: it is assumed that the order of labels in the dataframe is the same as order of images in image_dir.)
             Arguments:
                 - image_dir [str] : path to directory of single nucleus images
                 - dataframe [pd.DataFrame] : dataframe containing GT labels; if None, it is assumed that user wants to proceed without using labels (use case 1)
-                - norm_factor [int] : images are divided pixelwise by this value during loading (calculated as median value of the medians of each input image)
-                - scale_factor [tuple] : Z,Y,X scale factors
                 - split_data [bool] : flag to determine if input data is split into train/val/test sets (use case 2) or just a single dataset (use case 1)
                 - seed [int] : random seed to use for train/val/test split
             Outputs:
                 - train, val, test [pd.DataFrame] : dataframe containing GT label (if inputted) + image filename for each nucleus
         '''
-        # set norm / scale factors as attributes (to be called by .train() and .predict() when creating dataloaders)
-        self.norm_factor = norm_factor
-        self.scale_factors = scale_factors
-
         ### FIXME small DF for debugging ###
         # dataframe = dataframe.iloc[::15]
 
@@ -166,9 +170,10 @@ def run_epoch(self, dataloader, is_train):
             images.to(self.device)
             labels.to(self.device)
 
-            # reshape images to [batch, channel, Z, Y, X] #
-            images = torch.swapaxes(images, 1, 2)
-            images = torch.unsqueeze(images, 1)
+            # for 3D images, reshape images to [batch, channel, Z, Y, X] #
+            if self.is_3d:
+                images = torch.swapaxes(images, 1, 2)
+                images = torch.unsqueeze(images, 1)
 
             ### FORWARD PASS ###
             outputs = self.model(images)
@@ -234,13 +239,11 @@ def train(self, train_df, val_df, n_epochs, batch_size=4, initial_LR=1e-5, trans
         train_X_fn = train_df['filename'].values
         train_y = np.where(train_df['label'].values == 'G1', 0, 1)
         if lazy_load:
-            train_dataloader = DataLoader(CCN_Dataset(train_X_fn, train_y, self.norm_factor, self.scale_factors, transform=transform, lazy_load=lazy_load),
+            train_dataloader = DataLoader(CCN_Dataset(train_X_fn, train_y, transform=transform, lazy_load=lazy_load),
                                                       batch_size=batch_size, shuffle=False)
         else:
             train_X = np.asarray([imread(fn) for fn in train_X_fn])
-            train_X_ds = np.asarray([downscale_local_mean(image, self.scale_factors) for image in train_X])
-            train_X_norm = np.asarray([image / self.norm_factor for image in train_X_ds])
-            train_dataloader = DataLoader(CCN_Dataset(train_X_norm, train_y, self.norm_factor, self.scale_factors, transform=transform, lazy_load=lazy_load),
+            train_dataloader = DataLoader(CCN_Dataset(train_X, train_y, transform=transform, lazy_load=lazy_load),
                                                       batch_size=batch_size, shuffle=False)
 
         # create dataloader for validation data #
@@ -249,13 +252,11 @@ def train(self, train_df, val_df, n_epochs, batch_size=4, initial_LR=1e-5, trans
         val_y = np.where(val_df['label'].values == 'G1', 0, 1)
 
         if lazy_load:
-            val_dataloader = DataLoader(CCN_Dataset(val_X_fn, val_y, self.norm_factor, self.scale_factors, transform=None, lazy_load=lazy_load),
+            val_dataloader = DataLoader(CCN_Dataset(val_X_fn, val_y, transform=None, lazy_load=lazy_load),
                                         batch_size=batch_size, shuffle=False)
         else:
             val_X = np.asarray([imread(fn) for fn in val_X_fn])
-            val_X_ds = np.asarray([downscale_local_mean(image, self.scale_factors) for image in val_X])
-            val_X_norm = np.asarray([image / self.norm_factor for image in val_X_ds])
-            val_dataloader = DataLoader(CCN_Dataset(val_X_norm, val_y, self.norm_factor, self.scale_factors, transform=None, lazy_load=lazy_load),
+            val_dataloader = DataLoader(CCN_Dataset(val_X, val_y, transform=None, lazy_load=lazy_load),
                                         batch_size=batch_size, shuffle=False)
 
         # track val acc for each epoch to check for improvement #
@@ -313,7 +314,7 @@ def predict(self, dataframe, with_labels, decision_threshold=0.5):
 
         X_fn = dataframe['filename'].values
         y = np.where(dataframe['label'].values == 'G1', 0, 1) if with_labels else np.zeros(len(X_fn))
-        dataloader = DataLoader(CCN_Dataset(X_fn, y, self.norm_factor, self.scale_factors, transform=None, lazy_load=True), batch_size=4, shuffle=False)
+        dataloader = DataLoader(CCN_Dataset(X_fn, y, transform=None, lazy_load=True), batch_size=4, shuffle=False)
 
         # Run through network #
         with torch.no_grad():
@@ -322,9 +323,10 @@ def predict(self, dataframe, with_labels, decision_threshold=0.5):
                 images = images.to(self.device)
                 labels = labels.to(self.device)
 
-                # Reshape to [batch, channels, Z, Y, X] #
-                images = torch.swapaxes(images, 1, 2)
-                images = torch.unsqueeze(images, 1)
+                # for 3D images, reshape to [batch, channels, Z, Y, X] #
+                if self.is_3d:
+                    images = torch.swapaxes(images, 1, 2)
+                    images = torch.unsqueeze(images, 1)
 
                 # Run inference #
                 outputs = self.model(images)
@@ -422,7 +424,10 @@ def show_image(self, dataframe, index=None, with_preds=True, hide_plot=False, fi
         if not hide_plot:
             plt.figure(figsize=figsize)
             plt.axis('off')
-            plt.imshow(np.max(image, axis=0))
+            if self.is_3d:
+                plt.imshow(np.max(image, axis=0))
+            else:
+                plt.imshow(image)
             plt.title(f'Index: {index} / Label: {label} / Pred: {pred} / Prob: {prob:.3f}', fontsize=10)
             plt.show()
 
 
@@ -0,0 +1,128 @@
+import torch
+import torch.nn as nn
+
+class UNet2D(nn.Module):
+    """
+    2D version of the classification UNet architecture.
+    
+    Args:
+        in_channels (int): Number of input channels (default: 1)
+        init_features (int): Number of features in first layer (default: 32)
+        dropout_prob (float): Dropout probability in classification head (default: 0.5)
+    """
+    def __init__(self, in_channels=1, init_features=32, dropout_prob=0.5):
+        super(UNet2D, self).__init__()
+
+        # Store feature numbers for each level
+        features = init_features
+        
+        # Level 1 (No pooling)  
+        num_groups = 1 if in_channels == 1 else 8 # Adjust the number of groups to 1 if in_channels is 1
+        self.level1 = nn.Sequential(
+            nn.GroupNorm(num_groups, in_channels),
+            nn.Conv2d(in_channels, features, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        
+        # Level 2
+        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.level2 = nn.Sequential(
+            nn.GroupNorm(8, features),
+            nn.Conv2d(features, features*2, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        
+        # Level 3
+        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.level3 = nn.Sequential(
+            nn.GroupNorm(8, features*2),
+            nn.Conv2d(features*2, features*4, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        
+        # Level 4
+        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.level4 = nn.Sequential(
+            nn.GroupNorm(8, features*4),
+            nn.Conv2d(features*4, features*8, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        
+        # Classification head
+        self.classifier = nn.Sequential(
+            nn.AdaptiveMaxPool2d(output_size=(1, 1)),
+            nn.Flatten(),
+            nn.Dropout(p=dropout_prob),
+            nn.Linear(features*8, 1)
+        )
+
+    def forward(self, x):
+        """
+        Forward pass of the model.
+        
+        Args:
+            x (torch.Tensor): Input tensor of shape (batch, channels, H, W)
+        
+        Returns:
+            torch.Tensor: Output tensor of shape (batch, 1)
+        
+        Shape transformations:
+        Level 1: (batch, 1, H, W) -> (batch, 32, H, W)
+        Level 2: (batch, 32, H/2, W/2) -> (batch, 64, H/2, W/2)
+        Level 3: (batch, 64, H/4, W/4) -> (batch, 128, H/4, W/4)
+        Level 4: (batch, 128, H/8, W/8) -> (batch, 256, H/8, W/8)
+        Classification: (batch, 256, H/8, W/8) -> (batch, 1)
+        """
+        # Encoder path
+        x1 = self.level1(x)           # (batch, 32, H, W)
+        
+        x2 = self.pool2(x1)           # (batch, 32, H/2, W/2)
+        x2 = self.level2(x2)          # (batch, 64, H/2, W/2)
+        
+        x3 = self.pool3(x2)           # (batch, 64, H/4, W/4)
+        x3 = self.level3(x3)          # (batch, 128, H/4, W/4)
+        
+        x4 = self.pool4(x3)           # (batch, 128, H/8, W/8)
+        x4 = self.level4(x4)          # (batch, 256, H/8, W/8)
+        
+        # Classification head
+        out = self.classifier(x4)      # (batch, 1)
+        
+        return out
+
+    def get_embedding(self, x):
+        # Encoder path
+        x1 = self.level1(x)           # (batch, 32, H, W)
+        
+        x2 = self.pool2(x1)           # (batch, 32, H/2, W/2)
+        x2 = self.level2(x2)          # (batch, 64, H/2, W/2)
+        
+        x3 = self.pool3(x2)           # (batch, 64, H/4, W/4)
+        x3 = self.level3(x3)          # (batch, 128, H/4, W/4)
+        
+        x4 = self.pool4(x3)           # (batch, 128, H/8, W/8)
+        x4 = self.level4(x4)          # (batch, 256, H/8, W/8)
+
+        # Classification head (w/o final linear layer)
+        pooled = self.classifier[0](x4)
+        embedding = self.classifier[1](pooled)
+
+        return embedding
+
+if __name__ == "__main__":
+    # Test the model with a sample input
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = UNet2D().to(device)
+    
+    # Create random input tensor (batch_size=4, channels=1, height=64, width=64)
+    x = torch.randn(4, 1, 64, 64).to(device)
+    
+    # Forward pass
+    output = model(x)
+    
+    print(f"Input shape: {x.shape}")
+    print(f"Output shape: {output.shape}")
+    
+    # Print model summary
+    print("\nModel Architecture:")
+    print(model)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.1.1'`
	`1`	`+__version__ = '0.2.0'`