keep up with updates

Aaron Higuera · Aaron Higuera · commit 6283d2bc7d3e · 2024-03-26T12:11:46.000-05:00
diff --git a/DUNE/RiceBayes/BNN_model.py b/DUNE/RiceBayes/BNN_model.py
@@ -1,13 +1,12 @@
 from functools import partial
 import tensorflow as tf
 from tensorflow.keras import layers, models
-from tensorflow.keras.callbacks import ModelCheckpoint
 from tensorflow.keras.optimizers.legacy import SGD
 import tensorflow_probability as tfp
 tfd = tfp.distributions
 tfpl = tfp.layers
 
-num_samples = 1300667
+num_samples = 1510865
 
 
 def kl_approx(q, p, q_tensor):
@@ -30,7 +29,7 @@ def kl_approx(q, p, q_tensor):
 
 def prior(dtype, shape, name, trainable, add_variable_fn):
     """
-    Creates an Independent multivariate normal distribution as a prior.
+    Creates an customize multivariate normal distribution as a prior.
 
     Args:
         dtype: The data type of the distribution's parameters.
@@ -42,8 +41,8 @@ def prior(dtype, shape, name, trainable, add_variable_fn):
     Returns:
         tfd.Independent: The Independent multivariate normal distribution.
     """
-    dist = tfd.MultivariateNormalDiag(loc=1.2 * tf.ones(shape),
-                                      scale_diag=3.0*tf.ones(shape))
+    dist = tfd.MultivariateNormalDiag(loc = 1.0*tf.ones(shape),
+                                        scale_diag = 1.5*tf.ones(shape))
     
     batch_ndims = tf.size(dist.batch_shape_tensor())
     
@@ -163,7 +162,7 @@ def bayes_model(input_shape=(200,200,3)):
     inputs = layers.Input(shape=input_shape, name='inputs')
     
     x = get_convolution_reparameterization(16, 3, 'swish')(inputs)
-    x = layers.BatchNormalization()(x)
+    x = layers.BatchNormalization(name='batchnorm_0')(x)
     x = layers.ReLU()(x)
     x = layers.MaxPooling2D(3, strides=2, padding='same')(x)
     
@@ -172,48 +171,23 @@ def bayes_model(input_shape=(200,200,3)):
     
     for i in range(num_blocks):
         x = residual_block(x, filters[i], kernel_size = 3,
-                          padding = 'same', activation = tf.nn.silu,
+                          padding = 'same', activation = None,
                           pool_size = (2, 2), strides = (1, 1),
                           name = 'residual_block'+str(i))
 
     x = tf.keras.layers.GlobalMaxPooling2D()(x)
-    '''
-    x = tfpl.DenseReparameterization(
-        units=64,  # This matches the number of units from the Dense layer
-        activation='relu',  # Activation can be directly specified here
-        kernel_posterior_fn=tfpl.default_mean_field_normal_fn(is_singular=False),
-        kernel_prior_fn=tfpl.default_multivariate_normal_fn,
-        bias_posterior_fn=tfpl.default_mean_field_normal_fn(is_singular=False),
-        bias_prior_fn=tfpl.default_multivariate_normal_fn,
-        kernel_divergence_fn=adjusted_divergence_fn,
-        bias_divergence_fn=adjusted_divergence_fn,
-        name='dense_reparam1'
-        )(x)
-        
-    x = tfpl.DenseReparameterization(
-        units=32,  # This matches the number of units from the Dense layer
-        activation='sigmoid',  # Activation can be directly specified here
-        kernel_posterior_fn=tfpl.default_mean_field_normal_fn(is_singular=False),
-        kernel_prior_fn=tfpl.default_multivariate_normal_fn,
-        bias_posterior_fn=tfpl.default_mean_field_normal_fn(is_singular=False),
-        bias_prior_fn=tfpl.default_multivariate_normal_fn,
-        kernel_divergence_fn=adjusted_divergence_fn,
-        bias_divergence_fn=adjusted_divergence_fn,
-        name='dense_reparam2'
-        )(x)
-    '''           
+    
     x = tfpl.DenseReparameterization(
-        units = tfpl.CategoricalMixtureOfOneHotCategorical.params_size(3, 5), activation = None,
+        units = tfpl.CategoricalMixtureOfOneHotCategorical.params_size(3, 1), activation = None,
         kernel_posterior_fn = tfpl.default_mean_field_normal_fn(is_singular=False),
         kernel_prior_fn = tfpl.default_multivariate_normal_fn,  
         bias_prior_fn = tfpl.default_multivariate_normal_fn,
         bias_posterior_fn = tfpl.default_mean_field_normal_fn(is_singular=False),
         kernel_divergence_fn=divergence_fn,
-        bias_divergence_fn=divergence_fn,
-        name = 'dense_reparam3')(x)
-
-    x = tfpl.CategoricalMixtureOfOneHotCategorical(event_size = 3, num_components = 5, name = 'output')(x)   
-
+        name = 'dense_reparam1')(x)
+       
+    x = tfpl.CategoricalMixtureOfOneHotCategorical(event_size = 3, num_components = 1, name = 'output')(x)   
+    
     model = models.Model(inputs, outputs=x, name='Rice_BNN')
     
     return model
diff --git a/DUNE/RiceBayes/train.py b/DUNE/RiceBayes/train.py
@@ -10,8 +10,8 @@
 from generator_class import DataGenerator
 
 import tensorflow as tf
-from tensorflow.keras import datasets, layers, models, optimizers, callbacks
-from tensorflow.keras.callbacks import ModelCheckpoint
+from tensorflow.keras import layers, models, optimizers, callbacks
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
 from tensorflow.keras.optimizers.legacy import SGD
 
 #GPU/CPU Selection
@@ -22,7 +22,7 @@ class LearningRateSchedulerPlateau(callbacks.Callback):
     '''
     Learning rate scheduler
     '''
-    def __init__(self, factor=0.5, patience=5, min_lr=1e-6):
+    def __init__(self, factor=0.5, patience=5, min_lr=1e-4):
         super(LearningRateSchedulerPlateau, self).__init__()
         self.factor = factor          # Factor by which the learning rate will be reduced
         self.patience = patience      # Number of epochs with no improvement after which learning rate will be reduced
@@ -81,8 +81,8 @@ def nll(y_true, y_pred):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--num_epochs', type=int, default=1, help='Number of epochs')
-    parser.add_argument('--batch_size', type=int, default=256, help='Batch size')
-    parser.add_argument('--learning_rate', type=float, default=1e-3, help='Learning rate')
+    parser.add_argument('--batch_size', type=int, default=64, help='Batch size')
+    parser.add_argument('--learning_rate', type=float, default=1e-2, help='Learning rate')
     parser.add_argument('--pixel_map_size', type=int, default=200, help='Pixel map size square shape')
     parser.add_argument('--pixel_maps', type=str, help='Pre-selected pixel maps ')
     parser.add_argument('--test_name', type=str, default='test', help='name of model and plots')
@@ -117,12 +117,14 @@ def nll(y_true, y_pred):
     lr_scheduler = LearningRateSchedulerPlateau(factor=0.5, patience=5, min_lr=1e-6)
     history_filename = args.test_name+'_training_history.json'
     history_saver = SaveHistoryToFile(history_filename)
+    early_stopper = EarlyStopping(monitor='val_loss', patience=3, min_delta=0.01, mode='min',
+                                  restore_best_weights=True)
 
     train_generator = DataGenerator(partition['train'], **params)
     validation_generator = DataGenerator(partition['validation'], **params)    
     
     model.fit(train_generator,validation_data=validation_generator,
-              epochs=args.num_epochs, callbacks=[lr_scheduler, history_saver])
+              epochs=args.num_epochs, callbacks=[lr_scheduler, history_saver, early_stopper])
     
     # for inferences need to save weights
     weights = args.test_name+'.h5'