refactor

foamliu · foamliu · commit 7d165b2bf5e3 · 2018-05-26T01:00:57.000+08:00
diff --git a/densenet121.py b/densenet121.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+
+from keras.optimizers import SGD
+from keras.layers import Input, merge, ZeroPadding2D
+from keras.layers.core import Dense, Dropout, Activation
+from keras.layers.convolutional import Convolution2D
+from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D
+from keras.layers.normalization import BatchNormalization
+from keras.models import Model
+import keras.backend as K
+
+from sklearn.metrics import log_loss
+
+from custom_layers.scale_layer import Scale
+
+from load_cifar10 import load_cifar10_data
+
+def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None):
+    '''
+    DenseNet 121 Model for Keras
+
+    Model Schema is based on 
+    https://github.com/flyyufelix/DenseNet-Keras
+
+    ImageNet Pretrained Weights 
+    Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfMlRYb3YzV210VzQ
+    TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSTA4SHJVOHNuTXc
+
+    # Arguments
+        nb_dense_block: number of dense blocks to add to end
+        growth_rate: number of filters to add per dense block
+        nb_filter: initial number of filters
+        reduction: reduction factor of transition blocks.
+        dropout_rate: dropout rate
+        weight_decay: weight decay factor
+        classes: optional number of classes to classify images
+        weights_path: path to pre-trained weights
+    # Returns
+        A Keras model instance.
+    '''
+    eps = 1.1e-5
+
+    # compute compression factor
+    compression = 1.0 - reduction
+
+    # Handle Dimension Ordering for different backends
+    global concat_axis
+    if K.image_dim_ordering() == 'tf':
+      concat_axis = 3
+      img_input = Input(shape=(img_rows, img_cols, color_type), name='data')
+    else:
+      concat_axis = 1
+      img_input = Input(shape=(color_type, img_rows, img_cols), name='data')
+
+    # From architecture for ImageNet (Table 1 in the paper)
+    nb_filter = 64
+    nb_layers = [6,12,24,16] # For DenseNet-121
+
+    # Initial convolution
+    x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
+    x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x)
+    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x)
+    x = Scale(axis=concat_axis, name='conv1_scale')(x)
+    x = Activation('relu', name='relu1')(x)
+    x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x)
+    x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
+
+    # Add dense blocks
+    for block_idx in range(nb_dense_block - 1):
+        stage = block_idx+2
+        x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)
+
+        # Add transition_block
+        x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay)
+        nb_filter = int(nb_filter * compression)
+
+    final_stage = stage + 1
+    x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay)
+
+    x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv'+str(final_stage)+'_blk_bn')(x)
+    x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x)
+    x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x)
+
+    x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x)
+    x_fc = Dense(1000, name='fc6')(x_fc)
+    x_fc = Activation('softmax', name='prob')(x_fc)
+
+    model = Model(img_input, x_fc, name='densenet')
+
+    if K.image_dim_ordering() == 'th':
+      # Use pre-trained weights for Theano backend
+      weights_path = 'imagenet_models/densenet121_weights_th.h5'
+    else:
+      # Use pre-trained weights for Tensorflow backend
+      weights_path = 'imagenet_models/densenet121_weights_tf.h5'
+
+    model.load_weights(weights_path, by_name=True)
+
+    # Truncate and replace softmax layer for transfer learning
+    # Cannot use model.layers.pop() since model is not of Sequential() type
+    # The method below works since pre-trained weights are stored in layers but not in the model
+    x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x)
+    x_newfc = Dense(num_classes, name='fc6')(x_newfc)
+    x_newfc = Activation('softmax', name='prob')(x_newfc)
+
+    model = Model(img_input, x_newfc)
+
+    # Learning rate is changed to 0.001
+    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
+    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
+
+    return model
+
+
+def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4):
+    '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout
+        # Arguments
+            x: input tensor 
+            stage: index for dense block
+            branch: layer index within each dense block
+            nb_filter: number of filters
+            dropout_rate: dropout rate
+            weight_decay: weight decay factor
+    '''
+    eps = 1.1e-5
+    conv_name_base = 'conv' + str(stage) + '_' + str(branch)
+    relu_name_base = 'relu' + str(stage) + '_' + str(branch)
+
+    # 1x1 Convolution (Bottleneck layer)
+    inter_channel = nb_filter * 4  
+    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x1_bn')(x)
+    x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x)
+    x = Activation('relu', name=relu_name_base+'_x1')(x)
+    x = Convolution2D(inter_channel, 1, 1, name=conv_name_base+'_x1', bias=False)(x)
+
+    if dropout_rate:
+        x = Dropout(dropout_rate)(x)
+
+    # 3x3 Convolution
+    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x2_bn')(x)
+    x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x)
+    x = Activation('relu', name=relu_name_base+'_x2')(x)
+    x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x)
+    x = Convolution2D(nb_filter, 3, 3, name=conv_name_base+'_x2', bias=False)(x)
+
+    if dropout_rate:
+        x = Dropout(dropout_rate)(x)
+
+    return x
+
+
+def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4):
+    ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout 
+        # Arguments
+            x: input tensor
+            stage: index for dense block
+            nb_filter: number of filters
+            compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block.
+            dropout_rate: dropout rate
+            weight_decay: weight decay factor
+    '''
+
+    eps = 1.1e-5
+    conv_name_base = 'conv' + str(stage) + '_blk'
+    relu_name_base = 'relu' + str(stage) + '_blk'
+    pool_name_base = 'pool' + str(stage) 
+
+    x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_bn')(x)
+    x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x)
+    x = Activation('relu', name=relu_name_base)(x)
+    x = Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, bias=False)(x)
+
+    if dropout_rate:
+        x = Dropout(dropout_rate)(x)
+
+    x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x)
+
+    return x
+
+
+def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True):
+    ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
+        # Arguments
+            x: input tensor
+            stage: index for dense block
+            nb_layers: the number of layers of conv_block to append to the model.
+            nb_filter: number of filters
+            growth_rate: growth rate
+            dropout_rate: dropout rate
+            weight_decay: weight decay factor
+            grow_nb_filters: flag to decide to allow number of filters to grow
+    '''
+
+    eps = 1.1e-5
+    concat_feat = x
+
+    for i in range(nb_layers):
+        branch = i+1
+        x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate, weight_decay)
+        concat_feat = merge([concat_feat, x], mode='concat', concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch))
+
+        if grow_nb_filters:
+            nb_filter += growth_rate
+
+    return concat_feat, nb_filter
+
+if __name__ == '__main__':
+
+    # Example to fine-tune on 3000 samples from Cifar10
+
+    img_rows, img_cols = 224, 224 # Resolution of inputs
+    channel = 3
+    num_classes = 10 
+    batch_size = 16 
+    nb_epoch = 10
+
+    # Load Cifar10 data. Please implement your own load_data() module for your own dataset
+    X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols)
+
+    # Load our model
+    model = densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes)
+
+    # Start Fine-tuning
+    model.fit(X_train, Y_train,
+              batch_size=batch_size,
+              nb_epoch=nb_epoch,
+              shuffle=True,
+              verbose=1,
+              validation_data=(X_valid, Y_valid),
+              )
+
+    # Make predictions
+    predictions_valid = model.predict(X_valid, batch_size=batch_size, verbose=1)
+
+    # Cross-entropy loss score
+    score = log_loss(Y_valid, predictions_valid)
diff --git a/resnet_152.py b/resnet_152.py
@@ -171,6 +171,7 @@ def resnet152_model(img_rows, img_cols, color_type=1, num_classes=None):
 
     return model
 
+
 if __name__ == '__main__':
 
     # Example to fine-tune on 3000 samples from Cifar10
diff --git a/train.py b/train.py
@@ -10,7 +10,7 @@
 
 from config import img_height, img_width, batch_size, patience, num_channels, num_classes, train_data, valid_data, \
     num_train_samples, num_valid_samples, num_epochs, verbose
-from resnet_50 import resnet50_model
+from densenet121 import densenet121_model
 from utils import get_available_gpus, get_available_cpus
 
 if __name__ == '__main__':
@@ -21,14 +21,13 @@
     pretrained_path = args["pretrained"]
 
     # prepare data augmentation configuration
-    train_data_gen = ImageDataGenerator(rescale=1. / 255,
-                                        shear_range=0.2,
+    train_data_gen = ImageDataGenerator(shear_range=0.2,
                                         rotation_range=20.,
-                                        width_shift_range=0.1,
-                                        height_shift_range=0.1,
+                                        width_shift_range=0.3,
+                                        height_shift_range=0.3,
                                         zoom_range=0.2,
                                         horizontal_flip=True)
-    valid_data_gen = ImageDataGenerator(rescale=1. / 255)
+    valid_data_gen = ImageDataGenerator()
 
     # generators
     train_generator = train_data_gen.flow_from_directory(train_data, (img_width, img_height), batch_size=batch_size,
@@ -58,17 +57,17 @@ def on_epoch_end(self, epoch, logs=None):
     num_gpu = len(get_available_gpus())
     if num_gpu >= 2:
         with tf.device("/cpu:0"):
-            model = resnet50_model(img_rows=img_height, img_cols=img_width, color_type=num_channels,
-                                   num_classes=num_classes)
+            model = densenet121_model(img_rows=img_height, img_cols=img_width, color_type=num_channels,
+                                      num_classes=num_classes)
             if pretrained_path is not None:
                 model.load_weights(pretrained_path)
 
         new_model = multi_gpu_model(model, gpus=num_gpu)
         # rewrite the callback: saving through the original model and not the multi-gpu model.
         model_checkpoint = MyCbk(model)
     else:
-        new_model = resnet50_model(img_rows=img_height, img_cols=img_width, color_type=num_channels,
-                                   num_classes=num_classes)
+        new_model = densenet121_model(img_rows=img_height, img_cols=img_width, color_type=num_channels,
+                                      num_classes=num_classes)
         if pretrained_path is not None:
             new_model.load_weights(pretrained_path)