|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | + |
| 3 | +from keras.optimizers import SGD |
| 4 | +from keras.layers import Input, merge, ZeroPadding2D |
| 5 | +from keras.layers.core import Dense, Dropout, Activation |
| 6 | +from keras.layers.convolutional import Convolution2D |
| 7 | +from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D |
| 8 | +from keras.layers.normalization import BatchNormalization |
| 9 | +from keras.models import Model |
| 10 | +import keras.backend as K |
| 11 | + |
| 12 | +from sklearn.metrics import log_loss |
| 13 | + |
| 14 | +from custom_layers.scale_layer import Scale |
| 15 | + |
| 16 | +from load_cifar10 import load_cifar10_data |
| 17 | + |
| 18 | +def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): |
| 19 | + ''' |
| 20 | + DenseNet 121 Model for Keras |
| 21 | +
|
| 22 | + Model Schema is based on |
| 23 | + https://github.com/flyyufelix/DenseNet-Keras |
| 24 | +
|
| 25 | + ImageNet Pretrained Weights |
| 26 | + Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfMlRYb3YzV210VzQ |
| 27 | + TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSTA4SHJVOHNuTXc |
| 28 | +
|
| 29 | + # Arguments |
| 30 | + nb_dense_block: number of dense blocks to add to end |
| 31 | + growth_rate: number of filters to add per dense block |
| 32 | + nb_filter: initial number of filters |
| 33 | + reduction: reduction factor of transition blocks. |
| 34 | + dropout_rate: dropout rate |
| 35 | + weight_decay: weight decay factor |
| 36 | + classes: optional number of classes to classify images |
| 37 | + weights_path: path to pre-trained weights |
| 38 | + # Returns |
| 39 | + A Keras model instance. |
| 40 | + ''' |
| 41 | + eps = 1.1e-5 |
| 42 | + |
| 43 | + # compute compression factor |
| 44 | + compression = 1.0 - reduction |
| 45 | + |
| 46 | + # Handle Dimension Ordering for different backends |
| 47 | + global concat_axis |
| 48 | + if K.image_dim_ordering() == 'tf': |
| 49 | + concat_axis = 3 |
| 50 | + img_input = Input(shape=(img_rows, img_cols, color_type), name='data') |
| 51 | + else: |
| 52 | + concat_axis = 1 |
| 53 | + img_input = Input(shape=(color_type, img_rows, img_cols), name='data') |
| 54 | + |
| 55 | + # From architecture for ImageNet (Table 1 in the paper) |
| 56 | + nb_filter = 64 |
| 57 | + nb_layers = [6,12,24,16] # For DenseNet-121 |
| 58 | + |
| 59 | + # Initial convolution |
| 60 | + x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) |
| 61 | + x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x) |
| 62 | + x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) |
| 63 | + x = Scale(axis=concat_axis, name='conv1_scale')(x) |
| 64 | + x = Activation('relu', name='relu1')(x) |
| 65 | + x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) |
| 66 | + x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) |
| 67 | + |
| 68 | + # Add dense blocks |
| 69 | + for block_idx in range(nb_dense_block - 1): |
| 70 | + stage = block_idx+2 |
| 71 | + x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) |
| 72 | + |
| 73 | + # Add transition_block |
| 74 | + x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate, weight_decay=weight_decay) |
| 75 | + nb_filter = int(nb_filter * compression) |
| 76 | + |
| 77 | + final_stage = stage + 1 |
| 78 | + x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) |
| 79 | + |
| 80 | + x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv'+str(final_stage)+'_blk_bn')(x) |
| 81 | + x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x) |
| 82 | + x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x) |
| 83 | + |
| 84 | + x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) |
| 85 | + x_fc = Dense(1000, name='fc6')(x_fc) |
| 86 | + x_fc = Activation('softmax', name='prob')(x_fc) |
| 87 | + |
| 88 | + model = Model(img_input, x_fc, name='densenet') |
| 89 | + |
| 90 | + if K.image_dim_ordering() == 'th': |
| 91 | + # Use pre-trained weights for Theano backend |
| 92 | + weights_path = 'imagenet_models/densenet121_weights_th.h5' |
| 93 | + else: |
| 94 | + # Use pre-trained weights for Tensorflow backend |
| 95 | + weights_path = 'imagenet_models/densenet121_weights_tf.h5' |
| 96 | + |
| 97 | + model.load_weights(weights_path, by_name=True) |
| 98 | + |
| 99 | + # Truncate and replace softmax layer for transfer learning |
| 100 | + # Cannot use model.layers.pop() since model is not of Sequential() type |
| 101 | + # The method below works since pre-trained weights are stored in layers but not in the model |
| 102 | + x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) |
| 103 | + x_newfc = Dense(num_classes, name='fc6')(x_newfc) |
| 104 | + x_newfc = Activation('softmax', name='prob')(x_newfc) |
| 105 | + |
| 106 | + model = Model(img_input, x_newfc) |
| 107 | + |
| 108 | + # Learning rate is changed to 0.001 |
| 109 | + sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) |
| 110 | + model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) |
| 111 | + |
| 112 | + return model |
| 113 | + |
| 114 | + |
| 115 | +def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): |
| 116 | + '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout |
| 117 | + # Arguments |
| 118 | + x: input tensor |
| 119 | + stage: index for dense block |
| 120 | + branch: layer index within each dense block |
| 121 | + nb_filter: number of filters |
| 122 | + dropout_rate: dropout rate |
| 123 | + weight_decay: weight decay factor |
| 124 | + ''' |
| 125 | + eps = 1.1e-5 |
| 126 | + conv_name_base = 'conv' + str(stage) + '_' + str(branch) |
| 127 | + relu_name_base = 'relu' + str(stage) + '_' + str(branch) |
| 128 | + |
| 129 | + # 1x1 Convolution (Bottleneck layer) |
| 130 | + inter_channel = nb_filter * 4 |
| 131 | + x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x1_bn')(x) |
| 132 | + x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x) |
| 133 | + x = Activation('relu', name=relu_name_base+'_x1')(x) |
| 134 | + x = Convolution2D(inter_channel, 1, 1, name=conv_name_base+'_x1', bias=False)(x) |
| 135 | + |
| 136 | + if dropout_rate: |
| 137 | + x = Dropout(dropout_rate)(x) |
| 138 | + |
| 139 | + # 3x3 Convolution |
| 140 | + x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_x2_bn')(x) |
| 141 | + x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x) |
| 142 | + x = Activation('relu', name=relu_name_base+'_x2')(x) |
| 143 | + x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x) |
| 144 | + x = Convolution2D(nb_filter, 3, 3, name=conv_name_base+'_x2', bias=False)(x) |
| 145 | + |
| 146 | + if dropout_rate: |
| 147 | + x = Dropout(dropout_rate)(x) |
| 148 | + |
| 149 | + return x |
| 150 | + |
| 151 | + |
| 152 | +def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): |
| 153 | + ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout |
| 154 | + # Arguments |
| 155 | + x: input tensor |
| 156 | + stage: index for dense block |
| 157 | + nb_filter: number of filters |
| 158 | + compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. |
| 159 | + dropout_rate: dropout rate |
| 160 | + weight_decay: weight decay factor |
| 161 | + ''' |
| 162 | + |
| 163 | + eps = 1.1e-5 |
| 164 | + conv_name_base = 'conv' + str(stage) + '_blk' |
| 165 | + relu_name_base = 'relu' + str(stage) + '_blk' |
| 166 | + pool_name_base = 'pool' + str(stage) |
| 167 | + |
| 168 | + x = BatchNormalization(epsilon=eps, axis=concat_axis, name=conv_name_base+'_bn')(x) |
| 169 | + x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x) |
| 170 | + x = Activation('relu', name=relu_name_base)(x) |
| 171 | + x = Convolution2D(int(nb_filter * compression), 1, 1, name=conv_name_base, bias=False)(x) |
| 172 | + |
| 173 | + if dropout_rate: |
| 174 | + x = Dropout(dropout_rate)(x) |
| 175 | + |
| 176 | + x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) |
| 177 | + |
| 178 | + return x |
| 179 | + |
| 180 | + |
| 181 | +def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True): |
| 182 | + ''' Build a dense_block where the output of each conv_block is fed to subsequent ones |
| 183 | + # Arguments |
| 184 | + x: input tensor |
| 185 | + stage: index for dense block |
| 186 | + nb_layers: the number of layers of conv_block to append to the model. |
| 187 | + nb_filter: number of filters |
| 188 | + growth_rate: growth rate |
| 189 | + dropout_rate: dropout rate |
| 190 | + weight_decay: weight decay factor |
| 191 | + grow_nb_filters: flag to decide to allow number of filters to grow |
| 192 | + ''' |
| 193 | + |
| 194 | + eps = 1.1e-5 |
| 195 | + concat_feat = x |
| 196 | + |
| 197 | + for i in range(nb_layers): |
| 198 | + branch = i+1 |
| 199 | + x = conv_block(concat_feat, stage, branch, growth_rate, dropout_rate, weight_decay) |
| 200 | + concat_feat = merge([concat_feat, x], mode='concat', concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch)) |
| 201 | + |
| 202 | + if grow_nb_filters: |
| 203 | + nb_filter += growth_rate |
| 204 | + |
| 205 | + return concat_feat, nb_filter |
| 206 | + |
| 207 | +if __name__ == '__main__': |
| 208 | + |
| 209 | + # Example to fine-tune on 3000 samples from Cifar10 |
| 210 | + |
| 211 | + img_rows, img_cols = 224, 224 # Resolution of inputs |
| 212 | + channel = 3 |
| 213 | + num_classes = 10 |
| 214 | + batch_size = 16 |
| 215 | + nb_epoch = 10 |
| 216 | + |
| 217 | + # Load Cifar10 data. Please implement your own load_data() module for your own dataset |
| 218 | + X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) |
| 219 | + |
| 220 | + # Load our model |
| 221 | + model = densenet121_model(img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes) |
| 222 | + |
| 223 | + # Start Fine-tuning |
| 224 | + model.fit(X_train, Y_train, |
| 225 | + batch_size=batch_size, |
| 226 | + nb_epoch=nb_epoch, |
| 227 | + shuffle=True, |
| 228 | + verbose=1, |
| 229 | + validation_data=(X_valid, Y_valid), |
| 230 | + ) |
| 231 | + |
| 232 | + # Make predictions |
| 233 | + predictions_valid = model.predict(X_valid, batch_size=batch_size, verbose=1) |
| 234 | + |
| 235 | + # Cross-entropy loss score |
| 236 | + score = log_loss(Y_valid, predictions_valid) |
0 commit comments