Make examples agnostic to image_dim_ordering

2016-09-06 15:53:56 -07:00 · 2016-09-06 15:53:56 -07:00 · cc92025fdc
commit cc92025fdc
parent f05cd95fad
8 changed files with 79 additions and 323 deletions
--- a/examples/cifar10_cnn.py
+++ b/examples/cifar10_cnn.py
@ -43,7 +43,7 @@ Y_test = np_utils.to_categorical(y_test, nb_classes)
 model = Sequential()

 model.add(Convolution2D(32, 3, 3, border_mode='same',
-                        input_shape=(img_channels, img_rows, img_cols)))
+                        input_shape=X_train.shape[1:]))
 model.add(Activation('relu'))
 model.add(Convolution2D(32, 3, 3))
 model.add(Activation('relu'))
--- a/examples/image_ocr.py
+++ b/examples/image_ocr.py
@ -61,6 +61,7 @@ OUTPUT_DIR = "image_ocr"

 np.random.seed(55)

+
 # this creates larger "blotches" of noise which look
 # more realistic than just adding gaussian noise
 # assumes greyscale with pixels ranging from 0 to 1
@ -73,6 +74,7 @@ def speckle(img):
    img_speck[img_speck <= 0] = 0
    return img_speck

+
 # paints the string in a random location the bounding box
 # also uses a random font, a slight random rotation,
 # and a random amount of speckle noise
@ -114,6 +116,7 @@ def paint_text(text, w, h):

    return a

+
 def shuffle_mats_or_lists(matrix_list, stop_ind=None):
    ret = []
    assert all([len(i) == len(matrix_list[0]) for i in matrix_list])
@ -131,9 +134,11 @@ def shuffle_mats_or_lists(matrix_list, stop_ind=None):
        elif isinstance(mat, list):
            ret.append([mat[i] for i in a])
        else:
-            raise TypeError('shuffle_mats_or_lists only supports numpy.array and list objects')
+            raise TypeError('shuffle_mats_or_lists only supports '
+                            'numpy.array and list objects')
    return ret

+
 def text_to_labels(text, num_classes):
    ret = []
    for char in text:
@ -143,6 +148,7 @@ def text_to_labels(text, num_classes):
            ret.append(26)
    return ret

+
 # only a-z and space..probably not to difficult
 # to expand to uppercase and symbols

@ -150,14 +156,15 @@ def is_valid_str(in_str):
    search = re.compile(r'[^a-z\ ]').search
    return not bool(search(in_str))

+
 # Uses generator functions to supply train/test with
 # data. Image renderings are text are created on the fly
 # each time with random perturbations

 class TextImageGenerator(keras.callbacks.Callback):

-    def __init__(self, monogram_file, bigram_file, minibatch_size, img_w,
-                 img_h, downsample_width, val_split,
+    def __init__(self, monogram_file, bigram_file, minibatch_size,
+                 img_w, img_h, downsample_width, val_split,
                 absolute_max_string_len=16):

        self.minibatch_size = minibatch_size
@ -221,7 +228,10 @@ class TextImageGenerator(keras.callbacks.Callback):
    # each time an image is requested from train/val/test, a new random
    # painting of the text is performed
    def get_batch(self, index, size, train):
-        X_data = np.ones([size, 1, self.img_h, self.img_w])
+        if K.image_dim_ordering() == 'th':
+            X_data = np.ones([size, 1, self.img_h, self.img_w])
+        else:
+            X_data = np.ones([size, self.img_h, self.img_w, 1])
        labels = np.ones([size, self.absolute_max_string_len])
        input_length = np.zeros([size, 1])
        label_length = np.zeros([size, 1])
@ -231,13 +241,19 @@ class TextImageGenerator(keras.callbacks.Callback):
            # Mix in some blank inputs.  This seems to be important for
            # achieving translational invariance
            if train and i > size - 4:
-                X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h)
+                if K.image_dim_ordering() == 'th':
+                    X_data[i, 0, :, :] = paint_text('', self.img_w, self.img_h)
+                else:
+                    X_data[i, :, :, 0] = paint_text('', self.img_w, self.img_h)
                labels[i, 0] = self.blank_label
                input_length[i] = self.downsample_width
                label_length[i] = 1
                source_str.append('')
            else:
-                X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
+                if K.image_dim_ordering() == 'th':
+                    X_data[i, 0, :, :] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
+                else:
+                    X_data[i, :, :, 0] = paint_text(self.X_text[index + i], self.img_w, self.img_h)
                labels[i, :] = self.Y_data[index + i]
                input_length[i] = self.downsample_width
                label_length[i] = self.Y_len[index + i]
@ -285,6 +301,7 @@ class TextImageGenerator(keras.callbacks.Callback):
        if epoch == 30:
            self.build_word_list(64000, 12, 0.5)

+
 # the actual loss calc occurs here despite it not being
 # an internal Keras loss function

@ -295,6 +312,7 @@ def ctc_lambda_func(args):
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

+
 # For a real OCR application, this should be beam search with a dictionary
 # and language model.  For this example, best path is sufficient.

@ -314,9 +332,10 @@ def decode_batch(test_func, word_batch):
        ret.append(outstr)
    return ret

+
 class VizCallback(keras.callbacks.Callback):

-    def __init__(self, test_func, text_img_gen, num_display_words = 6):
+    def __init__(self, test_func, text_img_gen, num_display_words=6):
        self.test_func = test_func
        self.output_dir = os.path.join(
            OUTPUT_DIR, datetime.datetime.now().strftime('%A, %d. %B %Y %I.%M%p'))
@ -350,7 +369,11 @@ class VizCallback(keras.callbacks.Callback):

        for i in range(self.num_display_words):
            pylab.subplot(self.num_display_words, 1, i + 1)
-            pylab.imshow(word_batch['the_input'][i, 0, :, :], cmap='Greys_r')
+            if K.image_dim_ordering() == 'th':
+                the_input = word_batch['the_input'][i, 0, :, :]
+            else:
+                the_input = word_batch['the_input'][i, :, :, 0]
+            pylab.imshow(the_input, cmap='Greys_r')
            pylab.xlabel('Truth = \'%s\' Decoded = \'%s\'' % (word_batch['source_str'][i], res[i]))
        fig = pylab.gcf()
        fig.set_size_inches(10, 12)
@ -375,6 +398,11 @@ time_dense_size = 32
 rnn_size = 512
 time_steps = img_w / (pool_size_1 * pool_size_2)

+if K.image_dim_ordering() == 'th':
+    input_shape = (1, img_h, img_w)
+else:
+    input_shape = (img_h, img_w, 1)
+
 fdir = os.path.dirname(get_file('wordlists.tgz',
                                origin='http://www.isosemi.com/datasets/wordlists.tgz', untar=True))

@ -387,7 +415,7 @@ img_gen = TextImageGenerator(monogram_file=os.path.join(fdir, 'wordlist_mono_cle
                             val_split=words_per_epoch - val_words)

 act = 'relu'
-input_data = Input(name='the_input', shape=(1, img_h, img_w), dtype='float32')
+input_data = Input(name='the_input', shape=input_shape, dtype='float32')
 inner = Convolution2D(conv_num_filters, filter_size, filter_size, border_mode='same',
                      activation=act, name='conv1')(input_data)
 inner = MaxPooling2D(pool_size=(pool_size_1, pool_size_1), name='max1')(inner)
--- a/examples/inception_v3.py
+++ b/examples/inception_v3.py
@ -1,290 +0,0 @@
-'''This script demonstrates how to build the Inception v3 architecture
-using the Keras functional API.
-We are not actually training it here, for lack of appropriate data.
-
-For more information about this architecture, see:
-
-"Rethinking the Inception Architecture for Computer Vision"
-Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, Zbigniew Wojna
-http://arxiv.org/abs/1512.00567
-'''
-from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
-from keras.layers import BatchNormalization, Flatten, Dense, Dropout
-from keras.layers import Input, merge
-from keras.models import Model
-from keras import regularizers
-
-
-# global constants
-NB_CLASS = 1000  # number of classes
-DIM_ORDERING = 'th'  # 'th' (channels, width, height) or 'tf' (width, height, channels)
-WEIGHT_DECAY = 0.  # L2 regularization factor
-USE_BN = False  # whether to use batch normalization
-
-
-def conv2D_bn(x, nb_filter, nb_row, nb_col,
-              border_mode='same', subsample=(1, 1),
-              activation='relu', batch_norm=USE_BN,
-              weight_decay=WEIGHT_DECAY, dim_ordering=DIM_ORDERING):
-    '''Utility function to apply to a tensor a module conv + BN
-    with optional weight decay (L2 weight regularization).
-    '''
-    if weight_decay:
-        W_regularizer = regularizers.l2(weight_decay)
-        b_regularizer = regularizers.l2(weight_decay)
-    else:
-        W_regularizer = None
-        b_regularizer = None
-    x = Convolution2D(nb_filter, nb_row, nb_col,
-                      subsample=subsample,
-                      activation=activation,
-                      border_mode=border_mode,
-                      W_regularizer=W_regularizer,
-                      b_regularizer=b_regularizer,
-                      dim_ordering=dim_ordering)(x)
-    if batch_norm:
-        x = BatchNormalization()(x)
-    return x
-
-# Define image input layer
-
-if DIM_ORDERING == 'th':
-    img_input = Input(shape=(3, 299, 299))
-    CONCAT_AXIS = 1
-elif DIM_ORDERING == 'tf':
-    img_input = Input(shape=(299, 299, 3))
-    CONCAT_AXIS = 3
-else:
-    raise Exception('Invalid dim ordering: ' + str(DIM_ORDERING))
-
-# Entry module
-
-x = conv2D_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-x = conv2D_bn(x, 32, 3, 3, border_mode='valid')
-x = conv2D_bn(x, 64, 3, 3)
-x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
-
-x = conv2D_bn(x, 80, 1, 1, border_mode='valid')
-x = conv2D_bn(x, 192, 3, 3, border_mode='valid')
-x = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
-
-# mixed: 35 x 35 x 256
-
-branch1x1 = conv2D_bn(x, 64, 1, 1)
-
-branch5x5 = conv2D_bn(x, 48, 1, 1)
-branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
-
-branch3x3dbl = conv2D_bn(x, 64, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 32, 1, 1)
-x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed_1: 35 x 35 x 288
-
-branch1x1 = conv2D_bn(x, 64, 1, 1)
-
-branch5x5 = conv2D_bn(x, 48, 1, 1)
-branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
-
-branch3x3dbl = conv2D_bn(x, 64, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
-x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed2: 35 x 35 x 288
-
-branch1x1 = conv2D_bn(x, 64, 1, 1)
-
-branch5x5 = conv2D_bn(x, 48, 1, 1)
-branch5x5 = conv2D_bn(branch5x5, 64, 5, 5)
-
-branch3x3dbl = conv2D_bn(x, 64, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 64, 1, 1)
-x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed3: 17 x 17 x 768
-
-branch3x3 = conv2D_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
-
-branch3x3dbl = conv2D_bn(x, 64, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 96, 3, 3, subsample=(2, 2), border_mode='valid')
-
-branch_pool = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
-x = merge([branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed4: 17 x 17 x 768
-
-branch1x1 = conv2D_bn(x, 192, 1, 1)
-
-branch7x7 = conv2D_bn(x, 128, 1, 1)
-branch7x7 = conv2D_bn(branch7x7, 128, 1, 7)
-branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
-
-branch7x7dbl = conv2D_bn(x, 128, 1, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 1, 7)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 128, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed5: 17 x 17 x 768
-
-branch1x1 = conv2D_bn(x, 192, 1, 1)
-
-branch7x7 = conv2D_bn(x, 160, 1, 1)
-branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
-branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
-
-branch7x7dbl = conv2D_bn(x, 160, 1, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed5: 17 x 17 x 768
-
-branch1x1 = conv2D_bn(x, 192, 1, 1)
-
-branch7x7 = conv2D_bn(x, 160, 1, 1)
-branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
-branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
-
-branch7x7dbl = conv2D_bn(x, 160, 1, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 1, 7)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed6: 17 x 17 x 768
-
-branch1x1 = conv2D_bn(x, 192, 1, 1)
-
-branch7x7 = conv2D_bn(x, 160, 1, 1)
-branch7x7 = conv2D_bn(branch7x7, 160, 1, 7)
-branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
-
-branch7x7dbl = conv2D_bn(x, 160, 1, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 160, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed7: 17 x 17 x 768
-
-branch1x1 = conv2D_bn(x, 192, 1, 1)
-
-branch7x7 = conv2D_bn(x, 192, 1, 1)
-branch7x7 = conv2D_bn(branch7x7, 192, 1, 7)
-branch7x7 = conv2D_bn(branch7x7, 192, 7, 1)
-
-branch7x7dbl = conv2D_bn(x, 160, 1, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 7, 1)
-branch7x7dbl = conv2D_bn(branch7x7dbl, 192, 1, 7)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# Auxiliary head
-
-aux_logits = AveragePooling2D((5, 5), strides=(3, 3), dim_ordering=DIM_ORDERING)(x)
-aux_logits = conv2D_bn(aux_logits, 128, 1, 1)
-aux_logits = conv2D_bn(aux_logits, 728, 5, 5, border_mode='valid')
-aux_logits = Flatten()(aux_logits)
-aux_preds = Dense(NB_CLASS, activation='softmax')(aux_logits)
-
-# mixed8: 8 x 8 x 1280
-
-branch3x3 = conv2D_bn(x, 192, 1, 1)
-branch3x3 = conv2D_bn(branch3x3, 320, 3, 3, subsample=(2, 2), border_mode='valid')
-
-branch7x7x3 = conv2D_bn(x, 192, 1, 1)
-branch7x7x3 = conv2D_bn(branch7x7x3, 192, 1, 7)
-branch7x7x3 = conv2D_bn(branch7x7x3, 192, 7, 1)
-branch7x7x3 = conv2D_bn(branch7x7x3, 192, 3, 3, subsample=(2, 2), border_mode='valid')
-
-branch_pool = AveragePooling2D((3, 3), strides=(2, 2), dim_ordering=DIM_ORDERING)(x)
-x = merge([branch3x3, branch7x7x3, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed9: 8 x 8 x 2048
-
-branch1x1 = conv2D_bn(x, 320, 1, 1)
-
-branch3x3 = conv2D_bn(x, 384, 1, 1)
-branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
-branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
-branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
-
-branch3x3dbl = conv2D_bn(x, 448, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
-branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
-branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
-branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# mixed10: 8 x 8 x 2048
-
-branch1x1 = conv2D_bn(x, 320, 1, 1)
-
-branch3x3 = conv2D_bn(x, 384, 1, 1)
-branch3x3_1 = conv2D_bn(branch3x3, 384, 1, 3)
-branch3x3_2 = conv2D_bn(branch3x3, 384, 3, 1)
-branch3x3 = merge([branch3x3_1, branch3x3_2], mode='concat', concat_axis=CONCAT_AXIS)
-
-branch3x3dbl = conv2D_bn(x, 448, 1, 1)
-branch3x3dbl = conv2D_bn(branch3x3dbl, 384, 3, 3)
-branch3x3dbl_1 = conv2D_bn(branch3x3dbl, 384, 1, 3)
-branch3x3dbl_2 = conv2D_bn(branch3x3dbl, 384, 3, 1)
-branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], mode='concat', concat_axis=CONCAT_AXIS)
-
-branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same', dim_ordering=DIM_ORDERING)(x)
-branch_pool = conv2D_bn(branch_pool, 192, 1, 1)
-x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], mode='concat', concat_axis=CONCAT_AXIS)
-
-# Final pooling and prediction
-
-x = AveragePooling2D((8, 8), strides=(1, 1), dim_ordering=DIM_ORDERING)(x)
-x = Dropout(0.5)(x)
-x = Flatten()(x)
-preds = Dense(NB_CLASS, activation='softmax')(x)
-
-# Define model
-
-model = Model(input=img_input, output=[preds, aux_preds])
-model.compile('rmsprop', 'categorical_crossentropy')
-
-# train via e.g. `model.fit(x_train, [y_train] * 2, batch_size=32, nb_epoch=100)`
-# Note that for a large dataset it would be preferable
-# to train using `fit_generator` (see Keras docs).
--- a/examples/mnist_cnn.py
+++ b/examples/mnist_cnn.py
@ -14,6 +14,7 @@ from keras.models import Sequential
 from keras.layers import Dense, Dropout, Activation, Flatten
 from keras.layers import Convolution2D, MaxPooling2D
 from keras.utils import np_utils
+from keras import backend as K

 batch_size = 128
 nb_classes = 10
@ -24,15 +25,22 @@ img_rows, img_cols = 28, 28
 # number of convolutional filters to use
 nb_filters = 32
 # size of pooling area for max pooling
-nb_pool = 2
+pool_size = (2, 2)
 # convolution kernel size
 kernel_size = (3, 3)

 # the data, shuffled and split between train and test sets
 (X_train, y_train), (X_test, y_test) = mnist.load_data()

-X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
-X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+if K.image_dim_ordering() == 'th':
+    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
+    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+    input_shape = (1, img_rows, img_cols)
+else:
+    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
+    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
+    input_shape = (img_rows, img_cols, 1)
+
 X_train = X_train.astype('float32')
 X_test = X_test.astype('float32')
 X_train /= 255
@ -49,11 +57,11 @@ model = Sequential()

 model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
                        border_mode='valid',
-                        input_shape=(1, img_rows, img_cols)))
+                        input_shape=input_shape))
 model.add(Activation('relu'))
 model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1]))
 model.add(Activation('relu'))
-model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool)))
+model.add(MaxPooling2D(pool_size=pool_size))
 model.add(Dropout(0.25))

 model.add(Flatten())
--- a/examples/mnist_transfer_cnn.py
+++ b/examples/mnist_transfer_cnn.py
@ -22,7 +22,7 @@ from keras.models import Sequential
 from keras.layers import Dense, Dropout, Activation, Flatten
 from keras.layers import Convolution2D, MaxPooling2D
 from keras.utils import np_utils
-
+from keras import backend as K

 now = datetime.datetime.now

@ -35,14 +35,19 @@ img_rows, img_cols = 28, 28
 # number of convolutional filters to use
 nb_filters = 32
 # size of pooling area for max pooling
-nb_pool = 2
+pool_size = 2
 # convolution kernel size
-nb_conv = 3
+kernel_size = 3
+
+if K.image_dim_ordering() == 'th':
+    input_shape = (1, img_rows, img_cols)
+else:
+    input_shape = (img_rows, img_cols, 1)


 def train_model(model, train, test, nb_classes):
-    X_train = train[0].reshape(train[0].shape[0], 1, img_rows, img_cols)
-    X_test = test[0].reshape(test[0].shape[0], 1, img_rows, img_cols)
+    X_train = train[0].reshape((train[0].shape[0],) + input_shape)
+    X_test = test[0].reshape((test[0].shape[0],) + input_shape)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
@ -86,13 +91,13 @@ y_test_gte5 = y_test[y_test >= 5] - 5

 # define two groups of layers: feature (convolutions) and classification (dense)
 feature_layers = [
-    Convolution2D(nb_filters, nb_conv, nb_conv,
+    Convolution2D(nb_filters, kernel_size, kernel_size,
                  border_mode='valid',
-                  input_shape=(1, img_rows, img_cols)),
+                  input_shape=input_shape),
    Activation('relu'),
-    Convolution2D(nb_filters, nb_conv, nb_conv),
+    Convolution2D(nb_filters, kernel_size, kernel_size),
    Activation('relu'),
-    MaxPooling2D(pool_size=(nb_pool, nb_pool)),
+    MaxPooling2D(pool_size=(pool_size, pool_size)),
    Dropout(0.25),
    Flatten(),
 ]
@ -105,9 +110,7 @@ classification_layers = [
 ]

 # create complete model
-model = Sequential()
-for l in feature_layers + classification_layers:
-    model.add(l)
+model = Sequential(feature_layers + classification_layers)

 # train model for 5-digit classification [0..4]
 train_model(model,
--- a/keras/datasets/cifar10.py
+++ b/keras/datasets/cifar10.py
@ -1,6 +1,7 @@
 from __future__ import absolute_import
 from .cifar import load_batch
 from ..utils.data_utils import get_file
+from .. import backend as K
 import numpy as np
 import os

@ -18,8 +19,8 @@ def load_data():
    for i in range(1, 6):
        fpath = os.path.join(path, 'data_batch_' + str(i))
        data, labels = load_batch(fpath)
-        X_train[(i-1)*10000:i*10000, :, :, :] = data
-        y_train[(i-1)*10000:i*10000] = labels
+        X_train[(i - 1) * 10000: i * 10000, :, :, :] = data
+        y_train[(i - 1) * 10000: i * 10000] = labels

    fpath = os.path.join(path, 'test_batch')
    X_test, y_test = load_batch(fpath)
@ -27,4 +28,8 @@ def load_data():
    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

+    if K.image_dim_ordering() == 'tf':
+        X_train = X_train.transpose(0, 2, 3, 1)
+        X_test = X_test.transpose(0, 2, 3, 1)
+
    return (X_train, y_train), (X_test, y_test)
--- a/keras/datasets/cifar100.py
+++ b/keras/datasets/cifar100.py
@ -1,6 +1,7 @@
 from __future__ import absolute_import
 from .cifar import load_batch
 from ..utils.data_utils import get_file
+from .. import backend as K
 import numpy as np
 import os

@ -13,9 +14,6 @@ def load_data(label_mode='fine'):
    origin = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz"
    path = get_file(dirname, origin=origin, untar=True)

-    nb_test_samples = 10000
-    nb_train_samples = 50000
-
    fpath = os.path.join(path, 'train')
    X_train, y_train = load_batch(fpath, label_key=label_mode+'_labels')

@ -25,4 +23,8 @@ def load_data(label_mode='fine'):
    y_train = np.reshape(y_train, (len(y_train), 1))
    y_test = np.reshape(y_test, (len(y_test), 1))

+    if K.image_dim_ordering() == 'tf':
+        X_train = X_train.transpose(0, 2, 3, 1)
+        X_test = X_test.transpose(0, 2, 3, 1)
+
    return (X_train, y_train), (X_test, y_test)