# HelloWo.. [MNIST](https://keras.io/datasets/#mnist-database-of-handwritten-digits)!

In [None]:
from __future__ import print_function
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

Load MNIST dataset

In [None]:
num_classes = 10
# input image dimensions
img_rows, img_cols = 28, 28

# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
train_idx = np.random.choice(x_train.shape[0], 5000, replace=False)
test_idx = np.random.choice(x_test.shape[0], 100, replace=False)

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

In [None]:
def instantiateAndFitModel(model, optimizer, x_test, y_test, x_train, y_train, verbose = 1, epochs = 5, batch_size = 128):
    model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=optimizer,
              metrics=['accuracy'])
    model.load_weights('weights_toyCNN.h5')
    model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=verbose,
          validation_data=(x_test, y_test))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

Do some basic data preprocessing

In [None]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train = x_train[train_idx,:,:,:]
x_test = x_test[test_idx,:,:,:]
y_train = y_train[train_idx]
y_test = y_test[test_idx]

x_train_untainted = x_train
x_test_untainted = x_test

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Lets design our Convolutional Neural Network using Keras' [sequential API](https://keras.io/models/sequential/):

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.save_weights('weights_toyCNN.h5')

** Exercise: ** Implement the same model using Keras [functional API](https://keras.io/models/model/).

### How does the model performance change if you normalize the data?

** Raw, unnormalized data **

In [None]:
x_train = x_train_untainted
x_test = x_test_untainted
instantiateAndFitModel(model, keras.optimizers.Adadelta(), x_test, y_test, x_train, y_train)

** Maximum scaling **

In [None]:
x_train = x_train_untainted / 255
x_test = x_test_untainted / 255

instantiateAndFitModel(model, keras.optimizers.Adadelta(), x_test, y_test, x_train, y_train)

** Mean centering & Maximum Scaling **

In [None]:
x_train = x_train_untainted - np.mean(x_train_untainted)
x_test = x_test_untainted - np.mean(x_train_untainted)
x_train /= np.max(x_train)
x_test /= np.max(x_test)


instantiateAndFitModel(model, keras.optimizers.Adadelta(), x_test, y_test, x_train, y_train)

** Exercise: ** Could you think of another way to normalize our images?

## Does the optimizer affect the overall accuracy?

** Stochastic Gradient Descent **

In [None]:
optimizer = keras.optimizers.SGD(lr=0.001, momentum=0.0, decay=0.0, nesterov=False)
instantiateAndFitModel(model, optimizer, x_test, y_test, x_train, y_train)

** ADAM **

In [None]:
optimizer=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
instantiateAndFitModel(model, optimizer, x_test, y_test, x_train, y_train)

** RMSProp **

In [None]:
optimizer=keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
instantiateAndFitModel(model, optimizer, x_test, y_test, x_train, y_train)

## Hyperparameter optimization

** Grid search ** is a simple way to optimize our learning rate as shown in the following snippet:

In [None]:
noVerbose = 0

for lr in range(-5,1):
    print("Learning rate " + str(10**lr))
    optimizer=keras.optimizers.RMSprop(lr=10**lr, rho=0.9, epsilon=1e-08, decay=0.0)
    instantiateAndFitModel(model, optimizer, x_test, y_test, x_train, y_train, noVerbose)
    print("\n")

** Exercise: ** Evauate random search as approach to optimize our learning rate