Abstract
The purpose of this script is to understand various stages of building a good CNN model. Using transfer learning techniques, one can achieve nearly 100% accuracy for this task. However, in this script I will try to build a model from scratch that leads a reasonable accuracy. The final result shows close to 95% on validation dataset.
Key highlights:
import os
import zipfile
import random
import shutil
from shutil import copyfile
from os import getcwd
import pathlib
import datetime
import cv2
from PIL import Image
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import model_from_json
from sklearn.model_selection import train_test_split
import matplotlib.pylab as plt
import numpy as np
#Write twice, because sometimes it does not work the first time, especially if you are switching back to notebook
%matplotlib notebook
%matplotlib notebook
The images are under two folders, /Cat and /Dog. I do not wish to create separate train and validation folders and copy the images into it. Instead I will write a custom data feeder that will read images, perform preprocessing and feed to the model. We will perform on-the-fly augmentation within the model (by adding extra layers at the start).
path_to_images = pathlib.Path('/CatsAndDogs/Pet_images_new')
Dogs_dir = os.path.join(path_to_images, 'Dog')
Cats_dir = os.path.join(path_to_images, 'Cat')
print(len(os.listdir(Dogs_dir)))
print(len(os.listdir(Cats_dir)))
dog_image_fnames = os.listdir(Dogs_dir)
cat_image_fnames = os.listdir(Cats_dir)
There are roughly ~1800 corrupt images.
num_skipped = 0
for folder_name in ("Cat", "Dog"):
folder_path = os.path.join(path_to_images, folder_name)
for fname in os.listdir(folder_path):
fpath = os.path.join(folder_path, fname)
try:
fobj = open(fpath, "rb")
is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
finally:
fobj.close()
if not is_jfif:
num_skipped += 1
# Delete corrupted image
os.remove(fpath)
print("Deleted %d images" % num_skipped)
#remove 0 size images and non jpg images
import os
j=0
for catagory in os.listdir(path_to_images):
sub_dir = os.path.join(path_to_images, catagory)
for name in os.listdir(sub_dir):
fpath = os.path.join(sub_dir,name)
if name.split('.')[1] == 'jpg':
if os.path.getsize(fpath) <= 0:
if os.path.isfile(fpath) == False:
print(fpath)
os.remove(fpath)
elif os.path.getsize(fpath) > 0:
j +=1
if name.split('.')[1] != 'jpg':
print(fpath)
os.remove(fpath)
print(j)
#Count total images
image_count = len(list(path_to_images.glob('*/*.jpg')))
print(image_count)
After carefully inspecting the images, I found some images that are neither cats nor dogs. These images may cause the problems in training. If they appear in validation dataset, they may cause spikes in validation loss curve. Some examples are plotted below.
path_bad_images = pathlib.Path("/CatsAndDogs/bad_images")
bad_images = os.listdir(path_bad_images)
print(len(bad_images))
plt.figure(figsize=(8,8))
for i in np.arange(9):
img = Image.open(os.path.join(path_bad_images,bad_images[i]))
ax = plt.subplot(3, 3, i + 1)
plt.imshow(img)
#plt.title(class_names[labels[i]])
plt.axis("off")
batch_size = 64 # max 1000 of these images can fit in gpu memory
new_img_size = (200,200)
data_split = 0.2 #0.0005
seed_value = 42
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
path_to_images,
validation_split=data_split,
subset="training",
seed=seed_value,
image_size=new_img_size,
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
path_to_images,
validation_split=data_split,
subset="validation",
seed=seed_value,
image_size=new_img_size,
batch_size=batch_size)
class_names = train_ds.class_names
print(class_names)
Take 1 batch from the validation dataset and plot.
plt.figure(figsize=(8,8))
for images, labels in val_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
Use at least 3 layers. We will try two models, first a shallow one, second with more layers. For this task yous should expect the shallow model causing an under fit. It is indicative in the loss curve. You may see that the training_loss and accuracy does not improve after a while.
Note also that, we have included on-the-fly image augmentation in the first two layers. Furthermore, we use dropout for regularization, to avoid over fitting. We do not want high dropout in the initial layers. It is set to a relatively higher value after flattening the convolution layer outputs.
And finally, since we are using ReLu activation function, using kernel_initializer='he_uniform' is highly recommended.
#A shallow model
# model = tf.keras.models.Sequential([
# tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(new_img_size[0],new_img_size[1],3)),
# tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
# tf.keras.layers.MaxPool2D(2,2),
# tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
# tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
# tf.keras.layers.MaxPool2D(2,2),
# tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
# tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
# tf.keras.layers.MaxPool2D(2,2),
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dropout(0.3),
# tf.keras.layers.Dense(256, activation='relu'),
# tf.keras.layers.Dropout(0.3),
# tf.keras.layers.Dense(256, activation='relu'),
# tf.keras.layers.Dense(1, activation='sigmoid')
# ])
#Secind model with more layers
model = tf.keras.models.Sequential([
layers.experimental.preprocessing.Rescaling(1./255,input_shape=(new_img_size[0],new_img_size[1],3)),
tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
tf.keras.layers.experimental.preprocessing.RandomRotation(0.3),
tf.keras.layers.Conv2D(16, (3,3), activation='relu', kernel_initializer='he_uniform', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_uniform', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv2D(32, (3,3), activation='relu', kernel_initializer='he_uniform', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv2D(64, (3,3), activation='relu', kernel_initializer='he_uniform', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Conv2D(256, (3,3), activation='relu', kernel_initializer='he_uniform', padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.MaxPool2D(2,2),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_uniform'),
tf.keras.layers.Dropout(0.35),
#tf.keras.layers.BatchNormalization(),
tf.keras.layers.Dense(1, activation='sigmoid')
])
# model = tf.keras.models.Sequential([
# layers.experimental.preprocessing.Rescaling(1./255),
# layers.Conv2D(32, 3, activation='relu'),
# layers.MaxPooling2D(),
# layers.Conv2D(32, 3, activation='relu'),
# layers.MaxPooling2D(),
# layers.Conv2D(32, 3, activation='relu'),
# layers.MaxPooling2D(),
# layers.Flatten(),
# layers.Dense(128, activation='relu'),
# layers.Dense(num_classes)
# ])
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc']) #SGD(lr=0.007, momentum=0.9), #RMSprop(lr=0.0001), #Adam(lr=0.001)
#model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
#print(model.summary())
class MyThresholdCallback(tf.keras.callbacks.Callback):
def __init__(self, threshold):
super(MyThresholdCallback, self).__init__()
self.threshold = threshold
def on_epoch_end(self, epoch, logs=None):
val_acc = logs["val_acc"]
if val_acc >= self.threshold:
self.model.stop_training = True
early_stopping_callback = MyThresholdCallback(threshold=0.951)
logdir = os.path.join(
"logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
modelfname = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+'.h5'
model_folder = pathlib.Path('/CatsAndDogs/model')
mcp_save = tf.keras.callbacks.ModelCheckpoint(os.path.join(model_folder, modelfname), save_best_only=True, monitor='val_loss', mode='min')
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
print(logdir)
# # ======= Go to the logs folder within the current project. ===========
# # ======= Open a terminal, activate correct env, run following command to launch tensorboard
# # tensorboard --port=6007 --logdir /CatsAndDogs/logs
# # =====================================================================
epochs = 300
history = model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs,
verbose=1,
callbacks=[tensorboard_callback, mcp_save, early_stopping_callback]
)
#model.save(os.path.join(model_folder, modelfname))
# model.fit(
# train_ds,
# validation_data=val_ds,
# epochs=3
# )
# PLOT LOSS AND ACCURACY
#import matplotlib.image as mpimg
#import matplotlib.pyplot as plt
#-----------------------------------------------------------
# Retrieve a list of list results on training and test data
# sets for each training epoch
#-----------------------------------------------------------
acc=history.history['acc']
val_acc=history.history['val_acc']
loss=history.history['loss']
val_loss=history.history['val_loss']
epochs=np.arange(len(acc)) # Get number of epochs
#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.figure()
plt.plot(epochs, acc, 'r', label="Training Accuracy")
plt.plot(epochs, val_acc, 'b', label="Validation Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.figure()
plt.plot(epochs, loss, 'r', label="Training Loss")
plt.plot(epochs, val_loss, 'b', label="Validation Loss")
plt.legend()
plt.title('Training and validation loss')
# Desired output. Charts with training and validation metrics. No crash :)
# serialize model to JSON
model_json = model.to_json()
with open(pathlib.Path("/CatsAndDogs/model/best_model_v2/model_structure.json"), "w") as json_file:
json_file.write(model_json)
# load json and create model
json_file = open(pathlib.Path("/CatsAndDogs/model/best_model_v2/model_structure.json"), 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(pathlib.Path("/CatsAndDogs/model/best_model_v2/20201022-232355.h5"))
print("Loaded model from disk")
# evaluate loaded model on test data
loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
for images, labels in val_ds.take(1):
i = random.randint(0,10)
img_array = keras.preprocessing.image.img_to_array(images[i])
img_array = tf.expand_dims(img_array, 0) # Create batch axis
true_class = np.array(labels[i])
predicted_class = model.predict_classes(img_array)[0]
predictions = model.predict(img_array)
score = predictions[0]
print("Prediction: ",class_names[predicted_class[0]], " | Truth: ", class_names[true_class])
print("Score: ", np.round(score,2))
plt.figure()
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[true_class])
plt.axis("off")
i=0
img_name = r"Coronavirus-and-Cats-Science-Roundup-Catipilla.jpg"
path_to_Downloaded_images = pathlib.Path("/CatsAndDogs/online_test_images")
img_file_path = os.path.join(path_to_Downloaded_images,img_name)
img = keras.preprocessing.image.load_img( img_file_path, target_size=new_img_size)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create batch axis
predicted_class = loaded_model.predict_classes(img_array)[0]
predictions = loaded_model.predict(img_array)
score = predictions[0]
print("Prediction: ",class_names[predicted_class[0]], " | Truth: ", class_names[i])
print("Score: ", np.round(score,2))
plt.figure()
plt.imshow(img)
plt.title(class_names[i])
plt.axis("off")
We have developed Convolutional Neural Network based model without using any pre-trained models or transfer learning methods. Achieves accuracy of ~95% on validation dataset. The original dataset had many corrupted files, that needed to be removed. Custom image feeder as well as callbacks are created.