In the previous lab, you saw that having a high training accuracy does not automatically mean having a good predictive model. It can still perform poorly on new data because it has overfit to the training set. In this lab, you will see how to avoid that using data augmentation. This increases the amount of training data by modifying the existing training data's properties. For example, in image data, you can apply different preprocessing techniques such as rotate, flip, shear, or zoom on your existing images so you can simulate other data that the model should also learn from. This way, the model would see more variety in the images during training so it will infer better on new, previously unseen data.
import os
import pickle
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import logging
tf.get_logger().setLevel(logging.ERROR)
# I am using my GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(e)
BASE_DIR = 'cats_and_dogs_filtered'
train_dir = os.path.join(BASE_DIR, 'train')
validation_dir = os.path.join(BASE_DIR, 'validation')
# Directory with training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')
# Directory with validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
# Instantiate the training dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(
train_dir,
image_size=(150, 150),
batch_size=20,
label_mode='binary'
)
# Instantiate the validation dataset
validation_dataset = tf.keras.utils.image_dataset_from_directory(
validation_dir,
image_size=(150, 150),
batch_size=20,
label_mode='binary'
)
# Optimize the datasets for training
SHUFFLE_BUFFER_SIZE = 1000
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE
train_dataset_final = (train_dataset
.cache()
.shuffle(SHUFFLE_BUFFER_SIZE)
.prefetch(PREFETCH_BUFFER_SIZE)
)
validation_dataset_final = (validation_dataset
.cache()
.prefetch(PREFETCH_BUFFER_SIZE)
)
Found 3000 files belonging to 2 classes. Found 1000 files belonging to 2 classes.
def create_model():
'''Creates a CNN with 4 convolutional layers'''
model = tf.keras.models.Sequential([
tf.keras.Input(shape=(150, 150, 3)),
tf.keras.layers.Rescaling(1./255),
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Constant for epochs
EPOCHS = 20
# Create a new model
model = create_model()
# Setup the training parameters
model.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
metrics=['accuracy'])
# Train the model
history = model.fit(
train_dataset_final,
epochs=EPOCHS,
validation_data=validation_dataset_final,
verbose=2)
Epoch 1/20 150/150 - 18s - loss: 0.6832 - accuracy: 0.5547 - val_loss: 0.6598 - val_accuracy: 0.5910 - 18s/epoch - 120ms/step Epoch 2/20 150/150 - 5s - loss: 0.6434 - accuracy: 0.6233 - val_loss: 0.6303 - val_accuracy: 0.6300 - 5s/epoch - 32ms/step Epoch 3/20 150/150 - 5s - loss: 0.5976 - accuracy: 0.6770 - val_loss: 0.5726 - val_accuracy: 0.7010 - 5s/epoch - 32ms/step Epoch 4/20 150/150 - 5s - loss: 0.5559 - accuracy: 0.7160 - val_loss: 0.5652 - val_accuracy: 0.7130 - 5s/epoch - 32ms/step Epoch 5/20 150/150 - 5s - loss: 0.5293 - accuracy: 0.7383 - val_loss: 0.5619 - val_accuracy: 0.7140 - 5s/epoch - 32ms/step Epoch 6/20 150/150 - 5s - loss: 0.5017 - accuracy: 0.7627 - val_loss: 0.5195 - val_accuracy: 0.7520 - 5s/epoch - 32ms/step Epoch 7/20 150/150 - 5s - loss: 0.4709 - accuracy: 0.7790 - val_loss: 0.4983 - val_accuracy: 0.7680 - 5s/epoch - 32ms/step Epoch 8/20 150/150 - 5s - loss: 0.4435 - accuracy: 0.7960 - val_loss: 0.5131 - val_accuracy: 0.7380 - 5s/epoch - 32ms/step Epoch 9/20 150/150 - 5s - loss: 0.4145 - accuracy: 0.8087 - val_loss: 0.4830 - val_accuracy: 0.7850 - 5s/epoch - 32ms/step Epoch 10/20 150/150 - 5s - loss: 0.3826 - accuracy: 0.8273 - val_loss: 0.5368 - val_accuracy: 0.7510 - 5s/epoch - 33ms/step Epoch 11/20 150/150 - 5s - loss: 0.3631 - accuracy: 0.8373 - val_loss: 0.5050 - val_accuracy: 0.7860 - 5s/epoch - 33ms/step Epoch 12/20 150/150 - 5s - loss: 0.3344 - accuracy: 0.8563 - val_loss: 0.4959 - val_accuracy: 0.7680 - 5s/epoch - 33ms/step Epoch 13/20 150/150 - 5s - loss: 0.3085 - accuracy: 0.8713 - val_loss: 0.4725 - val_accuracy: 0.7900 - 5s/epoch - 33ms/step Epoch 14/20 150/150 - 5s - loss: 0.2883 - accuracy: 0.8793 - val_loss: 0.5042 - val_accuracy: 0.7700 - 5s/epoch - 33ms/step Epoch 15/20 150/150 - 5s - loss: 0.2660 - accuracy: 0.8893 - val_loss: 0.5248 - val_accuracy: 0.7720 - 5s/epoch - 33ms/step Epoch 16/20 150/150 - 5s - loss: 0.2373 - accuracy: 0.9003 - val_loss: 0.4794 - val_accuracy: 0.7970 - 5s/epoch - 33ms/step Epoch 17/20 150/150 - 5s - loss: 0.2145 - accuracy: 0.9147 - val_loss: 0.5293 - val_accuracy: 0.7920 - 5s/epoch - 33ms/step Epoch 18/20 150/150 - 5s - loss: 0.1944 - accuracy: 0.9217 - val_loss: 0.5242 - val_accuracy: 0.7970 - 5s/epoch - 33ms/step Epoch 19/20 150/150 - 5s - loss: 0.1718 - accuracy: 0.9390 - val_loss: 0.5598 - val_accuracy: 0.7890 - 5s/epoch - 33ms/step Epoch 20/20 150/150 - 5s - loss: 0.1521 - accuracy: 0.9453 - val_loss: 0.6685 - val_accuracy: 0.7600 - 5s/epoch - 33ms/step
def plot_loss_acc(history):
'''Plots the training and validation loss and accuracy from a history object'''
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
fig, ax = plt.subplots(1,2, figsize=(12, 6))
ax[0].plot(epochs, acc, 'bo', label='Training accuracy')
ax[0].plot(epochs, val_acc, 'b', label='Validation accuracy')
ax[0].set_title('Training and validation accuracy')
ax[0].set_xlabel('epochs')
ax[0].set_ylabel('accuracy')
ax[0].legend()
ax[1].plot(epochs, loss, 'bo', label='Training Loss')
ax[1].plot(epochs, val_loss, 'b', label='Validation Loss')
ax[1].set_title('Training and validation loss')
ax[1].set_xlabel('epochs')
ax[1].set_ylabel('loss')
ax[1].legend()
plt.show()
# Plot training results
plot_loss_acc(history)
From the results above, you'll see the training accuracy is more than 90%, and the validation accuracy is in the 70%-80% range. This is a great example of overfitting -- which in short means that it can do very well with images it has seen before, but not so well with images it hasn't.
Data augmentation
One simple method to avoid overfitting is to augment the images. If you think about it, most pictures of a cat are very similar -- the ears are at the top, the eyes are below the ears etc. Things like the distance between the eyes and ears will always be quite similar too.
What if you tweak with the images a bit -- rotate the image, squash it, etc. That's what image augementation is all about.
To do that, you will build a data augmentation model with preprocessing layers for image augmentation. This will transform the data during training to introduce variations of the same image. Let's quickly go over the layers you will use in this exercise.
- RandomFlip is for randomly flipping the images horizontally, vertically, or both.
- RandomRotation rotates the image by an angle within a given range.
- RandomTranslation shifts pictures vertically and horizontally.
- RandomZoom zooms into or out of the images.
In addition, some of these layers have a fill_mode
parameter. This is the strategy used for filling newly created pixels, which can appear after a rotation or a width/height shift.
The code below will create this model with some set parameters. After you complete this lab, feel free to modify these and see the impact on the results.
FILL_MODE = 'nearest'
data_augmentation = tf.keras.models.Sequential([
tf.keras.Input(shape=(150,150,3)),
# Below defines data augmentation layers
tf.keras.layers.RandomFlip("horizontal"),
tf.keras.layers.RandomRotation(0.2, fill_mode = FILL_MODE),
tf.keras.layers.RandomTranslation(0.2, 0.2, fill_mode=FILL_MODE),
tf.keras.layers.RandomZoom(0.2, fill_mode=FILL_MODE)
])
You will define a utility function that lets you preview how the transformed images look like. It will take in a sample image, then output a given number of augmented images using the model you defined above.
def demo_augmentation(sample_image, model, num_aug):
image_preview = []
# Convert the input image to PIL image
sample_image_pil = tf.keras.utils.array_to_img(sample_image)
image_preview.append(sample_image_pil)
# Apply augmentation to image and append it to the list
for i in range(num_aug):
sample_image_aug = model(tf.expand_dims(sample_image, axis=0))
sample_image_aug_pil = tf.keras.utils.array_to_img(tf.squeeze(sample_image_aug))
image_preview.append(sample_image_aug_pil)
fig, axes = plt.subplots(1, num_aug + 1, figsize=(12, 12))
for index, ax in enumerate(axes):
ax.imshow(image_preview[index])
ax.set_axis_off()
if index == 0:
ax.set_title('original')
else:
ax.set_title(f'augment {index}')
# Get a batch of images
sample_batch = list(train_dataset.take(1))[0][0]
print(f'images per batch: {len(sample_batch)}')
images per batch: 20
NUM_AUG = 4
# Apply the transformations to the first 4 images
demo_augmentation(sample_batch[0], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[1], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[2], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[3], data_augmentation, NUM_AUG)
del sample_batch
Now that you see what the preprocessing layers do, you can prepend these to the base model so it can generate transformed images to the base model. Do note that these layers are only active while training. They are automatically disabled during prediction and evaluation.
# Instantiate the base model
model_without_aug = create_model()
# Prepend the data augmentation layers to the base model
model_with_aug = tf.keras.models.Sequential([
data_augmentation,
model_without_aug
])
# Compile the model
model_with_aug.compile(
loss='binary_crossentropy',
optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
metrics=['accuracy'])
EPOCHS=20 # DO IT FOR 80 EPOCHS AS SUGGESTED IN THE COURSE. i AM DOING IT FOR 20 TO SAVE MEMORY AND RUN FASTER
# Train the new model
history_with_aug = model_with_aug.fit(
train_dataset_final,
epochs=EPOCHS,
validation_data=validation_dataset_final,
verbose=2)
Epoch 1/20 150/150 - 32s - loss: 0.6216 - accuracy: 0.6613 - val_loss: 0.6123 - val_accuracy: 0.6620 - 32s/epoch - 214ms/step Epoch 2/20 150/150 - 36s - loss: 0.6201 - accuracy: 0.6540 - val_loss: 0.6434 - val_accuracy: 0.6100 - 36s/epoch - 239ms/step Epoch 3/20 150/150 - 37s - loss: 0.6079 - accuracy: 0.6707 - val_loss: 0.6102 - val_accuracy: 0.6740 - 37s/epoch - 245ms/step Epoch 4/20 150/150 - 36s - loss: 0.6068 - accuracy: 0.6680 - val_loss: 0.6000 - val_accuracy: 0.6740 - 36s/epoch - 241ms/step Epoch 5/20 150/150 - 36s - loss: 0.6013 - accuracy: 0.6713 - val_loss: 0.6656 - val_accuracy: 0.6430 - 36s/epoch - 238ms/step Epoch 6/20 150/150 - 35s - loss: 0.5925 - accuracy: 0.6747 - val_loss: 0.5672 - val_accuracy: 0.7210 - 35s/epoch - 232ms/step Epoch 7/20 150/150 - 35s - loss: 0.5921 - accuracy: 0.6883 - val_loss: 0.5859 - val_accuracy: 0.7000 - 35s/epoch - 230ms/step Epoch 8/20 150/150 - 34s - loss: 0.5841 - accuracy: 0.6970 - val_loss: 0.6375 - val_accuracy: 0.6590 - 34s/epoch - 229ms/step Epoch 9/20 150/150 - 35s - loss: 0.5866 - accuracy: 0.6870 - val_loss: 0.6376 - val_accuracy: 0.6460 - 35s/epoch - 233ms/step Epoch 10/20 150/150 - 36s - loss: 0.5787 - accuracy: 0.6910 - val_loss: 0.6150 - val_accuracy: 0.6480 - 36s/epoch - 240ms/step Epoch 11/20 150/150 - 35s - loss: 0.5714 - accuracy: 0.7000 - val_loss: 0.7421 - val_accuracy: 0.6210 - 35s/epoch - 232ms/step Epoch 12/20 150/150 - 35s - loss: 0.5686 - accuracy: 0.7053 - val_loss: 0.5311 - val_accuracy: 0.7520 - 35s/epoch - 232ms/step Epoch 13/20 150/150 - 35s - loss: 0.5587 - accuracy: 0.7033 - val_loss: 0.5469 - val_accuracy: 0.7270 - 35s/epoch - 234ms/step Epoch 14/20 150/150 - 35s - loss: 0.5623 - accuracy: 0.7103 - val_loss: 0.6007 - val_accuracy: 0.6920 - 35s/epoch - 232ms/step Epoch 15/20 150/150 - 35s - loss: 0.5497 - accuracy: 0.7187 - val_loss: 0.5373 - val_accuracy: 0.7470 - 35s/epoch - 233ms/step Epoch 16/20 150/150 - 35s - loss: 0.5579 - accuracy: 0.7187 - val_loss: 0.5115 - val_accuracy: 0.7680 - 35s/epoch - 237ms/step Epoch 17/20 150/150 - 35s - loss: 0.5541 - accuracy: 0.7230 - val_loss: 0.4991 - val_accuracy: 0.7820 - 35s/epoch - 235ms/step Epoch 18/20 150/150 - 35s - loss: 0.5492 - accuracy: 0.7157 - val_loss: 0.5024 - val_accuracy: 0.7760 - 35s/epoch - 230ms/step Epoch 19/20 150/150 - 35s - loss: 0.5441 - accuracy: 0.7243 - val_loss: 0.5497 - val_accuracy: 0.7320 - 35s/epoch - 232ms/step Epoch 20/20 150/150 - 35s - loss: 0.5444 - accuracy: 0.7203 - val_loss: 0.5685 - val_accuracy: 0.7330 - 35s/epoch - 232ms/step
plot_loss_acc(history_with_aug)
As you can see, the training accuracy has gone down slightly compared to the baseline. (YOU WILL OBSERVE A SHARP DIFFERENCE IN ACCURACY AND LOSS GRAPHS IF YOU RUN FOR 80 EPOCHS AS SUGGESTED). This is expected because (as a result of data augmentation) there are more variety in the images so the model will need more runs to learn from them. The good thing is the validation accuracy is no longer stalling and is more in line with the training results. This means that the model is now performing better on unseen data.