In the previous lab, you saw how data augmentation helped improve the model's performance on unseen data. By tweaking the cat and dog training images, the model was able to learn features that are also representative of the validation data. However, applying data augmentation requires good understanding of your dataset. Simply transforming it randomly will not always yield good results.
In the next cells, you will apply the same techniques to the Horses or Humans
dataset and analyze the results
# First import the necessary libraries
import os
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import logging
tf.get_logger().setLevel(logging.ERROR)
# I am using my GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print('Using GPU - ', gpu)
except RuntimeError as e:
print(e)
Using GPU - PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
TRAIN_DIR = 'horse-or-human'
VAL_DIR = 'validation-horse-or-human'
# Directory with training horse pictures
train_horse_dir = os.path.join(TRAIN_DIR, 'horses')
# Directory with training human pictures
train_human_dir = os.path.join(TRAIN_DIR, 'humans')
# Directory with validation horse pictures
validation_horse_dir = os.path.join(VAL_DIR, 'horses')
# Directory with validation human pictures
validation_human_dir = os.path.join(VAL_DIR, 'humans')
# Constants
BATCH_SIZE = 32
IMAGE_SIZE = (300, 300)
LABEL_MODE = 'binary'
# Instantiate the training dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(
TRAIN_DIR,
image_size=IMAGE_SIZE,
batch_size=BATCH_SIZE,
label_mode=LABEL_MODE
)
# Instantiate the validation set
validation_dataset = tf.keras.utils.image_dataset_from_directory(
VAL_DIR,
image_size=IMAGE_SIZE,
batch_size=BATCH_SIZE,
label_mode=LABEL_MODE
)
# Optimize the datasets for training
SHUFFLE_BUFFER_SIZE = 1000
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE
train_dataset_final = (train_dataset
.cache()
.shuffle(SHUFFLE_BUFFER_SIZE)
.prefetch(PREFETCH_BUFFER_SIZE)
)
validation_dataset_final = (validation_dataset
.cache()
.prefetch(PREFETCH_BUFFER_SIZE)
)
Found 1027 files belonging to 2 classes. Found 256 files belonging to 2 classes.
def create_model():
'''Builds a CNN for image binary classification'''
model = tf.keras.models.Sequential([
tf.keras.Input(shape=(300,300,3)),
# This will rescale the image to [0,1]
tf.keras.layers.Rescaling(1./255),
# This is the first convolution
tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2, 2),
# The second convolution
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The third convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fourth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# The fifth convolution
tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
# Only 1 output neuron. It will contain a value from 0-1 where 0 for one class ('horses') and 1 for the other ('humans')
tf.keras.layers.Dense(1, activation='sigmoid')
])
return model
# Define fill mode.
FILL_MODE = 'nearest'
# Create the augmentation model.
data_augmentation = tf.keras.Sequential([
# Specify the input shape.
tf.keras.Input(shape=(300,300,3)),
# Add the augmentation layers
tf.keras.layers.RandomFlip("horizontal"),
tf.keras.layers.RandomRotation(0.2, fill_mode=FILL_MODE),
tf.keras.layers.RandomTranslation(0.2, 0.2, fill_mode=FILL_MODE),
tf.keras.layers.RandomZoom(0.2, fill_mode=FILL_MODE),
tf.keras.layers.RandomFlip("vertical")
])
def demo_augmentation(sample_image, model, num_aug):
'''Takes a single image array, then uses a model to generate num_aug transformations'''
# Instantiate preview list
image_preview = []
# Convert input image to a PIL image instance
sample_image_pil = tf.keras.utils.array_to_img(sample_image)
# Append the result to the list
image_preview.append(sample_image_pil)
# Apply the image augmentation and append the results to the list
for i in range(NUM_AUG):
sample_image_aug = model(tf.expand_dims(sample_image, axis=0))
sample_image_aug_pil = tf.keras.utils.array_to_img(tf.squeeze(sample_image_aug))
image_preview.append(sample_image_aug_pil)
# Instantiate a subplot
fig, axes = plt.subplots(1, NUM_AUG + 1, figsize=(12, 12))
# Preview the images.
for index, ax in enumerate(axes):
ax.imshow(image_preview[index])
ax.set_axis_off()
if index == 0:
ax.set_title('original')
else:
ax.set_title(f'augment {index}')
NUM_AUG = 4
# Get a batch of images
sample_batch = list(train_dataset.take(1))[0][0]
# Apply the transformations to the first 4 images
demo_augmentation(sample_batch[0], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[1], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[2], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[3], data_augmentation, NUM_AUG)
# Delete the variable to free up some memory
del sample_batch
# Instantiate the base model
model_without_aug = create_model()
# Prepend the data augmentation layers to the base model
model_with_aug = tf.keras.models.Sequential([
data_augmentation,
model_without_aug
])
# Compile the model
model_with_aug.compile(loss='binary_crossentropy',
optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
metrics=['accuracy'])
# Constant for epochs
EPOCHS = 20
# Train the model
history = model_with_aug.fit(
train_dataset_final,
epochs=EPOCHS,
verbose=2,
validation_data = validation_dataset_final)
Epoch 1/20 33/33 - 34s - loss: 0.6840 - accuracy: 0.5852 - val_loss: 0.6698 - val_accuracy: 0.5000 - 34s/epoch - 1s/step Epoch 2/20 33/33 - 12s - loss: 0.6409 - accuracy: 0.6475 - val_loss: 0.5923 - val_accuracy: 0.6680 - 12s/epoch - 372ms/step Epoch 3/20 33/33 - 14s - loss: 0.5943 - accuracy: 0.6884 - val_loss: 0.9234 - val_accuracy: 0.5000 - 14s/epoch - 413ms/step Epoch 4/20 33/33 - 15s - loss: 0.5515 - accuracy: 0.7186 - val_loss: 0.5291 - val_accuracy: 0.7031 - 15s/epoch - 470ms/step Epoch 5/20 33/33 - 14s - loss: 0.5342 - accuracy: 0.7283 - val_loss: 0.6924 - val_accuracy: 0.6211 - 14s/epoch - 430ms/step Epoch 6/20 33/33 - 14s - loss: 0.4900 - accuracy: 0.7507 - val_loss: 0.6713 - val_accuracy: 0.6680 - 14s/epoch - 423ms/step Epoch 7/20 33/33 - 14s - loss: 0.4814 - accuracy: 0.7605 - val_loss: 0.6603 - val_accuracy: 0.6914 - 14s/epoch - 433ms/step Epoch 8/20 33/33 - 14s - loss: 0.4819 - accuracy: 0.7683 - val_loss: 1.3719 - val_accuracy: 0.5508 - 14s/epoch - 431ms/step Epoch 9/20 33/33 - 14s - loss: 0.4683 - accuracy: 0.7809 - val_loss: 0.9719 - val_accuracy: 0.6055 - 14s/epoch - 429ms/step Epoch 10/20 33/33 - 14s - loss: 0.4475 - accuracy: 0.7858 - val_loss: 0.9553 - val_accuracy: 0.6211 - 14s/epoch - 432ms/step Epoch 11/20 33/33 - 14s - loss: 0.4322 - accuracy: 0.7887 - val_loss: 0.8269 - val_accuracy: 0.6289 - 14s/epoch - 431ms/step Epoch 12/20 33/33 - 14s - loss: 0.4132 - accuracy: 0.8014 - val_loss: 1.1940 - val_accuracy: 0.6016 - 14s/epoch - 432ms/step Epoch 13/20 33/33 - 15s - loss: 0.4018 - accuracy: 0.8160 - val_loss: 1.9636 - val_accuracy: 0.5312 - 15s/epoch - 442ms/step Epoch 14/20 33/33 - 15s - loss: 0.3884 - accuracy: 0.8238 - val_loss: 1.5335 - val_accuracy: 0.5664 - 15s/epoch - 440ms/step Epoch 15/20 33/33 - 15s - loss: 0.3670 - accuracy: 0.8374 - val_loss: 1.5629 - val_accuracy: 0.5664 - 15s/epoch - 453ms/step Epoch 16/20 33/33 - 15s - loss: 0.3591 - accuracy: 0.8354 - val_loss: 1.0477 - val_accuracy: 0.6406 - 15s/epoch - 448ms/step Epoch 17/20 33/33 - 14s - loss: 0.3644 - accuracy: 0.8315 - val_loss: 0.5701 - val_accuracy: 0.7656 - 14s/epoch - 436ms/step Epoch 18/20 33/33 - 15s - loss: 0.3305 - accuracy: 0.8549 - val_loss: 2.3625 - val_accuracy: 0.5156 - 15s/epoch - 440ms/step Epoch 19/20 33/33 - 14s - loss: 0.3059 - accuracy: 0.8705 - val_loss: 1.5070 - val_accuracy: 0.5898 - 14s/epoch - 438ms/step Epoch 20/20 33/33 - 14s - loss: 0.2930 - accuracy: 0.8744 - val_loss: 1.4151 - val_accuracy: 0.6289 - 14s/epoch - 437ms/step
def plot_loss_acc(history):
'''Plots the training and validation loss and accuracy from a history object'''
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
fig, ax = plt.subplots(1,2, figsize=(12, 6))
ax[0].plot(epochs, acc, 'bo', label='Training accuracy')
ax[0].plot(epochs, val_acc, 'b', label='Validation accuracy')
ax[0].set_title('Training and validation accuracy')
ax[0].set_xlabel('epochs')
ax[0].set_ylabel('accuracy')
ax[0].legend()
ax[1].plot(epochs, loss, 'bo', label='Training Loss')
ax[1].plot(epochs, val_loss, 'b', label='Validation Loss')
ax[1].set_title('Training and validation loss')
ax[1].set_xlabel('epochs')
ax[1].set_ylabel('loss')
ax[1].legend()
plt.show()
# Plot training results
plot_loss_acc(history)
As you can see in the results, the preprocessing techniques used in augmenting the data did not help much in the results. The validation accuracy is fluctuating and not trending up like the training accuracy. This might be because the additional training data still do not represent the features in the validation data. For example, some human or horse poses in the validation set cannot be mimicked by the augmented data. It might also be that the background of the training images are also being learned by the model so the white background of the validation set is throwing the prediction off.