In the previous lab, you saw how data augmentation helped improve the model's performance on unseen data. By tweaking the cat and dog training images, the model was able to learn features that are also representative of the validation data. However, applying data augmentation requires good understanding of your dataset. Simply transforming it randomly will not always yield good results.

In the next cells, you will apply the same techniques to the Horses or Humans dataset and analyze the results

In [1]:
# First import the necessary libraries
import os
import random
import matplotlib.pyplot as plt
import tensorflow as tf

import logging

tf.get_logger().setLevel(logging.ERROR) 
In [2]:
# I am using my GPU
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            print('Using GPU - ', gpu)
    except RuntimeError as e:
        print(e)
Using GPU -  PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
In [3]:
TRAIN_DIR = 'horse-or-human'
VAL_DIR = 'validation-horse-or-human'

# Directory with training horse pictures
train_horse_dir = os.path.join(TRAIN_DIR, 'horses')

# Directory with training human pictures
train_human_dir = os.path.join(TRAIN_DIR, 'humans')

# Directory with validation horse pictures
validation_horse_dir = os.path.join(VAL_DIR, 'horses')

# Directory with validation human pictures
validation_human_dir = os.path.join(VAL_DIR, 'humans')
In [4]:
# Constants
BATCH_SIZE = 32
IMAGE_SIZE = (300, 300)
LABEL_MODE = 'binary'

# Instantiate the training dataset
train_dataset = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode=LABEL_MODE
    )

# Instantiate the validation set
validation_dataset = tf.keras.utils.image_dataset_from_directory(
    VAL_DIR,
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    label_mode=LABEL_MODE
    )

# Optimize the datasets for training
SHUFFLE_BUFFER_SIZE = 1000
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE

train_dataset_final = (train_dataset
                       .cache()
                       .shuffle(SHUFFLE_BUFFER_SIZE)
                       .prefetch(PREFETCH_BUFFER_SIZE)
                       )

validation_dataset_final = (validation_dataset
                            .cache()
                            .prefetch(PREFETCH_BUFFER_SIZE)
                            )
Found 1027 files belonging to 2 classes.
Found 256 files belonging to 2 classes.
In [5]:
def create_model():
  '''Builds a CNN for image binary classification'''

  model = tf.keras.models.Sequential([
      tf.keras.Input(shape=(300,300,3)),
      # This will rescale the image to [0,1]
      tf.keras.layers.Rescaling(1./255),
      # This is the first convolution
      tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2, 2),
      # The second convolution
      tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      # The third convolution
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      # The fourth convolution
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      # The fifth convolution
      tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
      tf.keras.layers.MaxPooling2D(2,2),
      # Flatten the results to feed into a DNN
      tf.keras.layers.Flatten(),
      # 512 neuron hidden layer
      tf.keras.layers.Dense(512, activation='relu'),
      # Only 1 output neuron. It will contain a value from 0-1 where 0 for one class ('horses') and 1 for the other ('humans')
      tf.keras.layers.Dense(1, activation='sigmoid')
      ])

  return model
In [6]:
# Define fill mode.
FILL_MODE = 'nearest'

# Create the augmentation model.
data_augmentation = tf.keras.Sequential([
        # Specify the input shape.
        tf.keras.Input(shape=(300,300,3)),
        # Add the augmentation layers
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomRotation(0.2, fill_mode=FILL_MODE),
        tf.keras.layers.RandomTranslation(0.2, 0.2, fill_mode=FILL_MODE),
        tf.keras.layers.RandomZoom(0.2, fill_mode=FILL_MODE),
        tf.keras.layers.RandomFlip("vertical")
        ])
In [7]:
def demo_augmentation(sample_image, model, num_aug):
    '''Takes a single image array, then uses a model to generate num_aug transformations'''

    # Instantiate preview list
    image_preview = []

    # Convert input image to a PIL image instance
    sample_image_pil = tf.keras.utils.array_to_img(sample_image)

    # Append the result to the list
    image_preview.append(sample_image_pil)

    # Apply the image augmentation and append the results to the list
    for i in range(NUM_AUG):
        sample_image_aug = model(tf.expand_dims(sample_image, axis=0))
        sample_image_aug_pil = tf.keras.utils.array_to_img(tf.squeeze(sample_image_aug))
        image_preview.append(sample_image_aug_pil)

    # Instantiate a subplot
    fig, axes = plt.subplots(1, NUM_AUG + 1, figsize=(12, 12))

    # Preview the images.
    for index, ax in enumerate(axes):
        ax.imshow(image_preview[index])
        ax.set_axis_off()

        if index == 0:
            ax.set_title('original')
        else:
            ax.set_title(f'augment {index}')
In [8]:
NUM_AUG = 4

# Get a batch of images
sample_batch = list(train_dataset.take(1))[0][0]

# Apply the transformations to the first 4 images
demo_augmentation(sample_batch[0], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[1], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[2], data_augmentation, NUM_AUG)
demo_augmentation(sample_batch[3], data_augmentation, NUM_AUG)

# Delete the variable to free up some memory
del sample_batch
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [9]:
# Instantiate the base model
model_without_aug = create_model()

# Prepend the data augmentation layers to the base model
model_with_aug = tf.keras.models.Sequential([
    data_augmentation,
    model_without_aug
])

# Compile the model
model_with_aug.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(learning_rate=1e-4),
              metrics=['accuracy'])
In [10]:
# Constant for epochs
EPOCHS = 20

# Train the model
history = model_with_aug.fit(
      train_dataset_final,
      epochs=EPOCHS,
      verbose=2,
      validation_data = validation_dataset_final)
Epoch 1/20
33/33 - 34s - loss: 0.6840 - accuracy: 0.5852 - val_loss: 0.6698 - val_accuracy: 0.5000 - 34s/epoch - 1s/step
Epoch 2/20
33/33 - 12s - loss: 0.6409 - accuracy: 0.6475 - val_loss: 0.5923 - val_accuracy: 0.6680 - 12s/epoch - 372ms/step
Epoch 3/20
33/33 - 14s - loss: 0.5943 - accuracy: 0.6884 - val_loss: 0.9234 - val_accuracy: 0.5000 - 14s/epoch - 413ms/step
Epoch 4/20
33/33 - 15s - loss: 0.5515 - accuracy: 0.7186 - val_loss: 0.5291 - val_accuracy: 0.7031 - 15s/epoch - 470ms/step
Epoch 5/20
33/33 - 14s - loss: 0.5342 - accuracy: 0.7283 - val_loss: 0.6924 - val_accuracy: 0.6211 - 14s/epoch - 430ms/step
Epoch 6/20
33/33 - 14s - loss: 0.4900 - accuracy: 0.7507 - val_loss: 0.6713 - val_accuracy: 0.6680 - 14s/epoch - 423ms/step
Epoch 7/20
33/33 - 14s - loss: 0.4814 - accuracy: 0.7605 - val_loss: 0.6603 - val_accuracy: 0.6914 - 14s/epoch - 433ms/step
Epoch 8/20
33/33 - 14s - loss: 0.4819 - accuracy: 0.7683 - val_loss: 1.3719 - val_accuracy: 0.5508 - 14s/epoch - 431ms/step
Epoch 9/20
33/33 - 14s - loss: 0.4683 - accuracy: 0.7809 - val_loss: 0.9719 - val_accuracy: 0.6055 - 14s/epoch - 429ms/step
Epoch 10/20
33/33 - 14s - loss: 0.4475 - accuracy: 0.7858 - val_loss: 0.9553 - val_accuracy: 0.6211 - 14s/epoch - 432ms/step
Epoch 11/20
33/33 - 14s - loss: 0.4322 - accuracy: 0.7887 - val_loss: 0.8269 - val_accuracy: 0.6289 - 14s/epoch - 431ms/step
Epoch 12/20
33/33 - 14s - loss: 0.4132 - accuracy: 0.8014 - val_loss: 1.1940 - val_accuracy: 0.6016 - 14s/epoch - 432ms/step
Epoch 13/20
33/33 - 15s - loss: 0.4018 - accuracy: 0.8160 - val_loss: 1.9636 - val_accuracy: 0.5312 - 15s/epoch - 442ms/step
Epoch 14/20
33/33 - 15s - loss: 0.3884 - accuracy: 0.8238 - val_loss: 1.5335 - val_accuracy: 0.5664 - 15s/epoch - 440ms/step
Epoch 15/20
33/33 - 15s - loss: 0.3670 - accuracy: 0.8374 - val_loss: 1.5629 - val_accuracy: 0.5664 - 15s/epoch - 453ms/step
Epoch 16/20
33/33 - 15s - loss: 0.3591 - accuracy: 0.8354 - val_loss: 1.0477 - val_accuracy: 0.6406 - 15s/epoch - 448ms/step
Epoch 17/20
33/33 - 14s - loss: 0.3644 - accuracy: 0.8315 - val_loss: 0.5701 - val_accuracy: 0.7656 - 14s/epoch - 436ms/step
Epoch 18/20
33/33 - 15s - loss: 0.3305 - accuracy: 0.8549 - val_loss: 2.3625 - val_accuracy: 0.5156 - 15s/epoch - 440ms/step
Epoch 19/20
33/33 - 14s - loss: 0.3059 - accuracy: 0.8705 - val_loss: 1.5070 - val_accuracy: 0.5898 - 14s/epoch - 438ms/step
Epoch 20/20
33/33 - 14s - loss: 0.2930 - accuracy: 0.8744 - val_loss: 1.4151 - val_accuracy: 0.6289 - 14s/epoch - 437ms/step
In [11]:
def plot_loss_acc(history):
    '''Plots the training and validation loss and accuracy from a history object'''
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))

    fig, ax = plt.subplots(1,2, figsize=(12, 6))
    ax[0].plot(epochs, acc, 'bo', label='Training accuracy')
    ax[0].plot(epochs, val_acc, 'b', label='Validation accuracy')
    ax[0].set_title('Training and validation accuracy')
    ax[0].set_xlabel('epochs')
    ax[0].set_ylabel('accuracy')
    ax[0].legend()

    ax[1].plot(epochs, loss, 'bo', label='Training Loss')
    ax[1].plot(epochs, val_loss, 'b', label='Validation Loss')
    ax[1].set_title('Training and validation loss')
    ax[1].set_xlabel('epochs')
    ax[1].set_ylabel('loss')
    ax[1].legend()

    plt.show()
In [12]:
# Plot training results
plot_loss_acc(history)
No description has been provided for this image

As you can see in the results, the preprocessing techniques used in augmenting the data did not help much in the results. The validation accuracy is fluctuating and not trending up like the training accuracy. This might be because the additional training data still do not represent the features in the validation data. For example, some human or horse poses in the validation set cannot be mimicked by the augmented data. It might also be that the background of the training images are also being learned by the model so the white background of the validation set is throwing the prediction off.

In [ ]: