import os
import random
import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

TRAIN_DIR = 'horse-or-human'  # Base folder which has the images
VALIDATION_DIR = 'validation-horse-or-human'

print('Files in current directory : ', os.listdir())
print(f'Subfolders within {TRAIN_DIR} are : {os.listdir(TRAIN_DIR)}')

Files in current directory :  ['.ipynb_checkpoints', 'DL_PART_1.ipynb', 'DL_PART_2.ipynb', 'DL_PART_3.ipynb', 'DL_PART_4.ipynb', 'horse-or-human', 'validation-horse-or-human']
Subfolders within horse-or-human are : ['horses', 'humans']

TRAIN_HORSE_DIR = os.path.join(TRAIN_DIR, 'horses')
TRAIN_HUMAN_DIR = os.path.join(TRAIN_DIR, 'humans')
VAL_HORSE_DIR = os.path.join(VALIDATION_DIR, 'horses')
VAL_HUMAN_DIR = os.path.join(VALIDATION_DIR, 'humans')

train_horse_names = os.listdir(TRAIN_HORSE_DIR)
train_human_names = os.listdir(TRAIN_HUMAN_DIR)
val_horse_names = os.listdir(VAL_HORSE_DIR)
val_human_names = os.listdir(VAL_HUMAN_DIR)

print('5 files from Horse directory : ', train_horse_names[:5])
print('5 files from Human directory : ', train_human_names[:5])

5 files from Horse directory :  ['horse01-0.png', 'horse01-1.png', 'horse01-2.png', 'horse01-3.png', 'horse01-4.png']
5 files from Human directory :  ['human01-00.png', 'human01-01.png', 'human01-02.png', 'human01-03.png', 'human01-04.png']

print('Total number of images in Horse category : ', len(train_horse_names))
print('Total number of images in Human category : ', len(train_human_names))

print('Total number of images in Horse category in Validation : ', len(val_horse_names))
print('Total number of images in Human category in Validation : ', len(val_human_names))

Total number of images in Horse category :  500
Total number of images in Human category :  527
Total number of images in Horse category in Validation :  128
Total number of images in Human category in Validation :  128

# Let's vizualize few random images
# To output images in 4 by 4 configuration
nrows = 4
ncols = 4

fig = plt.gcf()
fig.set_size_inches(ncols * 3, nrows * 3)

next_horse_pic = [os.path.join(TRAIN_HORSE_DIR, fname) for fname in random.sample(train_horse_names, k=8)]
next_human_pic = [os.path.join(TRAIN_HUMAN_DIR, fname) for fname in random.sample(train_human_names, k=8)]

for i, img_path in enumerate(next_horse_pic + next_human_pic):
    sp = plt.subplot(nrows, ncols, i+1)
    sp.axis('off')
    img = mpimg.imread(img_path)
    plt.imshow(img)

plt.show()

# Building a CNN model using multiple Convolution and Maxpooling layers to better understand features

model = tf.keras.models.Sequential([
    tf.keras.Input(shape=(300,300,3)),
    tf.keras.layers.Conv2D(16, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()

Model: "sequential"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ Layer (type)                         ┃ Output Shape                ┃         Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ conv2d (Conv2D)                      │ (None, 298, 298, 16)        │             448 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d (MaxPooling2D)         │ (None, 149, 149, 16)        │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_1 (Conv2D)                    │ (None, 147, 147, 32)        │           4,640 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_1 (MaxPooling2D)       │ (None, 73, 73, 32)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_2 (Conv2D)                    │ (None, 71, 71, 64)          │          18,496 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_2 (MaxPooling2D)       │ (None, 35, 35, 64)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_3 (Conv2D)                    │ (None, 33, 33, 64)          │          36,928 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_3 (MaxPooling2D)       │ (None, 16, 16, 64)          │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ conv2d_4 (Conv2D)                    │ (None, 14, 14, 64)          │          36,928 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ max_pooling2d_4 (MaxPooling2D)       │ (None, 7, 7, 64)            │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ flatten (Flatten)                    │ (None, 3136)                │               0 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense (Dense)                        │ (None, 512)                 │       1,606,144 │
├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤
│ dense_1 (Dense)                      │ (None, 1)                   │             513 │
└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘

 Total params: 1,704,097 (6.50 MB)

 Trainable params: 1,704,097 (6.50 MB)

 Non-trainable params: 0 (0.00 B)

model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
              loss = 'binary_crossentropy',
              metrics=['accuracy'])

train_dataset = tf.keras.utils.image_dataset_from_directory(
    TRAIN_DIR,
    image_size=(300,300),
    batch_size=32,
    label_mode='binary'
)

validation_dataset = tf.keras.utils.image_dataset_from_directory(
    VALIDATION_DIR,
    image_size=(300,300),
    batch_size=32,
    label_mode='binary'
)

dataset_type = type(train_dataset)
print('Train dataset inherits from tf.data.Dataset : ', issubclass(dataset_type, tf.data.Dataset))

Found 1027 files belonging to 2 classes.
Found 256 files belonging to 2 classes.
Train dataset inherits from tf.data.Dataset :  True

# Get one batch from the dataset to take a look at the shape and structure of the image

# Takes one batch of images and labels from train_dataset
sample_batch = list(train_dataset.take(1))[0]

print('Type of sample batch : ', type(sample_batch))
print('Number of elements in the batch ; ', len(sample_batch))


# image_batch contains image tensors, and label_batch contains corresponding binary labels
image_batch = sample_batch[0]
label_batch = sample_batch[1]

print('Image batch shape : ', image_batch.shape)
print('Sample batch shape : ', label_batch.shape)
print('Maximum and Minimum Values from batch - ',np.max(image_batch[0].numpy()), np.min(image_batch[0].numpy()))

Type of sample batch :  <class 'tuple'>
Number of elements in the batch ;  2
Image batch shape :  (32, 300, 300, 3)
Sample batch shape :  (32, 1)
Maximum and Minimum Values from batch -  255.0 1.0

len(image_batch), len(image_batch[0]), len(image_batch[0][0]), len(image_batch[0][0][0])

(32, 300, 300, 3)

# tensorflow has layer for rescaling the way we were doing standardization
rescale_layer = tf.keras.layers.Rescaling(scale=1.0/255.0)

# performing it on one image
image_scaled = rescale_layer(image_batch[0]).numpy()
print('Maximum and Minimum value after rescale ', np.max(image_scaled), np.min(image_scaled))

Maximum and Minimum value after rescale  1.0 0.003921569

# Useing .map() to apply rescaling to each image in train_dataset and validation_dataset
# Ensures all images are normalized before training the model

train_dataset_rescaled = train_dataset.map(lambda image, label : (rescale_layer(image), label))
validation_dataset_rescaled = validation_dataset.map(lambda img, label : (rescale_layer(img), label))

# Let's pick a random batch and check pixel values

sample_batch = list(train_dataset_rescaled.take(1))[0]
image_scaled = sample_batch[0][1].numpy()

np.max(image_scaled), np.min(image_scaled)

(1.0, 0.015686275)

# Using .cache() and .prefetch() for efficient batch loading.
    
SHUFFLE_BUFFER_SIZE = 1000
PREFETCH_BUFFER_SIZE = tf.data.AUTOTUNE

train_dataset_final = (train_dataset_rescaled
                       .cache()
                       .shuffle(SHUFFLE_BUFFER_SIZE)
                       .prefetch(PREFETCH_BUFFER_SIZE))

# No .shuffle() because validation data should remain in the same order.
validation_dataset_final = (validation_dataset_rescaled
                            .cache()
                            .prefetch(PREFETCH_BUFFER_SIZE))

# MODEL TRAINING

history = model.fit(train_dataset_final, validation_data=validation_dataset_final, epochs=15, verbose=2)

Epoch 1/15
33/33 - 8s - 250ms/step - accuracy: 0.7313 - loss: 0.5762 - val_accuracy: 0.9062 - val_loss: 0.2669
Epoch 2/15
33/33 - 8s - 242ms/step - accuracy: 0.8569 - loss: 0.3593 - val_accuracy: 0.8789 - val_loss: 0.3573
Epoch 3/15
33/33 - 8s - 241ms/step - accuracy: 0.9172 - loss: 0.2294 - val_accuracy: 0.5000 - val_loss: 2.6558
Epoch 4/15
33/33 - 8s - 240ms/step - accuracy: 0.9007 - loss: 0.2931 - val_accuracy: 0.8789 - val_loss: 0.5835
Epoch 5/15
33/33 - 8s - 241ms/step - accuracy: 0.9270 - loss: 0.3946 - val_accuracy: 0.8750 - val_loss: 0.9056
Epoch 6/15
33/33 - 8s - 240ms/step - accuracy: 0.9591 - loss: 0.1068 - val_accuracy: 0.8984 - val_loss: 0.5385
Epoch 7/15
33/33 - 8s - 242ms/step - accuracy: 0.9766 - loss: 0.0720 - val_accuracy: 0.8555 - val_loss: 1.4865
Epoch 8/15
33/33 - 8s - 238ms/step - accuracy: 0.9912 - loss: 0.0401 - val_accuracy: 0.8828 - val_loss: 1.1572
Epoch 9/15
33/33 - 8s - 238ms/step - accuracy: 0.9815 - loss: 0.0672 - val_accuracy: 0.8672 - val_loss: 1.1994
Epoch 10/15
33/33 - 8s - 240ms/step - accuracy: 0.9893 - loss: 0.0256 - val_accuracy: 0.8125 - val_loss: 2.3226
Epoch 11/15
33/33 - 8s - 246ms/step - accuracy: 0.9864 - loss: 0.0356 - val_accuracy: 0.8945 - val_loss: 1.5839
Epoch 12/15
33/33 - 8s - 241ms/step - accuracy: 0.9659 - loss: 0.3245 - val_accuracy: 0.8828 - val_loss: 1.6553
Epoch 13/15
33/33 - 8s - 237ms/step - accuracy: 1.0000 - loss: 0.0027 - val_accuracy: 0.8867 - val_loss: 1.4892
Epoch 14/15
33/33 - 8s - 240ms/step - accuracy: 1.0000 - loss: 4.5246e-04 - val_accuracy: 0.8867 - val_loss: 1.8155
Epoch 15/15
33/33 - 8s - 240ms/step - accuracy: 1.0000 - loss: 1.0632e-04 - val_accuracy: 0.8828 - val_loss: 2.2495

# Plotting the training accuracy

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label = 'Training accuracy')
plt.plot(epochs, val_acc, 'b', label = 'Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.legend(loc=0)
plt.show()

plt.plot(epochs, loss, 'r', label = 'Training Loss')
plt.plot(epochs, val_loss, 'b', label = 'Validation Loss')
plt.title('Training and Validation Loss')
plt.legend(loc=0)
plt.show()

# predicting a single image

file = os.path.join(VAL_HUMAN_DIR, val_human_names[100])
image = tf.keras.utils.load_img(file, target_size=(300,300))
image = tf.keras.utils.img_to_array(image)
image = rescale_layer(image)
image = np.expand_dims(image, axis = 0)
image.shape

(1, 300, 300, 3)

prediction = model.predict(image, verbose=False)[0][0]
if prediction > 0.5:
    print('Human')
else:
    print('Horse')

Human

# Let's predict on the complete dataset

prediction = model.predict(validation_dataset_final, verbose=False)
labels = np.concatenate([label_batch.numpy() for _, label_batch in validation_dataset_final])
labels = [int(lab) for label in labels for lab in label]
prediction = [1 if x[0] > 0.5 else 0 for x in prediction]

from collections import Counter

Counter(prediction)

Counter({1: 152, 0: 104})

Counter(labels)

Counter({1: 128, 0: 128})

#This extracts the outputs of all layers in the model starting from the second layer ([1:]). 
#It skips the input layer and captures outputs of the following layers, which will be used for visualization.
successive_outputs = [layer.output for layer in model.layers[1:]]


# Creates a new model called vizualization_model where:
#The input is the same as the original model’s input.
#The outputs are the feature maps from the layers (starting from the second layer) of the original model.
vizualization_model = tf.keras.models.Model(inputs = model.inputs, outputs = successive_outputs)



#Generates file paths for images of horses and humans by combining directory paths (TRAIN_HORSE_DIR, TRAIN_HUMAN_DIR) 
#with filenames (train_horse_names, train_human_names).
horse_image_file = [os.path.join(TRAIN_HORSE_DIR, f) for f in train_horse_names]
human_image_file = [os.path.join(TRAIN_HUMAN_DIR, f) for f in train_human_names]

# Randomly selects an image from either the horse or human images to be visualized.
img_path = random.choice(horse_image_file + human_image_file)


# Loads the image from the randomly selected path (img_path) and resizes it to 300x300 pixels.
img = tf.keras.utils.load_img(img_path, target_size=(300,300))


# Converts the image into a NumPy array and reshapes it to (1, 300, 300, 3) to match the input shape required by the model.
x = tf.keras.utils.img_to_array(img)
x = x.reshape((1,) + x.shape)

# Applies rescaling (dividing pixel values by 255) using the previously defined rescale_layer to normalize the image for the model.
x = rescale_layer(x)


#Passes the image (x) through the visualization model to get the activations (feature maps) from all layers (except the input layer).
successive_feature_maps = vizualization_model.predict(x, verbose=False)


# Retrieves the names of the layers (except the input layer) to label the visualizations.
layer_names = [layer.name for layer in model.layers[1:]]

# Iterates through each layer’s name and its corresponding feature map.
for layer_name, feature_map in zip(layer_names, successive_feature_maps):

    # Checks if the feature map is a 4D array, which typically represents convolutional layers' outputs (batch size, height, width, number of features)
    if len(feature_map.shape) == 4:

        # n_features is the number of filters (feature channels) in the layer, and size is the height/width of the feature map.
        n_features = feature_map.shape[-1]
        size = feature_map.shape[1]

        # Creates an empty grid to display the feature maps. The grid width is n_features * size to accommodate all features.
        display_grid = np.zeros((size, size*n_features))


        # Normalizes and adjusts each feature map channel:
                #Centers and scales each feature map to have a mean of 0 and a standard deviation of 1.
                #Multiplies by 64 and adds 128 to adjust the contrast and brightness.
                #Clips the values to be between 0 and 255, ensuring valid pixel values.
                #Fills the grid with the processed feature maps for visualization.
        for i in range(n_features):
            x=feature_map[0, :, :, i]
            x-=x.mean()
            x /= x.std()
            x *= 64
            x += 128
            x = np.clip(x, 0, 255).astype('uint8')
            display_grid[:, i*size : (i+1) * size] = x

        
        #Adjusts the figure size based on the number of features.
        scale = 20./n_features
        plt.figure(figsize=(scale*n_features, scale))
        plt.title(layer_name)
        plt.grid(False)
        plt.imshow(display_grid, aspect='auto', cmap='BuPu')

C:\Users\nishi\AppData\Local\Temp\ipykernel_2196\1779906593.py:9: RuntimeWarning: invalid value encountered in divide
  x /= x.std()
C:\Users\nishi\AppData\Local\Temp\ipykernel_2196\1779906593.py:12: RuntimeWarning: invalid value encountered in cast
  x = np.clip(x, 0, 255).astype('uint8')

Importing all the necessary libraries ¶

Fetching data from directory ¶

Building a small model from scratch ¶

Preprocessing the dataset before fitting the model with it ¶

Training and evaluating the model¶

Prediction¶

Let's visualize the features that CNN has learned ¶