Let's use the most famous dataset here - the handwritten dataset - for model prediction.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
mnist = tf.keras.datasets.mnist
mnist
<module 'keras.api.datasets.mnist' from 'E:\\7. Deep Learning\\venv\\lib\\site-packages\\keras\\api\\datasets\\mnist\\__init__.py'>
(train_image, train_label), (test_image, test_label) = mnist.load_data()
print('Shape of training images - ', train_image.shape)
print('Label of training images - ', train_label.shape)
print('Shape of testing images - ', test_image.shape)
print('Shape of testing labels - ', test_label.shape)
Shape of training images - (60000, 28, 28) Label of training images - (60000,) Shape of testing images - (10000, 28, 28) Shape of testing labels - (10000,)
Counter(test_label)
Counter({1: 1135, 2: 1032, 7: 1028, 3: 1010, 9: 1009, 4: 982, 0: 980, 8: 974, 6: 958, 5: 892})
Counter(train_label)
Counter({1: 6742, 7: 6265, 3: 6131, 2: 5958, 9: 5949, 0: 5923, 6: 5918, 8: 5851, 4: 5842, 5: 5421})
np.set_printoptions(linewidth=200)
print(train_label[0])
print(train_image[0])
5 [[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 3 18 18 18 126 136 175 26 166 255 247 127 0 0 0 0] [ 0 0 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253 225 172 253 242 195 64 0 0 0 0] [ 0 0 0 0 0 0 0 49 238 253 253 253 253 253 253 253 253 251 93 82 82 56 39 0 0 0 0 0] [ 0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 14 1 154 253 90 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 139 253 190 2 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 11 190 253 70 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 35 241 225 160 108 1 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 81 240 253 253 119 25 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 45 186 253 253 150 27 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 16 93 252 253 187 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 249 253 249 64 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 46 130 183 253 253 207 2 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 39 148 229 253 253 253 250 182 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 24 114 221 253 253 253 253 201 78 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 23 66 213 253 253 253 253 198 81 2 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 18 171 219 253 253 253 253 195 80 9 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 55 172 226 253 253 253 253 244 133 11 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 136 253 253 253 212 135 132 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
# Normalizing the images
train_image = train_image/255.0
test_image = test_image/255.0
# Building the model
model = tf.keras.models.Sequential([
tf.keras.Input(shape=(28,28,1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(10, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()
Model: "sequential_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ flatten_1 (Flatten) │ (None, 784) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_3 (Dense) │ (None, 512) │ 401,920 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_4 (Dense) │ (None, 128) │ 65,664 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_5 (Dense) │ (None, 10) │ 1,290 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 468,874 (1.79 MB)
Trainable params: 468,874 (1.79 MB)
Non-trainable params: 0 (0.00 B)
model.fit(train_image, train_label, epochs = 10)
Epoch 1/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 10s 5ms/step - accuracy: 0.9048 - loss: 0.3249 Epoch 2/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.9762 - loss: 0.0787 Epoch 3/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.9824 - loss: 0.0546 Epoch 4/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9882 - loss: 0.0386 Epoch 5/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9901 - loss: 0.0301 Epoch 6/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9917 - loss: 0.0242 Epoch 7/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.9924 - loss: 0.0229 Epoch 8/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.9943 - loss: 0.0164 Epoch 9/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 8s 4ms/step - accuracy: 0.9944 - loss: 0.0190 Epoch 10/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9949 - loss: 0.0155
<keras.src.callbacks.history.History at 0x2161b86f790>
# Let's check the model summary after training
model.summary()
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ │ flatten (Flatten) │ (None, 784) │ 0 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense (Dense) │ (None, 512) │ 401,920 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_1 (Dense) │ (None, 128) │ 65,664 │ ├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤ │ dense_2 (Dense) │ (None, 10) │ 1,290 │ └──────────────────────────────────────┴─────────────────────────────┴─────────────────┘
Total params: 1,406,624 (5.37 MB)
Trainable params: 468,874 (1.79 MB)
Non-trainable params: 0 (0.00 B)
Optimizer params: 937,750 (3.58 MB)
What are all the other parameters and its values? Well I am giving a brief here. However, everything might not be clear as of now but it will surely be clear by the end of this course.
Optimizer Parameters
The "Optimizer params: 937,750" refers to the total number of parameters that the optimizer (e.g., Adam, SGD) maintains for updating the model. These include:
- Weights & Biases updates – The optimizer needs to store past updates for adaptive algorithms like Adam (momentum and adaptive learning rates).
- First & Second Moments (for Adam/RMSprop) – Adam keeps track of moving averages of gradients and squared gradients, which increases parameter count.
- Learning rate schedules (if used) – Some optimizers maintain additional parameters for learning rate adjustments.
Why Non-Trainable Params = 0?
Non-trainable parameters usually come from frozen layers (like pre-trained models where some layers are not updated). Since this model has zero non-trainable parameters, it means:
- All layers are fully trainable (no frozen layers).
- No batch normalization layers with fixed moving statistics.
- No pre-trained weights used where some layers remain frozen.
Thus, all learnable parameters contribute to weight updates, and the optimizer manages additional memory for momentum-based updates.
Now, getting back to our model training. As you can see the accuracy has reached to 99 quiet before 10 epochs. So we dont need to go through all the 10 loops, and we can stop the training once the desired accuracy is reached using callbacks -
class mycallback(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if logs['accuracy'] >= 0.99:
print('Reached 99% of accuracy. Stopping the iteration')
self.model.stop_training = True
model.fit(train_image, train_label, epochs = 10, callbacks=[mycallback()])
Epoch 1/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9042 - loss: 0.3191 Epoch 2/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 6s 3ms/step - accuracy: 0.9737 - loss: 0.0818 Epoch 3/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.9841 - loss: 0.0511 Epoch 4/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 7s 4ms/step - accuracy: 0.9881 - loss: 0.0370 Epoch 5/10 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9896 - loss: 0.0327 Epoch 6/10 1867/1875 ━━━━━━━━━━━━━━━━━━━━ 0s 5ms/step - accuracy: 0.9916 - loss: 0.0262Reached 99% of accuracy. Stopping the iteration 1875/1875 ━━━━━━━━━━━━━━━━━━━━ 9s 5ms/step - accuracy: 0.9916 - loss: 0.0262
<keras.src.callbacks.history.History at 0x2161b3c2da0>
So it stopped early
# Model evaluation
model.evaluate(test_image, test_label)
313/313 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - accuracy: 0.9775 - loss: 0.0854
[0.06842312961816788, 0.9818000197410583]
# Predictions
classified_images = model.predict(test_image)
313/313 ━━━━━━━━━━━━━━━━━━━━ 0s 1ms/step
predictions = [np.argmax(images) for images in classified_images]
predictions[100], test_label[100]
(6, 6)
predictions[1000], test_label[1000]
(9, 9)
So what you did here?
- Used the Fashion Mnist dataset
- Looked into the basic data details
- Normalized the data
- You created the model architecture with -
- Input layer for the incoming data
- a Flatten layer
- 3 dense neural network layers with 512, 128 and 10 neurons alongwith activation functions
- Compiled the model with loss and optimizer
- Used a callback function in the fit and fitted the model with training data
- Evaluated the model with test data
- Predicted on the trained model