import numpy as np
import matplotlib.pyplot as plt

# Male samples (Class 0)
male = np.array([
    [78, 66],
    [82, 64],
    [81, 67]
])

# Female samples (Class 1)
female = np.array([
    [62, 84],
    [58, 86],
    [61, 82]
])

# Combine data
X = np.vstack((male, female))
y = np.array([0]*3 + [1]*3)

X

array([[78, 66],
       [82, 64],
       [81, 67],
       [62, 84],
       [58, 86],
       [61, 82]])

y

array([0, 0, 0, 1, 1, 1])

# Compute class means
mu_male = male.mean(axis=0)
mu_female = female.mean(axis=0)
mu_diff = mu_male - mu_female

# Compute within-class scatter matrix Sw
Sw = np.zeros((2, 2))
for x in male:
    diff = (x - mu_male).reshape(2, 1)
    Sw += diff @ diff.T  # Performing Matrix Multiplication
    
for x in female:
    diff = (x - mu_female).reshape(2, 1)
    Sw += diff @ diff.T

# Compute LDA direction w
w = np.linalg.inv(Sw) @ mu_diff
w /= np.linalg.norm(w)  # Normalize

# Project data onto w
projected = X @ w

# Plot original and projected data
fig, axs = plt.subplots(1, 2, figsize=(10, 4))

# Original space
axs[0].scatter(male[:, 0], male[:, 1], label="Male", marker='o')
axs[0].scatter(female[:, 0], female[:, 1], label="Female", marker='x')
axs[0].set_title("Original Feature Space")
axs[0].set_xlabel("Math Score")
axs[0].set_ylabel("English Score")
axs[0].legend()

# LDA projected space (1D)
axs[1].scatter(projected[:3], np.zeros(3), label="Male", marker='o')
axs[1].scatter(projected[3:], np.zeros(3), label="Female", marker='x')
axs[1].set_title("LDA Projected Space (1D)")
axs[1].set_yticks([])
axs[1].set_xlabel("LDA Component")
axs[1].legend()

plt.tight_layout()
plt.show()

# Let's consider 3 classes, 4 features

# Class A
A = np.array([
    [2, 3, 1, 4],
    [3, 2, 1, 5],
    [2, 4, 2, 4]
])

# Class B
B = np.array([
    [7, 5, 6, 8],
    [6, 6, 5, 9],
    [7, 4, 5, 7]
])

# Class C
C = np.array([
    [1, 9, 8, 2],
    [2, 8, 9, 3],
    [1, 10, 9, 1]
])

# Combine data
X = np.vstack([A, B, C])
y = np.array([0]*3 + [1]*3 + [2]*3)
classes = [A, B, C]
class_labels = [0, 1, 2]

# Compute overall mean
mu_overall = X.mean(axis=0)

# Compute class means
class_means = [cls.mean(axis=0) for cls in classes]

# Compute between-class scatter matrix S_b
S_b = np.zeros((4, 4))
for cls_mean in class_means:
    n = 3  # each class has 3 samples
    diff = (cls_mean - mu_overall).reshape(4, 1)
    S_b += n * (diff @ diff.T)

# Compute within-class scatter matrix S_w
S_w = np.zeros((4, 4))
for cls, mu in zip(classes, class_means):
    for x in cls:
        diff = (x - mu).reshape(4, 1)
        S_w += diff @ diff.T

# Solve the generalized eigenvalue problem
eigvals, eigvecs = np.linalg.eig(np.linalg.pinv(S_w) @ S_b)

# Sort eigenvectors by descending eigenvalues
sorted_indices = np.argsort(eigvals)[::-1]
eigvecs_sorted = eigvecs[:, sorted_indices]
eigvals_sorted = eigvals[sorted_indices]

eigvals_sorted

array([ 2.95913482e+02,  4.07531842e+01,  9.16087434e-15, -1.65159443e-14])

# Projection matrix (top 2 components for 3 classes)
W = eigvecs_sorted[:, :2]

# Project data
Z = X @ W

# Plot the projected data
plt.figure(figsize=(6, 5))
colors = ['red', 'green', 'blue']
markers = ['o', 's', 'x']
for i, label in enumerate(class_labels):
    plt.scatter(Z[y == label, 0], Z[y == label, 1],
                label=f'Class {label}', c=colors[i], marker=markers[i])
plt.title("Multiclass LDA Projection (2D)")
plt.xlabel("LD1")
plt.ylabel("LD2")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

import numpy as np
from sklearn.datasets import make_classification
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import matplotlib.pyplot as plt

# Generate synthetic 3 class data
X, y = make_classification(n_samples=300, n_features=4, n_informative=3, n_redundant=0,
                           n_classes=3, n_clusters_per_class=1, random_state=42)

# Apply LDA
lda = LinearDiscriminantAnalysis(n_components=2)
X_lda = lda.fit_transform(X, y)

# Plot the transformed data
plt.figure(figsize=(8, 5))
for label in np.unique(y):
    plt.scatter(X_lda[y == label, 0], X_lda[y == label, 1], label=f'Class {label}')
plt.title('LDA Projection (2D)')
plt.xlabel('LD1')
plt.ylabel('LD2')
plt.legend()
plt.grid(True)
plt.show()

Observation	Height	Weight
1	160	55
2	165	60
3	170	65
4	175	70
5	180	75

Observation	Height	Weight	Gender
1	160	55	Male
2	165	60	Female
3	170	65	Male
4	175	70	Female
5	180	75	Male

Linear Discriminant Analysis (LDA): A Deep Dive¶

Overall Mean¶

Class Mean¶

The Within-Class Scatter Matrix $S_W$¶

The Between-Class Scatter Matrix $S_B$¶

Fisher's Discriminant Ratio¶

Solving the Optimization Problem¶

LDA Projection¶

LDA with Multiple Classes¶

Example - To understand better¶

1. Class Means¶

2. Between-Class Scatter Matrix $S_b$¶

3. Within-Class Scatter Matrix $S_w$¶

For males:¶

For females:¶

Total within-class scatter:¶

4. Compute the LDA projection direction¶

Let's implement in Python - From Scratch¶

We don't have to implement from Scratch every time as we have dedicated library for LDA¶

Conclusion¶