import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC

# Create simple linearly separable data 
X_hard = np.array([
    [1, 2], [2, 3], [3, 3], [4, 5],  # Class 0
    [6, 6], [7, 7], [8, 8], [9, 10]  # Class 1
])
y_hard = np.array([0, 0, 0, 0, 1, 1, 1, 1])

# Fit hard-margin SVM (C is very large to prevent misclassification)
clf_hard = SVC(kernel='linear', C=100)
clf_hard.fit(X_hard, y_hard)

# Plotting
plt.figure(figsize=(8, 6))
plt.title("Hard Margin SVM: Linearly Separable Data")

# Plot points
plt.scatter(X_hard[:, 0], X_hard[:, 1], c=y_hard, cmap=plt.cm.coolwarm, s=60)

# Plot support vectors
plt.scatter(clf_hard.support_vectors_[:, 0], clf_hard.support_vectors_[:, 1],
            s=100, facecolors='none', edgecolors='k', label='Support Vectors')

# Plot decision boundary
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx = np.linspace(xlim[0], xlim[1], 200)
yy = np.linspace(ylim[0], ylim[1], 200)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf_hard.decision_function(xy).reshape(XX.shape)

# Plot decision boundary and margins
plt.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], linestyles=['--', '-', '--'])

plt.xlabel("Amount Transferred")
plt.ylabel("Time of Transaction")
plt.legend()
plt.grid(True)
plt.show()

# Introduce slight label noise to create a non-linearly separable scenario
X_soft = np.array([
    [1, 2], [2, 3], [3, 3], [4, 5],     # Class 0
    [6, 6], [7, 7], [8, 8], [4.5, 4.5]  # Class 1 (last point added closer to class 0)
])
y_soft = np.array([0, 0, 0, 0, 1, 1, 1, 1])  # Notice one class 1 point overlaps with class 0

# Fit soft-margin SVM (with lower C to allow for margin violations)
clf_soft = SVC(kernel='linear', C=1)
clf_soft.fit(X_soft, y_soft)

# Plotting
plt.figure(figsize=(8, 6))
plt.title("Soft Margin SVM: Non-linearly Separable Data with Slight Overlap")

# Plot points
plt.scatter(X_soft[:, 0], X_soft[:, 1], c=y_soft, cmap=plt.cm.coolwarm, s=60)

# Plot support vectors
plt.scatter(clf_soft.support_vectors_[:, 0], clf_soft.support_vectors_[:, 1],
            s=100, facecolors='none', edgecolors='k', label='Support Vectors')

# Plot decision boundary
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
xx = np.linspace(xlim[0], xlim[1], 200)
yy = np.linspace(ylim[0], ylim[1], 200)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = clf_soft.decision_function(xy).reshape(XX.shape)

# Plot decision boundary and margins
plt.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], linestyles=['--', '-', '--'])

plt.xlabel("Amount Transferred")
plt.ylabel("Time of Transaction")
plt.legend()
plt.grid(True)
plt.show()

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn import svm, datasets
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, \
recall_score, f1_score, classification_report, ConfusionMatrixDisplay

df = pd.read_csv('mushrooms-full-dataset.csv')
df.shape

(8124, 22)

df.head()

# Lets see if there is any null
df.isna().sum()

poisonous                   0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

# Now let's check the target class counts
df['poisonous'].value_counts(normalize=True)

poisonous
e    0.517971
p    0.482029
Name: proportion, dtype: float64

inputs = df.iloc[:,1:]
targets = df.iloc[:, 0]
inputs.head(2)

xtrain, xtest, ytrain, ytest = train_test_split(inputs, targets,
                                                test_size=0.2, random_state=42,
                                                stratify=targets)

ytrain.value_counts(normalize=True)

poisonous
e    0.517926
p    0.482074
Name: proportion, dtype: float64

ytest.value_counts(normalize=True)

poisonous
e    0.518154
p    0.481846
Name: proportion, dtype: float64

enc_features = OrdinalEncoder()
enc_label = LabelEncoder()

xtrain_transf = enc_features.fit_transform(xtrain)
xtest_transf = enc_features.transform(xtest)

ytrain_transf = enc_label.fit_transform(ytrain)
ytest_transf = enc_label.transform(ytest)

xtrain_transf[:2]

array([[2., 3., 9., 0., 2., 1., 0., 0., 2., 0., 1., 1., 6., 0., 0., 2.,
        1., 2., 1., 5., 1.],
       [5., 2., 5., 1., 5., 1., 0., 0., 1., 0., 2., 2., 2., 7., 0., 2.,
        2., 0., 7., 1., 6.]])

ytrain_transf[:2]

array([1, 0])

scaling = MinMaxScaler(feature_range= (-1,1)).fit(xtrain_transf)
xtrain_scaled = scaling.transform(xtrain_transf)

xtest_scaled = scaling.transform(xtest_transf)

# Starting with linear kernel first

C = 1.0  # parameter that helps in deciding how wide the margins of the classifier are
svc_linear = svm.SVC(kernel='linear', C=C).fit(xtrain_scaled, ytrain_transf)

ypred_test = svc_linear.predict(xtest_scaled)

print(confusion_matrix(ypred_test, ytest_transf))

[[815  28]
 [ 27 755]]

fig, axes = plt.subplots(figsize=(10,4))

cmd = ConfusionMatrixDisplay(
    confusion_matrix(ypred_test, ytest_transf),
    display_labels=['Edible', 'Poisonous']
)

cmd.plot(ax=axes);

print(classification_report(
    ytest_transf, ypred_test, target_names = ['Edible', 'Poisonous'])
)

              precision    recall  f1-score   support

      Edible       0.97      0.97      0.97       842
   Poisonous       0.97      0.96      0.96       783

    accuracy                           0.97      1625
   macro avg       0.97      0.97      0.97      1625
weighted avg       0.97      0.97      0.97      1625

hyperparameters = [
    {'kernel' : ['linear'], 'C' : [1,10]},
    {'kernel' : ['poly'], 'C':[0.1,1,10]},
    {'kernel' : ['rbf'], 'gamma':[1e-3, 1e-4], 'C':[1,10]}
]   

# gamma comes with radial - rbf, that controls the radius of the area of the support vector boundary

scores = ['precision', 'recall']

for score in scores:
    print('Tuning hyperparameter for ', score)
    print()

    clf = GridSearchCV(svm.SVC(), hyperparameters, scoring=score) # cv=5 by default
    clf.fit(xtrain_scaled, ytrain_transf)

    print('Best parameters found : \n', clf.best_params_)
    print()
    print('Grid score on development set:\n')
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print(mean, std * 2, params)
    print()
    print('Detailed Classification Report : \n')
    y_true, y_pred = ytest_transf, clf.predict(xtest_scaled)
    print(classification_report(y_true, y_pred))
    print('\n')

Tuning hyperparameter for  precision

Best parameters found : 
 {'C': 0.1, 'kernel': 'poly'}

Grid score on development set:

0.9581685809539241 0.018149967606886655 {'C': 1, 'kernel': 'linear'}
0.9609007016248526 0.014967629211791312 {'C': 10, 'kernel': 'linear'}
1.0 0.0 {'C': 0.1, 'kernel': 'poly'}
0.9996810207336523 0.001275917065390786 {'C': 1, 'kernel': 'poly'}
1.0 0.0 {'C': 10, 'kernel': 'poly'}
0.9472367421129387 0.017886730548394 {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.9425038002986422 0.019197193605451017 {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.9550908071345099 0.019754676048719444 {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.9472367421129387 0.017886730548394 {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}

Detailed Classification Report : 

              precision    recall  f1-score   support

           0       0.99      1.00      0.99       842
           1       1.00      0.99      0.99       783

    accuracy                           0.99      1625
   macro avg       0.99      0.99      0.99      1625
weighted avg       0.99      0.99      0.99      1625


Tuning hyperparameter for  recall

Best parameters found : 
 {'C': 1, 'kernel': 'poly'}

Grid score on development set:

0.9387172549439239 0.035570029622332376 {'C': 1, 'kernel': 'linear'}
0.9543584491289216 0.02122281349979636 {'C': 10, 'kernel': 'linear'}
0.9757417796597215 0.009336022016715193 {'C': 0.1, 'kernel': 'poly'}
1.0 0.0 {'C': 1, 'kernel': 'poly'}
1.0 0.0 {'C': 10, 'kernel': 'poly'}
0.892436216885519 0.006513038319161858 {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.8145599258092953 0.01683093150004418 {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.9336120580277297 0.010930774298062718 {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.892436216885519 0.006513038319161858 {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}

Detailed Classification Report : 

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       842
           1       1.00      1.00      1.00       783

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

Details on SVM Classification¶

Classification with SVMs¶

Regression using SVMs¶

Linear and Non-Linear Decision Boundaries¶

Performance and Practical Considerations¶

Understanding the Decision Boundary in SVMs through a Fraud Detection Example¶

Finding a Separating Hyperplane¶

What is a Hyperplane?¶

Hard Margin SVM: Perfect Separation¶

Soft Margin SVM: Handling Real-World Imperfections¶

The Role of C: Regularization in SVM¶

Bias-Variance Trade-off in SVMs¶

Example of Hard Margin in Python¶

Example of Soft Margin in python¶

Understanding Kernels in detail in Support Vector Machines¶

Linear Mapping and the Kernel Trick¶

What is a Kernel?¶

Types of Kernel Functions¶

1. Linear Kernel¶

2. Polynomial Kernel¶

3. Radial Basis Function (RBF) or Gaussian Kernel¶

4. Sigmoid Kernel¶

5. Custom or Precomputed Kernels¶

Implicit Transformation with Example¶

Visual Intuition Behind Kernels¶

Choosing the Right Kernel¶

Kernel Trick in Detail¶

The Kernel Trick: Intuition and Mechanism¶

Key Advantages of the Kernel Trick¶

Code Section - Classification of Mushroom dataset - Edible vs Poisonous¶

Importing Libraries¶

Loading dataset¶

Data Preprocessing¶

Creating Classification models¶

Using Cross Validation approach¶

	poisonous	cap-shape	cap-surface	cap-color	bruises	odor	gill-attachment	gill-spacing	gill-size	gill-color	...	stalk-surface-below-ring	stalk-color-above-ring	stalk-color-below-ring	veil-type	veil-color	ring-number	ring-type	spore-print-color	population	habitat
0	p	x	s	n	t	p	f	c	n	k	...	s	w	w	p	w	o	p	k	s	u
1	e	x	s	y	t	a	f	c	b	k	...	s	w	w	p	w	o	p	n	n	g
2	e	b	s	w	t	l	f	c	b	n	...	s	w	w	p	w	o	p	n	n	m
3	p	x	y	w	t	p	f	c	n	n	...	s	w	w	p	w	o	p	k	s	u
4	e	x	s	g	f	n	f	w	b	k	...	s	w	w	p	w	o	e	n	a	g

	poisonous	cap-shape	cap-surface	cap-color	bruises	odor	gill-attachment	gill-spacing	gill-size	gill-color	...	stalk-surface-below-ring	stalk-color-above-ring	stalk-color-below-ring	veil-type	veil-color	ring-number	ring-type	spore-print-color	population	habitat
0	p	x	s	n	t	p	f	c	n	k	...	s	w	w	p	w	o	p	k	s	u
1	e	x	s	y	t	a	f	c	b	k	...	s	w	w	p	w	o	p	n	n	g
2	e	b	s	w	t	l	f	c	b	n	...	s	w	w	p	w	o	p	n	n	m
3	p	x	y	w	t	p	f	c	n	n	...	s	w	w	p	w	o	p	k	s	u
4	e	x	s	g	f	n	f	w	b	k	...	s	w	w	p	w	o	e	n	a	g

	poisonous	cap-shape	cap-surface	cap-color	bruises	odor	gill-attachment	gill-spacing	gill-size	gill-color	...	stalk-surface-below-ring	stalk-color-above-ring	stalk-color-below-ring	veil-type	veil-color	ring-number	ring-type	spore-print-color	population	habitat
0	p	x	s	n	t	p	f	c	n	k	...	s	w	w	p	w	o	p	k	s	u
1	e	x	s	y	t	a	f	c	b	k	...	s	w	w	p	w	o	p	n	n	g
2	e	b	s	w	t	l	f	c	b	n	...	s	w	w	p	w	o	p	n	n	m
3	p	x	y	w	t	p	f	c	n	n	...	s	w	w	p	w	o	p	k	s	u
4	e	x	s	g	f	n	f	w	b	k	...	s	w	w	p	w	o	e	n	a	g