import numpy as np
from sklearn.metrics import mean_squared_error

# Actual and predicted values
y_true = np.array([100, 150, 200])
y_pred = np.array([90, 160, 210])

# Calculate MSE manually
errors = y_true - y_pred
squared_errors = errors ** 2
mse_manual = np.mean(squared_errors)

# Or use sklearn
mse_sklearn = mean_squared_error(y_true, y_pred)

print("Manual MSE:", mse_manual)
print("Sklearn MSE:", mse_sklearn)

Manual MSE: 100.0
Sklearn MSE: 100.0

from sklearn.metrics import log_loss

# Actual binary labels
y_true = [1, 0, 1]

# Predicted probabilities for class 1
y_prob = [0.9, 0.2, 0.6]

# Compute Log Loss
loss = log_loss(y_true, y_prob)
print("Log Loss:", loss)

Log Loss: 0.2797765635793423

import numpy as np
from sklearn.metrics import log_loss

# True labels (classes 0, 1, 2 exist, but only 1 and 2 appear in y_true)
y_true = [2, 1]

# Predicted probabilities for all 3 classes (0, 1, 2)
y_pred_proba = np.array([
    [0.1, 0.2, 0.7],  # Sample 1
    [0.2, 0.6, 0.2]   # Sample 2
])

# Specify all possible class labels
loss = log_loss(y_true, y_pred_proba, labels=[0, 1, 2])
print("Multi-class Log Loss:", loss)

Multi-class Log Loss: 0.4337502838523616

import numpy as np

# Data
X = np.array([1, 2])
Y = np.array([2, 4])

# Initialize parameters
w, b = 0.0, 0.0
alpha = 0.1
epochs = 10

for i in range(epochs):
    # Predictions
    Y_pred = w * X + b
    
    # Compute gradients
    dw = (-2 / len(X)) * np.sum(X * (Y - Y_pred))
    db = (-2 / len(X)) * np.sum(Y - Y_pred)
    
    # Update parameters
    w -= alpha * dw
    b -= alpha * db
    
    loss = np.mean((Y - Y_pred)**2)
    print(f"Epoch {i+1}: w={w:.3f}, b={b:.3f}, Loss={loss:.3f}")

Epoch 1: w=1.000, b=0.600, Loss=10.000
Epoch 2: w=1.320, b=0.780, Loss=1.060
Epoch 3: w=1.426, b=0.828, Loss=0.173
Epoch 4: w=1.465, b=0.835, Loss=0.083
Epoch 5: w=1.482, b=0.828, Loss=0.073
Epoch 6: w=1.492, b=0.818, Loss=0.070
Epoch 7: w=1.501, b=0.807, Loss=0.068
Epoch 8: w=1.508, b=0.795, Loss=0.066
Epoch 9: w=1.516, b=0.784, Loss=0.064
Epoch 10: w=1.523, b=0.772, Loss=0.062

import numpy as np

# Data
X = np.array([1, 2])
Y = np.array([0, 1])

# Initialize
w, b = 0.0, 0.0
alpha = 0.1
epochs = 10

for i in range(epochs):
    # Predictions using sigmoid
    z = w * X + b
    Y_pred = 1 / (1 + np.exp(-z))
    
    # Compute gradients
    dw = np.mean((Y_pred - Y) * X)
    db = np.mean(Y_pred - Y)
    
    # Update
    w -= alpha * dw
    b -= alpha * db
    
    # Compute log loss
    loss = -np.mean(Y * np.log(Y_pred) + (1 - Y) * np.log(1 - Y_pred))
    print(f"Epoch {i+1}: w={w:.3f}, b={b:.3f}, Loss={loss:.4f}")

Epoch 1: w=0.025, b=0.000, Loss=0.6931
Epoch 2: w=0.048, b=-0.001, Loss=0.6871
Epoch 3: w=0.070, b=-0.003, Loss=0.6818
Epoch 4: w=0.091, b=-0.005, Loss=0.6770
Epoch 5: w=0.111, b=-0.009, Loss=0.6728
Epoch 6: w=0.129, b=-0.013, Loss=0.6690
Epoch 7: w=0.147, b=-0.017, Loss=0.6655
Epoch 8: w=0.163, b=-0.022, Loss=0.6623
Epoch 9: w=0.179, b=-0.028, Loss=0.6594
Epoch 10: w=0.194, b=-0.034, Loss=0.6567

import numpy as np

# Data
X = np.array([1, 2])
Y = np.array([0, 1])

# Initialize
w, b = 0.0, 0.0
alpha = 0.1
epochs = 10

for i in range(epochs):
    # Predictions using sigmoid
    z = w * X + b
    Y_pred = 1 / (1 + np.exp(-z))
    
    # Compute gradients
    dw = np.mean((Y_pred - Y) * X)
    db = np.mean(Y_pred - Y)
    
    # Update
    w -= alpha * dw
    b -= alpha * db
    
    # Compute log loss
    loss = -np.mean(Y * np.log(Y_pred) + (1 - Y) * np.log(1 - Y_pred))
    print(f"Epoch {i+1}: w={w:.3f}, b={b:.3f}, Loss={loss:.4f}")

Epoch 1: w=0.025, b=0.000, Loss=0.6931
Epoch 2: w=0.048, b=-0.001, Loss=0.6871
Epoch 3: w=0.070, b=-0.003, Loss=0.6818
Epoch 4: w=0.091, b=-0.005, Loss=0.6770
Epoch 5: w=0.111, b=-0.009, Loss=0.6728
Epoch 6: w=0.129, b=-0.013, Loss=0.6690
Epoch 7: w=0.147, b=-0.017, Loss=0.6655
Epoch 8: w=0.163, b=-0.022, Loss=0.6623
Epoch 9: w=0.179, b=-0.028, Loss=0.6594
Epoch 10: w=0.194, b=-0.034, Loss=0.6567

import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor

# Sample dataset
X = np.array([[1], [2], [3], [4]])
y = np.array([4, 5, 6, 8])

# Fit Gradient Boosting model
model = GradientBoostingRegressor(n_estimators=3, learning_rate=0.1, max_depth=1)
model.fit(X, y)

GradientBoostingRegressor(max_depth=1, n_estimators=3)

GradientBoostingRegressor(max_depth=1, n_estimators=3)

# Predict
pred = model.predict(X)

# Plot predictions
plt.figure(figsize=(6,4))
plt.scatter(X, y, color='black', label='Actual')
plt.plot(X, pred, color='red', marker='o', label='Prediction')
plt.title('Gradient Boosting Regression (3 estimators)')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.grid(True)
plt.show()

from sklearn.ensemble import GradientBoostingClassifier

# Data
X = np.array([[1], [2], [3], [4]])
y = np.array([0, 0, 1, 1])

# Fit Gradient Boosting Classifier
model = GradientBoostingClassifier(n_estimators=3, learning_rate=0.1, max_depth=1)
model.fit(X, y)

# Predict probabilities
probs = model.predict_proba(X)[:, 1]
preds = model.predict(X)

# Visualize
plt.figure(figsize=(6,4))
plt.scatter(X, y, color='black', label='Actual')
plt.plot(X, probs, color='blue', marker='o', label='Predicted Probability')
plt.title('Gradient Boosting Classification (3 estimators)')
plt.xlabel('X')
plt.ylabel('Predicted probability of class 1')
plt.legend()
plt.grid(True)
plt.show()

# Show results
for i in range(len(X)):
    print(f"x = {X[i][0]}, Actual = {y[i]}, Predicted Prob = {probs[i]:.4f}, Predicted Class = {preds[i]}")

x = 1, Actual = 0, Predicted Prob = 0.3658, Predicted Class = 0
x = 2, Actual = 0, Predicted Prob = 0.3658, Predicted Class = 0
x = 3, Actual = 1, Predicted Prob = 0.6342, Predicted Class = 1
x = 4, Actual = 1, Predicted Prob = 0.6342, Predicted Class = 1

import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier

# Data
X = np.array([[1], [2], [3], [4]])
y = np.array([0, 0, 1, 1])

# Gradient Boosting Classifier with staged prediction
model = GradientBoostingClassifier(n_estimators=3, learning_rate=0.1, max_depth=1)
model.fit(X, y)

# Staged probability predictions
probs = list(model.staged_predict_proba(X))

# Plot how probabilities evolve for sample x = 1 to x = 4
plt.figure(figsize=(8,5))
for i in range(4):
    plt.plot(range(1, 4), [p[i][1] for p in probs], marker='o', label=f'Sample x={X[i][0]}')

plt.title("How predictions evolve with each boosting stage")
plt.xlabel("Stage")
plt.ylabel("Predicted probability for class 1")
plt.grid(True)
plt.legend()
plt.show()

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score

# Load the dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the model
gb = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    subsample=0.8,
    random_state=42
)
gb.fit(X_train, y_train)

GradientBoostingClassifier(random_state=42, subsample=0.8)

GradientBoostingClassifier(random_state=42, subsample=0.8)

# Make predictions
y_pred = gb.predict(X_test)

# Evaluate accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.9590643274853801

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
model.fit(X_train, y_train)
print("Accuracy:", accuracy_score(y_test, model.predict(X_test)))

Accuracy: 0.951048951048951

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'])

model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, 
                          max_depth=3, eval_metric='logloss')
model.fit(X_train, y_train)
print("Accuracy:", accuracy_score(y_test, model.predict(X_test)))

Accuracy: 0.9790209790209791

import lightgbm as lgb

# Load dataset
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

model = lgb.LGBMClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    force_col_wise=True,
    verbosity=-1
)

model.fit(X_train, y_train)
print("Accuracy:", accuracy_score(y_test, model.predict(X_test)))

Accuracy: 0.972027972027972

from catboost import CatBoostClassifier

data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target)

model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0)
model.fit(X_train, y_train)
print("Accuracy:", accuracy_score(y_test, model.predict(X_test)))

Accuracy: 0.9370629370629371

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits


X, y = load_digits(return_X_y=True)

train_X, test_X, train_y, test_y = train_test_split(X, y, 
                                                    test_size = 0.25, 
                                                    random_state = 42)

gbc = GradientBoostingClassifier(n_estimators=300,
                                 learning_rate=0.05,
                                 random_state=100,
                                 max_features=5 )
                                 
gbc.fit(train_X, train_y)

pred_y = gbc.predict(test_X)

acc = accuracy_score(test_y, pred_y)
print("Gradient Boosting Classifier accuracy is : {:.2f}".format(acc))

Gradient Boosting Classifier accuracy is : 0.98

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes


X, y = load_diabetes(return_X_y=True)

train_X, test_X, train_y, test_y = train_test_split(X, y, 
                                                    test_size = 0.20, 
                                                    random_state = 42)

gbr = GradientBoostingRegressor(loss='absolute_error',
                                learning_rate=0.1,
                                n_estimators=500,
                                max_depth = 4,
                                max_features = 4)

gbr.fit(train_X, train_y)

pred_y = gbr.predict(test_X)

test_rmse = mean_squared_error(test_y, pred_y) ** (1 / 2)

print('Root mean Square error: {:.2f}'.format(test_rmse))

Root mean Square error: 56.55

Sample	Actual Class (y)	Predicted Prob (ŷ)	Log Loss Component
1	1	0.9	$-\log(0.9) = 0.105$
2	0	0.2	$-\log(0.8) = 0.223$
3	1	0.6	$-\log(0.6) = 0.511$

x	y	F0	Residual = y - F0
1	4	5.75	-1.75
2	5	5.75	-0.75
3	6	5.75	0.25
4	8	5.75	2.25

x	y	p̂ (sigmoid(F₀))	Residual r = y - p̂
1	0	0.5	-0.5
2	0	0.5	-0.5
3	1	0.5	0.5
4	1	0.5	0.5

x	F₁(x)	p̂ = sigmoid(F₁(x))
1	-0.05	0.4875
2	-0.05	0.4875
3	0.05	0.5125
4	0.05	0.5125

x	y	p̂ = sigmoid(F₀)	Residual (y - p̂)
1	0	0.5	-0.5
2	0	0.5	-0.5
3	1	0.5	0.5
4	1	0.5	0.5

x	F₁(x)	p̂ = sigmoid(F₁(x))
1	-0.05	1 / (1 + e^{0.05}) ≈ 0.4875
2	-0.05	0.4875
3	0.05	0.5125
4	0.05	0.5125

x	F₁(x)	h₂(x)	F₂(x)	p̂ = sigmoid(F₂(x))
1	-0.05	-0.4875	-0.09875	≈ 0.4753
2	-0.05	-0.4875	-0.09875	≈ 0.4753
3	0.05	0.4875	0.09875	≈ 0.5247
4	0.05	0.4875	0.09875	≈ 0.5247

House	Actual Price (y)	Predicted Price (ŷ)	Error (y - ŷ)	Squared Error
1	100	90	10	100
2	150	160	-10	100
3	200	210	-10	100

In-depth Detail on Gradient Boosting Classifier¶

Knowledge of Loss Functions¶

How Log Loss Differs in Binary vs Multi-Class Classification¶

Binary Classification¶

Multi-Class Classification¶

Knowledge of Gradient Descent¶

Gradient Descent in Regression¶

Numerical Example for Regression¶

Step 1: Predictions¶

Step 2: Gradients¶

Step 3: Update Parameters¶

Gradient Descent in Classification¶

Numerical Example for Classification¶

Step 1: Predictions¶

Step 2: Gradients¶

Getting into Gradient Boosting¶

Step-by-Step Explanation of Gradient Boosting¶

Step 1: Define a Loss Function¶

Step 2: Initialize the Model¶

Step 3: Compute the Pseudo-Residuals¶

Step 4: Fit a New Model to the Residuals¶

Step 5: Update the Model¶

Step 6: Repeat the Process¶

Numerical Example (Regression)¶

Python Code Example with Visualization (Regression)¶

Numerical Example (Binary Classification)¶

Step 1: Initialize Predictions (F₀)¶

Step 2: Compute Probabilities and Pseudo-Residuals¶

Step 3: Fit a Decision Tree on Residuals¶

Step 4: Update Model with Learning Rate η¶

Step 5: Iterate to Improve the Model¶

Python Code for Classification Version¶

In-depth understanding of sequential model structure¶

Step 1: Initial Prediction (F₀)¶

Step 2: Compute Residuals (Gradients of Log Loss)¶

Step 3: Train First Model h₁(x) on Residuals¶

Step 4: Update Prediction¶

Step 5: Compute New Residuals (Stage 2)¶

Step 6: Fit Second Model h₂(x) on New Residuals¶

Step 7: Update Predictions Again¶

Step 8: Repeat Further¶

Final Model¶

Understanding Hyperparameters in Gradient Boosting¶

The Role and Importance of Boosting¶

Advantages of Gradient Boosting¶

Limitations of Gradient Boosting¶

Use Cases Where Gradient Boosting¶

Practical Implementation of Gradient Boosting¶

Types of Gradient Boosting¶

1. Traditional Gradient Boosting (GBM)¶

Key Characteristics¶

2. XGBoost (Extreme Gradient Boosting)¶

Key Characteristics¶

3. LightGBM (Light Gradient Boosting Machine)¶

Key Characteristics¶

4. CatBoost (Categorical Boosting)¶

Key Characteristics¶

Coding in Gradient Boosting¶

Classifying Digits Dataset¶

Predicting on Diabetes Dataset¶