In [ ]:
# Step 1: Import Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
In [ ]:
# Step 2: Load the Data
df = pd.read_csv('train.csv')
df.head()
Out[ ]:
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
0 1 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 0 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 1 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 4 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 0 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 785 columns

In [ ]:
# Step 3: Sample a Subset of the Data
df_sampled = df.sample(frac=0.1, random_state=42)
df_sampled.head()
Out[ ]:
label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 pixel780 pixel781 pixel782 pixel783
5701 9 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
5489 6 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
7773 0 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
8205 0 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1650 0 0 0 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 785 columns

In [ ]:
# Step 4: Pre-processing
# Split sampled dataset into features and labels
X_sampled = df_sampled.drop('label', axis=1)  # features
y_sampled = df_sampled['label']  # labels
# Impute missing values if there are any
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
X_sampled_imputed = imputer.fit_transform(X_sampled)
In [ ]:
# Step 5: Split the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X_sampled_imputed, y_sampled, test_size=0.2, random_state=42)
In [ ]:
# Step 6: Feature Engineering using PCA
pca = PCA(n_components=0.95)  # Preserve 95% of variance
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
In [ ]:
# Step 7: Hyperparameter Tuning for SVM using GridSearchCV
from sklearn.model_selection import GridSearchCV
# Define the parameter grid for SVM
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf', 'linear']  # Assuming you want to try both RBF and linear kernels
}
# Create a GridSearchCV object
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=4)
# Standardize the features (necessary for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_pca)
X_test_scaled = scaler.transform(X_test_pca)
# Fit the GridSearchCV object to the training data
grid.fit(X_train_scaled, y_train)
# Print the best parameters and estimator found by GridSearchCV
print(grid.best_params_)
print(grid.best_estimator_)
# Make predictions with the best found hyperparameحters
grid_predictions = grid.predict(X_test_scaled)
# Confusion matrix for the results of GridSearchCV
cmG = confusion_matrix(y_test, grid_predictions)
sns.heatmap(cmG, annot=True, fmt='d')
plt.title('Confusion Matrix for SVM after GridSearchCV')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
# Classification report for the results of GridSearchCV
print(classification_report(y_test, grid_predictions))
Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.116 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.116 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.110 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.205 total time=   0.1s
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.116 total time=   0.0s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.116 total time=   0.0s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.123 total time=   0.1s
[CV 1/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.136 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.143 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.122 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.137 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.192 total time=   0.0s
[CV 1/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ..C=0.1, gamma=0.01, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 2/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 3/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 4/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.110 total time=   0.0s
[CV 5/5] END ....C=0.1, gamma=0.001, kernel=rbf;, score=0.226 total time=   0.0s
[CV 1/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .C=0.1, gamma=0.001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 2/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 3/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 4/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.110 total time=   0.0s
[CV 5/5] END ...C=0.1, gamma=0.0001, kernel=rbf;, score=0.226 total time=   0.0s
[CV 1/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END C=0.1, gamma=0.0001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 3/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 4/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END ..........C=1, gamma=1, kernel=rbf;, score=0.123 total time=   0.1s
[CV 1/5] END .......C=1, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .......C=1, gamma=1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .......C=1, gamma=1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .......C=1, gamma=1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .......C=1, gamma=1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.129 total time=   0.1s
[CV 2/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.143 total time=   0.1s
[CV 3/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 4/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.151 total time=   0.1s
[CV 5/5] END ........C=1, gamma=0.1, kernel=rbf;, score=0.178 total time=   0.1s
[CV 1/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .....C=1, gamma=0.1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.755 total time=   0.1s
[CV 2/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.755 total time=   0.1s
[CV 3/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.830 total time=   0.1s
[CV 4/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.726 total time=   0.1s
[CV 5/5] END .......C=1, gamma=0.01, kernel=rbf;, score=0.781 total time=   0.0s
[CV 1/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ....C=1, gamma=0.01, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.612 total time=   0.0s
[CV 2/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.646 total time=   0.0s
[CV 3/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.633 total time=   0.0s
[CV 4/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.548 total time=   0.0s
[CV 5/5] END ......C=1, gamma=0.001, kernel=rbf;, score=0.623 total time=   0.0s
[CV 1/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ...C=1, gamma=0.001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 2/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 3/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.116 total time=   0.0s
[CV 4/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END .....C=1, gamma=0.0001, kernel=rbf;, score=0.226 total time=   0.0s
[CV 1/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ..C=1, gamma=0.0001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .........C=10, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END .........C=10, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 3/5] END .........C=10, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 4/5] END .........C=10, gamma=1, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END .........C=10, gamma=1, kernel=rbf;, score=0.123 total time=   0.1s
[CV 1/5] END ......C=10, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ......C=10, gamma=1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ......C=10, gamma=1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ......C=10, gamma=1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ......C=10, gamma=1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.156 total time=   0.1s
[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.151 total time=   0.1s
[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.192 total time=   0.1s
[CV 1/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.776 total time=   0.1s
[CV 2/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.789 total time=   0.1s
[CV 3/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.857 total time=   0.1s
[CV 4/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.733 total time=   0.1s
[CV 5/5] END ......C=10, gamma=0.01, kernel=rbf;, score=0.788 total time=   0.1s
[CV 1/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END ...C=10, gamma=0.01, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.816 total time=   0.1s
[CV 2/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.803 total time=   0.1s
[CV 3/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.769 total time=   0.1s
[CV 4/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.767 total time=   0.1s
[CV 5/5] END .....C=10, gamma=0.001, kernel=rbf;, score=0.788 total time=   0.1s
[CV 1/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END ..C=10, gamma=0.001, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.714 total time=   0.1s
[CV 2/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.687 total time=   0.1s
[CV 3/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.707 total time=   0.1s
[CV 4/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.610 total time=   0.0s
[CV 5/5] END ....C=10, gamma=0.0001, kernel=rbf;, score=0.705 total time=   0.0s
[CV 1/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .C=10, gamma=0.0001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ........C=100, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END ........C=100, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 3/5] END ........C=100, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 4/5] END ........C=100, gamma=1, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END ........C=100, gamma=1, kernel=rbf;, score=0.123 total time=   0.1s
[CV 1/5] END .....C=100, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .....C=100, gamma=1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .....C=100, gamma=1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .....C=100, gamma=1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .....C=100, gamma=1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 2/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.156 total time=   0.1s
[CV 3/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 4/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.151 total time=   0.1s
[CV 5/5] END ......C=100, gamma=0.1, kernel=rbf;, score=0.192 total time=   0.1s
[CV 1/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END ...C=100, gamma=0.1, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.776 total time=   0.1s
[CV 2/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.789 total time=   0.1s
[CV 3/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.857 total time=   0.1s
[CV 4/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.733 total time=   0.1s
[CV 5/5] END .....C=100, gamma=0.01, kernel=rbf;, score=0.788 total time=   0.1s
[CV 1/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END ..C=100, gamma=0.01, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.823 total time=   0.1s
[CV 2/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.782 total time=   0.1s
[CV 3/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.776 total time=   0.1s
[CV 4/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.767 total time=   0.1s
[CV 5/5] END ....C=100, gamma=0.001, kernel=rbf;, score=0.774 total time=   0.1s
[CV 1/5] END .C=100, gamma=0.001, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END .C=100, gamma=0.001, kernel=linear;, score=0.735 total time=   0.1s
[CV 3/5] END .C=100, gamma=0.001, kernel=linear;, score=0.755 total time=   0.1s
[CV 4/5] END .C=100, gamma=0.001, kernel=linear;, score=0.719 total time=   0.1s
[CV 5/5] END .C=100, gamma=0.001, kernel=linear;, score=0.767 total time=   0.1s
[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.796 total time=   0.1s
[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.762 total time=   0.1s
[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.741 total time=   0.1s
[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.733 total time=   0.1s
[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.753 total time=   0.1s
[CV 1/5] END C=100, gamma=0.0001, kernel=linear;, score=0.776 total time=   0.1s
[CV 2/5] END C=100, gamma=0.0001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END C=100, gamma=0.0001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END C=100, gamma=0.0001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END C=100, gamma=0.0001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 2/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 3/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.116 total time=   0.1s
[CV 4/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.110 total time=   0.1s
[CV 5/5] END .......C=1000, gamma=1, kernel=rbf;, score=0.123 total time=   0.1s
[CV 1/5] END ....C=1000, gamma=1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ....C=1000, gamma=1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ....C=1000, gamma=1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ....C=1000, gamma=1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ....C=1000, gamma=1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 2/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.156 total time=   0.1s
[CV 3/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.136 total time=   0.1s
[CV 4/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.151 total time=   0.1s
[CV 5/5] END .....C=1000, gamma=0.1, kernel=rbf;, score=0.192 total time=   0.1s
[CV 1/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END ..C=1000, gamma=0.1, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=0.776 total time=   0.0s
[CV 2/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=0.789 total time=   0.0s
[CV 3/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=0.857 total time=   0.1s
[CV 4/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=0.733 total time=   0.1s
[CV 5/5] END ....C=1000, gamma=0.01, kernel=rbf;, score=0.788 total time=   0.1s
[CV 1/5] END .C=1000, gamma=0.01, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END .C=1000, gamma=0.01, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END .C=1000, gamma=0.01, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END .C=1000, gamma=0.01, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END .C=1000, gamma=0.01, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.823 total time=   0.1s
[CV 2/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.782 total time=   0.0s
[CV 3/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.776 total time=   0.1s
[CV 4/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.767 total time=   0.1s
[CV 5/5] END ...C=1000, gamma=0.001, kernel=rbf;, score=0.774 total time=   0.1s
[CV 1/5] END C=1000, gamma=0.001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END C=1000, gamma=0.001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END C=1000, gamma=0.001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END C=1000, gamma=0.001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END C=1000, gamma=0.001, kernel=linear;, score=0.767 total time=   0.0s
[CV 1/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.789 total time=   0.1s
[CV 2/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.735 total time=   0.0s
[CV 3/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.748 total time=   0.1s
[CV 4/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.719 total time=   0.0s
[CV 5/5] END ..C=1000, gamma=0.0001, kernel=rbf;, score=0.774 total time=   0.0s
[CV 1/5] END C=1000, gamma=0.0001, kernel=linear;, score=0.776 total time=   0.0s
[CV 2/5] END C=1000, gamma=0.0001, kernel=linear;, score=0.735 total time=   0.0s
[CV 3/5] END C=1000, gamma=0.0001, kernel=linear;, score=0.755 total time=   0.0s
[CV 4/5] END C=1000, gamma=0.0001, kernel=linear;, score=0.719 total time=   0.0s
[CV 5/5] END C=1000, gamma=0.0001, kernel=linear;, score=0.767 total time=   0.0s
{'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
SVC(C=10, gamma=0.001)
No description has been provided for this image
              precision    recall  f1-score   support

           0       0.90      1.00      0.95        18
           1       1.00      0.97      0.98        30
           2       0.94      0.97      0.95        31
           3       0.69      0.85      0.76        13
           4       0.73      0.73      0.73        15
           5       0.53      0.62      0.57        13
           6       0.86      0.80      0.83        15
           7       0.76      0.87      0.81        15
           8       1.00      0.77      0.87        22
           9       0.78      0.58      0.67        12

    accuracy                           0.85       184
   macro avg       0.82      0.82      0.81       184
weighted avg       0.86      0.85      0.85       184

In [ ]:
# Step 7: Model Training
# Decision Tree
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train_pca, y_train)
# SVM
svm_classifier = SVC(probability=True, random_state=42)
svm_classifier.fit(X_train_pca, y_train)
best_svm_classifier = SVC(**grid.best_params_)
best_svm_classifier.fit(X_train_scaled, y_train)
Out[ ]:
SVC(C=10, gamma=0.001)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SVC(C=10, gamma=0.001)
In [ ]:
# Step 8: Model Evaluation
# Decision Tree Evaluation
dt_predictions = dt_classifier.predict(X_test_pca)
dt_accuracy = accuracy_score(y_test, dt_predictions)
dt_cv_scores = cross_val_score(dt_classifier, pca.transform(X_sampled_imputed), y_sampled, cv=5)
print("Decision Tree Classifier Report")
print(classification_report(y_test, dt_predictions))
print(f"Decision Tree Accuracy: {dt_accuracy}")
print(f"Decision Tree CV Accuracy: {np.mean(dt_cv_scores)}")
Decision Tree Classifier Report
              precision    recall  f1-score   support

           0       0.64      0.50      0.56        18
           1       0.96      0.87      0.91        30
           2       0.82      0.74      0.78        31
           3       0.70      0.54      0.61        13
           4       0.36      0.33      0.34        15
           5       0.33      0.54      0.41        13
           6       0.64      0.60      0.62        15
           7       0.61      0.73      0.67        15
           8       0.63      0.55      0.59        22
           9       0.42      0.67      0.52        12

    accuracy                           0.64       184
   macro avg       0.61      0.61      0.60       184
weighted avg       0.67      0.64      0.64       184

Decision Tree Accuracy: 0.6358695652173914
Decision Tree CV Accuracy: 0.5746495604656687
In [ ]:
#SVM Evaluation
svm_predictions = svm_classifier.predict(X_test_pca)
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_cv_scores = cross_val_score(svm_classifier, pca.transform(X_sampled_imputed), y_sampled, cv=5)
print("Support Vector Machine Classifier Report")
print(classification_report(y_test, svm_predictions))
print(f"SVM Accuracy: {svm_accuracy}")
print(f"SVM CV Accuracy: {np.mean(svm_cv_scores)}")
Support Vector Machine Classifier Report
              precision    recall  f1-score   support

           0       1.00      0.94      0.97        18
           1       0.97      0.97      0.97        30
           2       0.94      0.97      0.95        31
           3       0.79      0.85      0.81        13
           4       0.92      0.80      0.86        15
           5       0.85      0.85      0.85        13
           6       0.88      0.93      0.90        15
           7       0.94      1.00      0.97        15
           8       0.90      0.86      0.88        22
           9       0.83      0.83      0.83        12

    accuracy                           0.91       184
   macro avg       0.90      0.90      0.90       184
weighted avg       0.91      0.91      0.91       184

SVM Accuracy: 0.9130434782608695
SVM CV Accuracy: 0.895408648134949
In [ ]:
# Step 9: Visualization
# Confusion Matrix for Decision Tree
dt_conf_matrix = confusion_matrix(y_test, dt_predictions)
plt.figure(figsize=(10, 8))
sns.heatmap(dt_conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix for Decision Tree Classifier')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
No description has been provided for this image
In [ ]:
# ROC Curve for SVM
svm_probs = svm_classifier.predict_proba(X_test_pca)[:, 1]  # probabilities for the positive outcome
svm_fpr, svm_tpr, _ = roc_curve(y_test, svm_probs, pos_label=1)
svm_roc_auc = auc(svm_fpr, svm_tpr)
plt.figure(figsize=(8, 6))
plt.plot(svm_fpr, svm_tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % svm_roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic for SVM')
plt.legend(loc="lower right")
plt.show()
No description has been provided for this image
In [ ]:
# Precision-Recall Curve for SVM
svm_precision, svm_recall, _ = precision_recall_curve(y_test, svm_probs, pos_label=1)
plt.figure(figsize=(8, 6))
plt.plot(svm_recall, svm_precision, color='blue', lw=2, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve for SVM')
plt.legend(loc="upper right")
plt.show()
No description has been provided for this image