## Machine Learning - Digit Recognition using sklearn Models and keras MNIST data

• 26th Aug, 2021
• 17:14 PM

## Solution - Digit Recognition using sklearn Models and keras MNIST data

{
"nbformat": 4,
"nbformat_minor": 0,
"colab": {
"name": "mnist.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"id": "GQmGkgsBd1cr",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 90
},
"outputId": "f149a481-68e2-4a87-a9a7-75cc956a7376"
},
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import time\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"from keras.datasets import mnist"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
"  import pandas.util.testing as tm\n",
"Using TensorFlow backend.\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"id": "Sdb3bnJeyf99",
"colab_type": "code",
"colab": {}
},
"source": [
"from sklearn.metrics import classification_report,confusion_matrix,accuracy_score\n",
"\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn import tree ,svm\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"\n",
"\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "w4vFxEy-eNa7",
"colab_type": "code",
"colab": {}
},
"source": [
"(X_train, y_train) , (X_test, y_test) = mnist.load_data()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "L73b2u7uhu3x",
"colab_type": "code",
"colab": {}
},
"source": [
"X_train = X_train.reshape(X_train.shape[0], 28*28)\n",
"\n",
"X_test = X_test.reshape(X_test.shape[0], 28*28)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "RL65oMECea1u",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 87
},
},
"source": [
"print(\"Shape of X_train:\",X_train.shape)\n",
"print(\"Shape of y_train:\",y_train.shape)\n",
"print(\"Shape of X_test:\",X_test.shape)\n",
"print(\"Shape of y_test:\",y_test.shape)"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"Shape of X_train: (60000, 784)\n",
"Shape of y_train: (60000,)\n",
"Shape of X_test: (10000, 784)\n",
"Shape of y_test: (10000,)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"id": "Xf0kZLqvrMz_",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 322
},
"outputId": "59f3bf18-dc7b-4c26-b2ab-f7cc9fa1f83d"
},
"source": [
"# Ploting random 40 digits from X_train\n",
"\n",
"plt.figure(1 , figsize = (20 , 5))\n",
"\n",
"for c in range(1,41):\n",
"    plt.subplot(4, 10,c)\n",
"    i = np.random.randint(X_train.shape[0])\n",
"    im = X_train[i].reshape((28,28))\n",
"    plt.subplots_adjust(hspace = 0.5 , wspace = 0.5)\n",
"    plt.imshow(im, cmap='gray')\n",
"    plt.title(y_train[i])\n",
"    plt.xticks([])\n",
"    plt.yticks([])\n"
],
"execution_count": 6,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"

"
]
},
"tags": []
}
}
]
},
{
"cell_type": "code",
"id": "hxZBwjeCuQuq",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 335
},
"outputId": "76bef978-e004-4f95-f040-866bcf709c7f"
},
"source": [
"plt.figure(1 , figsize = (25 ,5))\n",
"n = 0 \n",
"for z , j in zip([y_train , y_test] , ['train labels', 'test labels']):\n",
"    n += 1\n",
"    plt.subplot(1 , 3  , n)\n",
"    sns.countplot(x = z , palette=\"Set3\")\n",
"    plt.title(j)\n",
"plt.show()"
],
"execution_count": 7,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": []
}
}
]
},
{
"cell_type": "markdown",
"id": "KfqeOO4GtuRC",
"colab_type": "text"
},
"source": [
"__Naive-Bayes:__"
]
},
{
"cell_type": "code",
"id": "8uh-U_z5oCKx",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create the Multinomial Naive Bayes Classifier\n",
"\n",
"clf = MultinomialNB()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "-FZ5gkbIo1_k",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 881
},
"outputId": "99a74981-92d4-440a-bbc1-0f1c973574cb"
},
"source": [
"# Perform the predictions\n",
"t0=time.time()\n",
"clf.fit(X_train,y_train)\n",
"print(\"Time taken to tain the model:\", round(time.time()-t0, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Perform the predictions\n",
"t1=time.time()\n",
"y_predicted = clf.predict(X_test)\n",
"print(\"Time taken to predict 10000 test cases:\", round(time.time()-t1, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy = {} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report: \\n {}\".format(classification_report(y_test, y_predicted, labels=range(0,10))))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"],\n",
"                         columns=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"])\n",
"\n",
"print(\"Confusion Matrix:\")\n",
"confusion\n"
],
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"text": [
"Time taken to tain the model: 3.92 seconds\n",
"==========================================\n",
"\n",
"Time taken to predict 10000 test cases: 0.05 seconds\n",
"==========================================\n",
"\n",
"Accuracy = 83.65 %\n",
"==========================================\n",
"\n",
"Classification Report: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.92      0.93      0.93       980\n",
"           1       0.91      0.93      0.92      1135\n",
"           2       0.90      0.83      0.86      1032\n",
"           3       0.80      0.84      0.82      1010\n",
"           4       0.84      0.75      0.79       982\n",
"           5       0.86      0.66      0.75       892\n",
"           6       0.89      0.90      0.89       958\n",
"           7       0.94      0.84      0.88      1028\n",
"           8       0.66      0.80      0.72       974\n",
"           9       0.71      0.86      0.78      1009\n",
"\n",
"    accuracy                           0.84     10000\n",
"   macro avg       0.84      0.83      0.84     10000\n",
"weighted avg       0.84      0.84      0.84     10000\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"

0
1
2
3
4
5
6
7
8
9
0
912
0
2
6
1
8
14
1
36
0
1
0
1061
5
9
0
2
6
0
51
1
2
15
11
858
24
10
3
33
11
66
1
3
4
11
34
851
1
21
7
14
40
27
4
2
2
6
0
732
0
25
1
38
176
5
23
11
6
107
18
590
17
6
78
36
6
17
13
17
1
7
25
860
0
18
0
7
1
21
11
5
19
0
1
861
40
69
8
6
26
13
54
14
27
8
9
777
40
9
6
7
3
10
66
10
0
17
27
863
\n",
"
"
],
"text/plain": [
"     0     1    2    3    4    5    6    7    8    9\n",
"0  912     0    2    6    1    8   14    1   36    0\n",
"1    0  1061    5    9    0    2    6    0   51    1\n",
"2   15    11  858   24   10    3   33   11   66    1\n",
"3    4    11   34  851    1   21    7   14   40   27\n",
"4    2     2    6    0  732    0   25    1   38  176\n",
"5   23    11    6  107   18  590   17    6   78   36\n",
"6   17    13   17    1    7   25  860    0   18    0\n",
"7    1    21   11    5   19    0    1  861   40   69\n",
"8    6    26   13   54   14   27    8    9  777   40\n",
"9    6     7    3   10   66   10    0   17   27  863"
]
},
"tags": []
},
"execution_count": 9
}
]
},
{
"cell_type": "markdown",
"id": "H3gFCC9w3wpj",
"colab_type": "text"
},
"source": [
"__Decision Tree Classifier__"
]
},
{
"cell_type": "code",
"id": "dp1BF4_N3lsl",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create the Decision Tree Classifier\n",
"\n",
"clf = tree.DecisionTreeClassifier()"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "Stt-1iNZz_xc",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 881
},
"outputId": "c716b1ce-b941-46d8-9ab0-44e88f3373c4"
},
"source": [
"# Perform the predictions\n",
"t0=time.time()\n",
"clf.fit(X_train,y_train)\n",
"print(\"Time taken to tain the model:\", round(time.time()-t0, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Perform the predictions\n",
"t1=time.time()\n",
"y_predicted = clf.predict(X_test)\n",
"print(\"Time taken to predict 10000 test cases:\", round(time.time()-t1, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy = {} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report: \\n {}\".format(classification_report(y_test, y_predicted, labels=range(0,10))))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"],\n",
"                         columns=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"])\n",
"\n",
"print(\"Confusion Matrix:\")\n",
"confusion\n"
],
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"text": [
"Time taken to tain the model: 19.84 seconds\n",
"==========================================\n",
"\n",
"Time taken to predict 10000 test cases: 0.02 seconds\n",
"==========================================\n",
"\n",
"Accuracy = 87.82 %\n",
"==========================================\n",
"\n",
"Classification Report: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.91      0.94      0.92       980\n",
"           1       0.94      0.96      0.95      1135\n",
"           2       0.87      0.86      0.86      1032\n",
"           3       0.84      0.86      0.85      1010\n",
"           4       0.88      0.88      0.88       982\n",
"           5       0.83      0.83      0.83       892\n",
"           6       0.90      0.89      0.89       958\n",
"           7       0.92      0.89      0.90      1028\n",
"           8       0.83      0.81      0.82       974\n",
"           9       0.85      0.85      0.85      1009\n",
"\n",
"    accuracy                           0.88     10000\n",
"   macro avg       0.88      0.88      0.88     10000\n",
"weighted avg       0.88      0.88      0.88     10000\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"

0
1
2
3
4
5
6
7
8
9
0
920
0
10
5
5
14
9
4
6
7
1
1
1089
8
3
3
5
7
3
15
1
2
13
17
884
27
12
10
9
29
23
8
3
8
5
33
870
5
38
4
7
17
23
4
6
5
7
7
862
12
15
6
20
42
5
12
8
4
47
9
742
22
5
28
15
6
17
4
14
7
18
24
850
1
17
6
7
4
14
25
17
9
3
4
917
9
26
8
17
10
24
35
22
27
17
10
786
26
9
16
3
6
18
39
15
5
19
26
862
\n",
"
"
],
"text/plain": [
"     0     1    2    3    4    5    6    7    8    9\n",
"0  920     0   10    5    5   14    9    4    6    7\n",
"1    1  1089    8    3    3    5    7    3   15    1\n",
"2   13    17  884   27   12   10    9   29   23    8\n",
"3    8     5   33  870    5   38    4    7   17   23\n",
"4    6     5    7    7  862   12   15    6   20   42\n",
"5   12     8    4   47    9  742   22    5   28   15\n",
"6   17     4   14    7   18   24  850    1   17    6\n",
"7    4    14   25   17    9    3    4  917    9   26\n",
"8   17    10   24   35   22   27   17   10  786   26\n",
"9   16     3    6   18   39   15    5   19   26  862"
]
},
"tags": []
},
"execution_count": 11
}
]
},
{
"cell_type": "markdown",
"id": "xeGcD0OK3vYn",
"colab_type": "text"
},
"source": [
"__Random Forest Classifier__"
]
},
{
"cell_type": "code",
"id": "tF4ewydp3vCa",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create the Random Forest Classifier\n",
"\n",
"clf = RandomForestClassifier(max_depth=2, random_state=0)\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "X70MJ5eY6P7i",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 881
},
"outputId": "aa397803-42dd-40a2-9434-a5184d38a516"
},
"source": [
"# Perform the predictions\n",
"t0=time.time()\n",
"clf.fit(X_train,y_train)\n",
"print(\"Time taken to tain the model:\", round(time.time()-t0, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Perform the predictions\n",
"t1=time.time()\n",
"y_predicted = clf.predict(X_test)\n",
"print(\"Time taken to predict 10000 test cases:\", round(time.time()-t1, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy = {} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report: \\n {}\".format(classification_report(y_test, y_predicted, labels=range(0,10))))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"],\n",
"                         columns=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"])\n",
"\n",
"print(\"Confusion Matrix:\")\n",
"confusion\n"
],
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"text": [
"Time taken to tain the model: 6.81 seconds\n",
"==========================================\n",
"\n",
"Time taken to predict 10000 test cases: 0.12 seconds\n",
"==========================================\n",
"\n",
"Accuracy = 63.63999999999999 %\n",
"==========================================\n",
"\n",
"Classification Report: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.62      0.98      0.76       980\n",
"           1       0.52      0.99      0.68      1135\n",
"           2       0.77      0.56      0.65      1032\n",
"           3       0.68      0.63      0.66      1010\n",
"           4       0.56      0.68      0.62       982\n",
"           5       0.94      0.04      0.07       892\n",
"           6       0.84      0.65      0.73       958\n",
"           7       0.66      0.83      0.74      1028\n",
"           8       0.78      0.48      0.59       974\n",
"           9       0.57      0.42      0.48      1009\n",
"\n",
"    accuracy                           0.64     10000\n",
"   macro avg       0.69      0.63      0.60     10000\n",
"weighted avg       0.69      0.64      0.60     10000\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"

0
1
2
3
4
5
6
7
8
9
0
964
3
2
1
0
0
2
7
1
0
1
0
1124
6
1
0
0
1
3
0
0
2
81
266
573
15
14
0
19
46
16
2
3
90
172
27
638
11
1
2
37
16
16
4
18
24
9
1
671
0
38
85
1
135
5
206
145
4
219
56
32
27
57
65
81
6
122
70
38
11
37
1
622
43
5
9
7
20
74
34
0
21
0
0
856
13
10
8
25
267
44
45
21
0
26
22
464
60
9
23
36
4
3
358
0
7
144
14
420
\n",
"
"
],
"text/plain": [
"     0     1    2    3    4   5    6    7    8    9\n",
"0  964     3    2    1    0   0    2    7    1    0\n",
"1    0  1124    6    1    0   0    1    3    0    0\n",
"2   81   266  573   15   14   0   19   46   16    2\n",
"3   90   172   27  638   11   1    2   37   16   16\n",
"4   18    24    9    1  671   0   38   85    1  135\n",
"5  206   145    4  219   56  32   27   57   65   81\n",
"6  122    70   38   11   37   1  622   43    5    9\n",
"7   20    74   34    0   21   0    0  856   13   10\n",
"8   25   267   44   45   21   0   26   22  464   60\n",
"9   23    36    4    3  358   0    7  144   14  420"
]
},
"tags": []
},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"id": "avGbsv8H6cCH",
"colab_type": "text"
},
"source": [
"__Support Vector Machine__"
]
},
{
"cell_type": "code",
"id": "hASMA6bO6cir",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create the Support Vector Machine\n",
"\n",
"clf = svm.SVC()\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "TlixY1S1-c8R",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 881
},
"outputId": "d53f634f-0b01-4e09-e8b6-d8129314da0a"
},
"source": [
"# Perform the predictions\n",
"t0=time.time()\n",
"clf.fit(X_train,y_train)\n",
"print(\"Time taken to tain the model:\", round(time.time()-t0, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Perform the predictions\n",
"t1=time.time()\n",
"y_predicted = clf.predict(X_test)\n",
"print(\"Time taken to predict 10000 test cases:\", round(time.time()-t1, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy = {} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report: \\n {}\".format(classification_report(y_test, y_predicted, labels=range(0,10))))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"],\n",
"                         columns=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"])\n",
"\n",
"print(\"Confusion Matrix:\")\n",
"confusion\n"
],
"execution_count": 15,
"outputs": [
{
"output_type": "stream",
"text": [
"Time taken to tain the model: 548.62 seconds\n",
"==========================================\n",
"\n",
"Time taken to predict 10000 test cases: 163.48 seconds\n",
"==========================================\n",
"\n",
"Accuracy = 97.92 %\n",
"==========================================\n",
"\n",
"Classification Report: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.98      0.99      0.99       980\n",
"           1       0.99      0.99      0.99      1135\n",
"           2       0.98      0.97      0.98      1032\n",
"           3       0.97      0.99      0.98      1010\n",
"           4       0.98      0.98      0.98       982\n",
"           5       0.99      0.98      0.98       892\n",
"           6       0.99      0.99      0.99       958\n",
"           7       0.98      0.97      0.97      1028\n",
"           8       0.97      0.98      0.97       974\n",
"           9       0.97      0.96      0.97      1009\n",
"\n",
"    accuracy                           0.98     10000\n",
"   macro avg       0.98      0.98      0.98     10000\n",
"weighted avg       0.98      0.98      0.98     10000\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"

0
1
2
3
4
5
6
7
8
9
0
973
0
1
0
0
2
1
1
2
0
1
0
1126
3
1
0
1
1
1
2
0
2
6
1
1006
2
1
0
2
7
6
1
3
0
0
2
995
0
2
0
5
5
1
4
0
0
5
0
961
0
3
0
2
11
5
2
0
0
9
0
871
4
1
4
1
6
6
2
0
0
2
3
944
0
1
0
7
0
6
11
1
1
0
0
996
2
11
8
3
0
2
6
3
2
2
3
950
3
9
3
4
1
7
10
2
1
7
4
970
\n",
"
"
],
"text/plain": [
"     0     1     2    3    4    5    6    7    8    9\n",
"0  973     0     1    0    0    2    1    1    2    0\n",
"1    0  1126     3    1    0    1    1    1    2    0\n",
"2    6     1  1006    2    1    0    2    7    6    1\n",
"3    0     0     2  995    0    2    0    5    5    1\n",
"4    0     0     5    0  961    0    3    0    2   11\n",
"5    2     0     0    9    0  871    4    1    4    1\n",
"6    6     2     0    0    2    3  944    0    1    0\n",
"7    0     6    11    1    1    0    0  996    2   11\n",
"8    3     0     2    6    3    2    2    3  950    3\n",
"9    3     4     1    7   10    2    1    7    4  970"
]
},
"tags": []
},
"execution_count": 15
}
]
},
{
"cell_type": "markdown",
"id": "DgBV-rka99MC",
"colab_type": "text"
},
"source": [
"__K Nearest Neighbour__"
]
},
{
"cell_type": "code",
"id": "uCQjwZVl98qN",
"colab_type": "code",
"colab": {}
},
"source": [
"# Create the K Nearest Neighbour\n",
"\n",
"clf = KNeighborsClassifier(n_neighbors=3)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "a5-KFdWg-edt",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 881
},
"outputId": "9138288b-df63-4140-be3d-19dce2af9b08"
},
"source": [
"# Perform the predictions\n",
"t0=time.time()\n",
"clf.fit(X_train,y_train)\n",
"print(\"Time taken to tain the model:\", round(time.time()-t0, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Perform the predictions\n",
"t1=time.time()\n",
"y_predicted = clf.predict(X_test)\n",
"print(\"Time taken to predict 10000 test cases:\", round(time.time()-t1, 2), \"seconds\")\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy = {} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report: \\n {}\".format(classification_report(y_test, y_predicted, labels=range(0,10))))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"],\n",
"                         columns=[\"0\", \"1\",\"2\", \"3\", \"4\",\"5\", \"6\", \"7\", \"8\", \"9\"])\n",
"\n",
"print(\"Confusion Matrix:\")\n",
"confusion\n"
],
"execution_count": 22,
"outputs": [
{
"output_type": "stream",
"text": [
"Time taken to tain the model: 18.7 seconds\n",
"==========================================\n",
"\n",
"Time taken to predict 10000 test cases: 972.5 seconds\n",
"==========================================\n",
"\n",
"Accuracy = 97.05 %\n",
"==========================================\n",
"\n",
"Classification Report: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.97      0.99      0.98       980\n",
"           1       0.96      1.00      0.98      1135\n",
"           2       0.98      0.97      0.97      1032\n",
"           3       0.96      0.97      0.96      1010\n",
"           4       0.98      0.97      0.97       982\n",
"           5       0.97      0.96      0.96       892\n",
"           6       0.98      0.99      0.98       958\n",
"           7       0.96      0.96      0.96      1028\n",
"           8       0.99      0.94      0.96       974\n",
"           9       0.96      0.96      0.96      1009\n",
"\n",
"    accuracy                           0.97     10000\n",
"   macro avg       0.97      0.97      0.97     10000\n",
"weighted avg       0.97      0.97      0.97     10000\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"
\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"

0
1
2
3
4
5
6
7
8
9
0
974
1
1
0
0
1
2
1
0
0
1
0
1133
2
0
0
0
0
0
0
0
2
10
9
996
2
0
0
0
13
2
0
3
0
2
4
976
1
13
1
7
3
3
4
1
6
0
0
950
0
4
2
0
19
5
6
1
0
11
2
859
5
1
3
4
6
5
3
0
0
3
3
944
0
0
0
7
0
21
5
0
1
0
0
991
0
10
8
8
2
4
16
8
11
3
4
914
4
9
4
5
2
8
9
2
1
8
2
968
\n",
"
"
],
"text/plain": [
"     0     1    2    3    4    5    6    7    8    9\n",
"0  974     1    1    0    0    1    2    1    0    0\n",
"1    0  1133    2    0    0    0    0    0    0    0\n",
"2   10     9  996    2    0    0    0   13    2    0\n",
"3    0     2    4  976    1   13    1    7    3    3\n",
"4    1     6    0    0  950    0    4    2    0   19\n",
"5    6     1    0   11    2  859    5    1    3    4\n",
"6    5     3    0    0    3    3  944    0    0    0\n",
"7    0    21    5    0    1    0    0  991    0   10\n",
"8    8     2    4   16    8   11    3    4  914    4\n",
"9    4     5    2    8    9    2    1    8    2  968"
]
},
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"id": "Zhg2H-JREW1K",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}