## Python Assignment Solution on Generating Your Own Data Using Numpy and Pandas Libraries Dimension

• 8th Oct, 2021
• 15:57 PM
```{
"nbformat": 4,
"nbformat_minor": 0,
"colab": {
"name": "Tim.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"id": "-S5tt4tWKLjo",
"colab_type": "code",
"outputId": "8b9b43f5-938f-4e57-8353-1022c6a571a0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 71
}
},
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"import time\n"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
"  import pandas.util.testing as tm\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"id": "o6QsjnrAKbIW",
"colab_type": "code",
"colab": {}
},
"source": [
"df = pd.DataFrame({\"Gender\" : np.repeat([\"Male\", \"Female\"],500), \n",
"                   # Age ranges between 18-60 years\n",
"                   \"Age\"    : np.random.randint(low=18, high=60, size=1000),       \n",
"                   \n",
"                   # Height ranges between 165-180cm\n",
"                   \"Height\" : np.random.randint(low=165, high=180, size=1000),\n",
"                   \n",
"                   # Weight ranges betweem 50-95kg\n",
"                   \"Weight\" : np.random.randint(low=50, high=95, size=1000),\n",
"                   \n",
"                   # Target varriable having 1's and 0's\n",
"                   \"Target\" : np.where(np.random.normal(0.0, 1.0, size=1000)<=0,0,1),\n",
"                     })\n",
"\n",
"\n",
"df= df.sample(frac=1).reset_index(drop=True)\n",
"\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "rZ6tvGMHK4z3",
"colab_type": "code",
"outputId": "d3bf9880-819a-49a0-b4e9-de4160950b68",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 255
}
},
"source": [
"print(\"Our data have {} rows and {} columns.\".format(df.shape[0], df.shape[1]))\n",
"print()\n",
"print(\"Data sample is as follows:\")\n",
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"Our data have 1000 rows and 5 columns.\n",
"\n",
"Data sample is as follows:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`Gender`
`Age`
`Height`
`Weight`
`Target`
`0`
`Female`
`27`
`177`
`51`
`0`
`1`
`Female`
`59`
`166`
`65`
`0`
`2`
`Male`
`46`
`171`
`86`
`1`
`3`
`Female`
`34`
`165`
`58`
`0`
`4`
`Female`
`18`
`175`
`69`
`0`
```\n",
"```
```"
],
"text/plain": [
"   Gender  Age  Height  Weight  Target\n",
"0  Female   27     177      51       0\n",
"1  Female   59     166      65       0\n",
"2    Male   46     171      86       1\n",
"3  Female   34     165      58       0\n",
"4  Female   18     175      69       0"
]
},
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"id": "5ViSI_S0nJ-D",
"colab_type": "code",
"outputId": "03943743-a33b-4004-f5fe-8bb5eabdaa86",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
}
},
"source": [
"sns.countplot(x=\"Gender\", data=df, palette=\"Set3\")\n",
"plt.show()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"

"
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"id": "HMdXi6Wlnbig",
"colab_type": "code",
"outputId": "50220526-57c9-4620-a9b5-66823a45620a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
}
},
"source": [
"sns.distplot(df[\"Age\"])"
],
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"tags": []
},
"execution_count": 5
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"id": "Th0uT6VnnQa8",
"colab_type": "code",
"outputId": "bc359c6e-8089-486d-df9d-b083e218977a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 298
}
},
"source": [
"sns.distplot(df[\"Height\"])"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"tags": []
},
"execution_count": 6
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"id": "5zRcS7MznZhJ",
"colab_type": "code",
"outputId": "e217887c-8983-4c5b-c51a-093a96c85e9f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
}
},
"source": [
"sns.distplot(df[\"Weight\"])"
],
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
]
},
"tags": []
},
"execution_count": 7
},
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"id": "W8_zHKgtnl0i",
"colab_type": "code",
"outputId": "55a3418a-c285-4e6a-8276-be8037e46167",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 470
}
},
"source": [
"# Heatmapshowing correlation between variables\n",
"fig, ax =plt.subplots(figsize=(8, 8))\n",
"plt.title(\"Correlation Plot\")\n",
"sns.heatmap(df.corr(), mask=np.zeros_like(df.corr(), dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),\n",
"            square=True, ax=ax, annot=True,linewidths=3)\n",
"plt.show()"
],
"execution_count": 8,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "code",
"id": "y8efIyqtXLZ0",
"colab_type": "code",
"colab": {}
},
"source": [
"# encoding our categorical variable (Gender)\n",
"df[\"Gender\"] = df[\"Gender\"].map({\"Male\":1, \"Female\":2})"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "QpGgE5U2o4p7",
"colab_type": "code",
"outputId": "8b40a6ee-c494-4cc8-9c5a-7bc9de48ece0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"source": [
],
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`Gender`
`Age`
`Height`
`Weight`
`Target`
`0`
`2`
`27`
`177`
`51`
`0`
`1`
`2`
`59`
`166`
`65`
`0`
`2`
`1`
`46`
`171`
`86`
`1`
`3`
`2`
`34`
`165`
`58`
`0`
`4`
`2`
`18`
`175`
`69`
`0`
```\n",
"```
```"
],
"text/plain": [
"   Gender  Age  Height  Weight  Target\n",
"0       2   27     177      51       0\n",
"1       2   59     166      65       0\n",
"2       1   46     171      86       1\n",
"3       2   34     165      58       0\n",
"4       2   18     175      69       0"
]
},
"tags": []
},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"id": "tcRXvj3smuUN",
"colab_type": "code",
"colab": {}
},
"source": [
"# Splitting data into X: independent and y: dependent variable\n",
"\n",
"X = df.drop(\"Target\",axis=1)\n",
"y = df[\"Target\"]\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "d1EQHhhiq-RG",
"colab_type": "code",
"outputId": "37e4a51c-7f69-4f93-9acf-a61489bf70c3",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"source": [
"# Standardizing independent variables into same scale\n",
"\n",
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler().fit(X)\n",
"X = scaler.transform(X)\n",
"\n",
"X = pd.DataFrame(X)\n",
"X.columns = [\"Gender\", \"Age\", \"Height\", \"Weight\"]\n",
"\n",
"# Independent variables after standardizing\n",
],
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`Gender`
`Age`
`Height`
`Weight`
`0`
`1.0`
`-0.914033`
`1.168520`
`-1.675116`
`1`
`1.0`
`1.671862`
`-1.365231`
`-0.597920`
`2`
`-1.0`
`0.621342`
`-0.213526`
`1.017873`
`3`
`1.0`
`-0.348369`
`-1.595572`
`-1.136518`
`4`
`1.0`
`-1.641316`
`0.707838`
`-0.290150`
```\n",
"```
```"
],
"text/plain": [
"   Gender       Age    Height    Weight\n",
"0     1.0 -0.914033  1.168520 -1.675116\n",
"1     1.0  1.671862 -1.365231 -0.597920\n",
"2    -1.0  0.621342 -0.213526  1.017873\n",
"3     1.0 -0.348369 -1.595572 -1.136518\n",
"4     1.0 -1.641316  0.707838 -0.290150"
]
},
"tags": []
},
"execution_count": 12
}
]
},
{
"cell_type": "code",
"id": "MxJ1dQGGrBXq",
"colab_type": "code",
"colab": {}
},
"source": [
"# Splitting data into training and testing data, we use 75% to train our classification model\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,shuffle=True, stratify=y)\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"id": "GJNylh4LnH0D",
"colab_type": "code",
"outputId": "3e64e3b0-c531-41c4-cc80-075d947a556e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
}
},
"source": [
"print(\"Shape of X_train:\",X_train.shape)\n",
"print(\"Shape of y_train:\",y_train.shape)\n",
"print(\"Shape of X_test:\",X_test.shape)\n",
"print(\"Shape of y_test:\",y_test.shape)"
],
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"Shape of X_train: (750, 4)\n",
"Shape of y_train: (750,)\n",
"Shape of X_test: (250, 4)\n",
"Shape of y_test: (250,)\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"id": "X6ZU0ULLnhwI",
"colab_type": "code",
"outputId": "db741f1f-be29-46d4-d718-2fcd58fe1fde",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 350
}
},
"source": [
"# visualizing training and testing labels\n",
"\n",
"plt.figure(1 , figsize = (25 ,5))\n",
"n = 0 \n",
"for z , j in zip([y_train , y_test] , ['train labels', 'test labels']):\n",
"    n += 1\n",
"    plt.subplot(1 , 3  , n)\n",
"    sns.countplot(x = z , palette=\"Set3\")\n",
"    plt.title(j)\n",
"plt.show()"
],
"execution_count": 15,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
},
{
"cell_type": "markdown",
"id": "0zAvC39QvLMM",
"colab_type": "text"
},
"source": [
"__Naive BBayes Classifier:__"
]
},
{
"cell_type": "code",
"id": "22vbc9OymmAJ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 417
},
"outputId": "122c248f-14cb-4053-fc6f-bf0120ac90a0"
},
"source": [
"# Create the Multinomial Naive Bayes Classifier\n",
"\n",
"from sklearn.metrics import classification_report,confusion_matrix,accuracy_score\n",
"\n",
"from sklearn.naive_bayes import GaussianNB\n",
"nbc = GaussianNB()\n",
"nbc.fit(X_train,y_train)\n",
"\n",
"# making predictions\n",
"y_pred = nbc.predict(X_test)\n",
"\n",
"\n",
"print(\"Accuracy of Naive-Bayes Classifier = {:0.2f} %\".format(accuracy_score(y_test, y_pred)*100))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"\n",
"print(\"The classification report of Naive-Bayes Classifier is as follows:\")\n",
"print(classification_report(y_test,y_pred))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"\n",
"cm=confusion_matrix(y_test,y_pred)\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\"], columns=[\"0\", \"1\"])\n",
"print(\"Confusion Matrix is as follows:\")\n",
"confusion\n"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy of Naive-Bayes Classifier = 50.80 %\n",
"\n",
"===================================================================\n",
"\n",
"The classification report of Naive-Bayes Classifier is as follows:\n",
"              precision    recall  f1-score   support\n",
"\n",
"           0       0.51      0.65      0.57       127\n",
"           1       0.50      0.36      0.42       123\n",
"\n",
"    accuracy                           0.51       250\n",
"   macro avg       0.51      0.51      0.50       250\n",
"weighted avg       0.51      0.51      0.50       250\n",
"\n",
"\n",
"===================================================================\n",
"\n",
"Confusion Matrix is as follows:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`0`
`1`
`0`
`83`
`44`
`1`
`79`
`44`
```\n",
"```
```"
],
"text/plain": [
"    0   1\n",
"0  83  44\n",
"1  79  44"
]
},
"tags": []
},
"execution_count": 16
}
]
},
{
"cell_type": "markdown",
"id": "JkLt6dPzvDOV",
"colab_type": "text"
},
"source": [
"__Nearest Neighbors Classifier:__"
]
},
{
"cell_type": "code",
"id": "zI88IBQyqiiQ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 417
},
"outputId": "4ee92eba-0f0f-4719-ba66-7715753aae3d"
},
"source": [
"from sklearn.neighbors import KNeighborsClassifier\n",
"\n",
"# Create the K Nearest Neighbour\n",
"\n",
"clf = KNeighborsClassifier(n_neighbors=3)\n",
"\n",
"clf.fit(X_train,y_train)\n",
"\n",
"# making predictions\n",
"y_predicted = clf.predict(X_test)\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy of Nearest Neighbors Classifier  = {:0.2f} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report of Nearest Neighbors Classifier: \\n {}\".format(classification_report(y_test, y_predicted)))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\"],\n",
"                         columns=[\"0\", \"1\"])\n",
"\n",
"print(\"Confusion Matrix of Nearest Neighbors Classifier:\")\n",
"confusion"
],
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy of Nearest Neighbors Classifier  = 47.60 %\n",
"\n",
"===================================================================\n",
"\n",
"Classification Report of Nearest Neighbors Classifier: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.48      0.40      0.44       127\n",
"           1       0.47      0.55      0.51       123\n",
"\n",
"    accuracy                           0.48       250\n",
"   macro avg       0.48      0.48      0.47       250\n",
"weighted avg       0.48      0.48      0.47       250\n",
"\n",
"\n",
"===================================================================\n",
"\n",
"Confusion Matrix of Nearest Neighbors Classifier:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`0`
`1`
`0`
`51`
`76`
`1`
`55`
`68`
```\n",
"```
```"
],
"text/plain": [
"    0   1\n",
"0  51  76\n",
"1  55  68"
]
},
"tags": []
},
"execution_count": 17
}
]
},
{
"cell_type": "markdown",
"id": "6jvzjHq5wzUM",
"colab_type": "text"
},
"source": [
"__Support Vector Machine:__"
]
},
{
"cell_type": "code",
"id": "EHKIvVM9vrYo",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 383
},
"outputId": "330fc102-1fbb-4d9d-a32b-dedebd20f9da"
},
"source": [
"# Create the Support Vector Machine\n",
"from sklearn import svm\n",
"\n",
"clf = svm.SVC()\n",
"\n",
"clf.fit(X_train,y_train)\n",
"\n",
"\n",
"# Perform the predictions\n",
"\n",
"y_predicted = clf.predict(X_test)\n",
"\n",
"# Calculate the accuracy of the prediction\n",
"print(\"Accuracy of Support Vector Machine = {:0.2f} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Cross validate the scores\n",
"print(\"Classification Report of Support Vector Machine is as follows: \\n {}\".format(classification_report(y_test, y_predicted)))\n",
"print(\"==========================================\")\n",
"print()\n",
"\n",
"# Confusion matrix\n",
"cm=np.array(confusion_matrix(y_test,y_predicted))\n",
"\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\"],\n",
"                         columns=[\"0\", \"1\"])\n",
"\n",
"print(\"Confusion Matrix of Support Vector Machine is as follows:\")\n",
"confusion\n"
],
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy of Support Vector Machine = 50.00 %\n",
"==========================================\n",
"\n",
"Classification Report of Support Vector Machine is as follows: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.51      0.51      0.51       127\n",
"           1       0.49      0.49      0.49       123\n",
"\n",
"    accuracy                           0.50       250\n",
"   macro avg       0.50      0.50      0.50       250\n",
"weighted avg       0.50      0.50      0.50       250\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix of Support Vector Machine is as follows:\n"
],
"name": "stdout"
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"```
` `
`0`
`1`
`0`
`65`
`62`
`1`
`63`
`60`
```\n",
"```
```"
],
"text/plain": [
"    0   1\n",
"0  65  62\n",
"1  63  60"
]
},
"tags": []
},
"execution_count": 18
}
]
},
{
"cell_type": "code",
"id": "6m-moTkexFjG",
"colab_type": "code",
"colab": {}
},
"source": [
""
],
"execution_count": 0,
"outputs": []
}
]
}

```