## Python Assignment Solution on Generating Your Own Data Using Numpy and Pandas Libraries Dimension

• 8th Oct, 2021
• 15:57 PM
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import seaborn as sns\n",
"import time\n"
/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  import pandas.util.testing as tm
"df = pd.DataFrame({\"Gender\" : np.repeat([\"Male\", \"Female\"],500), \n",
"                   # Age ranges between 18-60 years\n",
"                   \"Age\"    : np.random.randint(low=18, high=60, size=1000),       \n",
"                   \n",
"                   # Height ranges between 165-180cm\n",
"                   \"Height\" : np.random.randint(low=165, high=180, size=1000),\n",
"                   \n",
"                   # Weight ranges betweem 50-95kg\n",
"                   \"Weight\" : np.random.randint(low=50, high=95, size=1000),\n",
"                   \n",
"                   # Target varriable having 1's and 0's\n",
"                   \"Target\" : np.where(np.random.normal(0.0, 1.0, size=1000)<=0,0,1),\n",
"                     })\n",
"\n",
"\n",
"df= df.sample(frac=1).reset_index(drop=True)\n",
"\n"
print("Our data have {} rows and {} columns.".format(df.shape[0], df.shape[1]))
print()
print("Data sample is as follows:")
"print()\n",
"print(\"Data sample is as follows:\")\n",
"Our data have 1000 rows and 5 columns.\n",
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`Gender`
`Age`
`Height`
`Weight`
`Target`
`0`
`Female`
`27`
`177`
`51`
`0`
`1`
`Female`
`59`
`166`
`65`
`0`
`2`
`Male`
`46`
`171`
`86`
`1`
`3`
`Female`
`34`
`165`
`58`
`0`
`4`
`Female`
`18`
`175`
`69`
`0`
"   Gender  Age  Height  Weight  Target\n",
{
"cell_type": "code",
"id": "5ViSI_S0nJ-D",
"colab_type": "code",
"outputId": "03943743-a33b-4004-f5fe-8bb5eabdaa86",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 279
}
},
sns.countplot(x="Gender", data=df, palette="Set3")
plt.show()
{
"cell_type": "code",
"id": "HMdXi6Wlnbig",
"colab_type": "code",
"outputId": "50220526-57c9-4620-a9b5-66823a45620a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
}
},
sns.distplot(df["Age"])
{
"cell_type": "code",
"id": "Th0uT6VnnQa8",
"colab_type": "code",
"outputId": "bc359c6e-8089-486d-df9d-b083e218977a",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 298
}
},
sns.distplot(df["Height"])
{
"cell_type": "code",
"id": "5zRcS7MznZhJ",
"colab_type": "code",
"outputId": "e217887c-8983-4c5b-c51a-093a96c85e9f",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 296
}
},
sns.distplot(df["Weight"])
{
"cell_type": "code",
"id": "W8_zHKgtnl0i",
"colab_type": "code",
"outputId": "55a3418a-c285-4e6a-8276-be8037e46167",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 470
}
},
"# Heatmapshowing correlation between variables\n",
"execution_count": 8,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
]
},
"tags": [],
"needs_background": "light"
}
}
]
"# encoding our categorical variable (Gender)\n",
{
"cell_type": "code",
"id": "QpGgE5U2o4p7",
"colab_type": "code",
"outputId": "8b40a6ee-c494-4cc8-9c5a-7bc9de48ece0",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`Gender`
`Age`
`Height`
`Weight`
`Target`
`0`
`2`
`27`
`177`
`51`
`0`
`1`
`2`
`59`
`166`
`65`
`0`
`2`
`1`
`46`
`171`
`86`
`1`
`3`
`2`
`34`
`165`
`58`
`0`
`4`
`2`
`18`
`175`
`69`
`0`
"text/plain": [
"   Gender  Age  Height  Weight  Target\n",
"0       2   27     177      51       0\n",
"1       2   59     166      65       0\n",
"2       1   46     171      86       1\n",
"3       2   34     165      58       0\n",
"4       2   18     175      69       0"
]
},
"tags": []
},
"execution_count": 10
}
]
"# Splitting data into X: independent and y: dependent variable\n",
{
"cell_type": "code",
"id": "d1EQHhhiq-RG",
"colab_type": "code",
"outputId": "37e4a51c-7f69-4f93-9acf-a61489bf70c3",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
}
},
"# Standardizing independent variables into same scale\n",
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"```
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`Gender`
`Age`
`Height`
`Weight`
`0`
`1.0`
`-0.914033`
`1.168520`
`-1.675116`
`1`
`1.0`
`1.671862`
`-1.365231`
`-0.597920`
`2`
`-1.0`
`0.621342`
`-0.213526`
`1.017873`
`3`
`1.0`
`-0.348369`
`-1.595572`
`-1.136518`
`4`
`1.0`
`-1.641316`
`0.707838`
`-0.290150`
"text/plain": [
"   Gender       Age    Height    Weight\n",
"0     1.0 -0.914033  1.168520 -1.675116\n",
"1     1.0  1.671862 -1.365231 -0.597920\n",
"2    -1.0  0.621342 -0.213526  1.017873\n",
"3     1.0 -0.348369 -1.595572 -1.136518\n",
"4     1.0 -1.641316  0.707838 -0.290150"
]
},
"tags": []
},
"execution_count": 12
}
]
"# Splitting data into training and testing data, we use 75% to train our classification model\n",
{
"cell_type": "code",
"id": "GJNylh4LnH0D",
"colab_type": "code",
"outputId": "3e64e3b0-c531-41c4-cc80-075d947a556e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 85
}
},
"print(\"Shape of X_train:\",X_train.shape)\n",
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"text": [
"Shape of X_train: (750, 4)\n",
"Shape of y_train: (750,)\n",
"Shape of X_test: (250, 4)\n",
"Shape of y_test: (250,)\n"
],
"name": "stdout"
}
]
"source": [
"# visualizing training and testing labels\n",
"\n",
"plt.figure(1 , figsize = (25 ,5))\n",
"n = 0 \n",
"for z , j in zip([y_train , y_test] , ['train labels', 'test labels']):\n",
"    n += 1\n",
"    plt.subplot(1 , 3  , n)\n",
"    sns.countplot(x = z , palette=\"Set3\")\n",
"    plt.title(j)\n",
"plt.show()"
"source": [
__Naive BBayes Classifier:__
]
"source": [
"# Create the Multinomial Naive Bayes Classifier\n",
"\n",
"from sklearn.metrics import classification_report,confusion_matrix,accuracy_score\n",
"\n",
"from sklearn.naive_bayes import GaussianNB\n",
"nbc = GaussianNB()\n",
"nbc.fit(X_train,y_train)\n",
"\n",
"# making predictions\n",
"y_pred = nbc.predict(X_test)\n",
"\n",
"\n",
"print(\"Accuracy of Naive-Bayes Classifier = {:0.2f} %\".format(accuracy_score(y_test, y_pred)*100))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"\n",
"print(\"The classification report of Naive-Bayes Classifier is as follows:\")\n",
"print(classification_report(y_test,y_pred))\n",
"print()\n",
"print(\"===================================================================\")\n",
"print()\n",
"\n",
"\n",
"cm=confusion_matrix(y_test,y_pred)\n",
"confusion = pd.DataFrame(cm, index=[\"0\", \"1\"], columns=[\"0\", \"1\"])\n",
"print(\"Confusion Matrix is as follows:\")\n",
"confusion\n"
{
"output_type": "stream",
"text": [
"Accuracy of Naive-Bayes Classifier = 50.80 %\n",
"\n",
"===================================================================\n",
"\n",
"The classification report of Naive-Bayes Classifier is as follows:\n",
"              precision    recall  f1-score   support\n",
"\n",
"           0       0.51      0.65      0.57       127\n",
"           1       0.50      0.36      0.42       123\n",
"\n",
"    accuracy                           0.51       250\n",
"   macro avg       0.51      0.51      0.50       250\n",
"weighted avg       0.51      0.51      0.50       250\n",
"\n",
"\n",
"===================================================================\n",
"\n",
"Confusion Matrix is as follows:\n"
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`0`
`1`
`0`
`83`
`44`
`1`
`79`
`44`
"text/plain": [
"    0   1\n",
"0  83  44\n",
"1  79  44"
]
},
"tags": []
},
"execution_count": 16
}
]
__Nearest Neighbors Classifier:__
{
"cell_type": "code",
"id": "zI88IBQyqiiQ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 417
},
"outputId": "4ee92eba-0f0f-4719-ba66-7715753aae3d"
},
"from sklearn.neighbors import KNeighborsClassifier\n",
"execution_count": 17,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy of Nearest Neighbors Classifier  = 47.60 %\n",
"\n",
"===================================================================\n",
"\n",
"Classification Report of Nearest Neighbors Classifier: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.48      0.40      0.44       127\n",
"           1       0.47      0.55      0.51       123\n",
"\n",
"    accuracy                           0.48       250\n",
"   macro avg       0.48      0.48      0.47       250\n",
"weighted avg       0.48      0.48      0.47       250\n",
"\n",
"\n",
"===================================================================\n",
"\n",
"Confusion Matrix of Nearest Neighbors Classifier:\n"
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`0`
`1`
`0`
`51`
`76`
`1`
`55`
`68`
"text/plain": [
"    0   1\n",
"0  51  76\n",
"1  55  68"
]
},
"tags": []
},
"execution_count": 17
}
]
__Support Vector Machine:__
{
"cell_type": "code",
"id": "EHKIvVM9vrYo",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 383
},
"outputId": "330fc102-1fbb-4d9d-a32b-dedebd20f9da"
},
"# Create the Support Vector Machine\n",
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"text": [
"Accuracy of Support Vector Machine = 50.00 %\n",
"==========================================\n",
"\n",
"Classification Report of Support Vector Machine is as follows: \n",
"               precision    recall  f1-score   support\n",
"\n",
"           0       0.51      0.51      0.51       127\n",
"           1       0.49      0.49      0.49       123\n",
"\n",
"    accuracy                           0.50       250\n",
"   macro avg       0.50      0.50      0.50       250\n",
"weighted avg       0.50      0.50      0.50       250\n",
"\n",
"==========================================\n",
"\n",
"Confusion Matrix of Support Vector Machine is as follows:\n"
```\n",
"\n",
"\n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
"  \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"    \n",
"      \n",
"      \n",
"      \n",
"    \n",
"  \n",
`0`
`1`
`0`
`65`
`62`
`1`
`63`
`60`
"text/plain": [
"    0   1\n",
"0  65  62\n",
"1  63  60"
]
},
"tags": []
},
"execution_count": 18
}
]
