Login
Order Now
Support
Python Task on Generating Your Own Data Using Numpy and Pandas Libraries Dimension

Python Task on Generating Your Own Data Using Numpy and Pandas Libraries Dimension

  • 8th Oct, 2021
  • 15:57 PM

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "Tim.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "-S5tt4tWKLjo",
        "colab_type": "code",
        "outputId": "8b9b43f5-938f-4e57-8353-1022c6a571a0",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 71
        }
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import random\n",
        "import matplotlib.pyplot as plt\n",
        "%matplotlib inline\n",
        "import seaborn as sns\n",
        "import time\n"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
            "  import pandas.util.testing as tm\n"
          ],
          "name": "stderr"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "o6QsjnrAKbIW",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "df = pd.DataFrame({\"Gender\" : np.repeat([\"Male\", \"Female\"],500), \n",
        "                   # Age ranges between 18-60 years\n",
        "                   \"Age\"    : np.random.randint(low=18, high=60, size=1000),       \n",
        "                   \n",
        "                   # Height ranges between 165-180cm\n",
        "                   \"Height\" : np.random.randint(low=165, high=180, size=1000),\n",
        "                   \n",
        "                   # Weight ranges betweem 50-95kg\n",
        "                   \"Weight\" : np.random.randint(low=50, high=95, size=1000),\n",
        "                   \n",
        "                   # Target varriable having 1's and 0's\n",
        "                   \"Target\" : np.where(np.random.normal(0.0, 1.0, size=1000)<=0,0,1),\n",
        "                     })\n",
        "\n",
        "\n",
        "df= df.sample(frac=1).reset_index(drop=True)\n",
        "\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rZ6tvGMHK4z3",
        "colab_type": "code",
        "outputId": "d3bf9880-819a-49a0-b4e9-de4160950b68",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 255
        }
      },
      "source": [
        "print(\"Our data have {} rows and {} columns.\".format(df.shape[0], df.shape[1]))\n",
        "print()\n",
        "print(\"Data sample is as follows:\")\n",
        "df.head()"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Our data have 1000 rows and 5 columns.\n",
            "\n",
            "Data sample is as follows:\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Gender</th>\n",
              "      <th>Age</th>\n",
              "      <th>Height</th>\n",
              "      <th>Weight</th>\n",
              "      <th>Target</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>Female</td>\n",
              "      <td>27</td>\n",
              "      <td>177</td>\n",
              "      <td>51</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>Female</td>\n",
              "      <td>59</td>\n",
              "      <td>166</td>\n",
              "      <td>65</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>Male</td>\n",
              "      <td>46</td>\n",
              "      <td>171</td>\n",
              "      <td>86</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>Female</td>\n",
              "      <td>34</td>\n",
              "      <td>165</td>\n",
              "      <td>58</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>Female</td>\n",
              "      <td>18</td>\n",
              "      <td>175</td>\n",
              "      <td>69</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Gender  Age  Height  Weight  Target\n",
              "0  Female   27     177      51       0\n",
              "1  Female   59     166      65       0\n",
              "2    Male   46     171      86       1\n",
              "3  Female   34     165      58       0\n",
              "4  Female   18     175      69       0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5ViSI_S0nJ-D",
        "colab_type": "code",
        "outputId": "03943743-a33b-4004-f5fe-8bb5eabdaa86",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 279
        }
      },
      "source": [
        "sns.countplot(x=\"Gender\", data=df, palette=\"Set3\")\n",
        "plt.show()"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQz0lEQVR4nO3de5CddX3H8fdHgoJWuUikmGBjNa1FBwUjgtpWoVrAVlDRwmiNykwcS62O7VRsO22tttVqS9U6KBU0eLfeSK0j0iA6tSIsF7k7pCglKZgIiHcd8Ns/zm9/HpONWUyePcvu+zVz5jy/y/PsNzObfPLcU1VIkgRwr0kXIEmaPwwFSVJnKEiSOkNBktQZCpKkbsmkC9gZ++23X61YsWLSZUjSPcoll1zyjapaOtPYPToUVqxYwdTU1KTLkKR7lCQ3bm/Mw0eSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVI3aCgk+VqSK5NcnmSq9e2b5Lwk17fvfVp/krwlyYYkVyQ5dMjaJEnbmos9hadU1WOqalVrnwqsr6qVwPrWBjgGWNk+a4DT56A2SdKYSRw+Og5Y25bXAseP9Z9dIxcCeyc5YAL1SdKiNfQdzQV8JkkB76iqM4D9q+rmNn4LsH9bXgbcNLbuxtZ381gfSdYw2pPgIQ95yE4XePrFn9/pbWjheenjfmPSJfD976+fdAmah/bc86hBtz90KDypqjYleRBwXpLrxgerqlpgzFoLljMAVq1a5WvjJGkXGvTwUVVtat+bgY8DhwFfnz4s1L43t+mbgAPHVl/e+iRJc2SwUEhyvyT3n14GngZcBawDVrdpq4Fz2vI64AXtKqTDgTvGDjNJkubAkIeP9gc+nmT657y/qj6d5GLgw0lOBm4Entvmfwo4FtgAfA940YC1SZJmMFgoVNUNwKNn6L8V2OZMSVUVcMpQ9UiSdsw7miVJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqRs8FJLsluSyJJ9s7Ycm+VKSDUk+lOTerf8+rb2hja8YujZJ0k+biz2FlwPXjrXfAJxWVQ8HbgdObv0nA7e3/tPaPEnSHBo0FJIsB54OvLO1AxwJfKRNWQsc35aPa23a+FFtviRpjgy9p/DPwJ8CP27tBwLfrKo7W3sjsKwtLwNuAmjjd7T5PyXJmiRTSaa2bNkyZO2StOgMFgpJfgfYXFWX7MrtVtUZVbWqqlYtXbp0V25akha9JQNu+4nAM5IcC+wBPAB4M7B3kiVtb2A5sKnN3wQcCGxMsgTYC7h1wPokSVsZbE+hql5dVcuragVwInB+VT0P+CxwQpu2GjinLa9rbdr4+VVVQ9UnSdrWJO5TeBXwyiQbGJ0zOLP1nwk8sPW/Ejh1ArVJ0qI25OGjrqouAC5oyzcAh80w5wfAc+aiHknSzLyjWZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSusFCIckeSS5K8uUkVyd5Tet/aJIvJdmQ5ENJ7t3679PaG9r4iqFqkyTNbMg9hR8CR1bVo4HHAEcnORx4A3BaVT0cuB04uc0/Gbi99Z/W5kmS5tBgoVAj32nN3dungCOBj7T+tcDxbfm41qaNH5UkQ9UnSdrWoOcUkuyW5HJgM3Ae8D/AN6vqzjZlI7CsLS8DbgJo43cADxyyPknSTxs0FKrqrqp6DLAcOAx4xM5uM8maJFNJprZs2bLTNUqSfmJOrj6qqm8CnwWOAPZOsqQNLQc2teVNwIEAbXwv4NYZtnVGVa2qqlVLly4dvHZJWkyGvPpoaZK92/KewFOBaxmFwwlt2mrgnLa8rrVp4+dXVQ1VnyRpW0t2POXndgCwNslujMLnw1X1ySTXAB9M8jrgMuDMNv9M4D1JNgC3AScOWJskaQazCoUk66vqqB31jauqK4BDZui/gdH5ha37fwA8Zzb1SJKG8TNDIckewH2B/ZLsA0xfIvoAfnLVkCRpgdjRnsJLgFcADwYu4Seh8C3gXwasS5I0AT8zFKrqzcCbk7ysqt46RzVJkiZkVucUquqtSZ4ArBhfp6rOHqguSdIEzPZE83uAhwGXA3e17gIMBUlaQGZ7Seoq4CDvG5CkhW22N69dBfzikIVIkiZvtnsK+wHXJLmI0SOxAaiqZwxSlSRpImYbCn89ZBGSpPlhtlcffW7oQiRJkzfbq4++zehqI4B7M3phzner6gFDFSZJmnuz3VO4//RyexvaccDhQxUlSZqMu/3o7PaazU8Avz1APZKkCZrt4aNnjTXvxei+hR8MUpEkaWJme/XR744t3wl8jdEhJEnSAjLbcwovGroQSdLkzeqcQpLlST6eZHP7fDTJ8qGLkyTNrdmeaH4Xo3coP7h9/r31SZIWkNmGwtKqeldV3dk+7waWDliXJGkCZhsKtyZ5fpLd2uf5wK1DFiZJmnuzDYUXA88FbgFuBk4AXjhQTZKkCZntJal/A6yuqtsBkuwLvIlRWEiSFojZ7ikcPB0IAFV1G3DIMCVJkiZltqFwryT7TDfansJs9zIkSfcQs/2H/R+BLyb5t9Z+DvC3w5QkSZqU2d7RfHaSKeDI1vWsqrpmuLIkSZMw60NALQQMAklawO72o7MlSQuXoSBJ6gwFSVJnKEiSOkNBktQZCpKkbrBQSHJgks8muSbJ1Ule3vr3TXJekuvb9z6tP0nekmRDkiuSHDpUbZKkmQ25p3An8MdVdRBwOHBKkoOAU4H1VbUSWN/aAMcAK9tnDXD6gLVJkmYwWChU1c1VdWlb/jZwLbAMOA5Y26atBY5vy8cBZ9fIhcDeSQ4Yqj5J0rbm5JxCkhWMnqr6JWD/qrq5Dd0C7N+WlwE3ja22sfVtva01SaaSTG3ZsmWwmiVpMRo8FJL8AvBR4BVV9a3xsaoqoO7O9qrqjKpaVVWrli71jaCStCsNGgpJdmcUCO+rqo+17q9PHxZq35tb/ybgwLHVl7c+SdIcGfLqowBnAtdW1T+NDa0DVrfl1cA5Y/0vaFchHQ7cMXaYSZI0B4Z8Uc4Tgd8Hrkxyeev7M+D1wIeTnAzcyOjdzwCfAo4FNgDfA140YG2SpBkMFgpV9V9AtjN81AzzCzhlqHokSTvmHc2SpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQNFgpJzkqyOclVY337JjkvyfXte5/WnyRvSbIhyRVJDh2qLknS9g25p/Bu4Oit+k4F1lfVSmB9awMcA6xsnzXA6QPWJUnajsFCoao+D9y2VfdxwNq2vBY4fqz/7Bq5ENg7yQFD1SZJmtlcn1PYv6pubsu3APu35WXATWPzNra+bSRZk2QqydSWLVuGq1SSFqGJnWiuqgLq51jvjKpaVVWrli5dOkBlkrR4zXUofH36sFD73tz6NwEHjs1b3vokSXNorkNhHbC6La8Gzhnrf0G7Culw4I6xw0ySpDmyZKgNJ/kA8GRgvyQbgb8CXg98OMnJwI3Ac9v0TwHHAhuA7wEvGqouSdL2DRYKVXXSdoaOmmFuAacMVYskaXa8o1mS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUGQqSpM5QkCR1hoIkqTMUJEmdoSBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQJHWGgiSpMxQkSZ2hIEnqDAVJUmcoSJI6Q0GS1BkKkqTOUJAkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLUzatQSHJ0kq8k2ZDk1EnXI0mLzbwJhSS7AW8DjgEOAk5KctBkq5KkxWXehAJwGLChqm6oqh8BHwSOm3BNkrSoLJl0AWOWATeNtTcCj996UpI1wJrW/E6Sr8xBbYvFfsA3Jl3EfPAHky5AW/N3c9f6pe0NzKdQmJWqOgM4Y9J1LERJpqpq1aTrkLbm7+bcmU+HjzYBB461l7c+SdIcmU+hcDGwMslDk9wbOBFYN+GaJGlRmTeHj6rqziR/CJwL7AacVVVXT7isxcbDcpqv/N2cI6mqSdcgSZon5tPhI0nShBkKkqTOUFggktyV5PKxz4oBf9bXkuw31Pa1OCSpJO8day9JsiXJJ3ew3pN3NEc/v3lzolk77ftV9ZhJFyHdDd8FHpVkz6r6PvBUvAx94txTWMCSPDbJ55JckuTcJAe0/guSnJZkKsm1SR6X5GNJrk/yurH1P9HWvbrdST7Tz3h+kova3sk72jOspNn6FPD0tnwS8IHpgSSHJfliksuS/HeSX9165ST3S3JW+x28LImPxtlJhsLCsefYoaOPJ9kdeCtwQlU9FjgL+Nux+T9qd4i+HTgHOAV4FPDCJA9sc17c1l0F/NFYPwBJfg34PeCJbS/lLuB5A/4ZtfB8EDgxyR7AwcCXxsauA369qg4B/hL4uxnW/3Pg/Ko6DHgK8MYk9xu45gXNw0cLx08dPkryKEb/yJ+XBEb3ftw8Nn/6xsArgaur6ua23g2M7iy/lVEQPLPNOxBY2fqnHQU8Fri4/Yw9gc279o+lhayqrmjnv05itNcwbi9gbZKVQAG7z7CJpwHPSPInrb0H8BDg2kEKXgQMhYUrjP6xP2I74z9s3z8eW55uL0nyZOC3gCOq6ntJLmD0F27rn7G2ql69y6rWYrQOeBPwZGB8b/S1wGer6pktOC6YYd0Az64qH4y5i3j4aOH6CrA0yREASXZP8si7sf5ewO0tEB4BHD7DnPXACUke1H7Gvkm2+/RFaTvOAl5TVVdu1b8XPznx/MLtrHsu8LK0XdUkhwxS4SJiKCxQ7Z0UJwBvSPJl4HLgCXdjE59mtMdwLfB64MIZfsY1wF8An0lyBXAecMDO1q7Fpao2VtVbZhj6B+Dvk1zG9o9qvJbRYaUrklzd2toJPuZCktS5pyBJ6gwFSVJnKEiSOkNBktQZCpKkzlCQZpBk/yTvT3JDe/7TF8fu7t6Z7fqET81rhoK0lXYj1CeAz1fVL7fnP50ILJ9ALT51QHPKUJC2dSSjBwa+fbqjqm6sqrcm2S3JG5NcnOSKJC+BvgdwQZKPJLkuyfvG7rI9uvVdCjxrepvbe8JnkhcmWZfkfEZ3jUtzxv+FSNt6JHDpdsZOBu6oqscluQ/whSSfaWOHtHX/D/gC8MQkU8C/MgqaDcCHxrY1/YTPFyfZG7goyX+2sUOBg6vqtl35B5N2xFCQdiDJ24AnAT8CbgQOTnJCG96L0dNjfwRcVFUb2zqXAyuA7wBfrarrW/97gel3U2zvCZ8A5xkImgRDQdrW1cCzpxtVdUp7/egU8L/Ay6rq3PEV2lNlx582exc7/vs14xM+kzye0VvJpDnnOQVpW+cDeyR56Vjffdv3ucBL20uMSPIrO3ipy3XAiiQPa+2TxsZ8wqfmHUNB2kqNnhJ5PPCbSb6a5CJgLfAq4J3ANcClSa4C3sHP2COoqh8wOlz0H+1E8/hLiHzCp+Ydn5IqSercU5AkdYaCJKkzFCRJnaEgSeoMBUlSZyhIkjpDQZLU/T+tuh3ZdOsXewAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HMdXi6Wlnbig",
        "colab_type": "code",
        "outputId": "50220526-57c9-4620-a9b5-66823a45620a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 296
        }
      },
      "source": [
        "sns.distplot(df[\"Age\"])"
      ],
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f369dd9cd30>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 5
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Th0uT6VnnQa8",
        "colab_type": "code",
        "outputId": "bc359c6e-8089-486d-df9d-b083e218977a",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 298
        }
      },
      "source": [
        "sns.distplot(df[\"Height\"])"
      ],
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f369d81a4a8>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 6
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5zRcS7MznZhJ",
        "colab_type": "code",
        "outputId": "e217887c-8983-4c5b-c51a-093a96c85e9f",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 296
        }
      },
      "source": [
        "sns.distplot(df[\"Weight\"])"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f369d7f2748>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W8_zHKgtnl0i",
        "colab_type": "code",
        "outputId": "55a3418a-c285-4e6a-8276-be8037e46167",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 470
        }
      },
      "source": [
        "# Heatmapshowing correlation between variables\n",
        "fig, ax =plt.subplots(figsize=(8, 8))\n",
        "plt.title(\"Correlation Plot\")\n",
        "sns.heatmap(df.corr(), mask=np.zeros_like(df.corr(), dtype=np.bool), cmap=sns.diverging_palette(220, 10, as_cmap=True),\n",
        "            square=True, ax=ax, annot=True,linewidths=3)\n",
        "plt.show()"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "\n",
            "text/plain": [
              "<Figure size 576x576 with 2 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "y8efIyqtXLZ0",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# encoding our categorical variable (Gender)\n",
        "df[\"Gender\"] = df[\"Gender\"].map({\"Male\":1, \"Female\":2})"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "QpGgE5U2o4p7",
        "colab_type": "code",
        "outputId": "8b40a6ee-c494-4cc8-9c5a-7bc9de48ece0",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        }
      },
      "source": [
        "df.head()"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Gender</th>\n",
              "      <th>Age</th>\n",
              "      <th>Height</th>\n",
              "      <th>Weight</th>\n",
              "      <th>Target</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2</td>\n",
              "      <td>27</td>\n",
              "      <td>177</td>\n",
              "      <td>51</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2</td>\n",
              "      <td>59</td>\n",
              "      <td>166</td>\n",
              "      <td>65</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>46</td>\n",
              "      <td>171</td>\n",
              "      <td>86</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2</td>\n",
              "      <td>34</td>\n",
              "      <td>165</td>\n",
              "      <td>58</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2</td>\n",
              "      <td>18</td>\n",
              "      <td>175</td>\n",
              "      <td>69</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Gender  Age  Height  Weight  Target\n",
              "0       2   27     177      51       0\n",
              "1       2   59     166      65       0\n",
              "2       1   46     171      86       1\n",
              "3       2   34     165      58       0\n",
              "4       2   18     175      69       0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 10
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tcRXvj3smuUN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Splitting data into X: independent and y: dependent variable\n",
        "\n",
        "X = df.drop(\"Target\",axis=1)\n",
        "y = df[\"Target\"]\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "d1EQHhhiq-RG",
        "colab_type": "code",
        "outputId": "37e4a51c-7f69-4f93-9acf-a61489bf70c3",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 204
        }
      },
      "source": [
        "# Standardizing independent variables into same scale\n",
        "\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "scaler = StandardScaler().fit(X)\n",
        "X = scaler.transform(X)\n",
        "\n",
        "X = pd.DataFrame(X)\n",
        "X.columns = [\"Gender\", \"Age\", \"Height\", \"Weight\"]\n",
        "\n",
        "# Independent variables after standardizing\n",
        "X.head()"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Gender</th>\n",
              "      <th>Age</th>\n",
              "      <th>Height</th>\n",
              "      <th>Weight</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1.0</td>\n",
              "      <td>-0.914033</td>\n",
              "      <td>1.168520</td>\n",
              "      <td>-1.675116</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1.0</td>\n",
              "      <td>1.671862</td>\n",
              "      <td>-1.365231</td>\n",
              "      <td>-0.597920</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>-1.0</td>\n",
              "      <td>0.621342</td>\n",
              "      <td>-0.213526</td>\n",
              "      <td>1.017873</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1.0</td>\n",
              "      <td>-0.348369</td>\n",
              "      <td>-1.595572</td>\n",
              "      <td>-1.136518</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1.0</td>\n",
              "      <td>-1.641316</td>\n",
              "      <td>0.707838</td>\n",
              "      <td>-0.290150</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "   Gender       Age    Height    Weight\n",
              "0     1.0 -0.914033  1.168520 -1.675116\n",
              "1     1.0  1.671862 -1.365231 -0.597920\n",
              "2    -1.0  0.621342 -0.213526  1.017873\n",
              "3     1.0 -0.348369 -1.595572 -1.136518\n",
              "4     1.0 -1.641316  0.707838 -0.290150"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "MxJ1dQGGrBXq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Splitting data into training and testing data, we use 75% to train our classification model\n",
        "\n",
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,shuffle=True, stratify=y)\n"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "GJNylh4LnH0D",
        "colab_type": "code",
        "outputId": "3e64e3b0-c531-41c4-cc80-075d947a556e",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 85
        }
      },
      "source": [
        "print(\"Shape of X_train:\",X_train.shape)\n",
        "print(\"Shape of y_train:\",y_train.shape)\n",
        "print(\"Shape of X_test:\",X_test.shape)\n",
        "print(\"Shape of y_test:\",y_test.shape)"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Shape of X_train: (750, 4)\n",
            "Shape of y_train: (750,)\n",
            "Shape of X_test: (250, 4)\n",
            "Shape of y_test: (250,)\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "X6ZU0ULLnhwI",
        "colab_type": "code",
        "outputId": "db741f1f-be29-46d4-d718-2fcd58fe1fde",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 350
        }
      },
      "source": [
        "# visualizing training and testing labels\n",
        "\n",
        "plt.figure(1 , figsize = (25 ,5))\n",
        "n = 0 \n",
        "for z , j in zip([y_train , y_test] , ['train labels', 'test labels']):\n",
        "    n += 1\n",
        "    plt.subplot(1 , 3  , n)\n",
        "    sns.countplot(x = z , palette=\"Set3\")\n",
        "    plt.title(j)\n",
        "plt.show()"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "\n",
            "text/plain": [
              "<Figure size 1800x360 with 2 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0zAvC39QvLMM",
        "colab_type": "text"
      },
      "source": [
        "__Naive BBayes Classifier:__"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "22vbc9OymmAJ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 417
        },
        "outputId": "122c248f-14cb-4053-fc6f-bf0120ac90a0"
      },
      "source": [
        "# Create the Multinomial Naive Bayes Classifier\n",
        "\n",
        "from sklearn.metrics import classification_report,confusion_matrix,accuracy_score\n",
        "\n",
        "from sklearn.naive_bayes import GaussianNB\n",
        "nbc = GaussianNB()\n",
        "nbc.fit(X_train,y_train)\n",
        "\n",
        "# making predictions\n",
        "y_pred = nbc.predict(X_test)\n",
        "\n",
        "\n",
        "print(\"Accuracy of Naive-Bayes Classifier = {:0.2f} %\".format(accuracy_score(y_test, y_pred)*100))\n",
        "print()\n",
        "print(\"===================================================================\")\n",
        "print()\n",
        "\n",
        "\n",
        "print(\"The classification report of Naive-Bayes Classifier is as follows:\")\n",
        "print(classification_report(y_test,y_pred))\n",
        "print()\n",
        "print(\"===================================================================\")\n",
        "print()\n",
        "\n",
        "\n",
        "cm=confusion_matrix(y_test,y_pred)\n",
        "confusion = pd.DataFrame(cm, index=[\"0\", \"1\"], columns=[\"0\", \"1\"])\n",
        "print(\"Confusion Matrix is as follows:\")\n",
        "confusion\n"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Accuracy of Naive-Bayes Classifier = 50.80 %\n",
            "\n",
            "===================================================================\n",
            "\n",
            "The classification report of Naive-Bayes Classifier is as follows:\n",
            "              precision    recall  f1-score   support\n",
            "\n",
            "           0       0.51      0.65      0.57       127\n",
            "           1       0.50      0.36      0.42       123\n",
            "\n",
            "    accuracy                           0.51       250\n",
            "   macro avg       0.51      0.51      0.50       250\n",
            "weighted avg       0.51      0.51      0.50       250\n",
            "\n",
            "\n",
            "===================================================================\n",
            "\n",
            "Confusion Matrix is as follows:\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>0</th>\n",
              "      <th>1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>83</td>\n",
              "      <td>44</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>79</td>\n",
              "      <td>44</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "    0   1\n",
              "0  83  44\n",
              "1  79  44"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 16
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "JkLt6dPzvDOV",
        "colab_type": "text"
      },
      "source": [
        "__Nearest Neighbors Classifier:__"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zI88IBQyqiiQ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 417
        },
        "outputId": "4ee92eba-0f0f-4719-ba66-7715753aae3d"
      },
      "source": [
        "from sklearn.neighbors import KNeighborsClassifier\n",
        "\n",
        "# Create the K Nearest Neighbour\n",
        "\n",
        "clf = KNeighborsClassifier(n_neighbors=3)\n",
        "\n",
        "clf.fit(X_train,y_train)\n",
        "\n",
        "# making predictions\n",
        "y_predicted = clf.predict(X_test)\n",
        "\n",
        "# Calculate the accuracy of the prediction\n",
        "print(\"Accuracy of Nearest Neighbors Classifier  = {:0.2f} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
        "print()\n",
        "print(\"===================================================================\")\n",
        "print()\n",
        "\n",
        "# Cross validate the scores\n",
        "print(\"Classification Report of Nearest Neighbors Classifier: \\n {}\".format(classification_report(y_test, y_predicted)))\n",
        "print()\n",
        "print(\"===================================================================\")\n",
        "print()\n",
        "\n",
        "# Confusion matrix\n",
        "cm=np.array(confusion_matrix(y_test,y_predicted))\n",
        "\n",
        "confusion = pd.DataFrame(cm, index=[\"0\", \"1\"],\n",
        "                         columns=[\"0\", \"1\"])\n",
        "\n",
        "print(\"Confusion Matrix of Nearest Neighbors Classifier:\")\n",
        "confusion"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Accuracy of Nearest Neighbors Classifier  = 47.60 %\n",
            "\n",
            "===================================================================\n",
            "\n",
            "Classification Report of Nearest Neighbors Classifier: \n",
            "               precision    recall  f1-score   support\n",
            "\n",
            "           0       0.48      0.40      0.44       127\n",
            "           1       0.47      0.55      0.51       123\n",
            "\n",
            "    accuracy                           0.48       250\n",
            "   macro avg       0.48      0.48      0.47       250\n",
            "weighted avg       0.48      0.48      0.47       250\n",
            "\n",
            "\n",
            "===================================================================\n",
            "\n",
            "Confusion Matrix of Nearest Neighbors Classifier:\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>0</th>\n",
              "      <th>1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>51</td>\n",
              "      <td>76</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>55</td>\n",
              "      <td>68</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "    0   1\n",
              "0  51  76\n",
              "1  55  68"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 17
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6jvzjHq5wzUM",
        "colab_type": "text"
      },
      "source": [
        "__Support Vector Machine:__"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EHKIvVM9vrYo",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 383
        },
        "outputId": "330fc102-1fbb-4d9d-a32b-dedebd20f9da"
      },
      "source": [
        "# Create the Support Vector Machine\n",
        "from sklearn import svm\n",
        "\n",
        "clf = svm.SVC()\n",
        "\n",
        "clf.fit(X_train,y_train)\n",
        "\n",
        "\n",
        "# Perform the predictions\n",
        "\n",
        "y_predicted = clf.predict(X_test)\n",
        "\n",
        "# Calculate the accuracy of the prediction\n",
        "print(\"Accuracy of Support Vector Machine = {:0.2f} %\".format(accuracy_score(y_test, y_predicted)*100))\n",
        "print(\"==========================================\")\n",
        "print()\n",
        "\n",
        "# Cross validate the scores\n",
        "print(\"Classification Report of Support Vector Machine is as follows: \\n {}\".format(classification_report(y_test, y_predicted)))\n",
        "print(\"==========================================\")\n",
        "print()\n",
        "\n",
        "# Confusion matrix\n",
        "cm=np.array(confusion_matrix(y_test,y_predicted))\n",
        "\n",
        "confusion = pd.DataFrame(cm, index=[\"0\", \"1\"],\n",
        "                         columns=[\"0\", \"1\"])\n",
        "\n",
        "print(\"Confusion Matrix of Support Vector Machine is as follows:\")\n",
        "confusion\n"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Accuracy of Support Vector Machine = 50.00 %\n",
            "==========================================\n",
            "\n",
            "Classification Report of Support Vector Machine is as follows: \n",
            "               precision    recall  f1-score   support\n",
            "\n",
            "           0       0.51      0.51      0.51       127\n",
            "           1       0.49      0.49      0.49       123\n",
            "\n",
            "    accuracy                           0.50       250\n",
            "   macro avg       0.50      0.50      0.50       250\n",
            "weighted avg       0.50      0.50      0.50       250\n",
            "\n",
            "==========================================\n",
            "\n",
            "Confusion Matrix of Support Vector Machine is as follows:\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>0</th>\n",
              "      <th>1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>65</td>\n",
              "      <td>62</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>63</td>\n",
              "      <td>60</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "    0   1\n",
              "0  65  62\n",
              "1  63  60"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6m-moTkexFjG",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}

Share this post

assignment helpassignment helperassignment expertsassignment writing services