{
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "\n",
        "pd.set_option('display.max_rows', None)\n",
        "pd.set_option('display.max_columns', None)\n",
        "\n",
        "# Read the CSV file into a DataFrame\n",
        "df = pd.read_csv('Mall_Customers.csv')\n",
        "\n",
        "# Display the first 5 rows\n",
        "print(df.head().to_markdown(index=False, numalign=\"left\", stralign=\"left\"))\n",
        "\n",
        "# Print the column names and their data types\n",
        "print(df.info())"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "| CustomerID   | Gender   | Age   | Annual Income (k$)   | Spending Score (1-100)   |\n",
            "|:-------------|:---------|:------|:---------------------|:-------------------------|\n",
            "| 1            | Male     | 19    | 15                   | 39                       |\n",
            "| 2            | Male     | 21    | 15                   | 81                       |\n",
            "| 3            | Female   | 20    | 16                   | 6                        |\n",
            "| 4            | Female   | 23    | 16                   | 77                       |\n",
            "| 5            | Female   | 31    | 17                   | 40                       |\n",
            "<class 'pandas.core.frame.DataFrame'>\n",
            "RangeIndex: 200 entries, 0 to 199\n",
            "Data columns (total 5 columns):\n",
            " #   Column                  Non-Null Count  Dtype \n",
            "---  ------                  --------------  ----- \n",
            " 0   CustomerID              200 non-null    int64 \n",
            " 1   Gender                  200 non-null    object\n",
            " 2   Age                     200 non-null    int64 \n",
            " 3   Annual Income (k$)      200 non-null    int64 \n",
            " 4   Spending Score (1-100)  200 non-null    int64 \n",
            "dtypes: int64(4), object(1)\n",
            "memory usage: 7.9+ KB\n",
            "None\n"
          ]
        }
      ],
      "execution_count": null,
      "metadata": {
        "id": "v5BJ2oxKXGC4",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "35e2ff0e-473d-4e42-d86b-51f5eb890ef1"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "\n",
        "I'll use the `Annual Income (k$)` and `Spending Score (1-100)` columns to demonstrate how DBSCAN works. But first, I'll plot these two columns to get an overview of the data."
      ],
      "metadata": {
        "id": "UddW_d01XGC5"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import altair as alt\n",
        "\n",
        "# Extract the features\n",
        "X = df[['Annual Income (k$)', 'Spending Score (1-100)']]\n",
        "\n",
        "# Create a scatter plot\n",
        "chart = alt.Chart(X).mark_point().encode(\n",
        "    x='Annual Income (k$)',\n",
        "    y='Spending Score (1-100)',\n",
        "    tooltip=['Annual Income (k$)', 'Spending Score (1-100)']\n",
        ").properties(title='Scatter Plot of Spending Score vs. Annual Income').interactive()\n",
        "\n",
        "# Save the chart\n",
        "chart.save('spending_score_vs_annual_income_scatter_plot.json')\n",
        "chart"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "<style>\n",
              "  #altair-viz-c05fcb5a8921441296d091e31056fa6d.vega-embed {\n",
              "    width: 100%;\n",
              "    display: flex;\n",
              "  }\n",
              "\n",
              "  #altair-viz-c05fcb5a8921441296d091e31056fa6d.vega-embed details,\n",
              "  #altair-viz-c05fcb5a8921441296d091e31056fa6d.vega-embed details summary {\n",
              "    position: relative;\n",
              "  }\n",
              "</style>\n",
              "<div id=\"altair-viz-c05fcb5a8921441296d091e31056fa6d\"></div>\n",
              "<script type=\"text/javascript\">\n",
              "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
              "  (function(spec, embedOpt){\n",
              "    let outputDiv = document.currentScript.previousElementSibling;\n",
              "    if (outputDiv.id !== \"altair-viz-c05fcb5a8921441296d091e31056fa6d\") {\n",
              "      outputDiv = document.getElementById(\"altair-viz-c05fcb5a8921441296d091e31056fa6d\");\n",
              "    }\n",
              "\n",
              "    const paths = {\n",
              "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
              "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
              "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
              "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
              "    };\n",
              "\n",
              "    function maybeLoadScript(lib, version) {\n",
              "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
              "      return (VEGA_DEBUG[key] == version) ?\n",
              "        Promise.resolve(paths[lib]) :\n",
              "        new Promise(function(resolve, reject) {\n",
              "          var s = document.createElement('script');\n",
              "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
              "          s.async = true;\n",
              "          s.onload = () => {\n",
              "            VEGA_DEBUG[key] = version;\n",
              "            return resolve(paths[lib]);\n",
              "          };\n",
              "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
              "          s.src = paths[lib];\n",
              "        });\n",
              "    }\n",
              "\n",
              "    function showError(err) {\n",
              "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
              "      throw err;\n",
              "    }\n",
              "\n",
              "    function displayChart(vegaEmbed) {\n",
              "      vegaEmbed(outputDiv, spec, embedOpt)\n",
              "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
              "    }\n",
              "\n",
              "    if(typeof define === \"function\" && define.amd) {\n",
              "      requirejs.config({paths});\n",
              "      let deps = [\"vega-embed\"];\n",
              "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
              "    } else {\n",
              "      maybeLoadScript(\"vega\", \"5\")\n",
              "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
              "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
              "        .catch(showError)\n",
              "        .then(() => displayChart(vegaEmbed));\n",
              "    }\n",
              "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-b8ff046c6fd9526a8db9f4185d2c3cae\"}, \"mark\": {\"type\": \"point\"}, \"encoding\": {\"tooltip\": [{\"field\": \"Annual Income (k$)\", \"type\": \"quantitative\"}, {\"field\": \"Spending Score (1-100)\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Annual Income (k$)\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Spending Score (1-100)\", \"type\": \"quantitative\"}}, \"params\": [{\"name\": \"param_7\", \"select\": {\"type\": \"interval\", \"encodings\": [\"x\", \"y\"]}, \"bind\": \"scales\"}], \"title\": \"Scatter Plot of Spending Score vs. Annual Income\", \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-b8ff046c6fd9526a8db9f4185d2c3cae\": [{\"Annual Income (k$)\": 15, \"Spending Score (1-100)\": 39}, {\"Annual Income (k$)\": 15, \"Spending Score (1-100)\": 81}, {\"Annual Income (k$)\": 16, \"Spending Score (1-100)\": 6}, {\"Annual Income (k$)\": 16, \"Spending Score (1-100)\": 77}, {\"Annual Income (k$)\": 17, \"Spending Score (1-100)\": 40}, {\"Annual Income (k$)\": 17, \"Spending Score (1-100)\": 76}, {\"Annual Income (k$)\": 18, \"Spending Score (1-100)\": 6}, {\"Annual Income (k$)\": 18, \"Spending Score (1-100)\": 94}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 3}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 72}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 14}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 99}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 15}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 77}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 13}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 79}, {\"Annual Income (k$)\": 21, \"Spending Score (1-100)\": 35}, {\"Annual Income (k$)\": 21, \"Spending Score (1-100)\": 66}, {\"Annual Income (k$)\": 23, \"Spending Score (1-100)\": 29}, {\"Annual Income (k$)\": 23, \"Spending Score (1-100)\": 98}, {\"Annual Income (k$)\": 24, \"Spending Score (1-100)\": 35}, {\"Annual Income (k$)\": 24, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 25, \"Spending Score (1-100)\": 5}, {\"Annual Income (k$)\": 25, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 14}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 82}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 32}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 61}, {\"Annual Income (k$)\": 29, \"Spending Score (1-100)\": 31}, {\"Annual Income (k$)\": 29, \"Spending Score (1-100)\": 87}, {\"Annual Income (k$)\": 30, \"Spending Score (1-100)\": 4}, {\"Annual Income (k$)\": 30, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 4}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 92}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 14}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 81}, {\"Annual Income (k$)\": 34, \"Spending Score (1-100)\": 17}, {\"Annual Income (k$)\": 34, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 37, \"Spending Score (1-100)\": 26}, {\"Annual Income (k$)\": 37, \"Spending Score (1-100)\": 75}, {\"Annual Income (k$)\": 38, \"Spending Score (1-100)\": 35}, {\"Annual Income (k$)\": 38, \"Spending Score (1-100)\": 92}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 36}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 61}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 28}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 65}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 47}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 42, \"Spending Score (1-100)\": 52}, {\"Annual Income (k$)\": 42, \"Spending Score (1-100)\": 60}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 54}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 60}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 45}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 41}, {\"Annual Income (k$)\": 44, \"Spending Score (1-100)\": 50}, {\"Annual Income (k$)\": 44, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 51}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 56}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 47, \"Spending Score (1-100)\": 52}, {\"Annual Income (k$)\": 47, \"Spending Score (1-100)\": 59}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 51}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 59}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 50}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 48}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 59}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 47}, {\"Annual Income (k$)\": 49, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 49, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 50, \"Spending Score (1-100)\": 49}, {\"Annual Income (k$)\": 50, \"Spending Score (1-100)\": 56}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 47}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 54}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 53}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 48}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 52}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 51}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 41}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 44}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 57}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 57, \"Spending Score (1-100)\": 58}, {\"Annual Income (k$)\": 57, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 58, \"Spending Score (1-100)\": 60}, {\"Annual Income (k$)\": 58, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 59, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 59, \"Spending Score (1-100)\": 41}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 49}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 40}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 52}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 47}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 50}, {\"Annual Income (k$)\": 61, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 61, \"Spending Score (1-100)\": 49}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 41}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 48}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 59}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 55}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 56}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 50}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 43}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 48}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 52}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 54}, {\"Annual Income (k$)\": 64, \"Spending Score (1-100)\": 42}, {\"Annual Income (k$)\": 64, \"Spending Score (1-100)\": 46}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 48}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 50}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 43}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 59}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 43}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 57}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 56}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 40}, {\"Annual Income (k$)\": 69, \"Spending Score (1-100)\": 58}, {\"Annual Income (k$)\": 69, \"Spending Score (1-100)\": 91}, {\"Annual Income (k$)\": 70, \"Spending Score (1-100)\": 29}, {\"Annual Income (k$)\": 70, \"Spending Score (1-100)\": 77}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 35}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 95}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 11}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 75}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 9}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 75}, {\"Annual Income (k$)\": 72, \"Spending Score (1-100)\": 34}, {\"Annual Income (k$)\": 72, \"Spending Score (1-100)\": 71}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 5}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 88}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 7}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 74, \"Spending Score (1-100)\": 10}, {\"Annual Income (k$)\": 74, \"Spending Score (1-100)\": 72}, {\"Annual Income (k$)\": 75, \"Spending Score (1-100)\": 5}, {\"Annual Income (k$)\": 75, \"Spending Score (1-100)\": 93}, {\"Annual Income (k$)\": 76, \"Spending Score (1-100)\": 40}, {\"Annual Income (k$)\": 76, \"Spending Score (1-100)\": 87}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 12}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 97}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 36}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 74}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 22}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 90}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 17}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 88}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 20}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 76}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 16}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 89}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 1}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 78}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 1}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 73}, {\"Annual Income (k$)\": 79, \"Spending Score (1-100)\": 35}, {\"Annual Income (k$)\": 79, \"Spending Score (1-100)\": 83}, {\"Annual Income (k$)\": 81, \"Spending Score (1-100)\": 5}, {\"Annual Income (k$)\": 81, \"Spending Score (1-100)\": 93}, {\"Annual Income (k$)\": 85, \"Spending Score (1-100)\": 26}, {\"Annual Income (k$)\": 85, \"Spending Score (1-100)\": 75}, {\"Annual Income (k$)\": 86, \"Spending Score (1-100)\": 20}, {\"Annual Income (k$)\": 86, \"Spending Score (1-100)\": 95}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 27}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 63}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 13}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 75}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 10}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 92}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 13}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 86}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 15}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 69}, {\"Annual Income (k$)\": 93, \"Spending Score (1-100)\": 14}, {\"Annual Income (k$)\": 93, \"Spending Score (1-100)\": 90}, {\"Annual Income (k$)\": 97, \"Spending Score (1-100)\": 32}, {\"Annual Income (k$)\": 97, \"Spending Score (1-100)\": 86}, {\"Annual Income (k$)\": 98, \"Spending Score (1-100)\": 15}, {\"Annual Income (k$)\": 98, \"Spending Score (1-100)\": 88}, {\"Annual Income (k$)\": 99, \"Spending Score (1-100)\": 39}, {\"Annual Income (k$)\": 99, \"Spending Score (1-100)\": 97}, {\"Annual Income (k$)\": 101, \"Spending Score (1-100)\": 24}, {\"Annual Income (k$)\": 101, \"Spending Score (1-100)\": 68}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 17}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 85}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 23}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 69}, {\"Annual Income (k$)\": 113, \"Spending Score (1-100)\": 8}, {\"Annual Income (k$)\": 113, \"Spending Score (1-100)\": 91}, {\"Annual Income (k$)\": 120, \"Spending Score (1-100)\": 16}, {\"Annual Income (k$)\": 120, \"Spending Score (1-100)\": 79}, {\"Annual Income (k$)\": 126, \"Spending Score (1-100)\": 28}, {\"Annual Income (k$)\": 126, \"Spending Score (1-100)\": 74}, {\"Annual Income (k$)\": 137, \"Spending Score (1-100)\": 18}, {\"Annual Income (k$)\": 137, \"Spending Score (1-100)\": 83}]}}, {\"mode\": \"vega-lite\"});\n",
              "</script>"
            ],
            "text/plain": [
              "alt.Chart(...)"
            ]
          },
          "metadata": {},
          "execution_count": 11
        }
      ],
      "execution_count": null,
      "metadata": {
        "id": "vr0n7L5eXGC7",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 385
        },
        "outputId": "79d9a218-83ea-4daa-bafa-261fd71edcd1"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "[json-tag: code-generated-json-7-0-1738159089351249376]\n",
        "\n",
        "I'll now demonstrate how to determine the optimal values for the `eps` and `min_samples` hyperparameters in DBSCAN. These hyperparameters are crucial for controlling the density of clusters.\n",
        "\n",
        "I'll use the k-nearest neighbors (KNN) algorithm to help find a suitable value for `eps`. The KNN algorithm calculates the distances between data points, which can be useful in identifying areas of high density in the dataset."
      ],
      "metadata": {
        "id": "limMjxqEXGC7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.cluster import DBSCAN\n",
        "\n",
        "# Create a DBSCAN model\n",
        "dbscan = DBSCAN(eps=9, min_samples=5)\n",
        "\n",
        "# Fit the model and predict cluster labels\n",
        "X['Cluster'] = dbscan.fit_predict(X)\n",
        "\n",
        "# Count the number of data points in each cluster\n",
        "cluster_counts = X['Cluster'].value_counts()\n",
        "print(cluster_counts.to_markdown(numalign=\"left\", stralign=\"left\"))\n",
        "\n",
        "# Create a scatter plot\n",
        "chart = alt.Chart(X).mark_point().encode(\n",
        "    x='Annual Income (k$)',\n",
        "    y='Spending Score (1-100)',\n",
        "    color='Cluster:N',\n",
        "    tooltip=['Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']\n",
        ").properties(title='DBSCAN Clustering with eps=9 and min_samples=5').interactive()\n",
        "\n",
        "# Save the chart\n",
        "chart.save('dbscan_clustering_scatter_plot.json')\n",
        "chart"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "| Cluster   | count   |\n",
            "|:----------|:--------|\n",
            "| 3         | 88      |\n",
            "| 4         | 31      |\n",
            "| -1        | 23      |\n",
            "| 5         | 23      |\n",
            "| 0         | 16      |\n",
            "| 1         | 12      |\n",
            "| 2         | 7       |\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "<ipython-input-12-4601680d29a6>:7: SettingWithCopyWarning: \n",
            "A value is trying to be set on a copy of a slice from a DataFrame.\n",
            "Try using .loc[row_indexer,col_indexer] = value instead\n",
            "\n",
            "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
            "  X['Cluster'] = dbscan.fit_predict(X)\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "<style>\n",
              "  #altair-viz-8ea89506236147e88e987d3561deeccf.vega-embed {\n",
              "    width: 100%;\n",
              "    display: flex;\n",
              "  }\n",
              "\n",
              "  #altair-viz-8ea89506236147e88e987d3561deeccf.vega-embed details,\n",
              "  #altair-viz-8ea89506236147e88e987d3561deeccf.vega-embed details summary {\n",
              "    position: relative;\n",
              "  }\n",
              "</style>\n",
              "<div id=\"altair-viz-8ea89506236147e88e987d3561deeccf\"></div>\n",
              "<script type=\"text/javascript\">\n",
              "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
              "  (function(spec, embedOpt){\n",
              "    let outputDiv = document.currentScript.previousElementSibling;\n",
              "    if (outputDiv.id !== \"altair-viz-8ea89506236147e88e987d3561deeccf\") {\n",
              "      outputDiv = document.getElementById(\"altair-viz-8ea89506236147e88e987d3561deeccf\");\n",
              "    }\n",
              "\n",
              "    const paths = {\n",
              "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
              "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
              "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.20.1?noext\",\n",
              "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
              "    };\n",
              "\n",
              "    function maybeLoadScript(lib, version) {\n",
              "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
              "      return (VEGA_DEBUG[key] == version) ?\n",
              "        Promise.resolve(paths[lib]) :\n",
              "        new Promise(function(resolve, reject) {\n",
              "          var s = document.createElement('script');\n",
              "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
              "          s.async = true;\n",
              "          s.onload = () => {\n",
              "            VEGA_DEBUG[key] = version;\n",
              "            return resolve(paths[lib]);\n",
              "          };\n",
              "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
              "          s.src = paths[lib];\n",
              "        });\n",
              "    }\n",
              "\n",
              "    function showError(err) {\n",
              "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
              "      throw err;\n",
              "    }\n",
              "\n",
              "    function displayChart(vegaEmbed) {\n",
              "      vegaEmbed(outputDiv, spec, embedOpt)\n",
              "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
              "    }\n",
              "\n",
              "    if(typeof define === \"function\" && define.amd) {\n",
              "      requirejs.config({paths});\n",
              "      let deps = [\"vega-embed\"];\n",
              "      require(deps, displayChart, err => showError(`Error loading script: ${err.message}`));\n",
              "    } else {\n",
              "      maybeLoadScript(\"vega\", \"5\")\n",
              "        .then(() => maybeLoadScript(\"vega-lite\", \"5.20.1\"))\n",
              "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
              "        .catch(showError)\n",
              "        .then(() => displayChart(vegaEmbed));\n",
              "    }\n",
              "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-fecdf28bbc012d7d45555f7a6c8a5641\"}, \"mark\": {\"type\": \"point\"}, \"encoding\": {\"color\": {\"field\": \"Cluster\", \"type\": \"nominal\"}, \"tooltip\": [{\"field\": \"Annual Income (k$)\", \"type\": \"quantitative\"}, {\"field\": \"Spending Score (1-100)\", \"type\": \"quantitative\"}, {\"field\": \"Cluster\", \"type\": \"quantitative\"}], \"x\": {\"field\": \"Annual Income (k$)\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"Spending Score (1-100)\", \"type\": \"quantitative\"}}, \"params\": [{\"name\": \"param_8\", \"select\": {\"type\": \"interval\", \"encodings\": [\"x\", \"y\"]}, \"bind\": \"scales\"}], \"title\": \"DBSCAN Clustering with eps=9 and min_samples=5\", \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.20.1.json\", \"datasets\": {\"data-fecdf28bbc012d7d45555f7a6c8a5641\": [{\"Annual Income (k$)\": 15, \"Spending Score (1-100)\": 39, \"Cluster\": 2}, {\"Annual Income (k$)\": 15, \"Spending Score (1-100)\": 81, \"Cluster\": 0}, {\"Annual Income (k$)\": 16, \"Spending Score (1-100)\": 6, \"Cluster\": 1}, {\"Annual Income (k$)\": 16, \"Spending Score (1-100)\": 77, \"Cluster\": 0}, {\"Annual Income (k$)\": 17, \"Spending Score (1-100)\": 40, \"Cluster\": 2}, {\"Annual Income (k$)\": 17, \"Spending Score (1-100)\": 76, \"Cluster\": 0}, {\"Annual Income (k$)\": 18, \"Spending Score (1-100)\": 6, \"Cluster\": 1}, {\"Annual Income (k$)\": 18, \"Spending Score (1-100)\": 94, \"Cluster\": -1}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 3, \"Cluster\": 1}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 72, \"Cluster\": 0}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 14, \"Cluster\": 1}, {\"Annual Income (k$)\": 19, \"Spending Score (1-100)\": 99, \"Cluster\": -1}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 15, \"Cluster\": 1}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 77, \"Cluster\": 0}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 13, \"Cluster\": 1}, {\"Annual Income (k$)\": 20, \"Spending Score (1-100)\": 79, \"Cluster\": 0}, {\"Annual Income (k$)\": 21, \"Spending Score (1-100)\": 35, \"Cluster\": 2}, {\"Annual Income (k$)\": 21, \"Spending Score (1-100)\": 66, \"Cluster\": 0}, {\"Annual Income (k$)\": 23, \"Spending Score (1-100)\": 29, \"Cluster\": 2}, {\"Annual Income (k$)\": 23, \"Spending Score (1-100)\": 98, \"Cluster\": -1}, {\"Annual Income (k$)\": 24, \"Spending Score (1-100)\": 35, \"Cluster\": 2}, {\"Annual Income (k$)\": 24, \"Spending Score (1-100)\": 73, \"Cluster\": 0}, {\"Annual Income (k$)\": 25, \"Spending Score (1-100)\": 5, \"Cluster\": 1}, {\"Annual Income (k$)\": 25, \"Spending Score (1-100)\": 73, \"Cluster\": 0}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 14, \"Cluster\": 1}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 82, \"Cluster\": 0}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 32, \"Cluster\": 2}, {\"Annual Income (k$)\": 28, \"Spending Score (1-100)\": 61, \"Cluster\": 0}, {\"Annual Income (k$)\": 29, \"Spending Score (1-100)\": 31, \"Cluster\": 2}, {\"Annual Income (k$)\": 29, \"Spending Score (1-100)\": 87, \"Cluster\": 0}, {\"Annual Income (k$)\": 30, \"Spending Score (1-100)\": 4, \"Cluster\": 1}, {\"Annual Income (k$)\": 30, \"Spending Score (1-100)\": 73, \"Cluster\": 0}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 4, \"Cluster\": 1}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 92, \"Cluster\": -1}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 14, \"Cluster\": 1}, {\"Annual Income (k$)\": 33, \"Spending Score (1-100)\": 81, \"Cluster\": 0}, {\"Annual Income (k$)\": 34, \"Spending Score (1-100)\": 17, \"Cluster\": 1}, {\"Annual Income (k$)\": 34, \"Spending Score (1-100)\": 73, \"Cluster\": 0}, {\"Annual Income (k$)\": 37, \"Spending Score (1-100)\": 26, \"Cluster\": -1}, {\"Annual Income (k$)\": 37, \"Spending Score (1-100)\": 75, \"Cluster\": 0}, {\"Annual Income (k$)\": 38, \"Spending Score (1-100)\": 35, \"Cluster\": 3}, {\"Annual Income (k$)\": 38, \"Spending Score (1-100)\": 92, \"Cluster\": -1}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 36, \"Cluster\": 3}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 61, \"Cluster\": 3}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 28, \"Cluster\": 3}, {\"Annual Income (k$)\": 39, \"Spending Score (1-100)\": 65, \"Cluster\": 3}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 47, \"Cluster\": 3}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 40, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 42, \"Spending Score (1-100)\": 52, \"Cluster\": 3}, {\"Annual Income (k$)\": 42, \"Spending Score (1-100)\": 60, \"Cluster\": 3}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 54, \"Cluster\": 3}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 60, \"Cluster\": 3}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 45, \"Cluster\": 3}, {\"Annual Income (k$)\": 43, \"Spending Score (1-100)\": 41, \"Cluster\": 3}, {\"Annual Income (k$)\": 44, \"Spending Score (1-100)\": 50, \"Cluster\": 3}, {\"Annual Income (k$)\": 44, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 51, \"Cluster\": 3}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 56, \"Cluster\": 3}, {\"Annual Income (k$)\": 46, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 47, \"Spending Score (1-100)\": 52, \"Cluster\": 3}, {\"Annual Income (k$)\": 47, \"Spending Score (1-100)\": 59, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 51, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 59, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 50, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 48, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 59, \"Cluster\": 3}, {\"Annual Income (k$)\": 48, \"Spending Score (1-100)\": 47, \"Cluster\": 3}, {\"Annual Income (k$)\": 49, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 49, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 50, \"Spending Score (1-100)\": 49, \"Cluster\": 3}, {\"Annual Income (k$)\": 50, \"Spending Score (1-100)\": 56, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 47, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 54, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 53, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 48, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 52, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 51, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 41, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 44, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 57, \"Cluster\": 3}, {\"Annual Income (k$)\": 54, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 57, \"Spending Score (1-100)\": 58, \"Cluster\": 3}, {\"Annual Income (k$)\": 57, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 58, \"Spending Score (1-100)\": 60, \"Cluster\": 3}, {\"Annual Income (k$)\": 58, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 59, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 59, \"Spending Score (1-100)\": 41, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 49, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 40, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 52, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 47, \"Cluster\": 3}, {\"Annual Income (k$)\": 60, \"Spending Score (1-100)\": 50, \"Cluster\": 3}, {\"Annual Income (k$)\": 61, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 61, \"Spending Score (1-100)\": 49, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 41, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 48, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 59, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 55, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 56, \"Cluster\": 3}, {\"Annual Income (k$)\": 62, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 50, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 43, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 48, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 52, \"Cluster\": 3}, {\"Annual Income (k$)\": 63, \"Spending Score (1-100)\": 54, \"Cluster\": 3}, {\"Annual Income (k$)\": 64, \"Spending Score (1-100)\": 42, \"Cluster\": 3}, {\"Annual Income (k$)\": 64, \"Spending Score (1-100)\": 46, \"Cluster\": 3}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 48, \"Cluster\": 3}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 50, \"Cluster\": 3}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 43, \"Cluster\": 3}, {\"Annual Income (k$)\": 65, \"Spending Score (1-100)\": 59, \"Cluster\": 3}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 43, \"Cluster\": 3}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 57, \"Cluster\": 3}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 56, \"Cluster\": 3}, {\"Annual Income (k$)\": 67, \"Spending Score (1-100)\": 40, \"Cluster\": 3}, {\"Annual Income (k$)\": 69, \"Spending Score (1-100)\": 58, \"Cluster\": 3}, {\"Annual Income (k$)\": 69, \"Spending Score (1-100)\": 91, \"Cluster\": 4}, {\"Annual Income (k$)\": 70, \"Spending Score (1-100)\": 29, \"Cluster\": 3}, {\"Annual Income (k$)\": 70, \"Spending Score (1-100)\": 77, \"Cluster\": 4}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 35, \"Cluster\": 3}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 95, \"Cluster\": 4}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 11, \"Cluster\": 5}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 75, \"Cluster\": 4}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 9, \"Cluster\": 5}, {\"Annual Income (k$)\": 71, \"Spending Score (1-100)\": 75, \"Cluster\": 4}, {\"Annual Income (k$)\": 72, \"Spending Score (1-100)\": 34, \"Cluster\": 3}, {\"Annual Income (k$)\": 72, \"Spending Score (1-100)\": 71, \"Cluster\": 4}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 5, \"Cluster\": 5}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 88, \"Cluster\": 4}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 7, \"Cluster\": 5}, {\"Annual Income (k$)\": 73, \"Spending Score (1-100)\": 73, \"Cluster\": 4}, {\"Annual Income (k$)\": 74, \"Spending Score (1-100)\": 10, \"Cluster\": 5}, {\"Annual Income (k$)\": 74, \"Spending Score (1-100)\": 72, \"Cluster\": 4}, {\"Annual Income (k$)\": 75, \"Spending Score (1-100)\": 5, \"Cluster\": 5}, {\"Annual Income (k$)\": 75, \"Spending Score (1-100)\": 93, \"Cluster\": 4}, {\"Annual Income (k$)\": 76, \"Spending Score (1-100)\": 40, \"Cluster\": 3}, {\"Annual Income (k$)\": 76, \"Spending Score (1-100)\": 87, \"Cluster\": 4}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 12, \"Cluster\": 5}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 97, \"Cluster\": 4}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 36, \"Cluster\": 3}, {\"Annual Income (k$)\": 77, \"Spending Score (1-100)\": 74, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 22, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 90, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 17, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 88, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 20, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 76, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 16, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 89, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 1, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 78, \"Cluster\": 4}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 1, \"Cluster\": 5}, {\"Annual Income (k$)\": 78, \"Spending Score (1-100)\": 73, \"Cluster\": 4}, {\"Annual Income (k$)\": 79, \"Spending Score (1-100)\": 35, \"Cluster\": 3}, {\"Annual Income (k$)\": 79, \"Spending Score (1-100)\": 83, \"Cluster\": 4}, {\"Annual Income (k$)\": 81, \"Spending Score (1-100)\": 5, \"Cluster\": 5}, {\"Annual Income (k$)\": 81, \"Spending Score (1-100)\": 93, \"Cluster\": 4}, {\"Annual Income (k$)\": 85, \"Spending Score (1-100)\": 26, \"Cluster\": 5}, {\"Annual Income (k$)\": 85, \"Spending Score (1-100)\": 75, \"Cluster\": 4}, {\"Annual Income (k$)\": 86, \"Spending Score (1-100)\": 20, \"Cluster\": 5}, {\"Annual Income (k$)\": 86, \"Spending Score (1-100)\": 95, \"Cluster\": 4}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 27, \"Cluster\": 5}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 63, \"Cluster\": -1}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 13, \"Cluster\": 5}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 75, \"Cluster\": 4}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 10, \"Cluster\": 5}, {\"Annual Income (k$)\": 87, \"Spending Score (1-100)\": 92, \"Cluster\": 4}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 13, \"Cluster\": 5}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 86, \"Cluster\": 4}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 15, \"Cluster\": 5}, {\"Annual Income (k$)\": 88, \"Spending Score (1-100)\": 69, \"Cluster\": 4}, {\"Annual Income (k$)\": 93, \"Spending Score (1-100)\": 14, \"Cluster\": 5}, {\"Annual Income (k$)\": 93, \"Spending Score (1-100)\": 90, \"Cluster\": 4}, {\"Annual Income (k$)\": 97, \"Spending Score (1-100)\": 32, \"Cluster\": -1}, {\"Annual Income (k$)\": 97, \"Spending Score (1-100)\": 86, \"Cluster\": 4}, {\"Annual Income (k$)\": 98, \"Spending Score (1-100)\": 15, \"Cluster\": 5}, {\"Annual Income (k$)\": 98, \"Spending Score (1-100)\": 88, \"Cluster\": 4}, {\"Annual Income (k$)\": 99, \"Spending Score (1-100)\": 39, \"Cluster\": -1}, {\"Annual Income (k$)\": 99, \"Spending Score (1-100)\": 97, \"Cluster\": -1}, {\"Annual Income (k$)\": 101, \"Spending Score (1-100)\": 24, \"Cluster\": -1}, {\"Annual Income (k$)\": 101, \"Spending Score (1-100)\": 68, \"Cluster\": -1}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 17, \"Cluster\": -1}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 85, \"Cluster\": 4}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 23, \"Cluster\": -1}, {\"Annual Income (k$)\": 103, \"Spending Score (1-100)\": 69, \"Cluster\": -1}, {\"Annual Income (k$)\": 113, \"Spending Score (1-100)\": 8, \"Cluster\": -1}, {\"Annual Income (k$)\": 113, \"Spending Score (1-100)\": 91, \"Cluster\": -1}, {\"Annual Income (k$)\": 120, \"Spending Score (1-100)\": 16, \"Cluster\": -1}, {\"Annual Income (k$)\": 120, \"Spending Score (1-100)\": 79, \"Cluster\": -1}, {\"Annual Income (k$)\": 126, \"Spending Score (1-100)\": 28, \"Cluster\": -1}, {\"Annual Income (k$)\": 126, \"Spending Score (1-100)\": 74, \"Cluster\": -1}, {\"Annual Income (k$)\": 137, \"Spending Score (1-100)\": 18, \"Cluster\": -1}, {\"Annual Income (k$)\": 137, \"Spending Score (1-100)\": 83, \"Cluster\": -1}]}}, {\"mode\": \"vega-lite\"});\n",
              "</script>"
            ],
            "text/plain": [
              "alt.Chart(...)"
            ]
          },
          "metadata": {},
          "execution_count": 12
        }
      ],
      "execution_count": null,
      "metadata": {
        "id": "OjEG8dS4XGC8",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 657
        },
        "outputId": "fac9b86d-80b9-403a-d15a-26b5ea4f4cdc"
      }
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "GUQeNtccM20l"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "OB_V5PGzM2pT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "\n",
        "\n",
        "Here is a step-by-step code for explaining DBSCAN:"
      ],
      "metadata": {
        "id": "uNnLNxTGXGC8"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "import altair as alt\n",
        "from sklearn.neighbors import NearestNeighbors\n",
        "from sklearn.cluster import DBSCAN\n",
        "\n",
        "pd.set_option('display.max_rows', None)\n",
        "pd.set_option('display.max_columns', None)\n",
        "\n",
        "# Read the CSV file into a DataFrame\n",
        "df = pd.read_csv('Mall_Customers.csv')\n",
        "\n",
        "# Display the first 5 rows\n",
        "print(df.head().to_markdown(index=False, numalign=\"left\", stralign=\"left\"))\n",
        "\n",
        "# Print the column names and their data types\n",
        "print(df.info())\n",
        "\n",
        "# Extract the features\n",
        "X = df[['Annual Income (k$)', 'Spending Score (1-100)']]\n",
        "\n",
        "# Create a scatter plot\n",
        "chart = alt.Chart(X).mark_point().encode(\n",
        "    x='Annual Income (k$)',\n",
        "    y='Spending Score (1-100)',\n",
        "    tooltip=['Annual Income (k$)', 'Spending Score (1-100)']\n",
        ").properties(title='Scatter Plot of Spending Score vs. Annual Income').interactive()\n",
        "\n",
        "# Save the chart\n",
        "chart.save('spending_score_vs_annual_income_scatter_plot.json')\n",
        "\n",
        "# Fit a NearestNeighbors model\n",
        "knn = NearestNeighbors(n_neighbors=5)\n",
        "knn.fit(X)\n",
        "\n",
        "# Calculate distances to the 5th nearest neighbor\n",
        "distances, indices = knn.kneighbors(X)\n",
        "distances = np.sort(distances[:, 4])\n",
        "\n",
        "# Create a DataFrame for plotting\n",
        "df_distances = pd.DataFrame({'Distance': distances, 'Point': range(1, len(distances) + 1)})\n",
        "\n",
        "# Create a line chart\n",
        "chart = alt.Chart(df_distances).mark_line(point=True).encode(\n",
        "    x='Point',\n",
        "    y='Distance',\n",
        "    tooltip=['Point', 'Distance']\n",
        ").properties(title='Sorted Distances to the 5th Nearest Neighbor').interactive()\n",
        "\n",
        "# Save the chart\n",
        "chart.save('sorted_distances_5th_nearest_neighbor.json')\n",
        "\n",
        "# Create a DBSCAN model\n",
        "dbscan = DBSCAN(eps=9, min_samples=5)\n",
        "\n",
        "# Fit the model and predict cluster labels\n",
        "X['Cluster'] = dbscan.fit_predict(X)\n",
        "\n",
        "# Count the number of data points in each cluster\n",
        "cluster_counts = X['Cluster'].value_counts()\n",
        "print(cluster_counts.to_markdown(numalign=\"left\", stralign=\"left\"))\n",
        "\n",
        "# Create a scatter plot\n",
        "chart = alt.Chart(X).mark_point().encode(\n",
        "    x='Annual Income (k$)',\n",
        "    y='Spending Score (1-100)',\n",
        "    color='Cluster:N',\n",
        "    tooltip=['Annual Income (k$)', 'Spending Score (1-100)', 'Cluster']\n",
        ").properties(title='DBSCAN Clustering with eps=9 and min_samples=5').interactive()\n",
        "\n",
        "# Save the chart\n",
        "chart.save('dbscan_clustering_scatter_plot.json')"
      ],
      "outputs": [],
      "execution_count": null,
      "metadata": {
        "id": "vmFkEnxgXGC8"
      }
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}