Started PW-2

2025-09-23 13:18:25 +02:00
commit f0e1453d13
9 changed files with 13088 additions and 0 deletions
--- a/PW-2/ex5-regression-knn/lifesat.csv
+++ b/PW-2/ex5-regression-knn/lifesat.csv
@@ -0,0 +1,28 @@
+Country,GDP per capita (USD),Life satisfaction
+Russia,26456.3879381321,5.8
+Greece,27287.0834009302,5.4
+Turkey,28384.9877846263,5.5
+Latvia,29932.4939100562,5.9
+Hungary,31007.7684065437,5.6
+Portugal,32181.1545372343,5.4
+Poland,32238.157259275,6.1
+Estonia,35638.4213511812,5.7
+Spain,36215.4475907307,6.3
+Slovenia,36547.7389559849,5.9
+Lithuania,36732.034744031,5.9
+Israel,38341.3075704083,7.2
+Italy,38992.1483807498,6.0
+United Kingdom,41627.129269425,6.8
+France,42025.6173730617,6.5
+New Zealand,42404.3937381567,7.3
+Canada,45856.6256264804,7.4
+Finland,47260.800458441,7.6
+Belgium,48210.0331113444,6.9
+Australia,48697.8370282475,7.3
+Sweden,50683.3235097178,7.3
+Germany,50922.3580234484,7.0
+Austria,51935.6038618156,7.1
+Iceland,52279.7288513646,7.5
+Netherlands,54209.5638357302,7.4
+Denmark,55938.2128086032,7.6
+United States,60235.7284916969,6.9
--- a/PW-2/ex5-regression-knn/regression-knn-stud.ipynb
+++ b/PW-2/ex5-regression-knn/regression-knn-stud.ipynb
@@ -0,0 +1,258 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b94b0451",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import LinearRegression\n",
+    "\n",
+    "# Download and prepare the data\n",
+    "lifesat = pd.read_csv(\"lifesat.csv\")\n",
+    "X = lifesat[[\"GDP per capita (USD)\"]].values\n",
+    "y = lifesat[[\"Life satisfaction\"]].values\n",
+    "\n",
+    "# Visualize the data\n",
+    "lifesat.plot(kind='scatter', grid=True,\n",
+    "             x=\"GDP per capita (USD)\", y=\"Life satisfaction\")\n",
+    "plt.axis([23_500, 62_500, 4, 9])\n",
+    "plt.show()\n",
+    "\n",
+    "# Select a linear model\n",
+    "model = LinearRegression()\n",
+    "\n",
+    "# Train the model\n",
+    "model.fit(X, y)\n",
+    "\n",
+    "# Make a prediction for Cyprus\n",
+    "X_new = [[37_655.2]]  # Cyprus' GDP per capita in 2020\n",
+    "print(model.predict(X_new)) # outputs [[6.30165767]]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94fda07f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test = np.linspace(25000, 60000, 200)\n",
+    "X_test = [[value] for value in X_test]\n",
+    "y_test = model.predict(X_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "838b0242",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the data\n",
+    "lifesat.plot(kind='scatter', grid=True,\n",
+    "             x=\"GDP per capita (USD)\", y=\"Life satisfaction\")\n",
+    "plt.axis([23_500, 62_500, 4, 9])\n",
+    "plt.plot(X_test, y_test, color='red')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa14a4ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class KNearestNeighborRegressor(object):\n",
+    "  \"\"\" a kNN regressor with L2 distance \"\"\"\n",
+    "\n",
+    "  def __init__(self):\n",
+    "    pass\n",
+    "\n",
+    "  def train(self, X, y):\n",
+    "    \"\"\"\n",
+    "    Train the classifier. For k-nearest neighbors this is just \n",
+    "    memorizing the training data.\n",
+    "\n",
+    "    Inputs:\n",
+    "    - X: A numpy array of shape (num_train, D) containing the training data\n",
+    "      consisting of num_train samples each of dimension D.\n",
+    "    - y: A numpy array of shape (N,) containing the training labels, where\n",
+    "         y[i] is the label for X[i].\n",
+    "    \"\"\"\n",
+    "    self.X_train = X\n",
+    "    self.y_train = y\n",
+    "    \n",
+    "  def predict(self, X, k=1):\n",
+    "    \"\"\"\n",
+    "    Predict labels for test data using this classifier.\n",
+    "\n",
+    "    Inputs:\n",
+    "    - X: A numpy array of shape (num_test, D) containing test data consisting\n",
+    "         of num_test samples each of dimension D.\n",
+    "    - k: The number of nearest neighbors that vote for the predicted labels.\n",
+    "    - num_loops: Determines which implementation to use to compute distances\n",
+    "      between training points and testing points.\n",
+    "\n",
+    "    Returns:\n",
+    "    - y: A numpy array of shape (num_test,) containing predicted labels for the\n",
+    "      test data, where y[i] is the predicted label for the test point X[i].  \n",
+    "    \"\"\"\n",
+    "    dists = self.compute_distances(X)\n",
+    "    \n",
+    "    return self.predict_values(dists, k=k)\n",
+    "\n",
+    "\n",
+    "  def compute_distances(self, X):\n",
+    "    \"\"\"\n",
+    "    Compute the distance between each test point in X and each training point\n",
+    "    in self.X_train using a single loop over the test data.\n",
+    "\n",
+    "    Inputs:\n",
+    "    - X: A numpy array of shape (num_test, D) containing test data.\n",
+    "\n",
+    "    Returns:\n",
+    "    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]\n",
+    "      is the Euclidean distance between the ith test point and the jth training\n",
+    "      point.\n",
+    "    \"\"\"\n",
+    "    num_test = X.shape[0]\n",
+    "    num_train = self.X_train.shape[0]\n",
+    "    dists = np.zeros((num_test, num_train))\n",
+    "    for i in range(num_test):\n",
+    "        #######################################################################\n",
+    "        # TODO:                                                               #\n",
+    "        # Compute the l2 distance between the ith test point and all training #\n",
+    "        # points, and store the result in dists[i, :].                        #\n",
+    "        #######################################################################\n",
+    "        \n",
+    "        pass\n",
+    "    \n",
+    "        #######################################################################\n",
+    "        #                         END OF YOUR CODE                            #\n",
+    "        #######################################################################\n",
+    "    return dists\n",
+    "\n",
+    "\n",
+    "\n",
+    "  def predict_values(self, dists, k=1):\n",
+    "    \"\"\"\n",
+    "    Given a matrix of distances between test points and training points,\n",
+    "    predict a value for each test point.\n",
+    "\n",
+    "    Inputs:\n",
+    "    - dists: A numpy array of shape (num_test, num_train) where dists[i, j]\n",
+    "      gives the distance betwen the ith test point and the jth training point.\n",
+    "\n",
+    "    Returns:\n",
+    "    - y: A numpy array of shape (num_test,) containing predicted values for the\n",
+    "      test data, where y[i] is the predicted value for the test point X[i].  \n",
+    "    \"\"\"\n",
+    "    num_test = dists.shape[0]\n",
+    "    y_pred = np.zeros(num_test)\n",
+    "    for i in range(num_test):\n",
+    "        # A list of length k storing the labels of the k nearest neighbors to\n",
+    "        # the ith test point.\n",
+    "        closest_y = []\n",
+    "        \n",
+    "        #########################################################################\n",
+    "        # TODO:                                                                 #\n",
+    "        # Use the distance matrix to find the k nearest neighbors of the ith    #\n",
+    "        # testing point, and use self.y_train to find the labels of these       #\n",
+    "        # neighbors. Store these labels in closest_y.                           #\n",
+    "        # Hint: Look up the function numpy.argsort.                             #\n",
+    "        #########################################################################\n",
+    "        \n",
+    "        pass\n",
+    "    \n",
+    "        #########################################################################\n",
+    "        # TODO:                                                                 #\n",
+    "        # Now that you have found the labels of the k nearest neighbors, you    #\n",
+    "        # need to compute the average of the target values corresponding to the #\n",
+    "        # nearest neighbors.                                                    #\n",
+    "        #########################################################################\n",
+    "        \n",
+    "        pass\n",
+    "        \n",
+    "        #########################################################################\n",
+    "        #                           END OF YOUR CODE                            # \n",
+    "        #########################################################################\n",
+    "\n",
+    "    return y_pred"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "267d1168",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "knn_reg = KNearestNeighborRegressor()\n",
+    "knn_reg.train(np.array(X), y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd8203ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_hat_1 = knn_reg.predict(np.array(X_test), k=1)\n",
+    "y_hat_3 = knn_reg.predict(np.array(X_test), k=3)\n",
+    "y_hat_5 = knn_reg.predict(np.array(X_test), k=5)\n",
+    "y_hat_7 = knn_reg.predict(np.array(X_test), k=7)\n",
+    "y_hat_20 = knn_reg.predict(np.array(X_test), k=20)\n",
+    "y_hat_27 = knn_reg.predict(np.array(X_test), k=27)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3704256",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize the data\n",
+    "lifesat.plot(kind='scatter', grid=True,\n",
+    "             x=\"GDP per capita (USD)\", y=\"Life satisfaction\")\n",
+    "plt.axis([23_500, 62_500, 4, 9])\n",
+    "plt.plot(X_test, y_test, color='red')\n",
+    "plt.plot(X_test, y_hat_1, color='green')\n",
+    "# plt.plot(X_test, y_hat_3, color='blue')\n",
+    "# plt.plot(X_test, y_hat_5, color='magenta')\n",
+    "# plt.plot(X_test, y_hat_7, color='orange')\n",
+    "# plt.plot(X_test, y_hat_20, color='black')\n",
+    "# plt.plot(X_test, y_hat_27, color='grey')\n",
+    "plt.show()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}