Add files via upload

marijusGood · Jan 27, 2020 · 12193d6 · 12193d6
1 parent 2988acd
commit 12193d6
Showing 1 changed file with 292 additions and 0 deletions.
diff --git a/one-vs-all.ipynb b/one-vs-all.ipynb
@@ -0,0 +1,292 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# importing, changing pandas data to numpy array and shuffling it\n",
+    "iris = pd.read_csv(\"Iris.csv\")\n",
+    "iris_np = iris.to_numpy()\n",
+    "np.random.seed(42)\n",
+    "np.random.shuffle(iris_np)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "location_of_name = len(iris_np[0]) - 1 #where the iris names are located"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#this was supposed to add more Complex variables like x**2 to make the model more flexible but when I run the calculations I\n",
+    "#get nan, this cell works and dose it job, but Im not using it\n",
+    "def ComplexVariables(X):\n",
+    "    X = np.array(X)\n",
+    "    temp = np.zeros((len(X), (len(X[0])*2)))\n",
+    "    for i in range(len(X)):\n",
+    "        for j in range(len(X[0])):\n",
+    "            temp[i][j] = X[i][j]\n",
+    "        for j in range(len(X[0])):\n",
+    "            temp[i][j + len(X[0])] = np.power(X[i][j], 2)\n",
+    "    return temp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#calculating the Cost with delta modifier to protect from overtraining\n",
+    "def Cost(X, theta, y, delta):\n",
+    "    prediction = sigmoid(np.dot(X, np.transpose(theta)))\n",
+    "    y = np.array(y)\n",
+    "    cost = sum( -(y * np.log(prediction)) - (1 - y) * np.log(1 - prediction))/len(y) + (sum(np.power(theta, 2))) * delta/(2*len(y))\n",
+    "    return cost"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigmoid(prediction):\n",
+    "    prediction = list(-np.array(prediction))#this is to reverse the sign\n",
+    "    sigmoid = 1 / (1 + np.exp(prediction))\n",
+    "    return sigmoid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#calculating the Gradient with delta modifier to protect from overtraining\n",
+    "def Gradient(X, theta, y, alfa, delta):\n",
+    "    prediction = sigmoid(np.dot(X, np.transpose(theta)))\n",
+    "    temp_theta = theta.copy()\n",
+    "    temp_theta[0] = 0\n",
+    "    for i in range(len(theta)):\n",
+    "        theta[i] = theta[i] - (alfa * sum((prediction-y) * X[:,i]/len(y)) + (delta/len(y)) * temp_theta[i])\n",
+    "    return theta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def CostSum(training_set, theta, y_train, iris_names, delta):\n",
+    "    cost_sum = 0\n",
+    "    for i in range(len(iris_names)):\n",
+    "        classes = (y_train == i).astype(int)\n",
+    "        cost_sum += Cost(training_set, theta[i], classes, delta)\n",
+    "    print(\"the Error of the funcion is \", cost_sum)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def succes_rate(X, theta, y):\n",
+    "    predictions = list(np.dot(X, np.transpose(theta)))\n",
+    "    predictions = np.argmax(predictions, axis=1)\n",
+    "    succes_rate = (y == predictions).astype(int)\n",
+    "    succes_rate = sum(succes_rate)/len(y)\n",
+    "    print(\"succes rate\", succes_rate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Changes \"iris-name\" to numbers from 0 to amount of different names\n",
+    "iris_names = []\n",
+    "\n",
+    "for i in range(len(iris_np)):\n",
+    "    has_value = False\n",
+    "    for j in range(len(iris_names)):\n",
+    "        if(iris_names[j] == iris_np[i][location_of_name]):\n",
+    "            iris_np[i][location_of_name] = j\n",
+    "            has_value = True\n",
+    "    if has_value == False:\n",
+    "        iris_names.append(iris_np[i][location_of_name])\n",
+    "        iris_np[i][location_of_name] = len(iris_names) - 1\n",
+    "        has_value = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# getting the answers and deleting unnecessary data\n",
+    "iris_y = iris_np[:,location_of_name]\n",
+    "iris_np = np.delete(iris_np, location_of_name, 1)\n",
+    "iris_np = np.delete(iris_np, 0, 1)\n",
+    "iris_np = np.append(np.ones((len(iris_np), 1)), iris_np, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# splitting data into training_set 80% and testing_set 20%, setting delta(helps to avoid overfitting)\n",
+    "location_of_name = len(iris_np[0]) - 1\n",
+    "training_ratio = 0.2\n",
+    "delta = 0.1\n",
+    "#iris_np = ComplexVariables(iris_np)\n",
+    "training_set = iris_np[:int((1-training_ratio) * len(iris_np)),:].copy()\n",
+    "testing_set = iris_np[int((1-training_ratio) * len(iris_np)):,:].copy()\n",
+    "y_train = iris_y[:int((1-training_ratio) * len(iris_np))].copy()\n",
+    "y_test = iris_y[int((1-training_ratio) * len(iris_np)):].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#generating theta\n",
+    "theta = np.random.rand(len(iris_names), len(training_set[0]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the Error of the funcion is  15.72682452555025\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Cost before training\n",
+    "CostSum(training_set, theta, y_train, iris_names, delta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#trains for 10000 iterations\n",
+    "for i in range(len(iris_names)):\n",
+    "    classes = (y_train == i).astype(int)\n",
+    "    for j in range(10000):\n",
+    "        Gradient(training_set, theta[i], classes, 0.1, delta)#0.1 is how big are the steps in gradient descent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the Error of the funcion is  0.6876999762494191\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Cost after training\n",
+    "CostSum(training_set, theta, y_train, iris_names, delta)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "succes rate 0.9416666666666667\n"
+     ]
+    }
+   ],
+   "source": [
+    "# succes rate of training set\n",
+    "succes_rate(training_set ,theta, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "succes rate 1.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# succes rate of testing set\n",
+    "succes_rate(testing_set ,theta, y_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}