Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
marijusGood authored Jan 27, 2020
1 parent 2988acd commit 12193d6
Showing 1 changed file with 292 additions and 0 deletions.
292 changes: 292 additions & 0 deletions one-vs-all.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# importing, changing pandas data to numpy array and shuffling it\n",
"iris = pd.read_csv(\"Iris.csv\")\n",
"iris_np = iris.to_numpy()\n",
"np.random.seed(42)\n",
"np.random.shuffle(iris_np)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"location_of_name = len(iris_np[0]) - 1 #where the iris names are located"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"#this was supposed to add more Complex variables like x**2 to make the model more flexible but when I run the calculations I\n",
"#get nan, this cell works and dose it job, but Im not using it\n",
"def ComplexVariables(X):\n",
" X = np.array(X)\n",
" temp = np.zeros((len(X), (len(X[0])*2)))\n",
" for i in range(len(X)):\n",
" for j in range(len(X[0])):\n",
" temp[i][j] = X[i][j]\n",
" for j in range(len(X[0])):\n",
" temp[i][j + len(X[0])] = np.power(X[i][j], 2)\n",
" return temp"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"#calculating the Cost with delta modifier to protect from overtraining\n",
"def Cost(X, theta, y, delta):\n",
" prediction = sigmoid(np.dot(X, np.transpose(theta)))\n",
" y = np.array(y)\n",
" cost = sum( -(y * np.log(prediction)) - (1 - y) * np.log(1 - prediction))/len(y) + (sum(np.power(theta, 2))) * delta/(2*len(y))\n",
" return cost"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def sigmoid(prediction):\n",
" prediction = list(-np.array(prediction))#this is to reverse the sign\n",
" sigmoid = 1 / (1 + np.exp(prediction))\n",
" return sigmoid"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#calculating the Gradient with delta modifier to protect from overtraining\n",
"def Gradient(X, theta, y, alfa, delta):\n",
" prediction = sigmoid(np.dot(X, np.transpose(theta)))\n",
" temp_theta = theta.copy()\n",
" temp_theta[0] = 0\n",
" for i in range(len(theta)):\n",
" theta[i] = theta[i] - (alfa * sum((prediction-y) * X[:,i]/len(y)) + (delta/len(y)) * temp_theta[i])\n",
" return theta"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def CostSum(training_set, theta, y_train, iris_names, delta):\n",
" cost_sum = 0\n",
" for i in range(len(iris_names)):\n",
" classes = (y_train == i).astype(int)\n",
" cost_sum += Cost(training_set, theta[i], classes, delta)\n",
" print(\"the Error of the funcion is \", cost_sum)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def succes_rate(X, theta, y):\n",
" predictions = list(np.dot(X, np.transpose(theta)))\n",
" predictions = np.argmax(predictions, axis=1)\n",
" succes_rate = (y == predictions).astype(int)\n",
" succes_rate = sum(succes_rate)/len(y)\n",
" print(\"succes rate\", succes_rate)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Changes \"iris-name\" to numbers from 0 to amount of different names\n",
"iris_names = []\n",
"\n",
"for i in range(len(iris_np)):\n",
" has_value = False\n",
" for j in range(len(iris_names)):\n",
" if(iris_names[j] == iris_np[i][location_of_name]):\n",
" iris_np[i][location_of_name] = j\n",
" has_value = True\n",
" if has_value == False:\n",
" iris_names.append(iris_np[i][location_of_name])\n",
" iris_np[i][location_of_name] = len(iris_names) - 1\n",
" has_value = False"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# getting the answers and deleting unnecessary data\n",
"iris_y = iris_np[:,location_of_name]\n",
"iris_np = np.delete(iris_np, location_of_name, 1)\n",
"iris_np = np.delete(iris_np, 0, 1)\n",
"iris_np = np.append(np.ones((len(iris_np), 1)), iris_np, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# splitting data into training_set 80% and testing_set 20%, setting delta(helps to avoid overfitting)\n",
"location_of_name = len(iris_np[0]) - 1\n",
"training_ratio = 0.2\n",
"delta = 0.1\n",
"#iris_np = ComplexVariables(iris_np)\n",
"training_set = iris_np[:int((1-training_ratio) * len(iris_np)),:].copy()\n",
"testing_set = iris_np[int((1-training_ratio) * len(iris_np)):,:].copy()\n",
"y_train = iris_y[:int((1-training_ratio) * len(iris_np))].copy()\n",
"y_test = iris_y[int((1-training_ratio) * len(iris_np)):].copy()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#generating theta\n",
"theta = np.random.rand(len(iris_names), len(training_set[0]))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"the Error of the funcion is 15.72682452555025\n"
]
}
],
"source": [
"#Cost before training\n",
"CostSum(training_set, theta, y_train, iris_names, delta)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"#trains for 10000 iterations\n",
"for i in range(len(iris_names)):\n",
" classes = (y_train == i).astype(int)\n",
" for j in range(10000):\n",
" Gradient(training_set, theta[i], classes, 0.1, delta)#0.1 is how big are the steps in gradient descent"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"the Error of the funcion is 0.6876999762494191\n"
]
}
],
"source": [
"#Cost after training\n",
"CostSum(training_set, theta, y_train, iris_names, delta)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"succes rate 0.9416666666666667\n"
]
}
],
"source": [
"# succes rate of training set\n",
"succes_rate(training_set ,theta, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"succes rate 1.0\n"
]
}
],
"source": [
"# succes rate of testing set\n",
"succes_rate(testing_set ,theta, y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 12193d6

Please sign in to comment.