Skip to content

Commit

Permalink
baisc files
Browse files Browse the repository at this point in the history
  • Loading branch information
pyther-hub authored Aug 3, 2023
0 parents commit 4ee4f3a
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 0 deletions.
1 change: 1 addition & 0 deletions amlc-knn-exponential-weighted.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:04:50.050465Z","iopub.status.busy":"2023-04-23T15:04:50.049395Z","iopub.status.idle":"2023-04-23T15:05:03.739353Z","shell.execute_reply":"2023-04-23T15:05:03.738136Z","shell.execute_reply.started":"2023-04-23T15:04:50.050421Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting faiss-gpu\n"," Downloading faiss_gpu-1.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m14.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n","\u001b[?25hInstalling collected packages: faiss-gpu\n","Successfully installed faiss-gpu-1.7.2\n","\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n","\u001b[0m"]}],"source":["!pip install faiss-gpu"]},{"cell_type":"code","execution_count":11,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:05:03.742893Z","iopub.status.busy":"2023-04-23T15:05:03.742508Z","iopub.status.idle":"2023-04-23T15:05:03.750706Z","shell.execute_reply":"2023-04-23T15:05:03.749549Z","shell.execute_reply.started":"2023-04-23T15:05:03.742852Z"},"trusted":true},"outputs":[],"source":["k = 10\n","alpha = 5"]},{"cell_type":"code","execution_count":12,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-04-23T15:05:03.753651Z","iopub.status.busy":"2023-04-23T15:05:03.752491Z","iopub.status.idle":"2023-04-23T15:05:06.120846Z","shell.execute_reply":"2023-04-23T15:05:06.119802Z","shell.execute_reply.started":"2023-04-23T15:05:03.753613Z"},"trusted":true},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","import matplotlib.pyplot as plt\n","\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n","\n","import faiss\n","\n","import os\n","import gc\n","import torch\n","from tqdm import tqdm"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:05:06.124113Z","iopub.status.busy":"2023-04-23T15:05:06.123275Z","iopub.status.idle":"2023-04-23T15:05:06.179375Z","shell.execute_reply":"2023-04-23T15:05:06.178366Z","shell.execute_reply.started":"2023-04-23T15:05:06.124080Z"},"trusted":true},"outputs":[{"data":{"text/plain":["1"]},"execution_count":13,"metadata":{},"output_type":"execute_result"}],"source":["gpu_ids = \"0\" # can be e.g. \"3,4\" for multiple GPUs \n","os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids\n","faiss.get_num_gpus()"]},{"cell_type":"code","execution_count":14,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:05:06.181280Z","iopub.status.busy":"2023-04-23T15:05:06.180916Z","iopub.status.idle":"2023-04-23T15:05:06.190536Z","shell.execute_reply":"2023-04-23T15:05:06.189451Z","shell.execute_reply.started":"2023-04-23T15:05:06.181244Z"},"trusted":true},"outputs":[],"source":["class FaissKNeighbors:\n"," def __init__(self, k=5):\n"," self.index = None\n"," self.y = None\n"," self.k = k\n"," \n"," def fit(self, X, y):\n"," self.cpu_index = faiss.IndexFlatL2(X.shape[1])\n"," self.gpu_index = faiss.index_cpu_to_all_gpus(self.cpu_index)\n"," self.gpu_index.add(X.astype(np.float32))\n"," self.y = y\n","\n"," def predict(self, X, alpha):\n"," distances, indices = self.gpu_index.search(X.astype(np.float32), k=self.k, )\n"," predictions = np.array(self.y[indices])\n"," w = np.exp(-alpha*np.square(distances))\n"," return np.sum(w*predictions, axis=1)/np.sum(w, axis=1)"]},{"cell_type":"code","execution_count":15,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:05:06.192788Z","iopub.status.busy":"2023-04-23T15:05:06.191988Z","iopub.status.idle":"2023-04-23T15:05:44.409249Z","shell.execute_reply":"2023-04-23T15:05:44.408052Z","shell.execute_reply.started":"2023-04-23T15:05:06.192750Z"},"trusted":true},"outputs":[],"source":["train_df = pd.read_csv(\"/kaggle/input/aml-dataset/train.csv\")"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T15:05:44.411589Z","iopub.status.busy":"2023-04-23T15:05:44.411184Z"},"trusted":true},"outputs":[],"source":["train_Y = train_df['PRODUCT_LENGTH'].values\n","train_X = np.load(\"/kaggle/input/amlc-title-embeddings-vanilla/title_embeddings.npy\")\n","train_Y_log = np.log(train_Y+1)"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["model = FaissKNeighbors(k=k)\n","model.fit(train_X, train_Y_log)"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["gc.collect()\n","torch.cuda.empty_cache()"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["test_X = np.load(\"/kaggle/input/amlc-test-title-embeddings/title_embeddings_test.npy\")"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["predictions = []\n","\n","for test_x in tqdm(np.split(test_X, 16)):\n"," y_preds = model.predict(test_x, alpha)\n"," predictions.append(y_preds)\n"," gc.collect()\n"," torch.cuda.empty_cache()"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["y_preds = np.concatenate(predictions)\n","y_preds = np.exp(y_preds)-1"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["submission = test_df = pd.read_csv(\"/kaggle/input/aml-dataset/sample_submission.csv\")\n","submission['PRODUCT_LENGTH'] = y_preds\n","submission.to_csv(\"KNN_submission.csv\")"]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"}},"nbformat":4,"nbformat_minor":4}
1 change: 1 addition & 0 deletions amlc-knn-inverse-weighted.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:08:45.341033Z","iopub.status.busy":"2023-04-23T14:08:45.340579Z","iopub.status.idle":"2023-04-23T14:08:59.856870Z","shell.execute_reply":"2023-04-23T14:08:59.855512Z","shell.execute_reply.started":"2023-04-23T14:08:45.340992Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["Collecting faiss-gpu\n"," Downloading faiss_gpu-1.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.5/85.5 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n","\u001b[?25hInstalling collected packages: faiss-gpu\n","Successfully installed faiss-gpu-1.7.2\n","\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n","\u001b[0m"]}],"source":["!pip install faiss-gpu"]},{"cell_type":"code","execution_count":2,"metadata":{"_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","execution":{"iopub.execute_input":"2023-04-23T14:08:59.860540Z","iopub.status.busy":"2023-04-23T14:08:59.860120Z","iopub.status.idle":"2023-04-23T14:09:02.888339Z","shell.execute_reply":"2023-04-23T14:09:02.887276Z","shell.execute_reply.started":"2023-04-23T14:08:59.860498Z"},"trusted":true},"outputs":[],"source":["import numpy as np\n","import pandas as pd\n","import matplotlib.pyplot as plt\n","\n","from sklearn.linear_model import LinearRegression\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error\n","from sklearn.neighbors import KNeighborsRegressor\n","\n","import faiss\n","\n","import os\n","import gc\n","import torch\n","from tqdm import tqdm"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:09:02.890417Z","iopub.status.busy":"2023-04-23T14:09:02.889797Z","iopub.status.idle":"2023-04-23T14:09:02.958310Z","shell.execute_reply":"2023-04-23T14:09:02.957183Z","shell.execute_reply.started":"2023-04-23T14:09:02.890385Z"},"trusted":true},"outputs":[{"data":{"text/plain":["1"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["gpu_ids = \"0\" # can be e.g. \"3,4\" for multiple GPUs \n","os.environ['CUDA_VISIBLE_DEVICES'] = gpu_ids\n","faiss.get_num_gpus()"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:09:02.962107Z","iopub.status.busy":"2023-04-23T14:09:02.961734Z","iopub.status.idle":"2023-04-23T14:09:02.972164Z","shell.execute_reply":"2023-04-23T14:09:02.970063Z","shell.execute_reply.started":"2023-04-23T14:09:02.962068Z"},"trusted":true},"outputs":[],"source":["class FaissKNeighbors:\n"," def __init__(self, k=5):\n"," self.index = None\n"," self.y = None\n"," self.k = k\n"," \n"," def fit(self, X, y):\n"," # self.cpu_index = faiss.IndexFlat(X.shape[1], faiss.METRIC_L1)\n"," self.cpu_index = faiss.IndexFlatL2(X.shape[1])\n"," self.gpu_index = faiss.index_cpu_to_all_gpus(self.cpu_index)\n"," self.gpu_index.add(X.astype(np.float32))\n","\n"," self.y = y\n","\n"," def predict(self, X):\n"," distances, indices = self.gpu_index.search(X.astype(np.float32), k=self.k, )\n"," predictions = np.array(self.y[indices])\n"," w = 1./(np.square(distances)+1e-8)\n"," return np.sum(w*predictions, axis=1)/np.sum(w, axis=1)"]},{"cell_type":"code","execution_count":5,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:09:02.975282Z","iopub.status.busy":"2023-04-23T14:09:02.973578Z","iopub.status.idle":"2023-04-23T14:09:45.040073Z","shell.execute_reply":"2023-04-23T14:09:45.039022Z","shell.execute_reply.started":"2023-04-23T14:09:02.975240Z"},"trusted":true},"outputs":[],"source":["train_df = pd.read_csv(\"/kaggle/input/aml-dataset/train.csv\")"]},{"cell_type":"code","execution_count":6,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:09:45.042078Z","iopub.status.busy":"2023-04-23T14:09:45.041699Z","iopub.status.idle":"2023-04-23T14:10:29.909917Z","shell.execute_reply":"2023-04-23T14:10:29.908728Z","shell.execute_reply.started":"2023-04-23T14:09:45.042037Z"},"trusted":true},"outputs":[],"source":["train_Y = train_df['PRODUCT_LENGTH'].values\n","train_X = np.load(\"/kaggle/input/amlc-title-embeddings-vanilla/title_embeddings.npy\")\n","train_Y_log = np.log(train_Y+1)"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:10:29.912103Z","iopub.status.busy":"2023-04-23T14:10:29.911656Z","iopub.status.idle":"2023-04-23T14:10:29.919724Z","shell.execute_reply":"2023-04-23T14:10:29.917109Z","shell.execute_reply.started":"2023-04-23T14:10:29.912054Z"},"trusted":true},"outputs":[],"source":["# X_train, X_test, y_train, y_test = train_test_split(train_X, train_Y_log, test_size=0.20, random_state=0)\n","# X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(train_X, train_Y_log, test_size=0.20, random_state=0)"]},{"cell_type":"code","execution_count":8,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:10:29.922556Z","iopub.status.busy":"2023-04-23T14:10:29.921918Z","iopub.status.idle":"2023-04-23T14:10:36.487771Z","shell.execute_reply":"2023-04-23T14:10:36.486655Z","shell.execute_reply.started":"2023-04-23T14:10:29.922513Z"},"trusted":true},"outputs":[],"source":["model = FaissKNeighbors(k=10)\n","model.fit(train_X, train_Y_log)"]},{"cell_type":"code","execution_count":9,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:10:36.490005Z","iopub.status.busy":"2023-04-23T14:10:36.489588Z","iopub.status.idle":"2023-04-23T14:10:36.616568Z","shell.execute_reply":"2023-04-23T14:10:36.615392Z","shell.execute_reply.started":"2023-04-23T14:10:36.489961Z"},"trusted":true},"outputs":[],"source":["gc.collect()\n","torch.cuda.empty_cache()"]},{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:10:36.621400Z","iopub.status.busy":"2023-04-23T14:10:36.620055Z","iopub.status.idle":"2023-04-23T14:11:06.483317Z","shell.execute_reply":"2023-04-23T14:11:06.482192Z","shell.execute_reply.started":"2023-04-23T14:10:36.621358Z"},"trusted":true},"outputs":[],"source":["test_X = np.load(\"/kaggle/input/amlc-test-title-embeddings/title_embeddings_test.npy\")"]},{"cell_type":"code","execution_count":11,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:11:06.487054Z","iopub.status.busy":"2023-04-23T14:11:06.486335Z","iopub.status.idle":"2023-04-23T14:14:36.963466Z","shell.execute_reply":"2023-04-23T14:14:36.962458Z","shell.execute_reply.started":"2023-04-23T14:11:06.487013Z"},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["100%|██████████| 16/16 [03:30<00:00, 13.15s/it]\n"]}],"source":["predictions = []\n","\n","for test_x in tqdm(np.split(test_X, 16)):\n"," y_preds = model.predict(test_x)\n"," predictions.append(y_preds)\n"," gc.collect()\n"," torch.cuda.empty_cache()"]},{"cell_type":"code","execution_count":12,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:14:36.970052Z","iopub.status.busy":"2023-04-23T14:14:36.966623Z","iopub.status.idle":"2023-04-23T14:14:36.986450Z","shell.execute_reply":"2023-04-23T14:14:36.985265Z","shell.execute_reply.started":"2023-04-23T14:14:36.970012Z"},"trusted":true},"outputs":[],"source":["y_preds = np.concatenate(predictions)\n","y_preds = np.exp(y_preds)-1"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2023-04-23T14:14:36.993297Z","iopub.status.busy":"2023-04-23T14:14:36.990812Z","iopub.status.idle":"2023-04-23T14:14:39.306413Z","shell.execute_reply":"2023-04-23T14:14:39.305318Z","shell.execute_reply.started":"2023-04-23T14:14:36.993257Z"},"trusted":true},"outputs":[],"source":["submission = test_df = pd.read_csv(\"/kaggle/input/aml-dataset/sample_submission.csv\")\n","submission['PRODUCT_LENGTH'] = y_preds\n","submission.to_csv(\"KNN_submission.csv\")"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"}},"nbformat":4,"nbformat_minor":4}

0 comments on commit 4ee4f3a

Please sign in to comment.