Skip to content

Commit

Permalink
Merge pull request #63 from boostcampaitech7/feat-034/add-dataset
Browse files Browse the repository at this point in the history
feat: add last deposit variable
  • Loading branch information
yunhyechoi authored Oct 23, 2024
2 parents ef262e5 + c892742 commit 4e084e2
Showing 1 changed file with 44 additions and 6 deletions.
50 changes: 44 additions & 6 deletions merge_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 32,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -19,9 +19,16 @@
"from sklearn.preprocessing import StandardScaler\n",
"from utils.clustering import ClusteringMethods\n",
"\n",
"from tqdm import tqdm\n"
"from tqdm import tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
Expand Down Expand Up @@ -1314,6 +1321,7 @@
"metadata": {},
"outputs": [],
"source": [
"from haversine import haversine, Unit\n",
"def calculate_subway_distances(train_data, subway_data):\n",
" \"\"\"\n",
" 모든 아파트에 대해 연산하면 시간이 오래 걸리므로 위도 경도에 대해서 계산을 수행한 후 Merge한다.\n",
Expand Down Expand Up @@ -1458,6 +1466,27 @@
"test_data = pd.concat([test_data1, test_data2]).sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 직전 아파트 거래가"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 지난 아파트 거래가 변수\n",
"train_data = train_data.sort_values(['contract_year_month', 'contract_day'])\n",
"train_data['last_deposit_by_area'] = train_data.groupby(['latitude', 'longitude'])['deposit_by_area'].shift(1)\n",
"\n",
"last_apt_deposit = train_data.groupby(['latitude', 'longitude']).last().reset_index()[['latitude', 'longitude', 'deposit_by_area']].rename(columns = {'deposit_by_area' : 'last_deposit_by_area'})\n",
"test_data = test_data.merge(last_apt_deposit, how= 'left')"
]
},
{
"cell_type": "code",
"execution_count": 38,
Expand All @@ -1467,6 +1496,15 @@
"final_df = pd.concat([train_data, test_data],axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"final_df.drop(columns = 'index', inplace = True)"
]
},
{
"cell_type": "code",
"execution_count": 40,
Expand Down Expand Up @@ -1970,7 +2008,7 @@
"metadata": {},
"outputs": [],
"source": [
"final_df.to_csv(\"final_df.csv\", index=False)"
"final_df.to_csv(\"real_final_df.csv\", index=False)"
]
},
{
Expand All @@ -1979,7 +2017,7 @@
"metadata": {},
"outputs": [],
"source": [
"final_df.to_csv(\"final_df.csv.gz\", index=False, compression='gzip')"
"final_df.to_csv(\"real_final_df.csv.gz\", index=False, compression='gzip')"
]
},
{
Expand All @@ -1992,7 +2030,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "base",
"display_name": ".conda",
"language": "python",
"name": "python3"
},
Expand All @@ -2006,7 +2044,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 4e084e2

Please sign in to comment.