Skip to content

Commit

Permalink
Add raw preprocess functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tonyrbf96 committed Dec 6, 2020
1 parent bddb7ad commit 8bfadbf
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,4 @@ dmypy.json

.vscode/**
requirements_temp.txt
data
Empty file.
52 changes: 52 additions & 0 deletions kickstarter/data_extraction/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import json
from typing import Dict, List

from kickstarter.models import CategoryModel, CreatorModel, ProjectModel


def process_json(
filename: str,
projects: Dict[int, ProjectModel],
categories: Dict[int, CategoryModel],
creators: Dict[int, CreatorModel],
):
with open(
filename,
"r",
encoding="utf-8",
) as file:
filetype = file.read(1)
file.seek(0)
if filetype == "[":
jsondata: List[dict] = json.load(file)
for proyects_dict_wrapper in jsondata:
projects_dict_list: List[Dict] = proyects_dict_wrapper["projects"]
for project_dict in projects_dict_list:
process_new_project(project_dict, projects, categories, creators)
elif filetype == "{":
for line in file:
project_dict = json.loads(line)["data"]
process_new_project(project_dict, projects, categories, creators)
else:
raise Exception("Invalid file format")


def process_new_project(
project_dict: Dict,
projects: Dict[int, ProjectModel],
categories: Dict[int, CategoryModel],
creators: Dict[int, CreatorModel],
):
project = ProjectModel(**project_dict)
category = CategoryModel(**project_dict["category"])
creator = CreatorModel(**project_dict["creator"])

if (
project.id in projects
and projects[project.id].state_changed_at > project.state_changed_at
):
return

projects[project.id] = project
categories[category.id] = category
creators[creator.id] = creator

0 comments on commit 8bfadbf

Please sign in to comment.