Skip to content

Commit

Permalink
Implement basic statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
leynier committed Dec 6, 2020
1 parent 9cf51d8 commit d591cb1
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 16 deletions.
89 changes: 87 additions & 2 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from typing import Dict, List, Tuple

import pandas as pd
import plotly.graph_objects as go
import streamlit as st
from PIL import Image

from kickstarter.core import get_favorite_categories as get_favorite_categories_raw
from kickstarter.core import load_json as load_json_raw
from kickstarter.models import CategoryModel, ProjectModel
from kickstarter.processing import GrossingCategoriesModel, SuccessfulCategoriesModel
from kickstarter.processing import (
GrossingCategoriesModel,
MonthlyCategoriesSuccessModel,
MonthlyCategoriesTotalsModel,
SuccessfulCategoriesModel,
)

image = Image.open("images/logo.png")
st.image(image, use_column_width=True)
Expand All @@ -29,6 +35,9 @@ def get_favorite_categories(
List[CategoryModel],
List[CategoryModel],
List[CategoryModel],
MonthlyCategoriesSuccessModel,
MonthlyCategoriesTotalsModel,
Dict[int, Tuple[List[int], List[int]]],
]:
return get_favorite_categories_raw(projects, categories)

Expand Down Expand Up @@ -98,6 +107,9 @@ def get_favorite_categories(
List[CategoryModel],
List[CategoryModel],
List[CategoryModel],
MonthlyCategoriesSuccessModel,
MonthlyCategoriesTotalsModel,
Dict[int, Tuple[List[int], List[int]]],
] = get_favorite_categories(
*load_json() # type: ignore
)
Expand Down Expand Up @@ -167,9 +179,43 @@ def get_favorite_categories(
En las siguientes gráficas mostramos la cantidad de proyectos, asi como la
cantidad de estos que fueron exitosos, de estas categorías por mes durante
los años del 2009 al 2018.
los años del 2009 al 2020.
"""

timeline = go.Figure() # type: ignore

for cat in favorite_categories[4]:
timeline.add_scatter(
x=[
favorite_categories[6].dates[i]
for i in range(len(favorite_categories[5].dates))
],
y=[
favorite_categories[6].categories[cat.id][i]
for i in range(len(favorite_categories[5].dates))
],
name=cat.name,
opacity=0.9,
)
timeline.add_scatter(
x=[
favorite_categories[5].dates[i]
for i in range(len(favorite_categories[5].dates))
],
y=[
favorite_categories[5].categories[cat.id][i]
for i in range(len(favorite_categories[5].dates))
],
name=cat.name + " Exitosos",
opacity=0.9,
)

timeline.update_layout(
title_text="Cantidad de Proyectos vs Cantidad de Proyectos Exitosos",
xaxis_rangeslider_visible=True,
)
st.write(timeline)

"""
Teniendo en cuenta la información anterior podemos notar que muchas de las
categorías anteriores tuvieron un auge en **Kickstarter** durante los años
Expand All @@ -193,3 +239,42 @@ def get_favorite_categories(
A continuación analizaremos como se comportan las categorías que han sido y son
más exitosas dependiendo del més del año en que sus proyectos fueron dados a conocer.
"""

category_selected = st.selectbox(
"Categorías:",
options=[item.id for item in favorite_categories[4]],
format_func=lambda x: favorite_categories[0].categories[x].name,
)

months = [
"enero",
"febrero",
"marzo",
"abril",
"mayo",
"junio",
"julio",
"agosto",
"septiembre",
"obtubre",
"noviembre",
"diciembre",
]

successfuls = favorite_categories[7][category_selected][0]
totals = favorite_categories[7][category_selected][1]

fig_cat_months_rel = go.Figure( # type: ignore
data=[
go.Bar(name="Total", x=months, y=totals), # type: ignore
go.Bar(name="Éxitos", x=months, y=successfuls), # type: ignore
]
)

fig_cat_months_rel.update_layout(
title_text="Categoría: "
+ f"{favorite_categories[0].categories[category_selected].name}",
barmode="overlay",
)

st.write(fig_cat_months_rel)
36 changes: 35 additions & 1 deletion kickstarter/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
from .models import CategoryModel, ProjectModel
from .processing import top_grossing_categories # noqa: F401
from .processing import top_successful_categories # noqa: F401
from .processing import GrossingCategoriesModel, SuccessfulCategoriesModel, process
from .processing import (
GrossingCategoriesModel,
MonthlyCategoriesSuccessModel,
MonthlyCategoriesTotalsModel,
SuccessfulCategoriesModel,
process,
)


def load_json() -> Tuple[
Expand Down Expand Up @@ -36,12 +42,19 @@ def get_favorite_categories(
List[CategoryModel],
List[CategoryModel],
List[CategoryModel],
MonthlyCategoriesSuccessModel,
MonthlyCategoriesTotalsModel,
Dict[int, Tuple[List[int], List[int]]],
]:
results = process(projects, categories)
grossing_categories: List[CategoryModel] = []
successful_categories: List[CategoryModel] = []
crossing_categories_model: GrossingCategoriesModel = None # type: ignore
successful_categories_model: SuccessfulCategoriesModel = None # type: ignore
monthly_categories_success_model: MonthlyCategoriesSuccessModel = (
None
) # type: ignore
monthly_categories_totals_model: MonthlyCategoriesTotalsModel = None # type: ignore
for item in results:
if isinstance(item, GrossingCategoriesModel):
item = cast(GrossingCategoriesModel, item)
Expand All @@ -51,16 +64,37 @@ def get_favorite_categories(
item = cast(SuccessfulCategoriesModel, item)
successful_categories = item.top[:25]
successful_categories_model = item
elif isinstance(item, MonthlyCategoriesSuccessModel):
item = cast(MonthlyCategoriesSuccessModel, item)
monthly_categories_success_model = item
elif isinstance(item, MonthlyCategoriesTotalsModel):
item = cast(MonthlyCategoriesTotalsModel, item)
monthly_categories_totals_model = item
grossing_categories_set = set(grossing_categories)
successful_categories_set = set(successful_categories)
inter = cast(
List[CategoryModel],
list(set.intersection(grossing_categories_set, successful_categories_set)),
)
by_months: Dict[int, Tuple[List[int], List[int]]] = {}
for item in categories:
by_months[item] = ([0] * 12, [0] * 12)
dates = monthly_categories_success_model.dates
for item in by_months:
for (index, success), (_, total) in zip(
monthly_categories_success_model.categories[item].items(),
monthly_categories_totals_model.categories[item].items(),
):
date = dates[index]
by_months[item][0][date.month - 1] += success
by_months[item][1][date.month - 1] += total
return (
crossing_categories_model,
successful_categories_model,
grossing_categories,
successful_categories,
list(inter),
monthly_categories_success_model,
monthly_categories_totals_model,
by_months,
)
8 changes: 8 additions & 0 deletions kickstarter/processing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from .base_process import process, subscribe # noqa: F401
from .monthly_categories_success import ( # noqa: F401
MonthlyCategoriesSuccessModel,
monthly_categories_success,
)
from .monthly_categories_total import ( # noqa: F401
MonthlyCategoriesTotalsModel,
monthly_categories_totals,
)
from .top_grossing_categories import ( # noqa: F401
GrossingCategoriesModel,
top_grossing_categories,
Expand Down
17 changes: 11 additions & 6 deletions kickstarter/processing/monthly_categories_success.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import defaultdict
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from typing import Dict, Optional

from ..models import CategoryModel, ProjectModel
from . import subscribe
Expand All @@ -11,20 +11,25 @@ def __init__(self, start_year=2009, end_year=2020):
counter: int = 0
self.dates: Dict[int, datetime] = dict()
self._index: Dict[datetime, int] = dict()
self.categories: Dict[int, Dict[int, int]] = defaultdict(lambda: defaultdict(lambda: 0))
self.categories: Dict[int, Dict[int, int]] = defaultdict(
lambda: defaultdict(lambda: 0)
)

for year in range(start_year, end_year+1):
for year in range(start_year, end_year + 1):
for month in range(1, 13):
date = datetime(year, month, 1)
self.dates[counter] = date
self._index[date] = counter
counter += 1

def count(self, project: ProjectModel):
to_count_date: datetime = datetime(project.state_changed_at.year, project.state_changed_at.month, 1)
to_count_date: datetime = datetime(
project.state_changed_at.year, project.state_changed_at.month, 1
)
index: int = self._index[to_count_date]
self.categories[project.id][index] += 1 if project.state == "successful" else 0

self.categories[project.category.id][index] += (
1 if project.state == "successful" else 0
)


@subscribe
Expand Down
15 changes: 9 additions & 6 deletions kickstarter/processing/monthly_categories_total.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import defaultdict
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from typing import Dict, Optional

from ..models import CategoryModel, ProjectModel
from . import subscribe
Expand All @@ -11,20 +11,23 @@ def __init__(self, start_year=2009, end_year=2020):
counter: int = 0
self.dates: Dict[int, datetime] = dict()
self._index: Dict[datetime, int] = dict()
self.categories: Dict[int, Dict[int, int]] = defaultdict(lambda: defaultdict(lambda: 0))
self.categories: Dict[int, Dict[int, int]] = defaultdict(
lambda: defaultdict(lambda: 0)
)

for year in range(start_year, end_year+1):
for year in range(start_year, end_year + 1):
for month in range(1, 13):
date = datetime(year, month, 1)
self.dates[counter] = date
self._index[date] = counter
counter += 1

def count(self, project: ProjectModel):
to_count_date: datetime = datetime(project.state_changed_at.year, project.state_changed_at.month, 1)
to_count_date: datetime = datetime(
project.state_changed_at.year, project.state_changed_at.month, 1
)
index: int = self._index[to_count_date]
self.categories[project.id][index] += 1

self.categories[project.category.id][index] += 1


@subscribe
Expand Down
40 changes: 39 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typer = "^0.3.2"
streamlit = "^0.72.0"
pydantic = "^1.7.3"
plotly = "^4.13.0"
scipy = "^1.5.4"

[tool.poetry.dev-dependencies]
pytest = "^6.1.2"
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pywinpty==0.5.7; os_name == "nt" and python_version >= "3.6"
pyzmq==20.0.0; python_version >= "3.6"
requests==2.25.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
retrying==1.3.3
scipy==1.5.4; python_version >= "3.6"
send2trash==1.5.0; python_version >= "3.6"
six==1.15.0; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.5.0" and python_version >= "3.6"
smmap==3.0.4; python_version >= "3.6" and python_full_version < "3.0.0" or python_full_version >= "3.4.0" and python_version >= "3.6"
Expand Down

0 comments on commit d591cb1

Please sign in to comment.