Skip to content

Commit

Permalink
refactor: 함수명 명료화
Browse files Browse the repository at this point in the history
Fixes #42
  • Loading branch information
canolayoo78 authored and gsgh3016 committed Nov 24, 2024
1 parent b83d25b commit 1a32c40
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 38 deletions.
12 changes: 6 additions & 6 deletions analysis_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from streamlit_option_menu import option_menu

from streamlit_utils import (
choices_distribution,
column_length_distribution,
display_data_summary,
display_data_tab,
total_length_distribution,
make_choices_distribution_fig,
make_column_length_distribution_fig,
make_total_length_distribution_fig,
)

if __name__ == "__main__":
Expand Down Expand Up @@ -56,10 +56,10 @@
# 분포 확인
with tab3:
st.subheader("컬럼 별 데이터 길이 분포")
st.pyplot(column_length_distribution(df))
st.pyplot(make_column_length_distribution_fig(df))

st.subheader("전체 유효 컬럼 데이터 길이 분포")
st.pyplot(total_length_distribution(df))
st.pyplot(make_total_length_distribution_fig(df))

# 실험 데이터 확인
with tab4:
Expand All @@ -68,7 +68,7 @@
# 선다 확인
with tab5:
st.subheader("선다 확인")
st.pyplot(choices_distribution(df))
st.pyplot(make_choices_distribution_fig(df))

elif selected == "Compare":
st.title("🆚 Compare Datasets")
70 changes: 38 additions & 32 deletions streamlit_utils/visualization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
import numpy as np
import pandas as pd

title_fontsize = 16
label_fontsize = 12
alpha = 0.7
fig_size_s = (10, 4)
fig_size_m = (10, 8)


def set_before_length_plot(df: pd.DataFrame):
# 길이 표출을 위한 결측값 처리
Expand All @@ -17,7 +23,7 @@ def set_before_length_plot(df: pd.DataFrame):
return df, columns, bin_sizes


def plot_length_distribution_percentage(
def plot_length_distribution(
ax: axes.Axes, df: pd.DataFrame, column_name: Union[str, List[str]], bin_size=10, color="skyblue"
):
"""
Expand All @@ -40,57 +46,57 @@ def plot_length_distribution_percentage(
# 각 bin의 비율(%) 계산
percentages = (counts / len(lengths)) * 100

ax.bar(edges[:-1], percentages, width=bin_size, color=color, edgecolor="black", alpha=0.7, align="edge")
ax.set_title(f"{column_name} length distribution", fontsize=16)
ax.set_xlabel("Length of " + column_name, fontsize=12)
ax.set_ylabel("Percentage (%)", fontsize=12)
ax.grid(axis="y", linestyle="--", alpha=0.7)
ax.bar(edges[:-1], percentages, width=bin_size, color=color, edgecolor="black", alpha=alpha, align="edge")
ax.set_title(f"{column_name} length distribution", fontsize=title_fontsize)
ax.set_xlabel("Length of " + column_name, fontsize=label_fontsize)
ax.set_ylabel("Percentage (%)", fontsize=label_fontsize)
ax.grid(axis="y", linestyle="--", alpha=alpha)


def plot_choices_length_distribution(ax: axes.Axes, df: pd.DataFrame, bins=2, color="skyblue"):
df["choices"] = df["choices"].apply(literal_eval)
list_lengths = df["choices"].apply(len)
# 빈도수 계산 및 비율로 변환
value_counts = list_lengths.value_counts(normalize=True).sort_index() * 100

# 비율 text 명시
for idx, value in zip(value_counts.index, value_counts.values):
ax.text(idx, value - 5, f"{value:.1f}%", ha="center", fontsize=label_fontsize, color="black")

ax.bar(value_counts.index, value_counts.values, color=color, edgecolor="black", alpha=alpha)
ax.set_title("Choices Length Distribution", fontsize=title_fontsize)
ax.set_xlabel("Length of Choices", fontsize=label_fontsize)
ax.set_ylabel("Percentage (%)", fontsize=label_fontsize)
ax.grid(axis="y", linestyle="--", alpha=alpha)
ax.set_xticks(value_counts.index)


def column_length_distribution(df: pd.DataFrame):
def make_column_length_distribution_fig(df: pd.DataFrame):
df, columns, bin_sizes = set_before_length_plot(df)

fig, axes = plt.subplots(2, 2, figsize=(10, 8))
fig, axes = plt.subplots(2, 2, figsize=fig_size_m)
axes = axes.flatten()

for ax, column, bin_size in zip(axes, columns, bin_sizes):
plot_length_distribution_percentage(ax=ax, df=df, column_name=column, bin_size=bin_size)
plot_length_distribution(ax=ax, df=df, column_name=column, bin_size=bin_size)

fig.tight_layout()
return fig


def total_length_distribution(df: pd.DataFrame):
def make_total_length_distribution_fig(df: pd.DataFrame):
df, columns, bin_sizes = set_before_length_plot(df)

fig, ax = plt.subplots(figsize=(10, 5))
fig, ax = plt.subplots(figsize=fig_size_s)

plot_length_distribution_percentage(ax=ax, df=df, column_name=columns, bin_size=bin_sizes[-1])
plot_length_distribution(ax=ax, df=df, column_name=columns, bin_size=bin_sizes[-1])

fig.tight_layout()
return fig


def plot_choices_length_distribution(ax: axes.Axes, df: pd.DataFrame, bins=2, color="skyblue"):
df["choices"] = df["choices"].apply(literal_eval)
list_lengths = df["choices"].apply(len)
# 빈도수 계산 및 비율로 변환
value_counts = list_lengths.value_counts(normalize=True).sort_index() * 100

# 비율 text 명시
for idx, value in zip(value_counts.index, value_counts.values):
ax.text(idx, value - 5, f"{value:.1f}%", ha="center", fontsize=10, color="black")

ax.bar(value_counts.index, value_counts.values, color=color, edgecolor="black", alpha=0.7)
ax.set_title("Choices Length Distribution", fontsize=16)
ax.set_xlabel("Length of Choices", fontsize=12)
ax.set_ylabel("Percentage (%)", fontsize=12)
ax.grid(axis="y", linestyle="--", alpha=0.7)
ax.set_xticks(value_counts.index)


def choices_distribution(df: pd.DataFrame):
fig, ax = plt.subplots(figsize=(10, 5))
def make_choices_distribution_fig(df: pd.DataFrame):
fig, ax = plt.subplots(figsize=fig_size_s)

plot_choices_length_distribution(ax, df)

Expand Down

0 comments on commit 1a32c40

Please sign in to comment.