refactor: 함수명 명료화

Fixes #42
boostcampaitech7 · Nov 24, 2024 · 1a32c40 · 1a32c40
1 parent b83d25b
commit 1a32c40
Show file tree

Hide file tree

Showing 2 changed files with 44 additions and 38 deletions.
diff --git a/analysis_dashboard.py b/analysis_dashboard.py
@@ -6,11 +6,11 @@
 from streamlit_option_menu import option_menu
 
 from streamlit_utils import (
-    choices_distribution,
-    column_length_distribution,
     display_data_summary,
     display_data_tab,
-    total_length_distribution,
+    make_choices_distribution_fig,
+    make_column_length_distribution_fig,
+    make_total_length_distribution_fig,
 )
 
 if __name__ == "__main__":
@@ -56,10 +56,10 @@
         # 분포 확인
         with tab3:
             st.subheader("컬럼 별 데이터 길이 분포")
-            st.pyplot(column_length_distribution(df))
+            st.pyplot(make_column_length_distribution_fig(df))
 
             st.subheader("전체 유효 컬럼 데이터 길이 분포")
-            st.pyplot(total_length_distribution(df))
+            st.pyplot(make_total_length_distribution_fig(df))
 
         # 실험 데이터 확인
         with tab4:
@@ -68,7 +68,7 @@
         # 선다 확인
         with tab5:
             st.subheader("선다 확인")
-            st.pyplot(choices_distribution(df))
+            st.pyplot(make_choices_distribution_fig(df))
 
     elif selected == "Compare":
         st.title("🆚 Compare Datasets")
diff --git a/streamlit_utils/visualization_utils.py b/streamlit_utils/visualization_utils.py
@@ -6,6 +6,12 @@
 import numpy as np
 import pandas as pd
 
+title_fontsize = 16
+label_fontsize = 12
+alpha = 0.7
+fig_size_s = (10, 4)
+fig_size_m = (10, 8)
+
 
 def set_before_length_plot(df: pd.DataFrame):
     # 길이 표출을 위한 결측값 처리
@@ -17,7 +23,7 @@ def set_before_length_plot(df: pd.DataFrame):
     return df, columns, bin_sizes
 
 
-def plot_length_distribution_percentage(
+def plot_length_distribution(
     ax: axes.Axes, df: pd.DataFrame, column_name: Union[str, List[str]], bin_size=10, color="skyblue"
 ):
     """
@@ -40,57 +46,57 @@ def plot_length_distribution_percentage(
     # 각 bin의 비율(%) 계산
     percentages = (counts / len(lengths)) * 100
 
-    ax.bar(edges[:-1], percentages, width=bin_size, color=color, edgecolor="black", alpha=0.7, align="edge")
-    ax.set_title(f"{column_name} length distribution", fontsize=16)
-    ax.set_xlabel("Length of " + column_name, fontsize=12)
-    ax.set_ylabel("Percentage (%)", fontsize=12)
-    ax.grid(axis="y", linestyle="--", alpha=0.7)
+    ax.bar(edges[:-1], percentages, width=bin_size, color=color, edgecolor="black", alpha=alpha, align="edge")
+    ax.set_title(f"{column_name} length distribution", fontsize=title_fontsize)
+    ax.set_xlabel("Length of " + column_name, fontsize=label_fontsize)
+    ax.set_ylabel("Percentage (%)", fontsize=label_fontsize)
+    ax.grid(axis="y", linestyle="--", alpha=alpha)
+
+
+def plot_choices_length_distribution(ax: axes.Axes, df: pd.DataFrame, bins=2, color="skyblue"):
+    df["choices"] = df["choices"].apply(literal_eval)
+    list_lengths = df["choices"].apply(len)
+    # 빈도수 계산 및 비율로 변환
+    value_counts = list_lengths.value_counts(normalize=True).sort_index() * 100
+
+    # 비율 text 명시
+    for idx, value in zip(value_counts.index, value_counts.values):
+        ax.text(idx, value - 5, f"{value:.1f}%", ha="center", fontsize=label_fontsize, color="black")
+
+    ax.bar(value_counts.index, value_counts.values, color=color, edgecolor="black", alpha=alpha)
+    ax.set_title("Choices Length Distribution", fontsize=title_fontsize)
+    ax.set_xlabel("Length of Choices", fontsize=label_fontsize)
+    ax.set_ylabel("Percentage (%)", fontsize=label_fontsize)
+    ax.grid(axis="y", linestyle="--", alpha=alpha)
+    ax.set_xticks(value_counts.index)
 
 
-def column_length_distribution(df: pd.DataFrame):
+def make_column_length_distribution_fig(df: pd.DataFrame):
     df, columns, bin_sizes = set_before_length_plot(df)
 
-    fig, axes = plt.subplots(2, 2, figsize=(10, 8))
+    fig, axes = plt.subplots(2, 2, figsize=fig_size_m)
     axes = axes.flatten()
 
     for ax, column, bin_size in zip(axes, columns, bin_sizes):
-        plot_length_distribution_percentage(ax=ax, df=df, column_name=column, bin_size=bin_size)
+        plot_length_distribution(ax=ax, df=df, column_name=column, bin_size=bin_size)
 
     fig.tight_layout()
     return fig
 
 
-def total_length_distribution(df: pd.DataFrame):
+def make_total_length_distribution_fig(df: pd.DataFrame):
     df, columns, bin_sizes = set_before_length_plot(df)
 
-    fig, ax = plt.subplots(figsize=(10, 5))
+    fig, ax = plt.subplots(figsize=fig_size_s)
 
-    plot_length_distribution_percentage(ax=ax, df=df, column_name=columns, bin_size=bin_sizes[-1])
+    plot_length_distribution(ax=ax, df=df, column_name=columns, bin_size=bin_sizes[-1])
 
     fig.tight_layout()
     return fig
 
 
-def plot_choices_length_distribution(ax: axes.Axes, df: pd.DataFrame, bins=2, color="skyblue"):
-    df["choices"] = df["choices"].apply(literal_eval)
-    list_lengths = df["choices"].apply(len)
-    # 빈도수 계산 및 비율로 변환
-    value_counts = list_lengths.value_counts(normalize=True).sort_index() * 100
-
-    # 비율 text 명시
-    for idx, value in zip(value_counts.index, value_counts.values):
-        ax.text(idx, value - 5, f"{value:.1f}%", ha="center", fontsize=10, color="black")
-
-    ax.bar(value_counts.index, value_counts.values, color=color, edgecolor="black", alpha=0.7)
-    ax.set_title("Choices Length Distribution", fontsize=16)
-    ax.set_xlabel("Length of Choices", fontsize=12)
-    ax.set_ylabel("Percentage (%)", fontsize=12)
-    ax.grid(axis="y", linestyle="--", alpha=0.7)
-    ax.set_xticks(value_counts.index)
-
-
-def choices_distribution(df: pd.DataFrame):
-    fig, ax = plt.subplots(figsize=(10, 5))
+def make_choices_distribution_fig(df: pd.DataFrame):
+    fig, ax = plt.subplots(figsize=fig_size_s)
 
     plot_choices_length_distribution(ax, df)