Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feat] Streamlit 앱에 실험 데이터 분석을 위한 기능 추가 #41

Merged
merged 12 commits into from
Nov 23, 2024
Merged
3 changes: 2 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
HF_TOKEN={your_hf_token}
STREAMLIT_DATA_PATH={streamlit_data_path}
STREAMLIT_DATA_PATH={streamlit_data_path}
STREAMLIT_EXPERIMENT_DATA_PATH={streamlit_experiment_data_path}
13 changes: 13 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "streamlit debug",
"type": "debugpy",
"request": "launch",
"module": "streamlit",
"args": ["run", "${file}"],
"justMyCode": true
}
]
}
29 changes: 15 additions & 14 deletions analysis_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dotenv import load_dotenv
from streamlit_option_menu import option_menu

from streamlit_utils import access_data_by_index, display_data_summary, display_question_format, filter_data_by_column
from streamlit_utils import display_data_summary, display_data_tab

if __name__ == "__main__":

Expand All @@ -24,35 +24,36 @@
if selected == "Home":
st.title("📊 Data Analysis")
uploaded_file = st.sidebar.file_uploader("Upload a CSV file for analysis", type="csv")
tab1, tab2, tab3 = st.tabs(["📊 데이터 개요", "🔍 데이터 탐색", "📈 데이터 분포"])
experiment_file = st.sidebar.file_uploader("Upload a experiment result CSV file for analysis", type="csv")
tab1, tab2, tab3, tab4 = st.tabs(["📊 데이터 개요", "🔍 데이터 탐색", "📈 데이터 분포", "🔬 실험 데이터"])

if uploaded_file:
df = pd.read_csv(uploaded_file)
else:
# 첨부 파일이 없으면 기본적으로 train.csv에 대한 분석을 출력합니다.
# 첨부 파일이 없으면 기본적으로 설정한 학습 데이터에 대한 분석을 출력합니다.
# .env에서 STREAMLIT_DATA_PATH, STREAMLIT_EXPERIMENT_DATA_PATH에 각각 학습 데이터, 실험 데이터를 설정하세요.
df = pd.read_csv(os.getenv("STREAMLIT_DATA_PATH"))
if experiment_file:
exp_df = pd.read_csv(experiment_file)
else:
exp_df = pd.read_csv(os.getenv("STREAMLIT_EXPERIMENT_DATA_PATH"))

# 데이터 요약
with tab1:
display_data_summary(df)

# 개별 데이터 접근
with tab2:
st.subheader("전체 데이터 확인")
st.dataframe(df)

st.subheader("개별 데이터 확인")
access_method = st.radio("데이터 접근 방식 선택", ("Access by Index", "Filter by Column"))
if access_method == "Access by Index":
access_data_by_index(df)
elif access_method == "Filter by Column":
filter_data_by_column(df)

display_question_format(df)
display_data_tab(df)

# 분포 확인
with tab3:
st.subheader("데이터 분포")
# TODO: Add distribution plotting logic

# 실험 데이터 확인
with tab4:
display_data_tab(exp_df)

elif selected == "Compare":
st.title("🆚 Compare Datasets")
1 change: 1 addition & 0 deletions streamlit_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .data_util import *
from .key_manager import *
42 changes: 37 additions & 5 deletions streamlit_utils/data_util.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pandas as pd
import streamlit as st

from .key_manager import key_manager


# 데이터 요약 출력 함수
def display_data_summary(df: pd.DataFrame):
Expand All @@ -27,9 +29,9 @@ def access_data_by_index(df: pd.DataFrame):
min_value=0,
max_value=len(df) - 1,
step=1,
key="unique_key_1",
key=key_manager.generate_key(),
gsgh3016 marked this conversation as resolved.
Show resolved Hide resolved
)
if st.button("Retrieve by Index"):
if st.button("Retrieve by Index", key=key_manager.generate_key()):
if 0 <= index_input < len(df):
row_data = df.iloc[int(index_input)]
st.write(f"Row at index {int(index_input)}:")
Expand All @@ -41,8 +43,8 @@ def access_data_by_index(df: pd.DataFrame):
# 칼럼 필터링 함수
def filter_data_by_column(df: pd.DataFrame):
st.markdown("#### Filter Data by Column")
column = st.selectbox("Select a column to filter by:", df.columns)
search_value = st.text_input(f"Enter the value to search in '{column}':")
column = st.selectbox("Select a column to filter by:", df.columns, key=key_manager.generate_key())
search_value = st.text_input(f"Enter the value to search in '{column}':", key=key_manager.generate_key())
eyeol marked this conversation as resolved.
Show resolved Hide resolved

if st.button("Search"):
filtered_df = df[df[column].astype(str).str.contains(search_value, na=False, case=False, regex=False)]
Expand All @@ -66,7 +68,7 @@ def display_question_format(df: pd.DataFrame):
min_value=0,
max_value=len(df) - 1,
step=1,
key="unique_key_2",
key=key_manager.generate_key(),
)
row = df.iloc[question_idx]
paragraph = row["paragraph"]
Expand All @@ -85,6 +87,14 @@ def display_question_format(df: pd.DataFrame):
st.markdown(body="#### 🔍 <보기>")
st.write(row["question_plus"])

default_columns = [
"id",
"paragraph",
"question",
"question_plus",
"choices",
"answer",
] # 제공된 데이터셋의 기본 열 이름 정보
choices_list = eval(choices) if isinstance(choices, str) else choices
st.markdown("#### 📝 선택지")
for idx, choice in enumerate(choices_list, start=1):
Expand All @@ -101,3 +111,25 @@ def display_question_format(df: pd.DataFrame):
if "answer" in df.columns:
st.markdown("#### ✅ 정답")
st.write(row["answer"])

# 기본 열이 아닌 생성된 열일 경우 추가로 렌더링 하는 기능
for column in df.columns:
if column not in default_columns:
st.markdown(f"#### {column}")
st.write(row[column])


def display_data_tab(df: pd.DataFrame):
st.subheader("전체 데이터 확인")
st.dataframe(df, key=key_manager.generate_key())

st.subheader("개별 데이터 확인")
access_method = st.radio(
"데이터 접근 방식 선택", ("Access by Index", "Filter by Column"), key=key_manager.generate_key()
)
if access_method == "Access by Index":
access_data_by_index(df)
elif access_method == "Filter by Column":
filter_data_by_column(df)

display_question_format(df)
17 changes: 17 additions & 0 deletions streamlit_utils/key_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
class KeyManager:
def __init__(self, prefix="widget"):
self.prefix = prefix
self.counter = 0

def generate_key(self):
"""Element ID 생성

Returns:
str: "widget_{숫자}" 형식 Element ID
"""
self.counter += 1
return f"{self.prefix}_{self.counter}"


# ElementId 관리용 매니저 객체 생성
key_manager = KeyManager()