forked from o3etornam/Statbank_Hackathon2.0
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutility.py
93 lines (73 loc) · 3.47 KB
/
utility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import requests
import json
import streamlit as st
import pandas as pd
import features
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
from dotenv import load_dotenv
import os
session = requests.Session()
url = 'https://statsbank.statsghana.gov.gh:443/api/v1/en/PHC 2021 StatsBank/'
load_dotenv()
api_key = st.secrets["OPENAI_API_KEY"]
@st.cache_data
def api_reader(url, query):
response = session.post(url, json=query)
response_json = json.loads(response.content.decode('utf-8-sig'))
columns = [item['text'] for item in response_json['columns']]
data = [item['key'] + item['values'] for item in response_json['data']]
return data, columns
@st.cache_data
def convert_df(df):
return df.to_csv()
@st.cache_data
def load_query(path, root = 'queries/'):
full_path = root + path
with open(full_path) as json_file:
query = json.load(json_file)
return query
def query_builder(warehouse,features,age,query_semi_path, url = url):
selected = st.selectbox('Select the data you want to visualize',warehouse.keys(), key='1')
level = st.selectbox('What level of visualization do want?', ['National','Regional','Disctrict'], key = '2')
url = url + warehouse[selected]['extension']
query = load_query(path = query_semi_path + warehouse[selected]['query_path'])
if level == 'National':
for obj in query['query']:
if obj['code'] == "Geographic_Area":
obj['selection']['values'] = ['Ghana']
elif level == 'Regional':
for obj in query['query']:
if obj['code'] == "Geographic_Area":
obj['selection']['values'] = features.regions
else:
for obj in query['query']:
if obj['code'] == "Geographic_Area":
obj['selection']['values'] = features.districts
age_group = st.multiselect('Which Age group will you like to filter by', age, max_selections= 5,
default=[age[0]], key = '3')
for obj in query['query']:
if obj['code'] == "Age":
obj['selection']['values'] = age_group
education = age_group = st.multiselect('Which Education level will you like to filter by', features.education, max_selections= 5,
default=["Never attended","Primary","Secondary","Tertiary - Bachelor's Degree"], key = '4')
for obj in query['query']:
if obj['code'] == "Education":
obj['selection']['values'] = education
data, columns = api_reader(url= url,query=query)
dataset = pd.DataFrame(data,columns=columns)
return dataset
def data_filter(dataset,w_variable, title):
filtered = st.multiselect(f'What {title} will you like to visualize',dataset[w_variable].unique())
filtered_df = dataset[dataset[w_variable].isin(filtered)]
location = st.multiselect('Which Region will you like to filter by', filtered_df['Geographic_Area'].unique())
education = age_group = st.multiselect('Which Education level will you like to filter by', filtered_df['Education'].unique())
gender = st.multiselect('Which gender will you like to filter by', filtered_df['Sex'].unique())
age_group = st.multiselect('Which Age group will you like to filter by', filtered_df['Age'].unique())
return filtered_df,location, education, gender, age_group
llm = OpenAI(api_token=api_key)
@st.cache_resource
def query_df(df,prompt,llm = llm):
df = SmartDataframe(df, config={"llm": llm})
if prompt:
return st.write(df.chat(prompt))