-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPyODAM.py
executable file
·84 lines (69 loc) · 2.83 KB
/
PyODAM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests
import pandas as pd
class Odam:
def __init__(self, repos, dataset):
self.repos = repos
self.dataset = dataset
def getDataFromODAM(self, subset='', query=''):
headers = {'authorization': "Basic API Key Ommitted", 'accept': "text/csv"}
urlapi = self.repos+'/getdata/tsv/'+self.dataset
if subset:
urlapi = urlapi+'/('+subset+')'
if query:
urlapi = urlapi+'/'+query
## API Call to retrieve report
response = requests.get(urlapi, headers=headers)
## API Results
data = response.text
## Parse data into a DataFrame
## see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
labels = data.split('\n')[0].split('\t')
df = pd.DataFrame([x.split('\t') for x in data.split('\n')], columns=labels)
df.drop(df.index[0], inplace=True)
df = df.mask(df.eq('None')).dropna().reset_index()
## Convert all variables (columns) to numeric when possible
## see http://queirozf.com/entries/pandas-dataframe-examples-column-operations
for l in labels:
try:
df[l] = pd.to_numeric(df[l])
except:
pass
# Return Data.frame
return df
def getSubsetFromODAM(self, subset='', query=''):
df1 = self.getDataFromODAM(subset, query)
df2 = self.getDataFromODAM(subset, 'identifier')
df3 = self.getDataFromODAM(subset, 'factor')
df4 = self.getDataFromODAM(subset, 'quantitative')
df5 = self.getDataFromODAM(subset, 'qualitative')
# Keep only columns that have been converted to numeric (thus removing columns with NA)
S = subset.split(',')
numvars = []
for s in S:
numvars = numvars + self.intersection(df4[df4.Subset==s]['Attribute'], self.getVarNum(df1))
list1, list2 = ['data', 'identifier', 'factor', 'quantitative', 'qualitative', 'numvars' ], \
[df1, df2, df3, df4, df5, numvars ]
d = dict( zip( list1, list2 ))
return(d)
@staticmethod
def intersection(lst1, lst2):
lst3 = [value for value in lst1 if value in lst2]
return lst3
@staticmethod
def getVarNum(dataframe):
varnum=[]
for l in dataframe.columns:
try:
dataframe[l] = pd.to_numeric(dataframe[l])
varnum.append(l)
except:
pass
return varnum
@staticmethod
def convertDateToStr(DataNum):
dateStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%m/%d/%Y") for x in DataNum ]
return(dateStr)
@staticmethod
def convertTimeToStr(TimeNum):
timeStr = [ ( pd.to_datetime('1899-12-30') + pd.to_timedelta(x,'D') ).strftime("%H:%M") for x in TimeNum ]
return(timeStr)