-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPrepareData.py
115 lines (75 loc) · 4.21 KB
/
PrepareData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
class Model_Train():
#path for raw data
data_prefix = r"/home/zaid/project/data"
#path to store output with output folder on disk
output_path_prefix = r"/home/zaid/project/output"
# name of data folders for volunteers
volunteers_data_paths = ["volunteer_01", "volunteer_02", "volunteer_03", "volunteer_04", "volunteer_05",
"volunteer_06", "volunteer_07",
"volunteer_08", "volunteer_09", "volunteer_10"]
#dictionry for each IMU and its related data files
annotation_col_dict = {
"BothArmsLabel": ["lla", "lua", "rua", "rla"],
"RightArmLabel": ["rua", "rla"],
"LeftArmLabel": ["lla", "lua"],
"Locomotion": ["back", "rt"]
}
#read annotation file
def read_annotation_data(self,data_path ):
annotation_df = pd.read_csv(os.path.join(data_path, "annotations.CSV"))
return annotation_df
#prepares data using annotation files
def prepare_data(self, annotation_df, data_path):
#columns names of output dataset
volunteer_data_df = pd.DataFrame(
columns=["dataType", "Time", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "ADL", "module"])
for anno_col in self.annotation_col_dict.keys():
data_files = self.annotation_col_dict[anno_col]
for data_file in data_files:
data_df = pd.DataFrame(
columns=["dataType", "Time", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "ADL", "module"])
print(f"Data File {data_file}")
sensorData_df = pd.read_csv(os.path.join(data_path, "IMUs", f"{data_file}.csv"),
names=["dataType", "Time", "0", "1", "2", "3", "4", "5", "6", "7", "8",
"9"])
row_no = 0
while row_no <= (annotation_df.shape[0] - 1):
# print(f"Row no {row_no}")
if annotation_df[anno_col].iloc[row_no] is not np.nan:
ADL = annotation_df[anno_col].iloc[row_no]
startTime = annotation_df["Time [msec]"].iloc[row_no]
endTime = annotation_df["Time [msec]"].iloc[row_no + 1]
temp_df = sensorData_df[
(sensorData_df["Time"] >= startTime) & (sensorData_df["Time"] <= endTime)].copy()
temp_df["ADL"] = [ADL] * temp_df.shape[0]
temp_df["module"] = [data_file] * temp_df.shape[0]
data_df = data_df.append(temp_df)
row_no = row_no + 2
else:
row_no = row_no + 1
volunteer_data_df = volunteer_data_df.append(data_df)
return volunteer_data_df
#split in test and train in stratify way
def split_test_train(self, final_df):
train, test = train_test_split(final_df, test_size=0.3, stratify= final_df["ADL"], shuffle=True, random_state = 0)
return train, test
if __name__ == "__main__":
final_data_df = pd.DataFrame(
columns=["dataType", "Time", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "ADL", "module"])
obj = Model_Train()
for volunteer_data_path in obj.volunteers_data_paths:
print(f"Data preparating for {volunteer_data_path}")
data_path = os.path.join(obj.data_prefix, volunteer_data_path)
annotation_df = obj.read_annotation_data(data_path)
final_df = final_data_df.append(obj.prepare_data(annotation_df, data_path))
final_df.drop( labels = ["dataType", "Time"], axis = 1, inplace=True)
train, test = obj.split_test_train(final_df)
train.to_csv(os.path.join(obj.output_path_prefix, "train_data.csv"), index=False)
test = test.reset_index()
test.to_csv(os.path.join(obj.output_path_prefix, "test_data.csv"), index=False)