-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrandom_data.py
66 lines (56 loc) · 2.64 KB
/
random_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
""" Module that allows to generate random data files formatted the same way as the test and training sets from
Kaggle."""
import pandas as pd
import numpy as np
import files_metadata as fmd
from papagei import papagei as ppg
# Parameters for the acoustic data
max_acoustic = 1.
min_acoustic = -1.
def generate_random_training(file_name, nb_points):
"""
Generates a file containing dummy data formatted like the train sets from Kaggle.
:param file_name: Name under which to save the dummy data. Has to be a .csv file or it will be reformatted.
:param nb_points: Number of points to include in the dummy data.
:return: Saves a file in CSV format.
"""
file_name = _format_file_extension(file_name)
acoustic_data = _generate_random_acoustic(nb_points)
acoustic_data = np.concatenate((acoustic_data, np.ones((nb_points, 1))), axis=1)
data = pd.DataFrame(acoustic_data, columns=fmd.COLUMN_NAME)
data.to_csv(file_name, index=False)
def generate_random_testing(file_name, nb_points):
"""
Generates a file containing dummy data formatted like the test sets from Kaggle.
:param file_name: Name under which to save the dummy data. Has to be a .csv file or it will be reformatted.
:param nb_points: Number of points to include in the dummy data.
:return: Saves a file in CSV format.
"""
file_name = _format_file_extension(file_name)
acoustic_data = _generate_random_acoustic(nb_points)
data = pd.DataFrame(acoustic_data, columns=[fmd.COLUMN_NAME[0]])
data.to_csv(file_name, index=False)
def _format_file_extension(file_name):
"""
Takes a file_name and if the extension is not the expected one reformats it to get the right extension.
:param file_name: file name to be checked/reformatted.
:return: reformatted file name.
"""
if file_name[-len(fmd.EXPECTED_FILE_EXTENSION):] != fmd.EXPECTED_FILE_EXTENSION:
ppg.mock_warning("Unexpected file extension.")
file_name = file_name.replace(".", "dot")
file_name += fmd.EXPECTED_FILE_EXTENSION
return file_name
def _generate_random_acoustic(nb_points):
"""
Generates an array [nb_points x DATA_DIMENSION-1] and fills it with random numbers from min_acoustic to
max_acoustics.
:param nb_points: number of points to include in the array.
:return: an array filled with random points.
"""
mean_acoustic = (max_acoustic + min_acoustic) / 2.
span_acoustic = abs(max_acoustic - mean_acoustic)
acoustic_data = np.random.rand(nb_points, fmd.DATA_DIMENSION - 1)
acoustic_data *= span_acoustic
acoustic_data += mean_acoustic
return acoustic_data