forked from OBOFoundry/OBOFoundry.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsort-ontologies.py
executable file
·82 lines (66 loc) · 2.6 KB
/
sort-ontologies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
import csv
import sys
import yaml
from argparse import ArgumentParser
def main(args):
parser = ArgumentParser(description='''
Takes a YAML file containing information for various ontologies and a metadata file specifying
the sorting order for ontologies, and then produces a sorted version input YAML''')
parser.add_argument('unsorted_yaml', type=str,
help='Unsorted YAML file containing information for ontologies')
parser.add_argument('metadata_grid', type=str,
help='CSV or TSV file containing metadata information for ontologies')
parser.add_argument('output_yaml', type=str,
help='Name of output YAML file that will contain sorted ontology information')
args = parser.parse_args()
data_file = args.unsorted_yaml
grid = args.metadata_grid
output = args.output_yaml
sort_order = get_sort_order(grid)
data = load_data(data_file)
data = sort_ontologies(data, sort_order)
write_data(data, output)
def get_sort_order(grid):
'''Given the path to the metadata grid (CSV or TSV), extract the order of
ontologies from the grid. Return the list of ontology IDs in that order.'''
sort_order = []
if '.csv' in grid:
separator = ','
elif '.tsv' or '.txt' in grid:
separator = '\t'
else:
print('%s must be tab- or comma-separated.', file=sys.stderr)
sys.exit(1)
with open(grid, 'r') as f:
reader = csv.reader(f, delimiter=separator)
# Ignore the header row:
next(reader)
for row in reader:
# Ontology IDs are in the first column of the CSV/TSV. We simply pull them out of each line
# in the file. Their ordering in the file is the sort ordering we are looking for:
sort_order.append(row[0])
return sort_order
def load_data(data_file):
'''Given a YAML file, load the data into a dictionary.'''
stream = open(data_file, 'r')
data = yaml.load(stream, Loader=yaml.SafeLoader)
return data
def sort_ontologies(data, sort_order):
'''Given the ontologies data as a dictionary and the list of ontologies in
proper sort order, return the sorted data.'''
ontologies = []
for ont_id in sort_order:
# We assume that ontology ids are unique:
ont = [ont for ont in data['ontologies'] if ont['id'] == ont_id].pop()
ontologies.append(ont)
data['ontologies'] = ontologies
return data
def write_data(data, output):
'''Given the ontologies data as a dictionary and an output YAML file to
write to, write the data to the file. '''
yaml_str = yaml.dump(data)
with open(output, 'w') as f:
f.write(yaml_str)
if __name__ == '__main__':
main(sys.argv)