-
Notifications
You must be signed in to change notification settings - Fork 81
/
Copy pathlabel.py
53 lines (41 loc) · 1.78 KB
/
label.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
"""Entry-point script to label radiology reports."""
import pandas as pd
from args import ArgParser
from loader import Loader
from stages import Extractor, Classifier, Aggregator
from constants import *
def write(reports, labels, output_path, verbose=False):
"""Write labeled reports to specified path."""
labeled_reports = pd.DataFrame({REPORTS: reports})
for index, category in enumerate(CATEGORIES):
labeled_reports[category] = labels[:, index]
if verbose:
print(f"Writing reports and labels to {output_path}.")
labeled_reports[[REPORTS] + CATEGORIES].to_csv(output_path,
index=False)
def label(args):
"""Label the provided report(s)."""
loader = Loader(args.reports_path,
args.sections_to_extract,
args.extract_strict)
extractor = Extractor(args.mention_phrases_dir,
args.unmention_phrases_dir,
verbose=args.verbose)
classifier = Classifier(args.pre_negation_uncertainty_path,
args.negation_path,
args.post_negation_uncertainty_path,
verbose=args.verbose)
aggregator = Aggregator(CATEGORIES,
verbose=args.verbose)
# Load reports in place.
loader.load()
# Extract observation mentions in place.
extractor.extract(loader.collection)
# Classify mentions in place.
classifier.classify(loader.collection)
# Aggregate mentions to obtain one set of labels for each report.
labels = aggregator.aggregate(loader.collection)
write(loader.reports, labels, args.output_path, args.verbose)
if __name__ == "__main__":
parser = ArgParser()
label(parser.parse_args())