-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain_new.py
71 lines (63 loc) · 2.62 KB
/
main_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import ipdb
import pandas as pd
import numpy as np
from absl import app
from collections import defaultdict
from ESA.annotation_loader import AnnotationLoader
from ESA.experiments.clusters_and_ranking import ClustersAndRanking
from ESA.experiments.intra_annotator_agreement import IntraAnnotatorAgreement
from ESA.experiments.overview_segment_count_esa import overview_segment_count_esa
from ESA.experiments.esa_corr_others import esa_corr_others
from ESA.utils import PROTOCOL_DEFINITIONS
def main(args):
# class containing all information
annotations = AnnotationLoader(refresh_cache=False)
# ["MQM-1", "LLM", "ESA-1", "ESAAI-1", "ESA-2", "ESAAI-2", "WMT-MQM", "WMT-DASQM"]
df = annotations.get_view(only_overlap=True)
# df = df.dropna()
# for protocol in ["ESA"]:
# subdf = df[[f'{protocol}-1_score', f'{protocol}-IAA_score']]
# pearson = subdf.corr().iloc[0, 1]
# print(f"{protocol} pearson: {pearson:.3f} on {len(subdf)} samples")
#
# subdf = df[[f'{protocol}-1_score_mqm', f'{protocol}-IAA_score_mqm']]
# # convert to float
# subdf = subdf.applymap(lambda x: float(x))
# pearson = subdf.corr().iloc[0, 1]
# print(f"MQM {protocol} pearson: {pearson:.3f} on {len(subdf)} samples")
#
#
# subdf = df[[f'MQM-1_score', f'WMT-MQM_score']]
# pearson = subdf.corr().iloc[0, 1]
# print(f"MQM pearson: {pearson:.3f} on {len(subdf)} samples")
#
#
# # df = annotations.get_view(only_overlap=False)
# # df2 = df[['systemID', 'WMT-MQM_error_spans', 'WMT-DASQM_score']].dropna()
# # df2 = df2[df2['systemID'].isin(['refA', 'GPT4-5shot'])]
# #
# # error_categories = defaultdict(int)
# # for index, row in df2.iterrows():
# # for error in row['WMT-MQM_error_spans']:
# # error_categories[f"{row['systemID']}_{error['severity']}_{error['category']}"] += 1
#
# # df = df[['WMT-MQM_error_spans', 'WMT-DASQM_score']].dropna()
# # errors = []
# # for index, row in df.iterrows():
# # if len(row['WMT-MQM_error_spans']) > 1:
# # continue
# # for error in row['WMT-MQM_error_spans']:
# # errors.append([f"{error['severity']}_{error['category']}", row['WMT-DASQM_score']])
# #
# # errs = pd.DataFrame(errors, columns=['error', 'score'])
# # er = errs.groupby('error').agg(['mean', 'count']).reset_index()
#
#
# ipdb.set_trace()
# generate to papers
ClustersAndRanking(annotations)
IntraAnnotatorAgreement(annotations)
overview_segment_count_esa(annotations)
esa_corr_others(annotations)
if __name__ == '__main__':
app.run(main)