-
Notifications
You must be signed in to change notification settings - Fork 12
/
mmmu.py
107 lines (98 loc) · 3.77 KB
/
mmmu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
from typing import Any
from eureka_ml_insights.configs.experiment_config import ExperimentConfig
from eureka_ml_insights.core import EvalReporting, Inference, PromptProcessing
from eureka_ml_insights.data_utils import (
ASTEvalTransform,
ColumnRename,
CopyColumn,
DataReader,
HFDataReader,
MapStringsTransform,
MMDataLoader,
SequenceTransform,
)
from eureka_ml_insights.data_utils.mmmu_utils import (
CreateMMMUPrompts,
MMMUAll,
MMMUTaskToCategories,
)
from eureka_ml_insights.metrics import CountAggregator, MMMUMetric
from .config import (
AggregatorConfig,
DataSetConfig,
EvalReportingConfig,
InferenceConfig,
MetricConfig,
ModelConfig,
PipelineConfig,
PromptProcessingConfig,
)
class MMMU_BASELINE_PIPELINE(ExperimentConfig):
"""
This defines an ExperimentConfig pipeline for the MMMU dataset.
There is no model_config by default and the model config must be passed in via command lime.
"""
def configure_pipeline(self, model_config: ModelConfig, resume_from: str = None, **kwargs: dict[str, Any] ) -> PipelineConfig:
self.data_processing_comp = PromptProcessingConfig(
component_type=PromptProcessing,
data_reader_config=DataSetConfig(
HFDataReader,
{
"path": "MMMU/MMMU",
"split": "validation",
"tasks": MMMUAll,
"transform": SequenceTransform(
[
ASTEvalTransform(columns=["options"]),
CreateMMMUPrompts(),
ColumnRename(name_mapping={"answer": "ground_truth", "options": "target_options"}),
]
),
},
),
output_dir=os.path.join(self.log_dir, "data_processing_output"),
ignore_failure=False,
)
# Configure the inference component
self.inference_comp = InferenceConfig(
component_type=Inference,
model_config=model_config,
data_loader_config=DataSetConfig(
MMDataLoader,
{"path": os.path.join(self.data_processing_comp.output_dir, "transformed_data.jsonl")},
),
output_dir=os.path.join(self.log_dir, "inference_result"),
resume_from=resume_from,
)
# Configure the evaluation and reporting component.
self.evalreporting_comp = EvalReportingConfig(
component_type=EvalReporting,
data_reader_config=DataSetConfig(
DataReader,
{
"path": os.path.join(self.inference_comp.output_dir, "inference_result.jsonl"),
"format": ".jsonl",
"transform": SequenceTransform(
[
CopyColumn(column_name_src="__hf_task", column_name_dst="category"),
MapStringsTransform(
columns=["category"],
mapping=MMMUTaskToCategories,
),
]
),
},
),
metric_config=MetricConfig(MMMUMetric),
aggregator_configs=[
AggregatorConfig(CountAggregator, {"column_names": ["MMMUMetric_result"], "normalize": True}),
AggregatorConfig(
CountAggregator,
{"column_names": ["MMMUMetric_result"], "group_by": "category", "normalize": True},
),
],
output_dir=os.path.join(self.log_dir, "eval_report"),
)
# Configure the pipeline
return PipelineConfig([self.data_processing_comp, self.inference_comp, self.evalreporting_comp], self.log_dir)