Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set English language by default for all the LLM models #1686

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion tools/who_what_benchmark/tests/test_cli_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def test_text_verbose():
assert "## Diff:" in result.stderr


def test_text_language_autodetect():
def test_text_language():
with tempfile.TemporaryDirectory() as temp_dir:
temp_file_name = os.path.join(temp_dir, "gt.csv")
result = run_wwb(
Expand All @@ -172,6 +172,8 @@ def test_text_language_autodetect():
"2",
"--device",
"CPU",
"--language",
"cn",
]
)
assert result.returncode == 0
Expand Down
25 changes: 1 addition & 24 deletions tools/who_what_benchmark/whowhatbench/text_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,6 @@
}


def autodetect_language(model):
model2language = {
"chatglm": "cn",
"qwen2": "cn",
"qwen": "cn",
"baichuan": "cn",
"minicpmv": "cn",
"internlm": "cn",
}

if not hasattr(model, "config"):
return "en"
return model2language.get(model.config.model_type, "en")


@register_evaluator(
"text"
)
Expand All @@ -103,7 +88,7 @@ def __init__(
max_new_tokens=128,
crop_question=True,
num_samples=None,
language=None,
language="en",
gen_answer_fn=None,
generation_config=None,
generation_config_base=None,
Expand All @@ -130,9 +115,6 @@ def __init__(

# Take language from the base model if provided
self.language = language
if self.language is None:
if base_model is not None:
self.language = autodetect_language(base_model)

if base_model:
self.gt_data = self._generate_data(
Expand Down Expand Up @@ -233,11 +215,6 @@ def default_gen_answer(model, tokenizer, prompt, max_new_tokens, crop_question,
data = {"prompts": list(self.test_data)}
data = pd.DataFrame.from_dict(data)
else:
if self.language is None:
print(
"No language detecting in the base model or ground truth data. Taking language from target model."
)
self.language = autodetect_language(model)
data = pd.DataFrame.from_dict(default_data[self.language])

prompt_data = data["prompts"]
Expand Down
4 changes: 2 additions & 2 deletions tools/who_what_benchmark/whowhatbench/wwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ def parse_args():
"--language",
type=str,
choices=["en", "cn"],
default=None,
help="Used to select default prompts based on the primary model language, e.g. 'en', 'ch'.",
default="en",
help="Used to select default prompts based on the primary model language, e.g. 'en', 'cn'.",
)
parser.add_argument(
"--hf",
Expand Down
Loading