Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compile OH #23

Open
yiliu30 opened this issue Dec 24, 2024 · 0 comments
Open

compile OH #23

yiliu30 opened this issue Dec 24, 2024 · 0 comments

Comments

@yiliu30
Copy link
Owner

yiliu30 commented Dec 24, 2024

def get_torch_compiled_model(model, logger):
    from torchao.quantization import (
        float8_dynamic_activation_float8_weight,
        float8_weight_only,
        quantize_,
    )
    from torchao.quantization.granularity import (
        PerRow,
        PerTensor,
    )
    mode_map = {}
    mode = "dynamic"
    granularity = PerTensor()
    from functools import partial
    mode_map[mode] = partial(
            float8_dynamic_activation_float8_weight, granularity=granularity
        )
    factory = mode_map[mode]()
    quantize_(model, factory)
    print(f"Quantized model: {model}")
    # input 1: ('Explain the history of AI',)
    # output 1.1: ('Explain the history of AI and its evolution over time.\n\nArtificial intelligence (AI) has a rich and varied history that spans several decades. The term "Artificial Intelligence" was coined in 1956 by John McCarthy, a computer scientist who organized the first AI conference at Dartmouth College. Here is a brief overview of the history of AI and its evolution over time:\n\n1. Early Years (1950s-1960s): The first AI program was developed in 1951 by Alan Turing, a British mathematic',)
    #     # gen_text(
    # for gpt_bigcode, mpt, bloom, gpt2 model_type
    if hasattr(model, "transformer"):
        model.transformer = torch.compile(
            model.transformer, backend="hpu_backend", options={"keep_input_mutations": True}
        )
    # for gpt_neox
    elif hasattr(model, "gpt_neox"):
        model.gpt_neox = torch.compile(model.gpt_neox, backend="hpu_backend", options={"keep_input_mutations": True})
    # for llama, mistral, mixtral, qwen2
    elif hasattr(model, "model"):
        model.model = torch.compile(model.model, backend="hpu_backend", options={"keep_input_mutations": True})
    else:
        logger.warning(
            "In low performance case, please explicitly specify a module you want to wrap with `torch.compile`"
        )
        model = torch.compile(model, backend="hpu_backend", options={"keep_input_mutations": True})
    return model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant