forked from pytorch/ao
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
57 lines (54 loc) · 1.77 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from .smoothquant import * # noqa: F403
from .quant_api import * # noqa: F403
from .subclass import * # noqa: F403
from .quant_primitives import * # noqa: F403
from .utils import * # noqa: F403
from .weight_only import * # noqa: F403
from .unified import *
from .autoquant import *
from .granularity import *
from .linear_activation_quantized_tensor import (
LinearActivationQuantizedTensor,
to_linear_activation_quantized,
)
from .linear_activation_scale import (
to_weight_tensor_with_linear_activation_scale_metadata,
)
__all__ = [
"swap_conv2d_1x1_to_linear"
"safe_int_mm",
"autoquant",
"DEFAULT_AUTOQUANT_CLASS_LIST",
"DEFAULT_INT4_AUTOQUANT_CLASS_LIST",
"OTHER_AUTOQUANT_CLASS_LIST",
"get_scale",
"SmoothFakeDynQuantMixin",
"SmoothFakeDynamicallyQuantizedLinear",
"swap_linear_with_smooth_fq_linear",
"smooth_fq_linear_to_inference",
"set_smooth_fq_attribute",
"compute_error",
"Int4WeightOnlyGPTQQuantizer",
"Int4WeightOnlyQuantizer",
"quantize_affine",
"dequantize_affine",
"choose_qprams_affine",
"quantize_",
"int8_dynamic_activation_int4_weight",
"int8_dynamic_activation_int8_weight",
"int8_dynamic_activation_int8_semi_sparse_weight",
"int4_weight_only",
"int8_weight_only",
"uintx_weight_only",
"fpx_weight_only",
"LinearActivationQuantizedTensor",
"to_linear_activation_quantized",
"to_weight_tensor_with_linear_activation_scale_metadata",
"float8_weight_only",
"float8_dynamic_activation_float8_weight",
"float8_static_activation_float8_weight"
]