diff --git a/.gitignore b/.gitignore index 37f911684..ef9015660 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ pnpm-lock.yaml package-lock.json venv .history +cache # all models and temp files *.log diff --git a/README.md b/README.md index 10283289a..66e49a917 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,8 @@ Additional models will be added as they become available and there is public int Support will be extended to *Windows* once AMD releases ROCm for Windows - Any GPU compatibile with *DirectX* on *Windows* using **DirectML** libraries. This includes support for AMD GPUs that are not supported by native ROCm libraries -- *Intel Arc* GPUs using *Intel OneAPI* **Ipex/XPU** libraries +- *Intel Arc* GPUs using *Intel OneAPI* **Ipex/XPU** libraries +- *Intel* iGPUs using *Intel OneAPI* **OpenVINO** libraries - *Apple M1/M2* on *OSX* using built-in support in Torch with **MPS** optimizations ## Install & Run @@ -94,8 +95,8 @@ Once SD.Next is installed, simply run `webui.bat` (*Windows*) or `webui.sh` (*Li Below is partial list of all available parameters, run `webui --help` for the full list: Setup options: - --use-ipex Use Intel OneAPI XPU backend, default: False --use-directml Use DirectML if no compatible GPU is detected, default: False + --use-ipex Force use Intel OneAPI XPU backend, default: False --use-cuda Force use nVidia CUDA backend, default: False --use-rocm Force use AMD ROCm backend, default: False --skip-update Skip update of extensions and submodules, default: False diff --git a/installer.py b/installer.py index 192760214..a6e50ca59 100644 --- a/installer.py +++ b/installer.py @@ -458,6 +458,7 @@ def check_torch(): install('hidet', 'hidet') if opts.get('cuda_compile_backend', '') == 'openvino_fx': install('openvino==2023.1.0.dev20230811', 'openvino') + os.environ.setdefault('PYTORCH_TRACING_MODE', 'TORCHFX') if args.profile: print_profile(pr, 'Torch') @@ -829,8 +830,8 @@ def add_args(parser): group.add_argument('--upgrade', default = False, action='store_true', help = "Upgrade main repository to latest version, default: %(default)s") group.add_argument('--requirements', default = False, action='store_true', help = "Force re-check of requirements, default: %(default)s") group.add_argument('--quick', default = False, action='store_true', help = "Run with startup sequence only, default: %(default)s") - group.add_argument("--use-ipex", default = False, action='store_true', help="Use Intel OneAPI XPU backend, default: %(default)s") group.add_argument('--use-directml', default = False, action='store_true', help = "Use DirectML if no compatible GPU is detected, default: %(default)s") + group.add_argument("--use-ipex", default = False, action='store_true', help="Force use Intel OneAPI XPU backend, default: %(default)s") group.add_argument("--use-cuda", default=False, action='store_true', help="Force use nVidia CUDA backend, default: %(default)s") group.add_argument("--use-rocm", default=False, action='store_true', help="Force use AMD ROCm backend, default: %(default)s") group.add_argument('--skip-requirements', default = False, action='store_true', help = "Skips checking and installing requirements, default: %(default)s") diff --git a/modules/cmd_args.py b/modules/cmd_args.py index 92b3c1bc8..e8d8ef889 100644 --- a/modules/cmd_args.py +++ b/modules/cmd_args.py @@ -40,8 +40,8 @@ group.add_argument("--profile", action='store_true', help="Run profiler, default: %(default)s") group.add_argument("--disable-queue", action='store_true', help="Disable queues, default: %(default)s") group.add_argument('--debug', default = False, action='store_true', help = "Run installer with debug logging, default: %(default)s") -group.add_argument("--use-ipex", default = False, action='store_true', help="Use Intel OneAPI XPU backend, default: %(default)s") group.add_argument('--use-directml', default = False, action='store_true', help = "Use DirectML if no compatible GPU is detected, default: %(default)s") +group.add_argument("--use-ipex", default = False, action='store_true', help="Force use Intel OneAPI XPU backend, default: %(default)s") group.add_argument("--use-cuda", default=False, action='store_true', help="Force use nVidia CUDA backend, default: %(default)s") group.add_argument("--use-rocm", default=False, action='store_true', help="Force use AMD ROCm backend, default: %(default)s") group.add_argument('--subpath', type=str, help='Customize the URL subpath for usage with reverse proxy') diff --git a/modules/intel/openvino/__init__.py b/modules/intel/openvino/__init__.py index 84df9d596..d0835726f 100644 --- a/modules/intel/openvino/__init__.py +++ b/modules/intel/openvino/__init__.py @@ -2,23 +2,114 @@ import torch from openvino.frontend.pytorch.torchdynamo.execute import execute from openvino.frontend.pytorch.torchdynamo.partition import Partitioner +from openvino.runtime import Core, Type, PartialShape from torch._dynamo.backends.common import fake_tensor_unsupported from torch._dynamo.backends.registry import register_backend from torch.fx.experimental.proxy_tensor import make_fx +from torch._inductor.compile_fx import compile_fx +from hashlib import sha256 +import modules.shared + +class ModelState: + def __init__(self): + self.recompile = 1 + self.device = "CPU" + self.height = 512 + self.width = 512 + self.batch_size = 1 + self.mode = 0 + self.partition_id = 0 + self.model_hash = "" + +model_state = ModelState() @register_backend @fake_tensor_unsupported def openvino_fx(subgraph, example_inputs): - if os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") is None: - os.environ.setdefault("OPENVINO_TORCH_BACKEND_DEVICE", "GPU") - - model = make_fx(subgraph)(*example_inputs) - with torch.no_grad(): - model.eval() - partitioner = Partitioner() - compiled_model = partitioner.make_partitions(model) - - def _call(*args): - res = execute(compiled_model, *args, executor="openvino") - return res - return _call + try: + executor_parameters = None + core = Core() + if os.getenv("OPENVINO_TORCH_MODEL_CACHING") != "0": + os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "1") + model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest() + model_hash_str_file = model_hash_str + str(model_state.partition_id) + model_state.partition_id = model_state.partition_id + 1 + executor_parameters = {"model_hash_str": model_hash_str} + + example_inputs.reverse() + cache_root = "./cache/" + if os.getenv("OPENVINO_TORCH_CACHE_DIR") is not None: + cache_root = os.getenv("OPENVINO_TORCH_CACHE_DIR") + + device = "GPU" + + if os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") is not None: + device = os.getenv("OPENVINO_TORCH_BACKEND_DEVICE") + assert device in core.available_devices, "Specified device " + device + " is not in the list of OpenVINO Available Devices" + else: + os.environ.setdefault('OPENVINO_TORCH_BACKEND_DEVICE', device) + + file_name = get_cached_file_name(*example_inputs, model_hash_str=model_hash_str_file, device=device, cache_root=cache_root) + + if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"): + om = core.read_model(file_name + ".xml") + + dtype_mapping = { + torch.float32: Type.f32, + torch.float64: Type.f64, + torch.float16: Type.f16, + torch.int64: Type.i64, + torch.int32: Type.i32, + torch.uint8: Type.u8, + torch.int8: Type.i8, + torch.bool: Type.boolean + } + + for idx, input_data in enumerate(example_inputs): + om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) + om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) + om.validate_nodes_and_infer_types() + + if model_hash_str is not None: + core.set_property({'CACHE_DIR': cache_root + '/blob'}) + + compiled_model = core.compile_model(om, device) + def _call(*args): + ov_inputs = [a.detach().cpu().numpy() for a in args] + ov_inputs.reverse() + res = compiled_model(ov_inputs) + result = [torch.from_numpy(res[out]) for out in compiled_model.outputs] + return result + return _call + else: + example_inputs.reverse() + model = make_fx(subgraph)(*example_inputs) + with torch.no_grad(): + model.eval() + partitioner = Partitioner() + compiled_model = partitioner.make_partitions(model) + + def _call(*args): + res = execute(compiled_model, *args, executor="openvino", + executor_parameters=executor_parameters) + return res + return _call + except Exception: + return compile_fx(subgraph, example_inputs) + + +def get_cached_file_name(*args, model_hash_str, device, cache_root): + file_name = None + if model_hash_str is not None: + model_cache_dir = cache_root + "/model/" + try: + os.makedirs(model_cache_dir, exist_ok=True) + file_name = model_cache_dir + model_hash_str + "_" + device + for input_data in args: + if file_name is not None: + file_name += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + except OSError as error: + print("Cache directory ", cache_root, " cannot be created. Model caching is disabled. Error: ", error) + file_name = None + model_hash_str = None + return file_name