[NPU] Add IPEX_LLM_NPU_MTL to enable support on mtl (#12543)

plusbang · web-flow · commit caf15cc5ef24 · 2024-12-13T17:01:13.000+08:00
diff --git a/docs/mddocs/Quickstart/npu_quickstart.md b/docs/mddocs/Quickstart/npu_quickstart.md
@@ -90,6 +90,9 @@ For `ipex-llm` NPU support, set the following environment variable with active `
 
 ```cmd
 set BIGDL_USE_NPU=1
+
+:: [optional] for MTL support
+set IPEX_LLM_NPU_MTL=1
 ```
 
 ## Python API
diff --git a/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md b/python/llm/example/NPU/HF-Transformers-AutoModels/LLM/README.md
@@ -60,6 +60,9 @@ For optimal performance, it is recommended to set several environment variables.
 
 ```cmd
 set BIGDL_USE_NPU=1
+
+:: [optional] for running models on MTL
+set IPEX_LLM_NPU_MTL=1
 ```
 
 ## 3. Run Models
diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py
@@ -32,6 +32,11 @@ def convert_forward(m, target_m, new_forward):
 
 def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision,
                      quantization_group_size=0, load=False, max_prompt_len=512):
+    if os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1":
+        # For MTL support
+        os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0"
+        os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1"
+
     if model.config.model_type == "baichuan":
         # process NormHead module in Baichuan2 7B
         if hasattr(model, 'lm_head') and model.lm_head is not None: