openvinotoolkit · GuoliangShiIntel · Jan 14, 2026
@@ -1094,7 +1094,8 @@ void reshape_to_static(std::shared_ptr<ov::Model> model,
                        const uint32_t kvcache_size,
                        const KVAxesPosition& kv_axes_position,
                        const uint32_t lora_rank,
-                       const uint32_t lhs_seq_size = 0) {
+                       const uint32_t lhs_seq_size = 0,
+                       const bool is_prefill = false) {
     std::map<std::string, ov::PartialShape> new_shapes;
     for (const auto& input : model->inputs()) {
         const auto& input_name = input.get_any_name();
@@ -1129,8 +1130,9 @@ void reshape_to_static(std::shared_ptr<ov::Model> model,
             const auto& partial_shape = input.get_partial_shape();
             new_shape = partial_shape;
             new_shape[0] = 1;  // batch_dim
-        } else if (ov::npuw::matchEagle3HiddenStatesString(input_name)) {
-            new_shape = ov::npuw::Eagle3Extension::get_static_input(model, input, input_size);
+        } else if (ov::npuw::matchEagle3HiddenStatesString(input_name) ||
+                   ov::npuw::matchEagle3TreeMaskString(input_name)) {
+            new_shape = ov::npuw::Eagle3Extension::get_static_input(model, input, input_size, kvcache_size, is_prefill);
         } else if (ov::npuw::util::matchLoRAMatMulAString(input_name)) {
             new_shape = ov::PartialShape({lora_rank, input.get_partial_shape()[1]});
         } else if (ov::npuw::util::matchLoRAMatMulAlphaString(input_name)) {
@@ -1643,22 +1645,31 @@ class ReshapeToStatic : public ov::pass::ModelPass {
     KVAxesPosition m_kv_axes_position;
     uint32_t m_lora_rank;
     uint32_t m_lhs_seq_size;
+    bool m_is_prefill;
 
 public:
     OPENVINO_MODEL_PASS_RTTI("ov::npuw::ReshapeToStatic");
     explicit ReshapeToStatic(const uint32_t input_size,
                              const uint32_t kvcache_size,
                              const KVAxesPosition& kv_axes_position,
                              const uint32_t lora_rank,
-                             const uint32_t lhs_seq_size = 0)
+                             const uint32_t lhs_seq_size = 0,
+                             const bool is_prefill = false)
         : m_input_size(input_size),
           m_kvcache_size(kvcache_size),
           m_kv_axes_position(kv_axes_position),
           m_lora_rank(lora_rank),
-          m_lhs_seq_size(lhs_seq_size) {}
+          m_lhs_seq_size(lhs_seq_size),
+          m_is_prefill(is_prefill) {}
 
     bool run_on_model(const std::shared_ptr<ov::Model>& model) override {
-        reshape_to_static(model, m_input_size, m_kvcache_size, m_kv_axes_position, m_lora_rank, m_lhs_seq_size);
+        reshape_to_static(model,
+                          m_input_size,
+                          m_kvcache_size,
+                          m_kv_axes_position,
+                          m_lora_rank,
+                          m_lhs_seq_size,
+                          m_is_prefill);
 
         return true;
     }
@@ -2021,14 +2032,17 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
         ReshapeToStatic(static_cast<uint32_t>(m_prefill_chunk_size),
                         m_kvcache_desc.max_prompt_size,
                         axes,
-                        m_max_lora_rank)
+                        m_max_lora_rank,
+                        0,
+                        true)
             .run_on_model(prefill_model);
     } else {
         ReshapeToStatic(m_kvcache_desc.max_prompt_size,
                         m_kvcache_desc.max_prompt_size,
                         axes,
                         m_max_lora_rank,
-                        whisper_lhs_seq_size)
+                        whisper_lhs_seq_size,
+                        true)
             .run_on_model(prefill_model);
     }
     LOG_DEBUG("Make kvcache model with static shapes");