-
Notifications
You must be signed in to change notification settings - Fork 267
Add support for QNN stateful models #2012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,6 +3,7 @@ | |
|
|
||
| #include "../generators.h" | ||
| #include "../search.h" | ||
| #include "../models/model.h" | ||
| #include "interface.h" | ||
|
|
||
| namespace Generators { | ||
|
|
@@ -78,4 +79,31 @@ DeviceInterface* GetQNNInterface() { | |
| return g_device.get(); | ||
| } | ||
|
|
||
| bool IsQNNStatefulModel(const Model& model) { | ||
| // Check for both QNN and CPU device types | ||
| // When using QNN EP with genai_model=True, the model is stateful regardless of device type (QNN/CPU) | ||
| // For QNN models with enable_htp_shared_memory_allocator=1, p_device_ will be QNN type | ||
| // For QNN models without shared memory allocator, p_device_ will be CPU type | ||
| // Both cases need to be handled the same way for stateful models where KV cache is managed internally | ||
| if (model.p_device_->GetType() == DeviceType::QNN || model.p_device_->GetType() == DeviceType::CPU) { | ||
| const auto& provider_options = model.config_->model.decoder.session_options.provider_options; | ||
| for (const auto& po : provider_options) { | ||
| if (po.name == "QNN") { | ||
baijumeswani marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| for (const auto& option : po.options) { | ||
| // For QNN, if session option 'genie_model' is set to true, the session will encapsulate | ||
| // a stateful model, so KVCache will be managed internally. | ||
| if (option.first == "genie_model") { | ||
| std::string lower_value(option.second); | ||
| std::transform(lower_value.begin(), lower_value.end(), lower_value.begin(), | ||
| [](unsigned char c) { return static_cast<unsigned char>(std::tolower(c)); }); | ||
| return lower_value == "true"; | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+88
to
+104
|
||
|
|
||
| return false; | ||
| } | ||
|
|
||
| } // namespace Generators | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -5,4 +5,7 @@ namespace Generators { | |||||
|
|
||||||
| DeviceInterface* GetQNNInterface(); | ||||||
|
|
||||||
| struct Model; | ||||||
|
||||||
| struct Model; | |
| class Model; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is declared as struct Model in model.h
Uh oh!
There was an error while loading. Please reload this page.