@@ -81,7 +81,7 @@ DeviceInterface* GetQNNInterface() {
8181
8282bool IsQNNStatefulModel (const Model& model) {
8383 // Check for both QNN and CPU device types
84- // When using QNN EP with genai_model=True, the model is stateful regardless of device type
84+ // When using QNN EP with genai_model=True, the model is stateful regardless of device type (QNN/CPU)
8585 // For QNN models with enable_htp_shared_memory_allocator=1, p_device_ will be QNN type
8686 // For QNN models without shared memory allocator, p_device_ will be CPU type
8787 // Both cases need to be handled the same way for stateful models where KV cache is managed internally
@@ -90,10 +90,13 @@ bool IsQNNStatefulModel(const Model& model) {
9090 for (const auto & po : provider_options) {
9191 if (po.name == " QNN" ) {
9292 for (const auto & option : po.options ) {
93- // For QNN, if session option 'genai_model ' is set, the session will encapsulate
93+ // For QNN, if session option 'genie_model ' is set to true , the session will encapsulate
9494 // a stateful model, so KVCache will be managed internally.
95- if (option.first == " genai_model" && option.second == " True" ) {
96- return true ;
95+ if (option.first == " genie_model" ) {
96+ std::string lower_value (option.second );
97+ std::transform (lower_value.begin (), lower_value.end (), lower_value.begin (),
98+ [](unsigned char c) { return static_cast <unsigned char >(std::tolower (c)); });
99+ return lower_value == " true" ;
97100 }
98101 }
99102 }
0 commit comments