Make phi model of webgpu ep always use long RoPE to improve tps performance for long context scenario #6067
Annotations
2 errors and 1 warning
|
lint-cpp
Clang-format check failed. The following files need formatting: benchmark/c/main.cpp, benchmark/c/options.cpp, benchmark/c/options.h, benchmark/c/posix/resource_utils.cpp, benchmark/c/resource_utils.h, benchmark/c/windows/resource_utils.cpp, examples/c/src/common.cpp, examples/c/src/common.h, examples/c/src/model_chat.cpp, examples/c/src/model_qa.cpp, examples/c/src/model_vision.cpp, examples/c/src/phi4-mm.cpp, examples/c/src/whisper.cpp, examples/slm_engine/src/cpp/gtest_main.cpp, examples/slm_engine/src/cpp/input_decoder.cpp, examples/slm_engine/src/cpp/input_decoder.h, examples/slm_engine/src/cpp/input_decoder_test.cpp, examples/slm_engine/src/cpp/slm_engine.cpp, examples/slm_engine/src/cpp/slm_engine.h, examples/slm_engine/src/cpp/slm_engine_test.cpp, examples/slm_engine/src/cpp/slm_runner.cpp, examples/slm_engine/src/cpp/slm_server.cpp, src/beam_search_scorer.cpp, src/beam_search_scorer.h, src/beam_search_topk.h, src/config.cpp, src/config.h, src/constrained_logits_processor.cpp, src/constrained_logits_processor.h, src/cpu/interface.cpp, src/cpu/interface.h, src/cuda/beam_search_scorer_cuda.cpp, src/cuda/beam_search_scorer_cuda.h, src/cuda/cuda_common.h, src/cuda/cuda_sampling.h, src/cuda/cuda_topk.h, src/cuda/cuda_topk_benchmark_cache.h, src/cuda/cuda_topk_sort_benchmark_cache.h, src/cuda/interface.cpp, src/cuda/interface.h, src/cuda/kernels.h, src/cuda/search_cuda.cpp, src/cuda/search_cuda.h, src/dll_load_error.cpp, src/dll_load_error.h, src/dml/dml_adapter_info.cpp, src/dml/dml_adapter_info.h, src/dml/dml_adapter_selection.cpp, src/dml/dml_adapter_selection.h, src/dml/dml_command_allocator_ring.h, src/dml/dml_command_queue.cpp, src/dml/dml_command_queue.h, src/dml/dml_command_recorder.cpp, src/dml/dml_command_recorder.h, src/dml/dml_descriptor_pool.cpp, src/dml/dml_descriptor_pool.h, src/dml/dml_execution_context.cpp, src/dml/dml_execution_context.h, src/dml/dml_gpu_event.h, src/dml/dml_helpers.cpp, src/dml/dml_helpers.h, src/dml/dml_increment_values_kernel.cpp, src/dml/dml_increment_values_kernel.h, src/dml/dml_pooled_upload_heap.cpp, src/dml/dml_pooled_upload_heap.h, src/dml/dml_readback_heap.cpp, src/dml/dml_readback_heap.h, src/dml/dml_update_mask_kernel.cpp, src/dml/dml_update_mask_kernel.h, src/dml/generated_dml_shaders/increment_values_int32.h, src/dml/generated_dml_shaders/increment_values_int64.h, src/dml/generated_dml_shaders/update_mask_int32.h, src/dml/generated_dml_shaders/update_mask_int64.h, src/dml/interface.cpp, src/dml/interface.h, src/engine/block.cpp, src/engine/block.h, src/engine/cache_manager.cpp, src/engine/cache_manager.h, src/engine/decoders/decoder.h, src/engine/decoders/simple_decoder.cpp, src/engine/decoders/simple_decoder.h, src/engine/decoders/static_batch_decoder_io.cpp, src/engine/decoders/static_batch_decoder_io.h, src/engine/decoders/varlen_decoder_io.cpp, src/engine/decoders/varlen_decoder_io.h, src/engine/engine.cpp, src/engine/engine.h, src/engine/model_executor.cpp, src/engine/model_executor.h, src/engine/model_io.h, src/engine/paged_key_value_cache.cpp, src/engine/paged_key_value_cache.h, src/engine/request.cpp, src/engine/request.h, src/engine/scheduled_requests.cpp, src/engine/scheduled_requests.h, src/engine/scheduler.cpp, src/engine/scheduler.h, src/filesystem.h, src/generators.cpp, src/generators.h, src/java/src/main/native/ai_onnxruntime_genai_Adapters.cpp, src/java/src/main/native/ai_onnxruntime_genai_Audios.cpp, src/java/src/main/native/ai_onnxruntime_genai_Config.cpp, src/java/src/main/native/ai_onnxruntime_genai_GenAI.cpp, src/java/src/main/native/ai_onnxruntime_genai_Generator.cpp, src/java/src/main/native/ai_onnxruntime_genai_GeneratorParams.cpp, src/java/src/main/native/ai_onnxruntime_genai_Images.cpp, src/java/src/main/native/ai_onnxruntime_genai_Model.cpp, src/java/src/main/native/ai_onnxruntime_genai_MultiModalProcessor.cpp, src/java/src/main/native/ai_onnxruntime_genai_NamedTensors.cpp, src/java/src/main/native/ai_onnxruntime_genai_Sequences.cpp, src/java/src/main/native/ai_onnxruntime_genai_Tensor.cpp, src/java/src/main/native/ai_onnxruntime_genai_Tokenize
|
|
lint-cpp
clang-format check failed (exit code: 1). Some files need formatting.
|
|
lint-cpp
clang-format output (potentially noisy):
Clang-formatting 233 files
/mnt/vss/_work/onnxruntime-genai/onnxruntime-genai/src/generators.cpp:502:62: error: code should be clang-formatted [-Wclang-format-violations]
if (model_->p_device_->GetType() != DeviceType::WEBGPU &&
^
|