Skip to content

Make phi model of webgpu ep always use long RoPE to improve tps performance for long context scenario #6067

Make phi model of webgpu ep always use long RoPE to improve tps performance for long context scenario

Make phi model of webgpu ep always use long RoPE to improve tps performance for long context scenario #6067

Triggered via pull request December 22, 2025 09:58
Status Failure
Total duration 7m 35s
Artifacts

clang-format-lint.yml

on: pull_request
Fit to window
Zoom out
Zoom in

Annotations

2 errors and 1 warning
lint-cpp
Clang-format check failed. The following files need formatting: benchmark/c/main.cpp, benchmark/c/options.cpp, benchmark/c/options.h, benchmark/c/posix/resource_utils.cpp, benchmark/c/resource_utils.h, benchmark/c/windows/resource_utils.cpp, examples/c/src/common.cpp, examples/c/src/common.h, examples/c/src/model_chat.cpp, examples/c/src/model_qa.cpp, examples/c/src/model_vision.cpp, examples/c/src/phi4-mm.cpp, examples/c/src/whisper.cpp, examples/slm_engine/src/cpp/gtest_main.cpp, examples/slm_engine/src/cpp/input_decoder.cpp, examples/slm_engine/src/cpp/input_decoder.h, examples/slm_engine/src/cpp/input_decoder_test.cpp, examples/slm_engine/src/cpp/slm_engine.cpp, examples/slm_engine/src/cpp/slm_engine.h, examples/slm_engine/src/cpp/slm_engine_test.cpp, examples/slm_engine/src/cpp/slm_runner.cpp, examples/slm_engine/src/cpp/slm_server.cpp, src/beam_search_scorer.cpp, src/beam_search_scorer.h, src/beam_search_topk.h, src/config.cpp, src/config.h, src/constrained_logits_processor.cpp, src/constrained_logits_processor.h, src/cpu/interface.cpp, src/cpu/interface.h, src/cuda/beam_search_scorer_cuda.cpp, src/cuda/beam_search_scorer_cuda.h, src/cuda/cuda_common.h, src/cuda/cuda_sampling.h, src/cuda/cuda_topk.h, src/cuda/cuda_topk_benchmark_cache.h, src/cuda/cuda_topk_sort_benchmark_cache.h, src/cuda/interface.cpp, src/cuda/interface.h, src/cuda/kernels.h, src/cuda/search_cuda.cpp, src/cuda/search_cuda.h, src/dll_load_error.cpp, src/dll_load_error.h, src/dml/dml_adapter_info.cpp, src/dml/dml_adapter_info.h, src/dml/dml_adapter_selection.cpp, src/dml/dml_adapter_selection.h, src/dml/dml_command_allocator_ring.h, src/dml/dml_command_queue.cpp, src/dml/dml_command_queue.h, src/dml/dml_command_recorder.cpp, src/dml/dml_command_recorder.h, src/dml/dml_descriptor_pool.cpp, src/dml/dml_descriptor_pool.h, src/dml/dml_execution_context.cpp, src/dml/dml_execution_context.h, src/dml/dml_gpu_event.h, src/dml/dml_helpers.cpp, src/dml/dml_helpers.h, src/dml/dml_increment_values_kernel.cpp, src/dml/dml_increment_values_kernel.h, src/dml/dml_pooled_upload_heap.cpp, src/dml/dml_pooled_upload_heap.h, src/dml/dml_readback_heap.cpp, src/dml/dml_readback_heap.h, src/dml/dml_update_mask_kernel.cpp, src/dml/dml_update_mask_kernel.h, src/dml/generated_dml_shaders/increment_values_int32.h, src/dml/generated_dml_shaders/increment_values_int64.h, src/dml/generated_dml_shaders/update_mask_int32.h, src/dml/generated_dml_shaders/update_mask_int64.h, src/dml/interface.cpp, src/dml/interface.h, src/engine/block.cpp, src/engine/block.h, src/engine/cache_manager.cpp, src/engine/cache_manager.h, src/engine/decoders/decoder.h, src/engine/decoders/simple_decoder.cpp, src/engine/decoders/simple_decoder.h, src/engine/decoders/static_batch_decoder_io.cpp, src/engine/decoders/static_batch_decoder_io.h, src/engine/decoders/varlen_decoder_io.cpp, src/engine/decoders/varlen_decoder_io.h, src/engine/engine.cpp, src/engine/engine.h, src/engine/model_executor.cpp, src/engine/model_executor.h, src/engine/model_io.h, src/engine/paged_key_value_cache.cpp, src/engine/paged_key_value_cache.h, src/engine/request.cpp, src/engine/request.h, src/engine/scheduled_requests.cpp, src/engine/scheduled_requests.h, src/engine/scheduler.cpp, src/engine/scheduler.h, src/filesystem.h, src/generators.cpp, src/generators.h, src/java/src/main/native/ai_onnxruntime_genai_Adapters.cpp, src/java/src/main/native/ai_onnxruntime_genai_Audios.cpp, src/java/src/main/native/ai_onnxruntime_genai_Config.cpp, src/java/src/main/native/ai_onnxruntime_genai_GenAI.cpp, src/java/src/main/native/ai_onnxruntime_genai_Generator.cpp, src/java/src/main/native/ai_onnxruntime_genai_GeneratorParams.cpp, src/java/src/main/native/ai_onnxruntime_genai_Images.cpp, src/java/src/main/native/ai_onnxruntime_genai_Model.cpp, src/java/src/main/native/ai_onnxruntime_genai_MultiModalProcessor.cpp, src/java/src/main/native/ai_onnxruntime_genai_NamedTensors.cpp, src/java/src/main/native/ai_onnxruntime_genai_Sequences.cpp, src/java/src/main/native/ai_onnxruntime_genai_Tensor.cpp, src/java/src/main/native/ai_onnxruntime_genai_Tokenize
lint-cpp
clang-format check failed (exit code: 1). Some files need formatting.
lint-cpp
clang-format output (potentially noisy): Clang-formatting 233 files /mnt/vss/_work/onnxruntime-genai/onnxruntime-genai/src/generators.cpp:502:62: error: code should be clang-formatted [-Wclang-format-violations] if (model_->p_device_->GetType() != DeviceType::WEBGPU && ^