From 20a9960b06381fcbce21053a97f3c98a1797f5ae Mon Sep 17 00:00:00 2001 From: MentatBot <160964065+MentatBot@users.noreply.github.com> Date: Tue, 9 Sep 2025 15:59:48 +0000 Subject: [PATCH 1/2] Benchmark results (restart checkpoint 1): x-ai/grok-code-fast-1 - Restarted benchmark after system error, running at concurrency 20 - Saving initial results to prevent data loss - Will rerun API-error cases at lower concurrency after completion Mentat precommit script passed. Log: https://mentat.ai/gh/AbanteAI/LoCoDiff-bench/log/3f57f91c-f099-4cac-a5c4-660b7c86366c Co-authored-by: biobootloader <128252497+biobootloader@users.noreply.github.com> --- .../20250909_155741/extracted_output.txt | 895 +++++++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 601 ++++++++ .../20250909_155741/raw_response.txt | 897 +++++++++++ .../20250909_155853/extracted_output.txt | 213 +++ .../20250909_155853/metadata.json | 25 + .../20250909_155853/output.diff | 63 + .../20250909_155853/raw_response.txt | 215 +++ .../20250909_155741/extracted_output.txt | 139 ++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 15 + .../20250909_155741/raw_response.txt | 141 ++ .../20250909_155844/extracted_output.txt | 301 ++++ .../20250909_155844/metadata.json | 25 + .../20250909_155844/output.diff | 83 + .../20250909_155844/raw_response.txt | 303 ++++ .../20250909_155808/extracted_output.txt | 426 ++++++ .../20250909_155808/metadata.json | 25 + .../20250909_155808/output.diff | 20 + .../20250909_155808/raw_response.txt | 428 ++++++ .../20250909_155843/extracted_output.txt | 470 ++++++ .../20250909_155843/metadata.json | 25 + .../20250909_155843/output.diff | 99 ++ .../20250909_155843/raw_response.txt | 472 ++++++ .../20250909_155910/extracted_output.txt | 133 ++ .../20250909_155910/metadata.json | 25 + .../20250909_155910/output.diff | 12 + .../20250909_155910/raw_response.txt | 135 ++ .../20250909_155817/extracted_output.txt | 62 + .../20250909_155817/metadata.json | 25 + .../20250909_155817/output.diff | 25 + .../20250909_155817/raw_response.txt | 64 + .../20250909_155750/extracted_output.txt | 302 ++++ .../20250909_155750/metadata.json | 25 + .../20250909_155750/output.diff | 107 ++ .../20250909_155750/raw_response.txt | 304 ++++ .../20250909_155834/extracted_output.txt | 425 ++++++ .../20250909_155834/metadata.json | 25 + .../20250909_155834/output.diff | 46 + .../20250909_155834/raw_response.txt | 427 ++++++ .../20250909_155741/extracted_output.txt | 175 +++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 69 + .../20250909_155741/raw_response.txt | 177 +++ .../20250909_155741/extracted_output.txt | 790 ++++++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 622 ++++++++ .../20250909_155741/raw_response.txt | 907 +++++++++++ .../20250909_155825/extracted_output.txt | 171 +++ .../20250909_155825/metadata.json | 25 + .../20250909_155825/output.diff | 1 + .../20250909_155825/raw_response.txt | 173 +++ .../20250909_155741/extracted_output.txt | 101 ++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 1 + .../20250909_155741/raw_response.txt | 103 ++ .../20250909_155840/extracted_output.txt | 326 ++++ .../20250909_155840/metadata.json | 25 + .../20250909_155840/output.diff | 734 +++++++++ .../20250909_155840/raw_response.txt | 327 ++++ .../20250909_155836/extracted_output.txt | 212 +++ .../20250909_155836/metadata.json | 25 + .../20250909_155836/output.diff | 1 + .../20250909_155836/raw_response.txt | 214 +++ .../20250909_155824/metadata.json | 25 + .../20250909_155824/output.diff | 963 ++++++++++++ .../20250909_155824/raw_response.txt | 0 .../20250909_155741/extracted_output.txt | 503 +++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 28 + .../20250909_155741/raw_response.txt | 505 +++++++ .../20250909_155808/extracted_output.txt | 415 +++++ .../20250909_155808/metadata.json | 25 + .../20250909_155808/output.diff | 703 +++++++++ .../20250909_155808/raw_response.txt | 416 +++++ .../20250909_155855/extracted_output.txt | 259 ++++ .../20250909_155855/metadata.json | 25 + .../20250909_155855/output.diff | 15 + .../20250909_155855/raw_response.txt | 261 ++++ .../20250909_155741/extracted_output.txt | 454 ++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 411 +++++ .../20250909_155741/raw_response.txt | 456 ++++++ .../20250909_155741/extracted_output.txt | 255 ++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 869 +++++++++++ .../20250909_155741/raw_response.txt | 256 ++++ .../20250909_155741/extracted_output.txt | 1268 ++++++++++++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 433 ++++++ .../20250909_155741/raw_response.txt | 1269 ++++++++++++++++ .../20250909_155806/extracted_output.txt | 384 +++++ .../20250909_155806/metadata.json | 25 + .../20250909_155806/output.diff | 14 + .../20250909_155806/raw_response.txt | 386 +++++ .../20250909_155741/extracted_output.txt | 326 ++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 22 + .../20250909_155741/raw_response.txt | 328 ++++ .../20250909_155814/extracted_output.txt | 561 +++++++ .../20250909_155814/metadata.json | 25 + .../20250909_155814/output.diff | 84 ++ .../20250909_155814/raw_response.txt | 563 +++++++ .../20250909_155859/extracted_output.txt | 104 ++ .../20250909_155859/metadata.json | 25 + .../20250909_155859/output.diff | 867 +++++++++++ .../20250909_155859/raw_response.txt | 105 ++ .../20250909_155756/extracted_output.txt | 278 ++++ .../20250909_155756/metadata.json | 25 + .../20250909_155756/output.diff | 1 + .../20250909_155756/raw_response.txt | 280 ++++ .../20250909_155806/extracted_output.txt | 1148 ++++++++++++++ .../20250909_155806/metadata.json | 25 + .../20250909_155806/output.diff | 466 ++++++ .../20250909_155806/raw_response.txt | 1149 ++++++++++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 428 ++++++ .../20250909_155741/raw_response.txt | 0 .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 617 ++++++++ .../20250909_155741/raw_response.txt | 0 .../20250909_155758/extracted_output.txt | 744 +++++++++ .../20250909_155758/metadata.json | 25 + .../20250909_155758/output.diff | 132 ++ .../20250909_155758/raw_response.txt | 746 +++++++++ .../20250909_155811/extracted_output.txt | 26 + .../20250909_155811/metadata.json | 25 + .../20250909_155811/output.diff | 1 + .../20250909_155811/raw_response.txt | 28 + .../20250909_155839/extracted_output.txt | 823 ++++++++++ .../20250909_155839/metadata.json | 25 + .../20250909_155839/output.diff | 71 + .../20250909_155839/raw_response.txt | 825 ++++++++++ .../20250909_155839/extracted_output.txt | 110 ++ .../20250909_155839/metadata.json | 25 + .../20250909_155839/output.diff | 1 + .../20250909_155839/raw_response.txt | 112 ++ .../20250909_155839/extracted_output.txt | 289 ++++ .../20250909_155839/metadata.json | 25 + .../20250909_155839/output.diff | 13 + .../20250909_155839/raw_response.txt | 291 ++++ .../20250909_155741/extracted_output.txt | 499 ++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 87 ++ .../20250909_155741/raw_response.txt | 501 ++++++ .../20250909_155741/extracted_output.txt | 178 +++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 1 + .../20250909_155741/raw_response.txt | 180 +++ .../20250909_155831/extracted_output.txt | 773 ++++++++++ .../20250909_155831/metadata.json | 25 + .../20250909_155831/output.diff | 799 ++++++++++ .../20250909_155831/raw_response.txt | 774 ++++++++++ .../20250909_155741/extracted_output.txt | 470 ++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 95 ++ .../20250909_155741/raw_response.txt | 472 ++++++ .../20250909_155826/extracted_output.txt | 223 +++ .../20250909_155826/metadata.json | 25 + .../20250909_155826/output.diff | 1 + .../20250909_155826/raw_response.txt | 225 +++ .../20250909_155741/extracted_output.txt | 309 ++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 650 ++++++++ .../20250909_155741/raw_response.txt | 310 ++++ .../20250909_155814/extracted_output.txt | 1337 ++++++++++++++++ .../20250909_155814/metadata.json | 25 + .../20250909_155814/output.diff | 67 + .../20250909_155814/raw_response.txt | 1339 +++++++++++++++++ .../20250909_155741/extracted_output.txt | 678 +++++++++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 62 + .../20250909_155741/raw_response.txt | 680 +++++++++ .../20250909_155807/extracted_output.txt | 268 ++++ .../20250909_155807/metadata.json | 25 + .../20250909_155807/output.diff | 59 + .../20250909_155807/raw_response.txt | 270 ++++ .../20250909_155846/extracted_output.txt | 382 +++++ .../20250909_155846/metadata.json | 25 + .../20250909_155846/output.diff | 13 + .../20250909_155846/raw_response.txt | 384 +++++ .../20250909_155836/extracted_output.txt | 478 ++++++ .../20250909_155836/metadata.json | 25 + .../20250909_155836/output.diff | 574 +++++++ .../20250909_155836/raw_response.txt | 483 ++++++ .../20250909_155833/extracted_output.txt | 205 +++ .../20250909_155833/metadata.json | 25 + .../20250909_155833/output.diff | 17 + .../20250909_155833/raw_response.txt | 207 +++ .../20250909_155859/extracted_output.txt | 220 +++ .../20250909_155859/metadata.json | 25 + .../20250909_155859/output.diff | 1 + .../20250909_155859/raw_response.txt | 222 +++ .../20250909_155741/extracted_output.txt | 190 +++ .../20250909_155741/metadata.json | 25 + .../20250909_155741/output.diff | 27 + .../20250909_155741/raw_response.txt | 192 +++ .../20250909_155826/metadata.json | 25 + .../20250909_155826/output.diff | 421 ++++++ .../20250909_155826/raw_response.txt | 0 .../20250909_155832/extracted_output.txt | 853 +++++++++++ .../20250909_155832/metadata.json | 25 + .../20250909_155832/output.diff | 57 + .../20250909_155832/raw_response.txt | 854 +++++++++++ 204 files changed, 53376 insertions(+) create mode 100644 locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/metadata.json create mode 100644 locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/output.diff create mode 100644 locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/metadata.json create mode 100644 locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/output.diff create mode 100644 locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/metadata.json create mode 100644 locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/output.diff create mode 100644 locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/metadata.json create mode 100644 locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/output.diff create mode 100644 locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/metadata.json create mode 100644 locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/output.diff create mode 100644 locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/metadata.json create mode 100644 locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/output.diff create mode 100644 locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/metadata.json create mode 100644 locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/output.diff create mode 100644 locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/raw_response.txt create mode 100644 locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/extracted_output.txt create mode 100644 locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/metadata.json create mode 100644 locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/output.diff create mode 100644 locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/raw_response.txt create mode 100644 locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/extracted_output.txt create mode 100644 locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/metadata.json create mode 100644 locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/output.diff create mode 100644 locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/output.diff create mode 100644 locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/output.diff create mode 100644 locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/output.diff create mode 100644 locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/output.diff create mode 100644 locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/output.diff create mode 100644 locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/raw_response.txt create mode 100644 locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/extracted_output.txt create mode 100644 locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/metadata.json create mode 100644 locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/output.diff create mode 100644 locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/raw_response.txt create mode 100644 locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/extracted_output.txt create mode 100644 locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/metadata.json create mode 100644 locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/output.diff create mode 100644 locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/raw_response.txt create mode 100644 locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt create mode 100644 locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/metadata.json create mode 100644 locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/output.diff create mode 100644 locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/output.diff create mode 100644 locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/output.diff create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMRoot-test.js/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMRoot-test.js/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMRoot-test.js/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMRoot-test.js/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___findDOMNodeFB-test.js/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___findDOMNodeFB-test.js/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___findDOMNodeFB-test.js/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/react_packages_react-dom_src___tests___findDOMNodeFB-test.js/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactFiberLane.js/x-ai_grok-code-fast-1/20250909_155831/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactFiberLane.js/x-ai_grok-code-fast-1/20250909_155831/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactFiberLane.js/x-ai_grok-code-fast-1/20250909_155831/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactFiberLane.js/x-ai_grok-code-fast-1/20250909_155831/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactInternalTypes.js/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactInternalTypes.js/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactInternalTypes.js/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src_ReactInternalTypes.js/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactCache-test.js/x-ai_grok-code-fast-1/20250909_155826/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactCache-test.js/x-ai_grok-code-fast-1/20250909_155826/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactCache-test.js/x-ai_grok-code-fast-1/20250909_155826/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactCache-test.js/x-ai_grok-code-fast-1/20250909_155826/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactExpiration-test.js/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactExpiration-test.js/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactExpiration-test.js/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactExpiration-test.js/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalSideEffects-test.js/x-ai_grok-code-fast-1/20250909_155814/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalSideEffects-test.js/x-ai_grok-code-fast-1/20250909_155814/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalSideEffects-test.js/x-ai_grok-code-fast-1/20250909_155814/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalSideEffects-test.js/x-ai_grok-code-fast-1/20250909_155814/raw_response.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalUpdates-test.js/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalUpdates-test.js/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalUpdates-test.js/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/react_packages_react-reconciler_src___tests___ReactIncrementalUpdates-test.js/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaEditor_TlaEditor.tsx/x-ai_grok-code-fast-1/20250909_155807/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaEditor_TlaEditor.tsx/x-ai_grok-code-fast-1/20250909_155807/metadata.json create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaEditor_TlaEditor.tsx/x-ai_grok-code-fast-1/20250909_155807/output.diff create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaEditor_TlaEditor.tsx/x-ai_grok-code-fast-1/20250909_155807/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaFileShareMenu_Tabs_TlaExportTab.tsx/x-ai_grok-code-fast-1/20250909_155846/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaFileShareMenu_Tabs_TlaExportTab.tsx/x-ai_grok-code-fast-1/20250909_155846/metadata.json create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaFileShareMenu_Tabs_TlaExportTab.tsx/x-ai_grok-code-fast-1/20250909_155846/output.diff create mode 100644 locodiff-250425/results/tldraw_apps_dotcom_client_src_tla_components_TlaFileShareMenu_Tabs_TlaExportTab.tsx/x-ai_grok-code-fast-1/20250909_155846/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_editor_shapes_ShapeUtil.ts/x-ai_grok-code-fast-1/20250909_155836/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_editor_shapes_ShapeUtil.ts/x-ai_grok-code-fast-1/20250909_155836/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_editor_shapes_ShapeUtil.ts/x-ai_grok-code-fast-1/20250909_155836/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_editor_shapes_ShapeUtil.ts/x-ai_grok-code-fast-1/20250909_155836/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_utils_debug-flags.ts/x-ai_grok-code-fast-1/20250909_155833/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_utils_debug-flags.ts/x-ai_grok-code-fast-1/20250909_155833/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_utils_debug-flags.ts/x-ai_grok-code-fast-1/20250909_155833/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_editor_src_lib_utils_debug-flags.ts/x-ai_grok-code-fast-1/20250909_155833/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_text_TextShapeTool.test.ts/x-ai_grok-code-fast-1/20250909_155859/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_text_TextShapeTool.test.ts/x-ai_grok-code-fast-1/20250909_155859/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_text_TextShapeTool.test.ts/x-ai_grok-code-fast-1/20250909_155859/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_text_TextShapeTool.test.ts/x-ai_grok-code-fast-1/20250909_155859/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_video_VideoShapeUtil.tsx/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_video_VideoShapeUtil.tsx/x-ai_grok-code-fast-1/20250909_155741/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_video_VideoShapeUtil.tsx/x-ai_grok-code-fast-1/20250909_155741/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_shapes_video_VideoShapeUtil.tsx/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_ui_hooks_useTranslation_TLUiTranslationKey.ts/x-ai_grok-code-fast-1/20250909_155826/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_ui_hooks_useTranslation_TLUiTranslationKey.ts/x-ai_grok-code-fast-1/20250909_155826/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_lib_ui_hooks_useTranslation_TLUiTranslationKey.ts/x-ai_grok-code-fast-1/20250909_155826/raw_response.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_test_Editor.test.tsx/x-ai_grok-code-fast-1/20250909_155832/extracted_output.txt create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_test_Editor.test.tsx/x-ai_grok-code-fast-1/20250909_155832/metadata.json create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_test_Editor.test.tsx/x-ai_grok-code-fast-1/20250909_155832/output.diff create mode 100644 locodiff-250425/results/tldraw_packages_tldraw_src_test_Editor.test.tsx/x-ai_grok-code-fast-1/20250909_155832/raw_response.txt diff --git a/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..323116e22 --- /dev/null +++ b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,895 @@ +#!/usr/bin/env python + +import argparse +import os +import sys +from pathlib import Path + +import configargparse + +from aider import __version__ +from aider.args_formatter import ( + DotEnvFormatter, + MarkdownHelpFormatter, + YamlHelpFormatter, +) +from aider.deprecated import add_deprecated_model_args + +from .dump import dump # noqa: F401 + + +def resolve_aiderignore_path(path_str, git_root=None): + path = Path(path_str) + if path.is_absolute(): + return str(path) + elif git_root: + return str(Path(git_root) / path) + return str(path) + + +def default_env_file(git_root): + return os.path.join(git_root, ".env") if git_root else ".env" + + +def get_parser(default_config_files, git_root): + parser = configargparse.ArgumentParser( + description="aider is AI pair programming in your terminal", + add_config_file_help=True, + default_config_files=default_config_files, + config_file_parser_class=configargparse.YAMLConfigFileParser, + auto_env_var_prefix="AIDER_", + ) + group = parser.add_argument_group("Main model") + group.add_argument( + "files", metavar="FILE", nargs="*", help="files to edit with an LLM (optional)" + ) + group.add_argument( + "--model", + metavar="MODEL", + default=None, + help="Specify the model to use for the main chat", + ) + group.add_argument( + "--architect", + action="store_const", + dest="edit_format", + const="architect", + help="Use architect edit format for the main chat", + ) + group.add_argument( + "--auto-accept-architect", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable automatic acceptance of architect changes (default: True)", + ) + group.add_argument( + "--weak-model", + metavar="WEAK_MODEL", + default=None, + help=( + "Specify the model to use for commit messages and chat history summarization (default" + " depends on --model)" + ), + ) + group.add_argument( + "--editor-model", + metavar="EDITOR_MODEL", + default=None, + help="Specify the model to use for editor tasks (default depends on --model)", + ) + group.add_argument( + "--editor-edit-format", + metavar="EDITOR_EDIT_FORMAT", + default=None, + help="Specify the edit format for the editor model (default: depends on editor model)", + ) + group.add_argument( + "--show-model-warnings", + action=argparse.BooleanOptionalAction, + default=True, + help="Only work with models that have meta-data available (default: True)", + ) + group.add_argument( + "--check-model-accepts-settings", + action=argparse.BooleanOptionalAction, + default=True, + help=( + "Check if model accepts settings like reasoning_effort/thinking_tokens (default: True)" + ), + ) + group.add_argument( + "--max-chat-history-tokens", + type=int, + default=None, + help=( + "Soft limit on tokens for chat history, after which summarization begins." + " If unspecified, defaults to the model's max_chat_history_tokens." + ), + ) + group.add_argument( + "--reasoning-effort", + type=str, + help="Set the reasoning_effort API parameter (default: not set)", + ) + group.add_argument( + "--thinking-tokens", + type=str, + help="Set the thinking token budget for models that support it (default: not set)", + ) + group.add_argument( + "--verify-ssl", + action=argparse.BooleanOptionalAction, + default=True, + help="Verify the SSL cert when connecting to models (default: True)", + ) + group.add_argument( + "--timeout", + type=float, + default=None, + help="Timeout in seconds for API calls (default: None)", + ) + group.add_argument( + "--edit-format", + "--chat-mode", + metavar="EDIT_FORMAT", + default=None, + help="Specify what edit format the LLM should use (default depends on model)", + ) + + ########## + group = parser.add_argument_group("API Keys and settings") + group.add_argument( + "--openai-api-key", + help="Specify the OpenAI API key", + ) + group.add_argument( + "--anthropic-api-key", + help="Specify the Anthropic API key", + ) + group.add_argument( + "--openai-api-base", + help="Specify the api base url", + ) + group.add_argument( + "--openai-api-type", + help="Specify the api_type", + ) + group.add_argument( + "--openai-api-version", + help="Specify the api_version", + ) + group.add_argument( + "--openai-api-deployment-id", + help="Specify the deployment_id", + ) + group.add_argument( + "--openai-organization-id", + help="Specify the OpenAI organization ID", + ) + group.add_argument( + "--set-env", + action="append", + metavar="ENV_VAR_NAME=value", + help="Set an environment variable (to control API settings, can be used multiple times)", + default=[], + ) + group.add_argument( + "--api-key", + action="append", + metavar="PROVIDER=KEY", + help=( + "Set an API key for a provider (eg: --api-key provider= sets" + " PROVIDER_API_KEY=)" + ), + default=[], + ) + + ########## + group = parser.add_argument_group("Model settings") + group.add_argument( + "--list-models", + "--models", + metavar="MODEL", + help="List known models which match the (partial) MODEL name", + ) + group.add_argument( + "--model-settings-file", + metavar="MODEL_SETTINGS_FILE", + default=".aider.model.settings.yml", + help="Specify a file with aider model settings for unknown models", + ) + group.add_argument( + "--model-metadata-file", + metavar="MODEL_METADATA_FILE", + default=".aider.model.metadata.json", + help="Specify a file with context window and costs for unknown models", + ) + group.add_argument( + "--alias", + action="append", + metavar="ALIAS:MODEL", + help="Add a model alias (can be used multiple times)", + ) + + ########## + group = parser.add_argument_group("History Files") + default_input_history_file = ( + os.path.join(git_root, ".aider.input.history") if git_root else ".aider.input.history" + ) + default_chat_history_file = ( + os.path.join(git_root, ".aider.chat.history.md") if git_root else ".aider.chat.history.md" + ) + group.add_argument( + "--input-history-file", + metavar="INPUT_HISTORY_FILE", + default=default_input_history_file, + help=f"Specify the chat input history file (default: {default_input_history_file})", + ) + group.add_argument( + "--chat-history-file", + metavar="CHAT_HISTORY_FILE", + default=default_chat_history_file, + help=f"Specify the chat history file (default: {default_chat_history_file})", + ) + group.add_argument( + "--restore-chat-history", + action=argparse.BooleanOptionalAction, + default=False, + help="Restore the previous chat history messages (default: False)", + ) + group.add_argument( + "--llm-history-file", + metavar="LLM_HISTORY_FILE", + default=None, + help="Log the conversation with the LLM to this file (for example, .aider.llm.history)", + ) + + ########## + group = parser.add_argument_group("Output settings") + group.add_argument( + "--dark-mode", + action="store_true", + help="Use colors suitable for a dark terminal background (default: False)", + default=False, + ) + group.add_argument( + "--light-mode", + action="store_true", + help="Use colors suitable for a light terminal background (default: False)", + default=False, + ) + group.add_argument( + "--pretty", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable pretty, colorized output (default: True)", + ) + group.add_argument( + "--stream", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable streaming responses (default: True)", + ) + group.add_argument( + "--user-input-color", + default="#00cc00", + help="Set the color for user input (default: #00cc00)", + ) + group.add_argument( + "--tool-output-color", + default=None, + help="Set the color for tool output (default: None)", + ) + group.add_argument( + "--tool-error-color", + default="#FF2222", + help="Set the color for tool error messages (default: #FF2222)", + ) + group.add_argument( + "--tool-warning-color", + default="#FFA500", + help="Set the color for tool warning messages (default: #FFA500)", + ) + group.add_argument( + "--assistant-output-color", + default="#0088ff", + help="Set the color for assistant output (default: #0088ff)", + ) + group.add_argument( + "--completion-menu-color", + metavar="COLOR", + default=None, + help="Set the color for the completion menu (default: terminal's default text color)", + ) + group.add_argument( + "--completion-menu-bg-color", + metavar="COLOR", + default=None, + help=( + "Set the background color for the completion menu (default: terminal's default" + " background color)" + ), + ) + group.add_argument( + "--completion-menu-current-color", + metavar="COLOR", + default=None, + help=( + "Set the color for the current item in the completion menu (default: terminal's default" + " background color)" + ), + ) + group.add_argument( + "--completion-menu-current-bg-color", + metavar="COLOR", + default=None, + help=( + "Set the background color for the current item in the completion menu (default:" + " terminal's default text color)" + ), + ) + group.add_argument( + "--code-theme", + default="default", + help=( + "Set the markdown code theme (default: default, other options include monokai," + " solarized-dark, solarized-light, or a Pygments builtin style," + " see https://pygments.org/styles for available themes)" + ), + ) + group.add_argument( + "--show-diffs", + action="store_true", + help="Show diffs when committing changes (default: False)", + default=False, + ) + + ########## + group = parser.add_argument_group("Git settings") + group.add_argument( + "--git", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable looking for a git repo (default: True)", + ) + group.add_argument( + "--gitignore", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable adding .aider* to .gitignore (default: True)", + ) + default_aiderignore_file = ( + os.path.join(git_root, ".aiderignore") if git_root else ".aiderignore" + ) + group.add_argument( + "--aiderignore", + metavar="AIDERIGNORE", + type=lambda path_str: resolve_aiderignore_path(path_str, git_root), + default=default_aiderignore_file, + help="Specify the aider ignore file (default: .aiderignore in git root)", + ) + group.add_argument( + "--subtree-only", + action="store_true", + help="Only consider files in the current subtree of the git repository", + default=False, + ) + group.add_argument( + "--auto-commits", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable auto commit of LLM changes (default: True)", + ) + group.add_argument( + "--dirty-commits", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable commits when repo is found dirty (default: True)", + ) + group.add_argument( + "--attribute-author", + action=argparse.BooleanOptionalAction, + default=True, + help="Attribute aider code changes in the git author name (default: True)", + ) + group.add_argument( + "--attribute-committer", + action=argparse.BooleanOptionalAction, + default=True, + help="Attribute aider commits in the git committer name (default: True)", + ) + group.add_argument( + "--attribute-commit-message-author", + action=argparse.BooleanOptionalAction, + default=False, + help="Prefix commit messages with 'aider: ' if aider authored the changes (default: False)", + ) + group.add_argument( + "--attribute-commit-message-committer", + action=argparse.BooleanOptionalAction, + default=False, + help="Prefix all commit messages with 'aider: ' (default: False)", + ) + group.add_argument( + "--git-commit-verify", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable git pre-commit hooks with --no-verify (default: False)", + ) + + ########## + group = parser.add_argument_group("Fixing and committing") + group.add_argument( + "--commit", + action="store_true", + help="Commit all pending changes with a suitable commit message, then exit", + default=False, + ) + group.add_argument( + "--lint", + action="store_true", + help="Lint and fix provided files, or dirty files if none provided", + default=False, + ) + group.add_argument( + "--lint-cmd", + action="append", + help=( + 'Specify lint commands to run for different languages, eg: "python: flake8' + ' --select=..." (can be used multiple times)' + ), + default=[], + ) + group.add_argument( + "--auto-lint", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable automatic linting after changes (default: True)", + ) + group.add_argument( + "--test-cmd", + help="Specify command to run tests", + default=[], + ) + group.add_argument( + "--auto-test", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable automatic testing after changes (default: False)", + ) + group.add_argument( + "--test", + action="store_true", + help="Run tests, fix problems found and then exit", + default=False, + ) + group.add_argument( + "--commit-prompt", + metavar="PROMPT", + help="Specify a custom prompt for generating commit messages", + ) + group.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + default=False, + help="Perform a dry run without modifying files (default: False)", + ) + group.add_argument( + "--skip-sanity-check-repo", + action="store_true", + help="Skip the sanity check for the git repository (default: False)", + default=False, + ) + + ########## + group = parser.add_argument_group("Repomap settings") + group.add_argument( + "--map-tokens", + type=int, + default=None, + help="Suggested number of tokens to use for repo map, use 0 to disable", + ) + group.add_argument( + "--map-refresh", + choices=["auto", "always", "files", "manual"], + default="auto", + help=( + "Control how often the repo map is refreshed. Options: auto, always, files, manual" + " (default: auto)" + ), + ) + group.add_argument( + "--map-multiplier-no-files", + type=float, + default=2, + help="Multiplier for map tokens when no files are specified (default: 2)", + ) + + ########## + group = parser.add_argument_group("Cache settings") + group.add_argument( + "--cache-prompts", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable caching of prompts (default: False)", + ) + group.add_argument( + "--cache-keepalive-pings", + type=int, + default=0, + help="Number of times to ping at 5min intervals to keep prompt cache warm (default: 0)", + ) + + ########## + group = parser.add_argument_group("History Files") + default_input_history_file = ( + os.path.join(git_root, ".aider.input.history") if git_root else ".aider.input.history" + ) + default_chat_history_file = ( + os.path.join(git_root, ".aider.chat.history.md") if git_root else ".aider.chat.history.md" + ) + group.add_argument( + "--input-history-file", + metavar="INPUT_HISTORY_FILE", + default=default_input_history_file, + help=f"Specify the chat input history file (default: {default_input_history_file})", + ) + group.add_argument( + "--chat-history-file", + metavar="CHAT_HISTORY_FILE", + default=default_chat_history_file, + help=f"Specify the chat history file (default: {default_chat_history_file})", + ) + group.add_argument( + "--restore-chat-history", + action=argparse.BooleanOptionalAction, + default=False, + help="Restore the previous chat history messages (default: False)", + ) + group.add_argument( + "--llm-history-file", + metavar="LLM_HISTORY_FILE", + default=None, + help="Log the conversation with the LLM to this file (for example, .aider.llm.history)", + ) + + ########## + group = parser.add_argument_group("Analytics") + group.add_argument( + "--analytics", + action=argparse.BooleanOptionalAction, + default=None, + help="Enable/disable analytics for current session (default: random)", + ) + group.add_argument( + "--analytics-log", + metavar="ANALYTICS_LOG_FILE", + help="Specify a file to log analytics events", + ) + group.add_argument( + "--analytics-disable", + action="store_true", + help="Permanently disable analytics", + default=False, + ) + + group = parser.add_argument_group("Upgrading") + group.add_argument( + "--just-check-update", + action="store_true", + help="Check for updates and return status in the exit code", + default=False, + ) + group.add_argument( + "--show-release-notes", + action=argparse.BooleanOptionalAction, + help="Show release notes on first run of new version (default: None, ask user)", + default=None, + ) + group.add_argument( + "--install-main-branch", + action="store_true", + help="Install the latest version from the main branch", + default=False, + ) + group.add_argument( + "--upgrade", + "--update", + action="store_true", + help="Upgrade aider to the latest version from PyPI", + default=False, + ) + group.add_argument( + "--version", + action="version", + version=f"%(prog)s {__version__}", + help="Show the version number and exit", + ) + + ########## + group = parser.add_argument_group("Modes") + group.add_argument( + "--message", + "--msg", + "-m", + metavar="COMMAND", + help=( + "Specify a single message to send the LLM, process reply then exit (disables chat mode)" + ), + ) + group.add_argument( + "--message-file", + "-f", + metavar="MESSAGE_FILE", + help=( + "Specify a file containing the message to send the LLM, process reply, then exit" + " (disables chat mode)" + ), + ) + group.add_argument( + "--gui", + "--browser", + action=argparse.BooleanOptionalAction, + help="Run aider in your browser (default: False)", + default=False, + ) + group.add_argument( + "--copy-paste", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable automatic copy/paste of chat between aider and web UI (default: False)", + ) + group.add_argument( + "--apply", + metavar="FILE", + help="Apply the changes from the given file instead of running the chat (debug)", + ) + group.add_argument( + "--apply-clipboard-edits", + action="store_true", + help="Apply clipboard contents as edits using the main model's editor format", + default=False, + ) + group.add_argument( + "--yes-always", + action="store_true", + help="Always say yes to every confirmation", + default=None, + ) + group.add_argument( + "--exit", + action="store_true", + help="Do all startup activities then exit before accepting user input (debug)", + default=False, + ) + group.add_argument( + "--show-repo-map", + action="store_true", + help="Print the repo map and exit (debug)", + default=False, + ) + group.add_argument( + "--show-prompts", + action="store_true", + help="Print the system prompts and exit (debug)", + default=False, + ) + group.add_argument( + "--verbose", + action="store_true", + help="Enable verbose output", + default=False, + ) + group.add_argument( + "--encoding", + default="utf-8", + help="Specify the encoding for input and output (default: utf-8)", + ) + group.add_argument( + "--line-endings", + choices=["platform", "lf", "crlf"], + default="platform", + help="Line endings to use when writing files (default: platform)", + ) + + ########## + group = parser.add_argument_group("Analytics") + group.add_argument( + "--analytics", + action=argparse.BooleanOptionalAction, + default=None, + help="Enable/disable analytics for current session (default: random)", + ) + group.add_argument( + "--analytics-log", + metavar="ANALYTICS_LOG_FILE", + help="Specify a file to log analytics events", + ) + group.add_argument( + "--analytics-disable", + action="store_true", + help="Permanently disable analytics", + default=False, + ) + + ########## + group = parser.add_argument_group("Other settings") + group.add_argument( + "--file", + action="append", + metavar="FILE", + help="specify a file to edit (can be used multiple times)", + ) + group.add_argument( + "--read", + action="append", + metavar="FILE", + help="specify a read-only file (can be used multiple times)", + ) + group.add_argument( + "--vim", + action="store_true", + help="Use VI editing mode in the terminal (default: False)", + default=False, + ) + group.add_argument( + "--chat-language", + metavar="CHAT_LANGUAGE", + default=None, + help="Specify the language to use in the chat (default: None, uses system settings)", + ) + group.add_argument( + "--yes-always", + action="store_true", + help="Always say yes to every confirmation", + default=None, + ) + group.add_argument( + "--load", + metavar="LOAD_FILE", + help="Load and execute /commands from a file on launch", + ) + group.add_argument( + "--watch-files", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable watching files for ai coding comments (default: False)", + ) + group.add_argument( + "--set-env", + action="append", + metavar="ENV_VAR_NAME=value", + help="Set an environment variable (can be used multiple times)", + default=[], + ) + group.add_argument( + "--api-key", + action="append", + metavar="PROVIDER=KEY", + help="Set an API key for a provider (eg: --api-key anthropic=sk-123)", + default=[], + ) + group.add_argument( + "--env-file", + metavar="ENV_FILE", + default=default_env_file(git_root), + help="Specify the .env file to load (default: .env in git root)", + ) + group.add_argument( + "--suggest-shell-commands", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable suggesting shell commands (default: True)", + ) + group.add_argument( + "--fancy-input", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable fancy input with history and completion (default: True)", + ) + group.add_argument( + "--multiline", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable multi-line input mode with Meta-Enter to submit (default: False)", + ) + group.add_argument( + "--notifications", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Enable/disable terminal bell notifications when LLM responses are ready (default:" + " False)" + ), + ) + group.add_argument( + "--notifications-command", + metavar="COMMAND", + default=None, + help=( + "Specify a command to run for notifications instead of the terminal bell. If not" + " specified, a default command for your OS may be used." + ), + ) + group.add_argument( + "--detect-urls", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable detection and offering to add URLs to chat (default: True)", + ) + group.add_argument( + "--editor", + help="Specify which editor to use for the /editor command", + ) + group.add_argument( + "--c", + "--config", + is_config_file=True, + metavar="CONFIG_FILE", + help=( + "Specify the config file (default: search for .aider.conf.yml in git root, cwd" + " or home directory)" + ), + ) + + ########## + group = parser.add_argument_group("Deprecated model settings") + # Add deprecated model shortcut arguments + add_deprecated_model_args(parser, group) + + return parser + + +def get_md_help(): + os.environ["COLUMNS"] = "/rpc!lx7' + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = MarkdownHelpFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def get_sample_yaml(): + os.environ["COLUMNS"] = "120" + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = YamlHelpFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def get_sample_dotenv(): + os.environ["COLUMNS"] = "120" + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = DotEnvFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def main(): + arg = sys.argv[1] if len(sys.argv[1:]) else None + + if arg == "md": + print(get_md_help()) + elif arg == "dotenv": + print(get_sample_dotenv()) + else: + print(get_sample_yaml()) + + +if __name__ == "__main__": + status = main() + sys.exit(status) \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..7b812fea4 --- /dev/null +++ b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_args.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_args.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_args.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.899185+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 27429, + "extracted_output_length": 27415, + "expected_output_length": 26189, + "results_dir": "locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-s1jz6SIxviqBTVkmnunn", + "cost_usd": 0.02351338, + "prompt_tokens": 61646, + "completion_tokens": 6591, + "total_tokens": 68237, + "native_prompt_tokens": 60980, + "native_completion_tokens": 7591, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 1830 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..489561946 --- /dev/null +++ b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,601 @@ + +index e64aa9deb..323116e22 100644 +--- a/aider_aider_args.py_expectedoutput.txt (expected):tmp/tmpdtias6mm_expected.txt ++++ b/aider_aider_args.py_extracted.txt (actual):tmp/tmpbsu2iugo_actual.txt +@@ -49,78 +49,62 @@ def get_parser(default_config_files, git_root): + default=None, + help="Specify the model to use for the main chat", + ) +- +- ########## +- group = parser.add_argument_group("API Keys and settings") +- group.add_argument( +- "--openai-api-key", +- help="Specify the OpenAI API key", +- ) + group.add_argument( +- "--anthropic-api-key", +- help="Specify the Anthropic API key", +- ) +- group.add_argument( +- "--openai-api-base", +- help="Specify the api base url", ++ "--architect", ++ action="store_const", ++ dest="edit_format", ++ const="architect", ++ help="Use architect edit format for the main chat", + ) + group.add_argument( +- "--openai-api-type", +- help="(deprecated, use --set-env OPENAI_API_TYPE=)", ++ "--auto-accept-architect", ++ action=argparse.BooleanOptionalAction, ++ default=True, ++ help="Enable/disable automatic acceptance of architect changes (default: True)", + ) + group.add_argument( +- "--openai-api-version", +- help="(deprecated, use --set-env OPENAI_API_VERSION=)", ++ "--weak-model", ++ metavar="WEAK_MODEL", ++ default=None, ++ help=( ++ "Specify the model to use for commit messages and chat history summarization (default" ++ " depends on --model)" ++ ), + ) + group.add_argument( +- "--openai-api-deployment-id", +- help="(deprecated, use --set-env OPENAI_API_DEPLOYMENT_ID=)", ++ "--editor-model", ++ metavar="EDITOR_MODEL", ++ default=None, ++ help="Specify the model to use for editor tasks (default depends on --model)", + ) + group.add_argument( +- "--openai-organization-id", +- help="(deprecated, use --set-env OPENAI_ORGANIZATION=)", ++ "--editor-edit-format", ++ metavar="EDITOR_EDIT_FORMAT", ++ default=None, ++ help="Specify the edit format for the editor model (default: depends on editor model)", + ) + group.add_argument( +- "--set-env", +- action="append", +- metavar="ENV_VAR_NAME=value", +- help="Set an environment variable (to control API settings, can be used multiple times)", +- default=[], ++ "--show-model-warnings", ++ action=argparse.BooleanOptionalAction, ++ default=True, ++ help="Only work with models that have meta-data available (default: True)", + ) + group.add_argument( +- "--api-key", +- action="append", +- metavar="PROVIDER=KEY", ++ "--check-model-accepts-settings", ++ action=argparse.BooleanOptionalAction, ++ default=True, + help=( +- "Set an API key for a provider (eg: --api-key provider= sets" +- " PROVIDER_API_KEY=)" ++ "Check if model accepts settings like reasoning_effort/thinking_tokens (default: True)" + ), +- default=[], +- ) +- group = parser.add_argument_group("Model settings") +- group.add_argument( +- "--list-models", +- "--models", +- metavar="MODEL", +- help="List known models which match the (partial) MODEL name", +- ) +- group.add_argument( +- "--model-settings-file", +- metavar="MODEL_SETTINGS_FILE", +- default=".aider.model.settings.yml", +- help="Specify a file with aider model settings for unknown models", +- ) +- group.add_argument( +- "--model-metadata-file", +- metavar="MODEL_METADATA_FILE", +- default=".aider.model.metadata.json", +- help="Specify a file with context window and costs for unknown models", + ) + group.add_argument( +- "--alias", +- action="append", +- metavar="ALIAS:MODEL", +- help="Add a model alias (can be used multiple times)", ++ "--max-chat-history-tokens", ++ type=int, ++ default=None, ++ help=( ++ "Soft limit on tokens for chat history, after which summarization begins." ++ " If unspecified, defaults to the model's max_chat_history_tokens." ++ ), + ) + group.add_argument( + "--reasoning-effort", +@@ -151,101 +135,80 @@ def get_parser(default_config_files, git_root): + default=None, + help="Specify what edit format the LLM should use (default depends on model)", + ) ++ ++ ########## ++ group = parser.add_argument_group("API Keys and settings") + group.add_argument( +- "--architect", +- action="store_const", +- dest="edit_format", +- const="architect", +- help="Use architect edit format for the main chat", ++ "--openai-api-key", ++ help="Specify the OpenAI API key", + ) + group.add_argument( +- "--auto-accept-architect", +- action=argparse.BooleanOptionalAction, +- default=True, +- help="Enable/disable automatic acceptance of architect changes (default: True)", ++ "--anthropic-api-key", ++ help="Specify the Anthropic API key", + ) + group.add_argument( +- "--weak-model", +- metavar="WEAK_MODEL", +- default=None, +- help=( +- "Specify the model to use for commit messages and chat history summarization (default" +- " depends on --model)" +- ), ++ "--openai-api-base", ++ help="Specify the api base url", + ) + group.add_argument( +- "--editor-model", +- metavar="EDITOR_MODEL", +- default=None, +- help="Specify the model to use for editor tasks (default depends on --model)", ++ "--openai-api-type", ++ help="Specify the api_type", + ) + group.add_argument( +- "--editor-edit-format", +- metavar="EDITOR_EDIT_FORMAT", +- default=None, +- help="Specify the edit format for the editor model (default: depends on editor model)", ++ "--openai-api-version", ++ help="Specify the api_version", + ) + group.add_argument( +- "--show-model-warnings", +- action=argparse.BooleanOptionalAction, +- default=True, +- help="Only work with models that have meta-data available (default: True)", ++ "--openai-api-deployment-id", ++ help="Specify the deployment_id", + ) + group.add_argument( +- "--check-model-accepts-settings", +- action=argparse.BooleanOptionalAction, +- default=True, +- help=( +- "Check if model accepts settings like reasoning_effort/thinking_tokens (default: True)" +- ), ++ "--openai-organization-id", ++ help="Specify the OpenAI organization ID", + ) + group.add_argument( +- "--max-chat-history-tokens", +- type=int, +- default=None, ++ "--set-env", ++ action="append", ++ metavar="ENV_VAR_NAME=value", ++ help="Set an environment variable (to control API settings, can be used multiple times)", ++ default=[], ++ ) ++ group.add_argument( ++ "--api-key", ++ action="append", ++ metavar="PROVIDER=KEY", + help=( +- "Soft limit on tokens for chat history, after which summarization begins." +- " If unspecified, defaults to the model's max_chat_history_tokens." ++ "Set an API key for a provider (eg: --api-key provider= sets" ++ " PROVIDER_API_KEY=)" + ), ++ default=[], + ) + + ########## +- group = parser.add_argument_group("Cache settings") +- group.add_argument( +- "--cache-prompts", +- action=argparse.BooleanOptionalAction, +- default=False, +- help="Enable caching of prompts (default: False)", +- ) ++ group = parser.add_argument_group("Model settings") + group.add_argument( +- "--cache-keepalive-pings", +- type=int, +- default=0, +- help="Number of times to ping at 5min intervals to keep prompt cache warm (default: 0)", ++ "--list-models", ++ "--models", ++ metavar="MODEL", ++ help="List known models which match the (partial) MODEL name", + ) +- +- ########## +- group = parser.add_argument_group("Repomap settings") + group.add_argument( +- "--map-tokens", +- type=int, +- default=None, +- help="Suggested number of tokens to use for repo map, use 0 to disable", ++ "--model-settings-file", ++ metavar="MODEL_SETTINGS_FILE", ++ default=".aider.model.settings.yml", ++ help="Specify a file with aider model settings for unknown models", + ) + group.add_argument( +- "--map-refresh", +- choices=["auto", "always", "files", "manual"], +- default="auto", +- help=( +- "Control how often the repo map is refreshed. Options: auto, always, files, manual" +- " (default: auto)" +- ), ++ "--model-metadata-file", ++ metavar="MODEL_METADATA_FILE", ++ default=".aider.model.metadata.json", ++ help="Specify a file with context window and costs for unknown models", + ) + group.add_argument( +- "--map-multiplier-no-files", +- type=float, +- default=2, +- help="Multiplier for map tokens when no files are specified (default: 2)", ++ "--alias", ++ action="append", ++ metavar="ALIAS:MODEL", ++ help="Add a model alias (can be used multiple times)", + ) + + ########## +@@ -398,7 +361,6 @@ def get_parser(default_config_files, git_root): + default_aiderignore_file = ( + os.path.join(git_root, ".aiderignore") if git_root else ".aiderignore" + ) +- + group.add_argument( + "--aiderignore", + metavar="AIDERIGNORE", +@@ -454,36 +416,15 @@ def get_parser(default_config_files, git_root): + default=False, + help="Enable/disable git pre-commit hooks with --no-verify (default: False)", + ) ++ ++ ########## ++ group = parser.add_argument_group("Fixing and committing") + group.add_argument( + "--commit", + action="store_true", + help="Commit all pending changes with a suitable commit message, then exit", + default=False, + ) +- group.add_argument( +- "--commit-prompt", +- metavar="PROMPT", +- help="Specify a custom prompt for generating commit messages", +- ) +- group.add_argument( +- "--dry-run", +- action=argparse.BooleanOptionalAction, +- default=False, +- help="Perform a dry run without modifying files (default: False)", +- ) +- group.add_argument( +- "--skip-sanity-check-repo", +- action="store_true", +- help="Skip the sanity check for the git repository (default: False)", +- default=False, +- ) +- group.add_argument( +- "--watch-files", +- action=argparse.BooleanOptionalAction, +- default=False, +- help="Enable/disable watching files for ai coding comments (default: False)", +- ) +- group = parser.add_argument_group("Fixing and committing") + group.add_argument( + "--lint", + action="store_true", +@@ -522,6 +463,95 @@ def get_parser(default_config_files, git_root): + help="Run tests, fix problems found and then exit", + default=False, + ) ++ group.add_argument( ++ "--commit-prompt", ++ metavar="PROMPT", ++ help="Specify a custom prompt for generating commit messages", ++ ) ++ group.add_argument( ++ "--dry-run", ++ action=argparse.BooleanOptionalAction, ++ default=False, ++ help="Perform a dry run without modifying files (default: False)", ++ ) ++ group.add_argument( ++ "--skip-sanity-check-repo", ++ action="store_true", ++ help="Skip the sanity check for the git repository (default: False)", ++ default=False, ++ ) ++ ++ ########## ++ group = parser.add_argument_group("Repomap settings") ++ group.add_argument( ++ "--map-tokens", ++ type=int, ++ default=None, ++ help="Suggested number of tokens to use for repo map, use 0 to disable", ++ ) ++ group.add_argument( ++ "--map-refresh", ++ choices=["auto", "always", "files", "manual"], ++ default="auto", ++ help=( ++ "Control how often the repo map is refreshed. Options: auto, always, files, manual" ++ " (default: auto)" ++ ), ++ ) ++ group.add_argument( ++ "--map-multiplier-no-files", ++ type=float, ++ default=2, ++ help="Multiplier for map tokens when no files are specified (default: 2)", ++ ) ++ ++ ########## ++ group = parser.add_argument_group("Cache settings") ++ group.add_argument( ++ "--cache-prompts", ++ action=argparse.BooleanOptionalAction, ++ default=False, ++ help="Enable caching of prompts (default: False)", ++ ) ++ group.add_argument( ++ "--cache-keepalive-pings", ++ type=int, ++ default=0, ++ help="Number of times to ping at 5min intervals to keep prompt cache warm (default: 0)", ++ ) ++ ++ ########## ++ group = parser.add_argument_group("History Files") ++ default_input_history_file = ( ++ os.path.join(git_root, ".aider.input.history") if git_root else ".aider.input.history" ++ ) ++ default_chat_history_file = ( ++ os.path.join(git_root, ".aider.chat.history.md") if git_root else ".aider.chat.history.md" ++ ) ++ group.add_argument( ++ "--input-history-file", ++ metavar="INPUT_HISTORY_FILE", ++ default=default_input_history_file, ++ help=f"Specify the chat input history file (default: {default_input_history_file})", ++ ) ++ group.add_argument( ++ "--chat-history-file", ++ metavar="CHAT_HISTORY_FILE", ++ default=default_chat_history_file, ++ help=f"Specify the chat history file (default: {default_chat_history_file})", ++ ) ++ group.add_argument( ++ "--restore-chat-history", ++ action=argparse.BooleanOptionalAction, ++ default=False, ++ help="Restore the previous chat history messages (default: False)", ++ ) ++ group.add_argument( ++ "--llm-history-file", ++ metavar="LLM_HISTORY_FILE", ++ default=None, ++ help="Log the conversation with the LLM to this file (for example, .aider.llm.history)", ++ ) + + ########## + group = parser.add_argument_group("Analytics") +@@ -543,7 +573,6 @@ def get_parser(default_config_files, git_root): + default=False, + ) + +- ######### + group = parser.add_argument_group("Upgrading") + group.add_argument( + "--just-check-update", +@@ -551,12 +580,6 @@ def get_parser(default_config_files, git_root): + help="Check for updates and return status in the exit code", + default=False, + ) +- group.add_argument( +- "--check-update", +- action=argparse.BooleanOptionalAction, +- help="Check for new aider versions on launch", +- default=True, +- ) + group.add_argument( + "--show-release-notes", + action=argparse.BooleanOptionalAction, +@@ -627,6 +650,12 @@ def get_parser(default_config_files, git_root): + help="Apply clipboard contents as edits using the main model's editor format", + default=False, + ) ++ group.add_argument( ++ "--yes-always", ++ action="store_true", ++ help="Always say yes to every confirmation", ++ default=None, ++ ) + group.add_argument( + "--exit", + action="store_true", +@@ -645,30 +674,45 @@ def get_parser(default_config_files, git_root): + help="Print the system prompts and exit (debug)", + default=False, + ) ++ group.add_argument( ++ "--verbose", ++ action="store_true", ++ help="Enable verbose output", ++ default=False, ++ ) ++ group.add_argument( ++ "--encoding", ++ default="utf-8", ++ help="Specify the encoding for input and output (default: utf-8)", ++ ) ++ group.add_argument( ++ "--line-endings", ++ choices=["platform", "lf", "crlf"], ++ default="platform", ++ help="Line endings to use when writing files (default: platform)", ++ ) + + ########## +- group = parser.add_argument_group("Voice settings") ++ group = parser.add_argument_group("Analytics") + group.add_argument( +- "--voice-format", +- metavar="VOICE_FORMAT", +- default="wav", +- choices=["wav", "mp3", "webm"], +- help="Audio format for voice recording (default: wav). webm and mp3 require ffmpeg", ++ "--analytics", ++ action=argparse.BooleanOptionalAction, ++ default=None, ++ help="Enable/disable analytics for current session (default: random)", + ) + group.add_argument( +- "--voice-language", +- metavar="VOICE_LANGUAGE", +- default="en", +- help="Specify the language for voice using ISO 639-1 code (default: auto)", ++ "--analytics-log", ++ metavar="ANALYTICS_LOG_FILE", ++ help="Specify a file to log analytics events", + ) + group.add_argument( +- "--voice-input-device", +- metavar="VOICE_INPUT_DEVICE", +- default=None, +- help="Specify the input device name for voice recording", ++ "--analytics-disable", ++ action="store_true", ++ help="Permanently disable analytics", ++ default=False, + ) + +- ###### ++ ########## + group = parser.add_argument_group("Other settings") + group.add_argument( + "--file", +@@ -700,41 +744,31 @@ def get_parser(default_config_files, git_root): + help="Always say yes to every confirmation", + default=None, + ) +- group.add_argument( +- "-v", +- "--verbose", +- action="store_true", +- help="Enable verbose output", +- default=False, +- ) + group.add_argument( + "--load", + metavar="LOAD_FILE", + help="Load and execute /commands from a file on launch", + ) + group.add_argument( +- "--encoding", +- default="utf-8", +- help="Specify the encoding for input and output (default: utf-8)", ++ "--watch-files", ++ action=argparse.BooleanOptionalAction, ++ default=False, ++ help="Enable/disable watching files for ai coding comments (default: False)", + ) + group.add_argument( +- "--line-endings", +- choices=["platform", "lf", "crlf"], +- default="platform", +- help="Line endings to use when writing files (default: platform)", ++ "--set-env", ++ action="append", ++ metavar="ENV_VAR_NAME=value", ++ help="Set an environment variable (can be used multiple times)", ++ default=[], + ) + group.add_argument( +- "-c", +- "--config", +- is_config_file=True, +- metavar="CONFIG_FILE", +- help=( +- "Specify the config file (default: search for .aider.conf.yml in git root, cwd" +- " or home directory)" +- ), ++ "--api-key", ++ action="append", ++ metavar="PROVIDER=KEY", ++ help="Set an API key for a provider (eg: --api-key anthropic=sk-123)", ++ default=[], + ) +- # This is a duplicate of the argument in the preparser and is a no-op by this time of +- # argument parsing, but it's here so that the help is displayed as expected. + group.add_argument( + "--env-file", + metavar="ENV_FILE", +@@ -787,6 +821,16 @@ def get_parser(default_config_files, git_root): + "--editor", + help="Specify which editor to use for the /editor command", + ) ++ group.add_argument( ++ "--c", ++ "--config", ++ is_config_file=True, ++ metavar="CONFIG_FILE", ++ help=( ++ "Specify the config file (default: search for .aider.conf.yml in git root, cwd" ++ " or home directory)" ++ ), ++ ) + + ########## + group = parser.add_argument_group("Deprecated model settings") +@@ -797,7 +841,7 @@ def get_parser(default_config_files, git_root): + + + def get_md_help(): +- os.environ["COLUMNS"] = "70" ++ os.environ["COLUMNS"] = "/rpc!lx7' + sys.argv = ["aider"] + parser = get_parser([], None) + +@@ -810,7 +854,7 @@ def get_md_help(): + + + def get_sample_yaml(): +- os.environ["COLUMNS"] = "100" ++ os.environ["COLUMNS"] = "120" + sys.argv = ["aider"] + parser = get_parser([], None) + diff --git a/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..ca84bf1d5 --- /dev/null +++ b/locodiff-250425/results/aider_aider_args.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,897 @@ +```python +#!/usr/bin/env python + +import argparse +import os +import sys +from pathlib import Path + +import configargparse + +from aider import __version__ +from aider.args_formatter import ( + DotEnvFormatter, + MarkdownHelpFormatter, + YamlHelpFormatter, +) +from aider.deprecated import add_deprecated_model_args + +from .dump import dump # noqa: F401 + + +def resolve_aiderignore_path(path_str, git_root=None): + path = Path(path_str) + if path.is_absolute(): + return str(path) + elif git_root: + return str(Path(git_root) / path) + return str(path) + + +def default_env_file(git_root): + return os.path.join(git_root, ".env") if git_root else ".env" + + +def get_parser(default_config_files, git_root): + parser = configargparse.ArgumentParser( + description="aider is AI pair programming in your terminal", + add_config_file_help=True, + default_config_files=default_config_files, + config_file_parser_class=configargparse.YAMLConfigFileParser, + auto_env_var_prefix="AIDER_", + ) + group = parser.add_argument_group("Main model") + group.add_argument( + "files", metavar="FILE", nargs="*", help="files to edit with an LLM (optional)" + ) + group.add_argument( + "--model", + metavar="MODEL", + default=None, + help="Specify the model to use for the main chat", + ) + group.add_argument( + "--architect", + action="store_const", + dest="edit_format", + const="architect", + help="Use architect edit format for the main chat", + ) + group.add_argument( + "--auto-accept-architect", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable automatic acceptance of architect changes (default: True)", + ) + group.add_argument( + "--weak-model", + metavar="WEAK_MODEL", + default=None, + help=( + "Specify the model to use for commit messages and chat history summarization (default" + " depends on --model)" + ), + ) + group.add_argument( + "--editor-model", + metavar="EDITOR_MODEL", + default=None, + help="Specify the model to use for editor tasks (default depends on --model)", + ) + group.add_argument( + "--editor-edit-format", + metavar="EDITOR_EDIT_FORMAT", + default=None, + help="Specify the edit format for the editor model (default: depends on editor model)", + ) + group.add_argument( + "--show-model-warnings", + action=argparse.BooleanOptionalAction, + default=True, + help="Only work with models that have meta-data available (default: True)", + ) + group.add_argument( + "--check-model-accepts-settings", + action=argparse.BooleanOptionalAction, + default=True, + help=( + "Check if model accepts settings like reasoning_effort/thinking_tokens (default: True)" + ), + ) + group.add_argument( + "--max-chat-history-tokens", + type=int, + default=None, + help=( + "Soft limit on tokens for chat history, after which summarization begins." + " If unspecified, defaults to the model's max_chat_history_tokens." + ), + ) + group.add_argument( + "--reasoning-effort", + type=str, + help="Set the reasoning_effort API parameter (default: not set)", + ) + group.add_argument( + "--thinking-tokens", + type=str, + help="Set the thinking token budget for models that support it (default: not set)", + ) + group.add_argument( + "--verify-ssl", + action=argparse.BooleanOptionalAction, + default=True, + help="Verify the SSL cert when connecting to models (default: True)", + ) + group.add_argument( + "--timeout", + type=float, + default=None, + help="Timeout in seconds for API calls (default: None)", + ) + group.add_argument( + "--edit-format", + "--chat-mode", + metavar="EDIT_FORMAT", + default=None, + help="Specify what edit format the LLM should use (default depends on model)", + ) + + ########## + group = parser.add_argument_group("API Keys and settings") + group.add_argument( + "--openai-api-key", + help="Specify the OpenAI API key", + ) + group.add_argument( + "--anthropic-api-key", + help="Specify the Anthropic API key", + ) + group.add_argument( + "--openai-api-base", + help="Specify the api base url", + ) + group.add_argument( + "--openai-api-type", + help="Specify the api_type", + ) + group.add_argument( + "--openai-api-version", + help="Specify the api_version", + ) + group.add_argument( + "--openai-api-deployment-id", + help="Specify the deployment_id", + ) + group.add_argument( + "--openai-organization-id", + help="Specify the OpenAI organization ID", + ) + group.add_argument( + "--set-env", + action="append", + metavar="ENV_VAR_NAME=value", + help="Set an environment variable (to control API settings, can be used multiple times)", + default=[], + ) + group.add_argument( + "--api-key", + action="append", + metavar="PROVIDER=KEY", + help=( + "Set an API key for a provider (eg: --api-key provider= sets" + " PROVIDER_API_KEY=)" + ), + default=[], + ) + + ########## + group = parser.add_argument_group("Model settings") + group.add_argument( + "--list-models", + "--models", + metavar="MODEL", + help="List known models which match the (partial) MODEL name", + ) + group.add_argument( + "--model-settings-file", + metavar="MODEL_SETTINGS_FILE", + default=".aider.model.settings.yml", + help="Specify a file with aider model settings for unknown models", + ) + group.add_argument( + "--model-metadata-file", + metavar="MODEL_METADATA_FILE", + default=".aider.model.metadata.json", + help="Specify a file with context window and costs for unknown models", + ) + group.add_argument( + "--alias", + action="append", + metavar="ALIAS:MODEL", + help="Add a model alias (can be used multiple times)", + ) + + ########## + group = parser.add_argument_group("History Files") + default_input_history_file = ( + os.path.join(git_root, ".aider.input.history") if git_root else ".aider.input.history" + ) + default_chat_history_file = ( + os.path.join(git_root, ".aider.chat.history.md") if git_root else ".aider.chat.history.md" + ) + group.add_argument( + "--input-history-file", + metavar="INPUT_HISTORY_FILE", + default=default_input_history_file, + help=f"Specify the chat input history file (default: {default_input_history_file})", + ) + group.add_argument( + "--chat-history-file", + metavar="CHAT_HISTORY_FILE", + default=default_chat_history_file, + help=f"Specify the chat history file (default: {default_chat_history_file})", + ) + group.add_argument( + "--restore-chat-history", + action=argparse.BooleanOptionalAction, + default=False, + help="Restore the previous chat history messages (default: False)", + ) + group.add_argument( + "--llm-history-file", + metavar="LLM_HISTORY_FILE", + default=None, + help="Log the conversation with the LLM to this file (for example, .aider.llm.history)", + ) + + ########## + group = parser.add_argument_group("Output settings") + group.add_argument( + "--dark-mode", + action="store_true", + help="Use colors suitable for a dark terminal background (default: False)", + default=False, + ) + group.add_argument( + "--light-mode", + action="store_true", + help="Use colors suitable for a light terminal background (default: False)", + default=False, + ) + group.add_argument( + "--pretty", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable pretty, colorized output (default: True)", + ) + group.add_argument( + "--stream", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable streaming responses (default: True)", + ) + group.add_argument( + "--user-input-color", + default="#00cc00", + help="Set the color for user input (default: #00cc00)", + ) + group.add_argument( + "--tool-output-color", + default=None, + help="Set the color for tool output (default: None)", + ) + group.add_argument( + "--tool-error-color", + default="#FF2222", + help="Set the color for tool error messages (default: #FF2222)", + ) + group.add_argument( + "--tool-warning-color", + default="#FFA500", + help="Set the color for tool warning messages (default: #FFA500)", + ) + group.add_argument( + "--assistant-output-color", + default="#0088ff", + help="Set the color for assistant output (default: #0088ff)", + ) + group.add_argument( + "--completion-menu-color", + metavar="COLOR", + default=None, + help="Set the color for the completion menu (default: terminal's default text color)", + ) + group.add_argument( + "--completion-menu-bg-color", + metavar="COLOR", + default=None, + help=( + "Set the background color for the completion menu (default: terminal's default" + " background color)" + ), + ) + group.add_argument( + "--completion-menu-current-color", + metavar="COLOR", + default=None, + help=( + "Set the color for the current item in the completion menu (default: terminal's default" + " background color)" + ), + ) + group.add_argument( + "--completion-menu-current-bg-color", + metavar="COLOR", + default=None, + help=( + "Set the background color for the current item in the completion menu (default:" + " terminal's default text color)" + ), + ) + group.add_argument( + "--code-theme", + default="default", + help=( + "Set the markdown code theme (default: default, other options include monokai," + " solarized-dark, solarized-light, or a Pygments builtin style," + " see https://pygments.org/styles for available themes)" + ), + ) + group.add_argument( + "--show-diffs", + action="store_true", + help="Show diffs when committing changes (default: False)", + default=False, + ) + + ########## + group = parser.add_argument_group("Git settings") + group.add_argument( + "--git", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable looking for a git repo (default: True)", + ) + group.add_argument( + "--gitignore", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable adding .aider* to .gitignore (default: True)", + ) + default_aiderignore_file = ( + os.path.join(git_root, ".aiderignore") if git_root else ".aiderignore" + ) + group.add_argument( + "--aiderignore", + metavar="AIDERIGNORE", + type=lambda path_str: resolve_aiderignore_path(path_str, git_root), + default=default_aiderignore_file, + help="Specify the aider ignore file (default: .aiderignore in git root)", + ) + group.add_argument( + "--subtree-only", + action="store_true", + help="Only consider files in the current subtree of the git repository", + default=False, + ) + group.add_argument( + "--auto-commits", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable auto commit of LLM changes (default: True)", + ) + group.add_argument( + "--dirty-commits", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable commits when repo is found dirty (default: True)", + ) + group.add_argument( + "--attribute-author", + action=argparse.BooleanOptionalAction, + default=True, + help="Attribute aider code changes in the git author name (default: True)", + ) + group.add_argument( + "--attribute-committer", + action=argparse.BooleanOptionalAction, + default=True, + help="Attribute aider commits in the git committer name (default: True)", + ) + group.add_argument( + "--attribute-commit-message-author", + action=argparse.BooleanOptionalAction, + default=False, + help="Prefix commit messages with 'aider: ' if aider authored the changes (default: False)", + ) + group.add_argument( + "--attribute-commit-message-committer", + action=argparse.BooleanOptionalAction, + default=False, + help="Prefix all commit messages with 'aider: ' (default: False)", + ) + group.add_argument( + "--git-commit-verify", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable git pre-commit hooks with --no-verify (default: False)", + ) + + ########## + group = parser.add_argument_group("Fixing and committing") + group.add_argument( + "--commit", + action="store_true", + help="Commit all pending changes with a suitable commit message, then exit", + default=False, + ) + group.add_argument( + "--lint", + action="store_true", + help="Lint and fix provided files, or dirty files if none provided", + default=False, + ) + group.add_argument( + "--lint-cmd", + action="append", + help=( + 'Specify lint commands to run for different languages, eg: "python: flake8' + ' --select=..." (can be used multiple times)' + ), + default=[], + ) + group.add_argument( + "--auto-lint", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable automatic linting after changes (default: True)", + ) + group.add_argument( + "--test-cmd", + help="Specify command to run tests", + default=[], + ) + group.add_argument( + "--auto-test", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable automatic testing after changes (default: False)", + ) + group.add_argument( + "--test", + action="store_true", + help="Run tests, fix problems found and then exit", + default=False, + ) + group.add_argument( + "--commit-prompt", + metavar="PROMPT", + help="Specify a custom prompt for generating commit messages", + ) + group.add_argument( + "--dry-run", + action=argparse.BooleanOptionalAction, + default=False, + help="Perform a dry run without modifying files (default: False)", + ) + group.add_argument( + "--skip-sanity-check-repo", + action="store_true", + help="Skip the sanity check for the git repository (default: False)", + default=False, + ) + + ########## + group = parser.add_argument_group("Repomap settings") + group.add_argument( + "--map-tokens", + type=int, + default=None, + help="Suggested number of tokens to use for repo map, use 0 to disable", + ) + group.add_argument( + "--map-refresh", + choices=["auto", "always", "files", "manual"], + default="auto", + help=( + "Control how often the repo map is refreshed. Options: auto, always, files, manual" + " (default: auto)" + ), + ) + group.add_argument( + "--map-multiplier-no-files", + type=float, + default=2, + help="Multiplier for map tokens when no files are specified (default: 2)", + ) + + ########## + group = parser.add_argument_group("Cache settings") + group.add_argument( + "--cache-prompts", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable caching of prompts (default: False)", + ) + group.add_argument( + "--cache-keepalive-pings", + type=int, + default=0, + help="Number of times to ping at 5min intervals to keep prompt cache warm (default: 0)", + ) + + ########## + group = parser.add_argument_group("History Files") + default_input_history_file = ( + os.path.join(git_root, ".aider.input.history") if git_root else ".aider.input.history" + ) + default_chat_history_file = ( + os.path.join(git_root, ".aider.chat.history.md") if git_root else ".aider.chat.history.md" + ) + group.add_argument( + "--input-history-file", + metavar="INPUT_HISTORY_FILE", + default=default_input_history_file, + help=f"Specify the chat input history file (default: {default_input_history_file})", + ) + group.add_argument( + "--chat-history-file", + metavar="CHAT_HISTORY_FILE", + default=default_chat_history_file, + help=f"Specify the chat history file (default: {default_chat_history_file})", + ) + group.add_argument( + "--restore-chat-history", + action=argparse.BooleanOptionalAction, + default=False, + help="Restore the previous chat history messages (default: False)", + ) + group.add_argument( + "--llm-history-file", + metavar="LLM_HISTORY_FILE", + default=None, + help="Log the conversation with the LLM to this file (for example, .aider.llm.history)", + ) + + ########## + group = parser.add_argument_group("Analytics") + group.add_argument( + "--analytics", + action=argparse.BooleanOptionalAction, + default=None, + help="Enable/disable analytics for current session (default: random)", + ) + group.add_argument( + "--analytics-log", + metavar="ANALYTICS_LOG_FILE", + help="Specify a file to log analytics events", + ) + group.add_argument( + "--analytics-disable", + action="store_true", + help="Permanently disable analytics", + default=False, + ) + + group = parser.add_argument_group("Upgrading") + group.add_argument( + "--just-check-update", + action="store_true", + help="Check for updates and return status in the exit code", + default=False, + ) + group.add_argument( + "--show-release-notes", + action=argparse.BooleanOptionalAction, + help="Show release notes on first run of new version (default: None, ask user)", + default=None, + ) + group.add_argument( + "--install-main-branch", + action="store_true", + help="Install the latest version from the main branch", + default=False, + ) + group.add_argument( + "--upgrade", + "--update", + action="store_true", + help="Upgrade aider to the latest version from PyPI", + default=False, + ) + group.add_argument( + "--version", + action="version", + version=f"%(prog)s {__version__}", + help="Show the version number and exit", + ) + + ########## + group = parser.add_argument_group("Modes") + group.add_argument( + "--message", + "--msg", + "-m", + metavar="COMMAND", + help=( + "Specify a single message to send the LLM, process reply then exit (disables chat mode)" + ), + ) + group.add_argument( + "--message-file", + "-f", + metavar="MESSAGE_FILE", + help=( + "Specify a file containing the message to send the LLM, process reply, then exit" + " (disables chat mode)" + ), + ) + group.add_argument( + "--gui", + "--browser", + action=argparse.BooleanOptionalAction, + help="Run aider in your browser (default: False)", + default=False, + ) + group.add_argument( + "--copy-paste", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable automatic copy/paste of chat between aider and web UI (default: False)", + ) + group.add_argument( + "--apply", + metavar="FILE", + help="Apply the changes from the given file instead of running the chat (debug)", + ) + group.add_argument( + "--apply-clipboard-edits", + action="store_true", + help="Apply clipboard contents as edits using the main model's editor format", + default=False, + ) + group.add_argument( + "--yes-always", + action="store_true", + help="Always say yes to every confirmation", + default=None, + ) + group.add_argument( + "--exit", + action="store_true", + help="Do all startup activities then exit before accepting user input (debug)", + default=False, + ) + group.add_argument( + "--show-repo-map", + action="store_true", + help="Print the repo map and exit (debug)", + default=False, + ) + group.add_argument( + "--show-prompts", + action="store_true", + help="Print the system prompts and exit (debug)", + default=False, + ) + group.add_argument( + "--verbose", + action="store_true", + help="Enable verbose output", + default=False, + ) + group.add_argument( + "--encoding", + default="utf-8", + help="Specify the encoding for input and output (default: utf-8)", + ) + group.add_argument( + "--line-endings", + choices=["platform", "lf", "crlf"], + default="platform", + help="Line endings to use when writing files (default: platform)", + ) + + ########## + group = parser.add_argument_group("Analytics") + group.add_argument( + "--analytics", + action=argparse.BooleanOptionalAction, + default=None, + help="Enable/disable analytics for current session (default: random)", + ) + group.add_argument( + "--analytics-log", + metavar="ANALYTICS_LOG_FILE", + help="Specify a file to log analytics events", + ) + group.add_argument( + "--analytics-disable", + action="store_true", + help="Permanently disable analytics", + default=False, + ) + + ########## + group = parser.add_argument_group("Other settings") + group.add_argument( + "--file", + action="append", + metavar="FILE", + help="specify a file to edit (can be used multiple times)", + ) + group.add_argument( + "--read", + action="append", + metavar="FILE", + help="specify a read-only file (can be used multiple times)", + ) + group.add_argument( + "--vim", + action="store_true", + help="Use VI editing mode in the terminal (default: False)", + default=False, + ) + group.add_argument( + "--chat-language", + metavar="CHAT_LANGUAGE", + default=None, + help="Specify the language to use in the chat (default: None, uses system settings)", + ) + group.add_argument( + "--yes-always", + action="store_true", + help="Always say yes to every confirmation", + default=None, + ) + group.add_argument( + "--load", + metavar="LOAD_FILE", + help="Load and execute /commands from a file on launch", + ) + group.add_argument( + "--watch-files", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable watching files for ai coding comments (default: False)", + ) + group.add_argument( + "--set-env", + action="append", + metavar="ENV_VAR_NAME=value", + help="Set an environment variable (can be used multiple times)", + default=[], + ) + group.add_argument( + "--api-key", + action="append", + metavar="PROVIDER=KEY", + help="Set an API key for a provider (eg: --api-key anthropic=sk-123)", + default=[], + ) + group.add_argument( + "--env-file", + metavar="ENV_FILE", + default=default_env_file(git_root), + help="Specify the .env file to load (default: .env in git root)", + ) + group.add_argument( + "--suggest-shell-commands", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable suggesting shell commands (default: True)", + ) + group.add_argument( + "--fancy-input", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable fancy input with history and completion (default: True)", + ) + group.add_argument( + "--multiline", + action=argparse.BooleanOptionalAction, + default=False, + help="Enable/disable multi-line input mode with Meta-Enter to submit (default: False)", + ) + group.add_argument( + "--notifications", + action=argparse.BooleanOptionalAction, + default=False, + help=( + "Enable/disable terminal bell notifications when LLM responses are ready (default:" + " False)" + ), + ) + group.add_argument( + "--notifications-command", + metavar="COMMAND", + default=None, + help=( + "Specify a command to run for notifications instead of the terminal bell. If not" + " specified, a default command for your OS may be used." + ), + ) + group.add_argument( + "--detect-urls", + action=argparse.BooleanOptionalAction, + default=True, + help="Enable/disable detection and offering to add URLs to chat (default: True)", + ) + group.add_argument( + "--editor", + help="Specify which editor to use for the /editor command", + ) + group.add_argument( + "--c", + "--config", + is_config_file=True, + metavar="CONFIG_FILE", + help=( + "Specify the config file (default: search for .aider.conf.yml in git root, cwd" + " or home directory)" + ), + ) + + ########## + group = parser.add_argument_group("Deprecated model settings") + # Add deprecated model shortcut arguments + add_deprecated_model_args(parser, group) + + return parser + + +def get_md_help(): + os.environ["COLUMNS"] = "/rpc!lx7' + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = MarkdownHelpFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def get_sample_yaml(): + os.environ["COLUMNS"] = "120" + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = YamlHelpFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def get_sample_dotenv(): + os.environ["COLUMNS"] = "120" + sys.argv = ["aider"] + parser = get_parser([], None) + + # This instantiates all the action.env_var values + parser.parse_known_args() + + parser.formatter_class = DotEnvFormatter + + return argparse.ArgumentParser.format_help(parser) + + +def main(): + arg = sys.argv[1] if len(sys.argv[1:]) else None + + if arg == "md": + print(get_md_help()) + elif arg == "dotenv": + print(get_sample_dotenv()) + else: + print(get_sample_yaml()) + + +if __name__ == "__main__": + status = main() + sys.exit(status) +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/extracted_output.txt b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/extracted_output.txt new file mode 100644 index 000000000..b4443cf92 --- /dev/null +++ b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/extracted_output.txt @@ -0,0 +1,213 @@ +# flake8: noqa: E501 + +from .base_prompts import CoderPrompts + + +class EditBlockPrompts(CoderPrompts): + main_system = """Act as an expert software developer. +Always use best practices when coding. +Respect and use existing conventions, libraries, etc that are already present in the code base. +{lazy_prompt} + +Take requests for changes to the supplied code. +If the request is ambiguous, ask questions. + +Always reply to the user in {language}. + +Once you understand the request you MUST: + +1. Decide if you need to propose *SEARCH/REPLACE* edits to any files that haven't been added to the chat. You can create new files without asking! + +But if you need to propose edits to existing files not already added to the chat, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. +End your reply and wait for their approval. +You can keep asking if you then decide you need to edit more files. + +2. Think step-by-step and explain the needed changes in a few short sentences. + +3. Describe each change with a *SEARCH/REPLACE block* per the examples below. + +All changes to files must use this *SEARCH/REPLACE block* format. +ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! +{shell_cmd_prompt} +""" + + shell_cmd_prompt = """ +4. *Concisely* suggest any shell commands the user might want to run in ```bash blocks. + +Just suggest shell commands this way, not example code. +Only suggest complete shell commands that are ready to execute, without placeholders. +Only suggest at most a few shell commands at a time, not more than 1-3, one per line. +Do not suggest multi-line shell commands. +All shell commands will run from the root directory of the user's project. + +Use the appropriate shell based on the user's system info: +{platform} +Examples of when to suggest shell commands: + +- If you changed a self-contained html file, suggest an OS-appropriate command to open a browser to view it to see the updated content. +- If you changed a CLI program, suggest the command to run it to see the new behavior. +- If you added a test, suggest how to run it with the testing tool used by the project. +- Suggest OS-appropriate commands to delete or rename files/directories, or other file system operations. +- If your code changes add new dependencies, suggest the command to install them. +- Etc. +""" + + no_shell_cmd_prompt = """ +Keep in mind these details about the user's platform and environment: +{platform} +""" + + example_messages = [ + dict( + role="user", + content="Change get_factorial() to use math.factorial", + ), + dict( + role="assistant", + content="""To make this change we need to modify `mathweb/flask/app.py` to: + +1. Import the math package. +2. Remove the existing factorial() function. +3. Update get_factorial() to call math.factorial instead. + +Here are the *SEARCH/REPLACE* blocks: + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH +from flask import Flask +======= +import math +from flask import Flask +>>>>>>> REPLACE +{fence[1]} + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH +def factorial(n): + "compute factorial" + + if n == 0: + return 1 + else: + return n * factorial(n-1) + +======= +>>>>>>> REPLACE +{fence[1]} + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH + return str(factorial(n)) +======= + return str(math.factorial(n)) +>>>>>>> REPLACE +{fence[1]} +""", + ), + dict( + role="user", + content="Refactor hello() into its own file.", + ), + dict( + role="assistant", + content="""To make this change we need to modify `main.py` and make a new file `hello.py`: + +1. Make a new hello.py file with hello() in it. +2. Remove hello() from main.py and replace it with an import. + +Here are the *SEARCH/REPLACE* blocks: + +hello.py +{fence[0]}python +<<<<<<< SEARCH +======= +def hello(): + "print a greeting" + + print("hello") +>>>>>>> REPLACE +{fence[1]} + +main.py +{fence[0]}python +<<<<<<< SEARCH +def hello(): + "print a greeting" + + print("hello") +======= +from hello import hello +>>>>>>> REPLACE +{fence[1]} +""", + ), + ] + + system_reminder = """# *SEARCH/REPLACE block* Rules: + +Every *SEARCH/REPLACE block* must use this format: +1. The *FULL* file path alone on a line, verbatim. No bold asterisks, no quotes around it, no escaping of characters, etc. +2. The opening fence and code language, eg: {fence[0]}python +3. The start of search block: <<<<<<< SEARCH +4. A contiguous chunk of lines to search for in the existing source code +5. The dividing line: ======= +6. The lines to replace into the source code +7. The end of the replace block: >>>>>>> REPLACE +8. The closing fence: {fence[1]} + +{quad_backtick_reminder} +Every *SEARCH* section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, etc. +If the file contains code or other data wrapped/escaped in json/xml/quotes or other containers, you need to propose edits to the literal contents of the file, including the container markup. + +*SEARCH/REPLACE* blocks will *only* replace the first match occurrence. +Including multiple unique *SEARCH/REPLACE* blocks if needed. +Include enough lines in each SEARCH section to uniquely match each set of lines that need to change. + +Keep *SEARCH/REPLACE* blocks concise. +Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file. +Include just the changing lines, and a few surrounding lines if needed for uniqueness. +Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks. + +Include *ALL* the code being searched and replaced! + +Only create *SEARCH/REPLACE* blocks for files that the user has added to the chat! + +To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location. + +Pay attention to which files the user wants you to edit, especially if they are asking you to create a new file. + +If you want to put code in a new file, use a *SEARCH/REPLACE block* with: +- A new file path, including dir name if needed +- An empty `SEARCH` section +- The new file's contents in the `REPLACE` section + +{rename_with_shell}{go_ahead_tip} + +{lazy_prompt} +ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! +{shell_cmd_reminder} +""" + + rename_with_shell = """To rename files which have been added to the chat, use shell commands at the end of your response. + +""" + + go_ahead_tip = """If the user just says something like "ok" or "go ahead" or "do that" they probably want you to make SEARCH/REPLACE blocks for the code changes you just proposed. +The user will say when they've applied your edits. If they haven't explicitly confirmed the edits have been applied, they probably want proper SEARCH/REPLACE blocks. + +""" + + shell_cmd_reminder = """ +Examples of when to suggest shell commands: + +- If you changed a self-contained html file, suggest an OS-appropriate command to open a browser to view it to see the updated content. +- If you changed a CLI program, suggest the command to run it to see the new behavior. +- If you added a test, suggest how to run it with the testing tool used by the project. +- Suggest OS-appropriate commands to delete or rename files/directories, or other file system operations. +- If your code changes add new dependencies, suggest the command to install them. +- Etc. + +""" \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/metadata.json b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/metadata.json new file mode 100644 index 000000000..9a24e20ef --- /dev/null +++ b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_coders_editblock_prompts.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_coders_editblock_prompts.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_coders_editblock_prompts.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:53.911318+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 7387, + "extracted_output_length": 7373, + "expected_output_length": 7377, + "results_dir": "locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853", + "generation_id": "gen-1757433533-Rpdhs2sh7s1Qnzrlk2kA", + "cost_usd": 0.01641048, + "prompt_tokens": 35351, + "completion_tokens": 3311, + "total_tokens": 38662, + "native_prompt_tokens": 34893, + "native_completion_tokens": 6334, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 4640 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/output.diff b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/output.diff new file mode 100644 index 000000000..5f0830822 --- /dev/null +++ b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/output.diff @@ -0,0 +1,63 @@ + +index b000ba510..b4443cf92 100644 +--- a/aider_aider_coders_editblock_prompts.py_expectedoutput.txt (expected):tmp/tmpua7bkgzn_expected.txt ++++ b/aider_aider_coders_editblock_prompts.py_extracted.txt (actual):tmp/tmpyvw0idfr_actual.txt +@@ -8,6 +8,7 @@ class EditBlockPrompts(CoderPrompts): + Always use best practices when coding. + Respect and use existing conventions, libraries, etc that are already present in the code base. + {lazy_prompt} ++ + Take requests for changes to the supplied code. + If the request is ambiguous, ask questions. + +@@ -26,7 +27,7 @@ You can keep asking if you then decide you need to edit more files. + 3. Describe each change with a *SEARCH/REPLACE block* per the examples below. + + All changes to files must use this *SEARCH/REPLACE block* format. +-ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK*! ++ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! + {shell_cmd_prompt} + """ + +@@ -55,6 +56,7 @@ Examples of when to suggest shell commands: + Keep in mind these details about the user's platform and environment: + {platform} + """ ++ + example_messages = [ + dict( + role="user", +@@ -156,7 +158,6 @@ Every *SEARCH/REPLACE block* must use this format: + 7. The end of the replace block: >>>>>>> REPLACE + 8. The closing fence: {fence[1]} + +-Use the *FULL* file path, as shown to you by the user. + {quad_backtick_reminder} + Every *SEARCH* section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, etc. + If the file contains code or other data wrapped/escaped in json/xml/quotes or other containers, you need to propose edits to the literal contents of the file, including the container markup. +@@ -170,18 +171,23 @@ Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each ch + Include just the changing lines, and a few surrounding lines if needed for uniqueness. + Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks. + ++Include *ALL* the code being searched and replaced! ++ + Only create *SEARCH/REPLACE* blocks for files that the user has added to the chat! + + To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location. + +-Pay attention to which filenames the user wants you to edit, especially if they are asking you to create a new file. ++Pay attention to which files the user wants you to edit, especially if they are asking you to create a new file. + + If you want to put code in a new file, use a *SEARCH/REPLACE block* with: + - A new file path, including dir name if needed + - An empty `SEARCH` section + - The new file's contents in the `REPLACE` section + +-{rename_with_shell}{go_ahead_tip}{lazy_prompt}ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK*! ++{rename_with_shell}{go_ahead_tip} ++ ++{lazy_prompt} ++ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! + {shell_cmd_reminder} + """ + diff --git a/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/raw_response.txt b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/raw_response.txt new file mode 100644 index 000000000..2cb1b3576 --- /dev/null +++ b/locodiff-250425/results/aider_aider_coders_editblock_prompts.py/x-ai_grok-code-fast-1/20250909_155853/raw_response.txt @@ -0,0 +1,215 @@ +```python +# flake8: noqa: E501 + +from .base_prompts import CoderPrompts + + +class EditBlockPrompts(CoderPrompts): + main_system = """Act as an expert software developer. +Always use best practices when coding. +Respect and use existing conventions, libraries, etc that are already present in the code base. +{lazy_prompt} + +Take requests for changes to the supplied code. +If the request is ambiguous, ask questions. + +Always reply to the user in {language}. + +Once you understand the request you MUST: + +1. Decide if you need to propose *SEARCH/REPLACE* edits to any files that haven't been added to the chat. You can create new files without asking! + +But if you need to propose edits to existing files not already added to the chat, you *MUST* tell the user their full path names and ask them to *add the files to the chat*. +End your reply and wait for their approval. +You can keep asking if you then decide you need to edit more files. + +2. Think step-by-step and explain the needed changes in a few short sentences. + +3. Describe each change with a *SEARCH/REPLACE block* per the examples below. + +All changes to files must use this *SEARCH/REPLACE block* format. +ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! +{shell_cmd_prompt} +""" + + shell_cmd_prompt = """ +4. *Concisely* suggest any shell commands the user might want to run in ```bash blocks. + +Just suggest shell commands this way, not example code. +Only suggest complete shell commands that are ready to execute, without placeholders. +Only suggest at most a few shell commands at a time, not more than 1-3, one per line. +Do not suggest multi-line shell commands. +All shell commands will run from the root directory of the user's project. + +Use the appropriate shell based on the user's system info: +{platform} +Examples of when to suggest shell commands: + +- If you changed a self-contained html file, suggest an OS-appropriate command to open a browser to view it to see the updated content. +- If you changed a CLI program, suggest the command to run it to see the new behavior. +- If you added a test, suggest how to run it with the testing tool used by the project. +- Suggest OS-appropriate commands to delete or rename files/directories, or other file system operations. +- If your code changes add new dependencies, suggest the command to install them. +- Etc. +""" + + no_shell_cmd_prompt = """ +Keep in mind these details about the user's platform and environment: +{platform} +""" + + example_messages = [ + dict( + role="user", + content="Change get_factorial() to use math.factorial", + ), + dict( + role="assistant", + content="""To make this change we need to modify `mathweb/flask/app.py` to: + +1. Import the math package. +2. Remove the existing factorial() function. +3. Update get_factorial() to call math.factorial instead. + +Here are the *SEARCH/REPLACE* blocks: + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH +from flask import Flask +======= +import math +from flask import Flask +>>>>>>> REPLACE +{fence[1]} + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH +def factorial(n): + "compute factorial" + + if n == 0: + return 1 + else: + return n * factorial(n-1) + +======= +>>>>>>> REPLACE +{fence[1]} + +mathweb/flask/app.py +{fence[0]}python +<<<<<<< SEARCH + return str(factorial(n)) +======= + return str(math.factorial(n)) +>>>>>>> REPLACE +{fence[1]} +""", + ), + dict( + role="user", + content="Refactor hello() into its own file.", + ), + dict( + role="assistant", + content="""To make this change we need to modify `main.py` and make a new file `hello.py`: + +1. Make a new hello.py file with hello() in it. +2. Remove hello() from main.py and replace it with an import. + +Here are the *SEARCH/REPLACE* blocks: + +hello.py +{fence[0]}python +<<<<<<< SEARCH +======= +def hello(): + "print a greeting" + + print("hello") +>>>>>>> REPLACE +{fence[1]} + +main.py +{fence[0]}python +<<<<<<< SEARCH +def hello(): + "print a greeting" + + print("hello") +======= +from hello import hello +>>>>>>> REPLACE +{fence[1]} +""", + ), + ] + + system_reminder = """# *SEARCH/REPLACE block* Rules: + +Every *SEARCH/REPLACE block* must use this format: +1. The *FULL* file path alone on a line, verbatim. No bold asterisks, no quotes around it, no escaping of characters, etc. +2. The opening fence and code language, eg: {fence[0]}python +3. The start of search block: <<<<<<< SEARCH +4. A contiguous chunk of lines to search for in the existing source code +5. The dividing line: ======= +6. The lines to replace into the source code +7. The end of the replace block: >>>>>>> REPLACE +8. The closing fence: {fence[1]} + +{quad_backtick_reminder} +Every *SEARCH* section must *EXACTLY MATCH* the existing file content, character for character, including all comments, docstrings, etc. +If the file contains code or other data wrapped/escaped in json/xml/quotes or other containers, you need to propose edits to the literal contents of the file, including the container markup. + +*SEARCH/REPLACE* blocks will *only* replace the first match occurrence. +Including multiple unique *SEARCH/REPLACE* blocks if needed. +Include enough lines in each SEARCH section to uniquely match each set of lines that need to change. + +Keep *SEARCH/REPLACE* blocks concise. +Break large *SEARCH/REPLACE* blocks into a series of smaller blocks that each change a small portion of the file. +Include just the changing lines, and a few surrounding lines if needed for uniqueness. +Do not include long runs of unchanging lines in *SEARCH/REPLACE* blocks. + +Include *ALL* the code being searched and replaced! + +Only create *SEARCH/REPLACE* blocks for files that the user has added to the chat! + +To move code within a file, use 2 *SEARCH/REPLACE* blocks: 1 to delete it from its current location, 1 to insert it in the new location. + +Pay attention to which files the user wants you to edit, especially if they are asking you to create a new file. + +If you want to put code in a new file, use a *SEARCH/REPLACE block* with: +- A new file path, including dir name if needed +- An empty `SEARCH` section +- The new file's contents in the `REPLACE` section + +{rename_with_shell}{go_ahead_tip} + +{lazy_prompt} +ONLY EVER RETURN CODE IN A *SEARCH/REPLACE BLOCK! +{shell_cmd_reminder} +""" + + rename_with_shell = """To rename files which have been added to the chat, use shell commands at the end of your response. + +""" + + go_ahead_tip = """If the user just says something like "ok" or "go ahead" or "do that" they probably want you to make SEARCH/REPLACE blocks for the code changes you just proposed. +The user will say when they've applied your edits. If they haven't explicitly confirmed the edits have been applied, they probably want proper SEARCH/REPLACE blocks. + +""" + + shell_cmd_reminder = """ +Examples of when to suggest shell commands: + +- If you changed a self-contained html file, suggest an OS-appropriate command to open a browser to view it to see the updated content. +- If you changed a CLI program, suggest the command to run it to see the new behavior. +- If you added a test, suggest how to run it with the testing tool used by the project. +- Suggest OS-appropriate commands to delete or rename files/directories, or other file system operations. +- If your code changes add new dependencies, suggest the command to install them. +- Etc. + +""" +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..17bac6e98 --- /dev/null +++ b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,139 @@ +import argparse + +from aider import models, prompts +from aider.dump import dump # noqa: F401 + + +class ChatSummary: + def __init__(self, models=None, max_tokens=1024): + if not models: + raise ValueError("At least one model must be provided") + self.models = models if isinstance(models, list) else [models] + self.max_tokens = max_tokens + self.token_count = self.models[0].token_count + + def too_big(self, messages): + sized = self.tokenize(messages) + total = sum(tokens for tokens, _msg in sized) + return total > self.max_tokens + + def tokenize(self, messages): + sized = [] + for msg in messages: + tokens = self.token_count(msg) + sized.append((tokens, msg)) + return sized + + def summarize(self, messages, depth=0): + messages = self.summarize_real(messages) + if messages and messages[-1]["role"] != "assistant": + messages.append(dict(role="assistant", content="Ok.")) + return messages + + def summarize_real(self, messages, depth=0): + if not self.models: + raise ValueError("No models available for summarization") + + sized = self.tokenize(messages) + total = sum(tokens for tokens, _msg in sized) + if total <= self.max_tokens and depth == 0: + return messages + + min_split = 4 + if len(messages) <= min_split or depth > 3: + return self.summarize_all(messages) + + tail_tokens = 0 + split_index = len(messages) + half_max_tokens = self.max_tokens // 2 + + # Iterate over the messages in reverse order + for i in range(len(sized) - 1, -1, -1): + tokens, _msg = sized[i] + if tail_tokens + tokens < half_max_tokens: + tail_tokens += tokens + split_index = i + else: + break + + # Ensure the head ends with an assistant message + while messages[split_index - 1]["role"] != "assistant" and split_index > 1: + split_index -= 1 + + if split_index <= min_split: + return self.summarize_all(messages) + + head = messages[:split_index] + tail = messages[split_index:] + + sized = sized[:split_index] + head.reverse() + sized.reverse() + keep = [] + total = 0 + + # These sometimes come set with value = None + model_max_input_tokens = self.models[0].info.get("max_input_tokens") or 4096 + model_max_input_tokens -= 512 + + for i in range(split_index): + total += sized[i][0] + if total > model_max_input_tokens: + break + keep.append(head[i]) + + keep.reverse() + + summary = self.summarize_all(keep) + + tail_tokens = sum(tokens for tokens, msg in sized[split_index:]) + summary_tokens = self.token_count(summary) + + result = summary + tail + if summary_tokens + tail_tokens < self.max_tokens: + return result + + return self.summarize_real(result, depth + 1) + + def summarize_all(self, messages): + content = "" + for msg in messages: + role = msg["role"].upper() + if role not in ("USER", "ASSISTANT"): + continue + content += f"# {role}\n" + content += msg["content"] + if not content.endswith("\n"): + content += "\n" + + summarize_messages = [ + dict(role="system", content=prompts.summarize), + dict(role="user", content=content), + ] + + for model in self.models: + try: + summary = model.simple_send_with_retries(summarize_messages) + if summary is not None: + summary = prompts.summary_prefix + summary + return [dict(role="user", content=summary)] + except Exception as e: + print(f"Summarization failed for model {model.name}: {str(e)}") + + raise ValueError("summarizer unexpectedly failed for all models") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("filename", help="Markdown file to parse") + args = parser.parse_args() + + model_names = ["gpt-3.5-turbo", "gpt-4"] # Add more model names as needed + model_list = [models.Model(name) for name in model_names] + summarizer = ChatSummary(model_list) + + with open(args.filename, "r") as f: + text = f.read() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..7e165a263 --- /dev/null +++ b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_history.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_history.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_history.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.894188+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 4489, + "extracted_output_length": 4475, + "expected_output_length": 4559, + "results_dir": "locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-24vaoZkP2dJEH118AV2W", + "cost_usd": 0.00203548, + "prompt_tokens": 18936, + "completion_tokens": 1062, + "total_tokens": 19998, + "native_prompt_tokens": 18962, + "native_completion_tokens": 1102, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 126 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..87e19f3c5 --- /dev/null +++ b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,15 @@ + +index ce6172c9a..17bac6e98 100644 +--- a/aider_aider_history.py_expectedoutput.txt (expected):tmp/tmp6hotfywr_expected.txt ++++ b/aider_aider_history.py_extracted.txt (actual):tmp/tmp2o1djlx5_actual.txt +@@ -135,9 +135,5 @@ def main(): + with open(args.filename, "r") as f: + text = f.read() + +- summary = summarizer.summarize_chat_history_markdown(text) +- dump(summary) +- +- + if __name__ == "__main__": + main() +\ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..7e9821723 --- /dev/null +++ b/locodiff-250425/results/aider_aider_history.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,141 @@ +```python +import argparse + +from aider import models, prompts +from aider.dump import dump # noqa: F401 + + +class ChatSummary: + def __init__(self, models=None, max_tokens=1024): + if not models: + raise ValueError("At least one model must be provided") + self.models = models if isinstance(models, list) else [models] + self.max_tokens = max_tokens + self.token_count = self.models[0].token_count + + def too_big(self, messages): + sized = self.tokenize(messages) + total = sum(tokens for tokens, _msg in sized) + return total > self.max_tokens + + def tokenize(self, messages): + sized = [] + for msg in messages: + tokens = self.token_count(msg) + sized.append((tokens, msg)) + return sized + + def summarize(self, messages, depth=0): + messages = self.summarize_real(messages) + if messages and messages[-1]["role"] != "assistant": + messages.append(dict(role="assistant", content="Ok.")) + return messages + + def summarize_real(self, messages, depth=0): + if not self.models: + raise ValueError("No models available for summarization") + + sized = self.tokenize(messages) + total = sum(tokens for tokens, _msg in sized) + if total <= self.max_tokens and depth == 0: + return messages + + min_split = 4 + if len(messages) <= min_split or depth > 3: + return self.summarize_all(messages) + + tail_tokens = 0 + split_index = len(messages) + half_max_tokens = self.max_tokens // 2 + + # Iterate over the messages in reverse order + for i in range(len(sized) - 1, -1, -1): + tokens, _msg = sized[i] + if tail_tokens + tokens < half_max_tokens: + tail_tokens += tokens + split_index = i + else: + break + + # Ensure the head ends with an assistant message + while messages[split_index - 1]["role"] != "assistant" and split_index > 1: + split_index -= 1 + + if split_index <= min_split: + return self.summarize_all(messages) + + head = messages[:split_index] + tail = messages[split_index:] + + sized = sized[:split_index] + head.reverse() + sized.reverse() + keep = [] + total = 0 + + # These sometimes come set with value = None + model_max_input_tokens = self.models[0].info.get("max_input_tokens") or 4096 + model_max_input_tokens -= 512 + + for i in range(split_index): + total += sized[i][0] + if total > model_max_input_tokens: + break + keep.append(head[i]) + + keep.reverse() + + summary = self.summarize_all(keep) + + tail_tokens = sum(tokens for tokens, msg in sized[split_index:]) + summary_tokens = self.token_count(summary) + + result = summary + tail + if summary_tokens + tail_tokens < self.max_tokens: + return result + + return self.summarize_real(result, depth + 1) + + def summarize_all(self, messages): + content = "" + for msg in messages: + role = msg["role"].upper() + if role not in ("USER", "ASSISTANT"): + continue + content += f"# {role}\n" + content += msg["content"] + if not content.endswith("\n"): + content += "\n" + + summarize_messages = [ + dict(role="system", content=prompts.summarize), + dict(role="user", content=content), + ] + + for model in self.models: + try: + summary = model.simple_send_with_retries(summarize_messages) + if summary is not None: + summary = prompts.summary_prefix + summary + return [dict(role="user", content=summary)] + except Exception as e: + print(f"Summarization failed for model {model.name}: {str(e)}") + + raise ValueError("summarizer unexpectedly failed for all models") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("filename", help="Markdown file to parse") + args = parser.parse_args() + + model_names = ["gpt-3.5-turbo", "gpt-4"] # Add more model names as needed + model_list = [models.Model(name) for name in model_names] + summarizer = ChatSummary(model_list) + + with open(args.filename, "r") as f: + text = f.read() + +if __name__ == "__main__": + main() +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/extracted_output.txt b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/extracted_output.txt new file mode 100644 index 000000000..59e9fc28d --- /dev/null +++ b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/extracted_output.txt @@ -0,0 +1,301 @@ +import os +import re +import subprocess +import sys +import traceback +import warnings +import shlex +from dataclasses import dataclass +from pathlib import Path + +from grep_ast import TreeContext, filename_to_lang +from grep_ast.tsl import get_parser # noqa: E402 + +from aider.dump import dump # noqa: F401 +from aider.run_cmd import run_cmd_subprocess # noqa: F401 + +# tree_sitter is throwing a FutureWarning +warnings.simplefilter("ignore", category=FutureWarning) + + +@dataclass +class LintResult: + text: str + lines: list + + +class Linter: + def __init__(self, encoding="utf-8", root=None): + self.encoding = encoding + self.root = root + + self.languages = dict( + python=self.py_lint, + ) + self.all_lint_cmd = None + + def set_linter(self, lang, cmd): + if lang: + self.languages[lang] = cmd + else: + self.all_lint_cmd = cmd + + def get_rel_fname(self, fname): + if self.root: + try: + return os.path.relpath(fname, self.root) + except ValueError: + return fname + else: + return fname + + def run_cmd(self, cmd, rel_fname, code): + cmd += " " + shlex.quote(rel_fname) + + returncode = 0 + stdout = "" + try: + returncode, stdout = run_cmd_subprocess( + cmd, + cwd=self.root, + encoding=self.encoding, + ) + except OSError as err: + print(f"Unable to execute lint command: {err}") + return + errors = stdout + if returncode == 0: + return # zero exit status + + res = f"## Running: {cmd}\n\n" + res += errors + + return self.errors_to_lint_result(rel_fname, res) + + def errors_to_lint_result(self, rel_fname, errors): + if not errors: + return + + linenums = [] + filenames_linenums = find_filenames_and_linenums(errors, [rel_fname]) + if filenames_linenums: + filename, linenums = next(iter(filenames_linenums.items())) + linenums = [num - 1 for num in linenums] + + return LintResult(text=errors, lines=linenums) + + def lint(self, fname, cmd=None): + rel_fname = self.get_rel_fname(fname) + try: + code = Path(fname).read_text(encoding=self.encoding, errors="replace") + except OSError as err: + print(f"Unable to read {fname}: {err}") + return + + if cmd: + cmd = cmd.strip() + if not cmd: + lang = filename_to_lang(fname) + if not lang: + return + if self.all_lint_cmd: + cmd = self.all_lint_cmd + else: + cmd = self.languages.get(lang) + + if callable(cmd): + lintres = cmd(fname, rel_fname, code) + elif cmd: + lintres = self.run_cmd(cmd, rel_fname, code) + else: + lintres = basic_lint(rel_fname, code) + + if not lintres: + return + + res = "# Fix any errors below, if possible.\n\n" + res += lintres.text + res += "\n" + res += tree_context(rel_fname, code, lintres.lines) + + return res + + def py_lint(self, fname, rel_fname, code): + basic_res = basic_lint(rel_fname, code) + compile_res = lint_python_compile(fname, code) + flake_res = self.flake8_lint(rel_fname) + + text = "" + lines = set() + for res in [basic_res, compile_res, flake_res]: + if not res: + continue + if text: + text += "\n" + text += res.text + lines.update(res.lines) + + if text or lines: + return LintResult(text, lines) + + def flake8_lint(self, rel_fname): + fatal = "E9,F821,F823,F831,F406,F407,F701,F702,F704,F706" + flake8_cmd = [ + sys.executable, + "-m", + "flake8", + f"--select={fatal}", + "--show-source", + "--isolated", + rel_fname, + ] + + try: + result = subprocess.run( + flake8_cmd, + capture_output=True, + text=True, + check=False, + encoding=self.encoding, + errors="replace", + cwd=self.root, + ) + errors = result.stdout + result.stderr + except Exception as e: + errors = f"Error running flake8: {str(e)}" + + if not errors: + return + + text = f"## Running: {' '.join(flake8_cmd)}\n\n" + text += errors + return self.errors_to_lint_result(rel_fname, text) + + +def lint_python_compile(fname, code): + try: + compile(code, fname, "exec") # USE TRACEBACK BELOW HERE + return + except Exception as err: + end_lineno = getattr(err, "end_lineno", err.lineno) + line_numbers = list(range(err.lineno - 1, end_lineno)) + + tb_lines = traceback.format_exception(type(err), err, err.__traceback__) + last_file_i = 0 + + target = "# USE TRACEBACK" + target += " BELOW HERE" + for i in range(len(tb_lines)): + if target in tb_lines[i]: + last_file_i = i + break + + tb_lines = tb_lines[:1] + tb_lines[last_file_i + 1 :] + + res = "".join(tb_lines) + return LintResult(text=res, lines=line_numbers) + + +def basic_lint(fname, code): + """ + Use tree-sitter to look for syntax errors, display them with tree context. + """ + + lang = filename_to_lang(fname) + if not lang: + return + + # Tree-sitter linter is not capable of working with typescript #1132 + if lang == "typescript": + return + + try: + parser = get_parser(lang) + except Exception as err: + print(f"Unable to load parser: {err}") + return + + tree = parser.parse(bytes(code, "utf-8")) + + try: + errors = traverse_tree(tree.root_node) + except RecursionError: + print(f"Unable to lint {fname} due to RecursionError") + return + + if not errors: + return + + return LintResult(text="", lines=errors) + + +def tree_context(fname, code, line_nums): + context = TreeContext( + fname, + code, + color=False, + line_number=True, + child_context=False, + last_line=False, + margin=0, + mark_lois=True, + loi_pad=3, + # header_max=30, + show_top_of_file_parent_scope=False, + ) + line_nums = set(line_nums) + context.add_lines_of_interest(line_nums) + context.add_context() + s = "s" if len(line_nums) > 1 else "" + output = f"# Fix the error{s}, see relevant line{s} below marked with █.\n\n" + output += fname + ":\n" + output += context.format() + + return output + + +def traverse_tree(node): + errors = [] + if node.type == "ERROR" or node.is_missing: + line_no = node.start_point[0] + errors.append(line_no) + + for child in node.children: + errors += traverse_tree(child) + + return errors + + +def find_filenames_and_linenums(text, fnames): + """ + Search text for all occurrences of :\\d+ and make a list of them + where is one of the filenames in the list `fnames`. + """ + pattern = re.compile(r"(\b(?:" + "|".join(re.escape(fname) for fname in fnames) + r"):\d+\b)") + matches = pattern.findall(text) + result = {} + for match in matches: + fname, linenum = match.rsplit(":", 1) + if fname not in result: + result[fname] = set() + result[fname].add(int(linenum)) + return result + + +def main(): + """ + Main function to parse files provided as command line arguments. + """ + if len(sys.argv) < 2: + print("Usage: python linter.py ...") + sys.exit(1) + + linter = Linter(root=os.getcwd()) + for file_path in sys.argv[1:]: + errors = linter.lint(file_path) + if errors: + print(errors) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/metadata.json b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/metadata.json new file mode 100644 index 000000000..3b0efdb43 --- /dev/null +++ b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_linter.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_linter.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_linter.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:44.118783+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 7996, + "extracted_output_length": 7982, + "expected_output_length": 7996, + "results_dir": "locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844", + "generation_id": "gen-1757433524-DDEytQofKV4SXnlhQQh1", + "cost_usd": 0.01490238, + "prompt_tokens": 35444, + "completion_tokens": 2867, + "total_tokens": 38311, + "native_prompt_tokens": 35130, + "native_completion_tokens": 5297, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 3449 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/output.diff b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/output.diff new file mode 100644 index 000000000..41165048c --- /dev/null +++ b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/output.diff @@ -0,0 +1,83 @@ + +index add561d0a..59e9fc28d 100644 +--- a/aider_aider_linter.py_expectedoutput.txt (expected):tmp/tmpl53g66wm_expected.txt ++++ b/aider_aider_linter.py_extracted.txt (actual):tmp/tmpr3y6c1us_actual.txt +@@ -18,6 +18,12 @@ from aider.run_cmd import run_cmd_subprocess # noqa: F401 + warnings.simplefilter("ignore", category=FutureWarning) + + ++@dataclass ++class LintResult: ++ text: str ++ lines: list ++ ++ + class Linter: + def __init__(self, encoding="utf-8", root=None): + self.encoding = encoding +@@ -31,9 +37,8 @@ class Linter: + def set_linter(self, lang, cmd): + if lang: + self.languages[lang] = cmd +- return +- +- self.all_lint_cmd = cmd ++ else: ++ self.all_lint_cmd = cmd + + def get_rel_fname(self, fname): + if self.root: +@@ -145,8 +150,6 @@ class Linter: + rel_fname, + ] + +- text = f"## Running: {' '.join(flake8_cmd)}\n\n" +- + try: + result = subprocess.run( + flake8_cmd, +@@ -164,16 +167,11 @@ class Linter: + if not errors: + return + ++ text = f"## Running: {' '.join(flake8_cmd)}\n\n" + text += errors + return self.errors_to_lint_result(rel_fname, text) + + +-@dataclass +-class LintResult: +- text: str +- lines: list +- +- + def lint_python_compile(fname, code): + try: + compile(code, fname, "exec") # USE TRACEBACK BELOW HERE +@@ -194,8 +192,8 @@ def lint_python_compile(fname, code): + + tb_lines = tb_lines[:1] + tb_lines[last_file_i + 1 :] + +- res = "".join(tb_lines) +- return LintResult(text=res, lines=line_numbers) ++ res = "".join(tb_lines) ++ return LintResult(text=res, lines=line_numbers) + + + def basic_lint(fname, code): +@@ -249,14 +247,13 @@ def tree_context(fname, code, line_nums): + context.add_lines_of_interest(line_nums) + context.add_context() + s = "s" if len(line_nums) > 1 else "" +- output = f"## See relevant line{s} below marked with █.\n\n" ++ output = f"# Fix the error{s}, see relevant line{s} below marked with █.\n\n" + output += fname + ":\n" + output += context.format() + + return output + + +-# Traverse the tree to find errors + def traverse_tree(node): + errors = [] + if node.type == "ERROR" or node.is_missing: diff --git a/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/raw_response.txt b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/raw_response.txt new file mode 100644 index 000000000..7247feb4f --- /dev/null +++ b/locodiff-250425/results/aider_aider_linter.py/x-ai_grok-code-fast-1/20250909_155844/raw_response.txt @@ -0,0 +1,303 @@ +```python +import os +import re +import subprocess +import sys +import traceback +import warnings +import shlex +from dataclasses import dataclass +from pathlib import Path + +from grep_ast import TreeContext, filename_to_lang +from grep_ast.tsl import get_parser # noqa: E402 + +from aider.dump import dump # noqa: F401 +from aider.run_cmd import run_cmd_subprocess # noqa: F401 + +# tree_sitter is throwing a FutureWarning +warnings.simplefilter("ignore", category=FutureWarning) + + +@dataclass +class LintResult: + text: str + lines: list + + +class Linter: + def __init__(self, encoding="utf-8", root=None): + self.encoding = encoding + self.root = root + + self.languages = dict( + python=self.py_lint, + ) + self.all_lint_cmd = None + + def set_linter(self, lang, cmd): + if lang: + self.languages[lang] = cmd + else: + self.all_lint_cmd = cmd + + def get_rel_fname(self, fname): + if self.root: + try: + return os.path.relpath(fname, self.root) + except ValueError: + return fname + else: + return fname + + def run_cmd(self, cmd, rel_fname, code): + cmd += " " + shlex.quote(rel_fname) + + returncode = 0 + stdout = "" + try: + returncode, stdout = run_cmd_subprocess( + cmd, + cwd=self.root, + encoding=self.encoding, + ) + except OSError as err: + print(f"Unable to execute lint command: {err}") + return + errors = stdout + if returncode == 0: + return # zero exit status + + res = f"## Running: {cmd}\n\n" + res += errors + + return self.errors_to_lint_result(rel_fname, res) + + def errors_to_lint_result(self, rel_fname, errors): + if not errors: + return + + linenums = [] + filenames_linenums = find_filenames_and_linenums(errors, [rel_fname]) + if filenames_linenums: + filename, linenums = next(iter(filenames_linenums.items())) + linenums = [num - 1 for num in linenums] + + return LintResult(text=errors, lines=linenums) + + def lint(self, fname, cmd=None): + rel_fname = self.get_rel_fname(fname) + try: + code = Path(fname).read_text(encoding=self.encoding, errors="replace") + except OSError as err: + print(f"Unable to read {fname}: {err}") + return + + if cmd: + cmd = cmd.strip() + if not cmd: + lang = filename_to_lang(fname) + if not lang: + return + if self.all_lint_cmd: + cmd = self.all_lint_cmd + else: + cmd = self.languages.get(lang) + + if callable(cmd): + lintres = cmd(fname, rel_fname, code) + elif cmd: + lintres = self.run_cmd(cmd, rel_fname, code) + else: + lintres = basic_lint(rel_fname, code) + + if not lintres: + return + + res = "# Fix any errors below, if possible.\n\n" + res += lintres.text + res += "\n" + res += tree_context(rel_fname, code, lintres.lines) + + return res + + def py_lint(self, fname, rel_fname, code): + basic_res = basic_lint(rel_fname, code) + compile_res = lint_python_compile(fname, code) + flake_res = self.flake8_lint(rel_fname) + + text = "" + lines = set() + for res in [basic_res, compile_res, flake_res]: + if not res: + continue + if text: + text += "\n" + text += res.text + lines.update(res.lines) + + if text or lines: + return LintResult(text, lines) + + def flake8_lint(self, rel_fname): + fatal = "E9,F821,F823,F831,F406,F407,F701,F702,F704,F706" + flake8_cmd = [ + sys.executable, + "-m", + "flake8", + f"--select={fatal}", + "--show-source", + "--isolated", + rel_fname, + ] + + try: + result = subprocess.run( + flake8_cmd, + capture_output=True, + text=True, + check=False, + encoding=self.encoding, + errors="replace", + cwd=self.root, + ) + errors = result.stdout + result.stderr + except Exception as e: + errors = f"Error running flake8: {str(e)}" + + if not errors: + return + + text = f"## Running: {' '.join(flake8_cmd)}\n\n" + text += errors + return self.errors_to_lint_result(rel_fname, text) + + +def lint_python_compile(fname, code): + try: + compile(code, fname, "exec") # USE TRACEBACK BELOW HERE + return + except Exception as err: + end_lineno = getattr(err, "end_lineno", err.lineno) + line_numbers = list(range(err.lineno - 1, end_lineno)) + + tb_lines = traceback.format_exception(type(err), err, err.__traceback__) + last_file_i = 0 + + target = "# USE TRACEBACK" + target += " BELOW HERE" + for i in range(len(tb_lines)): + if target in tb_lines[i]: + last_file_i = i + break + + tb_lines = tb_lines[:1] + tb_lines[last_file_i + 1 :] + + res = "".join(tb_lines) + return LintResult(text=res, lines=line_numbers) + + +def basic_lint(fname, code): + """ + Use tree-sitter to look for syntax errors, display them with tree context. + """ + + lang = filename_to_lang(fname) + if not lang: + return + + # Tree-sitter linter is not capable of working with typescript #1132 + if lang == "typescript": + return + + try: + parser = get_parser(lang) + except Exception as err: + print(f"Unable to load parser: {err}") + return + + tree = parser.parse(bytes(code, "utf-8")) + + try: + errors = traverse_tree(tree.root_node) + except RecursionError: + print(f"Unable to lint {fname} due to RecursionError") + return + + if not errors: + return + + return LintResult(text="", lines=errors) + + +def tree_context(fname, code, line_nums): + context = TreeContext( + fname, + code, + color=False, + line_number=True, + child_context=False, + last_line=False, + margin=0, + mark_lois=True, + loi_pad=3, + # header_max=30, + show_top_of_file_parent_scope=False, + ) + line_nums = set(line_nums) + context.add_lines_of_interest(line_nums) + context.add_context() + s = "s" if len(line_nums) > 1 else "" + output = f"# Fix the error{s}, see relevant line{s} below marked with █.\n\n" + output += fname + ":\n" + output += context.format() + + return output + + +def traverse_tree(node): + errors = [] + if node.type == "ERROR" or node.is_missing: + line_no = node.start_point[0] + errors.append(line_no) + + for child in node.children: + errors += traverse_tree(child) + + return errors + + +def find_filenames_and_linenums(text, fnames): + """ + Search text for all occurrences of :\\d+ and make a list of them + where is one of the filenames in the list `fnames`. + """ + pattern = re.compile(r"(\b(?:" + "|".join(re.escape(fname) for fname in fnames) + r"):\d+\b)") + matches = pattern.findall(text) + result = {} + for match in matches: + fname, linenum = match.rsplit(":", 1) + if fname not in result: + result[fname] = set() + result[fname].add(int(linenum)) + return result + + +def main(): + """ + Main function to parse files provided as command line arguments. + """ + if len(sys.argv) < 2: + print("Usage: python linter.py ...") + sys.exit(1) + + linter = Linter(root=os.getcwd()) + for file_path in sys.argv[1:]: + errors = linter.lint(file_path) + if errors: + print(errors) + + +if __name__ == "__main__": + main() +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt new file mode 100644 index 000000000..c5c3ac11c --- /dev/null +++ b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt @@ -0,0 +1,426 @@ +import base64 +import hashlib +import http.server +import os +import secrets +import socketserver +import threading +import time +import webbrowser +from urllib.parse import parse_qs, urlparse + +import requests + +from aider import urls +from aider.io import InputOutput + + +def check_openrouter_tier(api_key): + """ + Checks if the user is on a free tier for OpenRouter. + + Args: + api_key: The OpenRouter API key to check. + + Returns: + A boolean indicating if the user is on a free tier (True) or paid tier (False). + Returns True if the check fails. + """ + try: + response = requests.get( + "https://openrouter.ai/api/v1/auth/key", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=5, # Add a reasonable timeout + ) + response.raise_for_status() + data = response.json() + # According to the documentation, 'is_free_tier' will be true if the user has never paid + return data.get("data", {}).get("is_free_tier", True) # Default to True if not found + except Exception: + # If there's any error, we'll default to assuming free tier + return True + + +def try_to_select_default_model(): + """ + Attempts to select a default model based on available API keys. + Checks OpenRouter tier status to select appropriate model. + + Returns: + The name of the selected model, or None if no suitable default is found. + """ + # Special handling for OpenRouter + openrouter_key = os.environ.get("OPENROUTER_API_KEY") + if openrouter_key: + # Check if the user is on a free tier + is_free_tier = check_openrouter_tier(openrouter_key) + if is_free_tier: + return "openrouter/google/gemini-2.5-pro-exp-03-25:free" + else: + return "openrouter/anthropic/claude-3.7-sonnet" + + # Select model based on other available API keys + model_key_pairs = [ + ("ANTHROPIC_API_KEY", "sonnet"), + ("DEEPSEEK_API_KEY", "deepseek"), + ("OPENAI_API_KEY", "gpt-4o"), + ("GEMINI_API_KEY", "gemini/gemini-2.5-pro-exp-03-25"), + ("VERTEXAI_PROJECT", "vertex_ai/gemini-2.5-pro-exp-03-25"), + ] + + for env_key, model_name in model_key_pairs: + api_key_value = os.environ.get(env_key) + if api_key_value: + return model_name + + return None + + +def offer_openrouter_oauth(io, analytics): + """ + Offers OpenRouter OAuth flow to the user if no API keys are found. + + Args: + io: The InputOutput object for user interaction. + analytics: The Analytics object for tracking events. + + Returns: + True if authentication was successful, False otherwise. + """ + # No API keys found - Offer OpenRouter OAuth + io.tool_output("OpenRouter provides free and paid access to many LLMs.") + # Use confirm_ask which handles non-interactive cases + if io.confirm_ask( + "Login to OpenRouter or create a free account?", + default="y", + ): + analytics.event("oauth_flow_initiated", provider="openrouter") + openrouter_key = start_openrouter_oauth_flow(io, analytics) + if openrouter_key: + # Successfully got key via OAuth, use the default OpenRouter model + # Ensure OPENROUTER_API_KEY is now set in the environment for later use + os.environ["OPENROUTER_API_KEY"] = openrouter_key + analytics.event("oauth_flow_success") + return True + + # OAuth failed or was cancelled by user implicitly (e.g., closing browser) + # Error messages are handled within start_openrouter_oauth_flow + analytics.event("oauth_flow_failure") + io.tool_error("OpenRouter authentication did not complete successfully.") + # Fall through to the final error message + + return False + + +def select_default_model(args, io, analytics): + """ + Selects a default model based on available API keys if no model is specified. + Offers OAuth flow for OpenRouter if no keys are found. + + Args: + args: The command line arguments object. + io: The InputOutput object for user interaction. + analytics: The Analytics object for tracking events. + + Returns: + The name of the selected model, or None if no suitable default is found. + """ + if args.model: + return args.model # Model already specified + + model = try_to_select_default_model() + if model: + io.tool_warning(f"Using {model} model with API key from environment.") + analytics.event("auto_model_selection", model=model) + return model + + no_model_msg = "No LLM model was specified and no API keys were provided." + io.tool_warning(no_model_msg) + + # Try OAuth if no model was detected + offer_openrouter_oauth(io, analytics) + + # Check again after potential OAuth success + model = try_to_select_default_model() + if model: + return model + + io.offer_url(urls.models_and_keys, "Open documentation URL for more info?") + + +# Helper function to find an available port +def find_available_port(start_port=8484, end_port=8584): + for port in range(start_port, end_port + 1): + try: + # Check if the port is available by trying to bind to it + with socketserver.TCPServer(("localhost", port), None): + return port + except OSError: + # Port is likely already in use + continue + return None + + +# PKCE code generation +def generate_pkce_codes(): + code_verifier = secrets.token_urlsafe(64) + hasher = hashlib.sha256() + hasher.update(code_verifier.encode("utf-8")) + code_challenge = base64.urlsafe_b64encode(hasher.digest()).rstrip(b"=").decode("utf-8") + return code_verifier, code_challenge + + +# Function to exchange the authorization code for an API key +def exchange_code_for_key(code, code_verifier, io): + try: + response = requests.post( + "https://openrouter.ai/api/v1/auth/keys", + headers={"Content-Type": "application/json"}, + json={ + "code": code, + "code_verifier": code_verifier, + "code_challenge_method": "S256", + }, + timeout=30, # Add a timeout + ) + response.raise_for_status() # Raise exception for bad status codes (4xx or 5xx) + data = response.json() + api_key = data.get("key") + if not api_key: + io.tool_error("Error: 'key' not found in OpenRouter response.") + io.tool_error(f"Response: {response.text}") + return None + return api_key + except requests.exceptions.Timeout: + io.tool_error("Error: Request to OpenRouter timed out during code exchange.") + return None + except requests.exceptions.HTTPError as e: + io.tool_error( + "Error exchanging code for OpenRouter key:" + f" {e.response.status_code} {e.response.reason}" + ) + io.tool_error(f"Response: {e.response.text}") + return None + except requests.exceptions.RequestException as e: + io.tool_error(f"Error exchanging code for OpenRouter key: {e}") + return None + except Exception as e: + io.tool_error(f"Unexpected error during code exchange: {e}") + return None + + +# Function to start the OAuth flow +def start_openrouter_oauth_flow(io, analytics): + """Initiates the OpenRouter OAuth PKCE flow using a local server.""" + + port = find_available_port() + if not port: + io.tool_error("Could not find an available port between 8484 and 8584.") + io.tool_error("Please ensure a port in this range is free, or configure manually.") + return None + + callback_url = f"http://localhost:{port}/callback/aider" + auth_code = None + server_error = None + server_started = threading.Event() + shutdown_server = threading.Event() + + class OAuthCallbackHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + nonlocal auth_code, server_error + parsed_path = urlparse(self.path) + if parsed_path.path == "/callback/aider": + query_params = parse_qs(parsed_path.query) + if "code" in query_params: + auth_code = query_params["code"][0] + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write( + b"

Success!

" + b"

Aider has received the authentication code. " + b"You can close this browser tab.

" + ) + # Signal the main thread to shut down the server + shutdown_server.set() + else: + # Redirect to aider website if 'code' is missing (e.g., user visited manually) + self.send_response(302) # Found (temporary redirect) + self.send_header("Location", urls.website) + self.end_headers() + # No need to set server_error, just redirect. + # Do NOT shut down the server here; wait for timeout or success. + else: + # Redirect anything else (e.g., favicon.ico) to the main website as well + self.send_response(302) + self.send_header("Location", urls.website) + self.end_headers() + self.wfile.write(b"Not Found") + + def log_message(self, format, *args): + # Suppress server logging to keep terminal clean + pass + + def run_server(): + nonlocal server_error + try: + with socketserver.TCPServer(("localhost", port), OAuthCallbackHandler) as httpd: + io.tool_output(f"Temporary server listening on {callback_url}", log_only=True) + server_started.set() # Signal that the server is ready + # Wait until shutdown is requested or timeout occurs (handled by main thread) + while not shutdown_server.is_set(): + httpd.handle_request() # Handle one request at a time + # Add a small sleep to prevent busy-waiting if needed, + # though handle_request should block appropriately. + time.sleep(0.1) + io.tool_output("Shutting down temporary server.", log_only=True) + except Exception as e: + server_error = f"Failed to start or run temporary server: {e}" + server_started.set() # Signal even if failed, error will be checked + shutdown_server.set() # Ensure shutdown logic proceeds + + server_thread = threading.Thread(target=run_server, daemon=True) + server_thread.start() + + # Wait briefly for the server to start, or for an error + if not server_started.wait(timeout=5): + io.tool_error("Temporary authentication server failed to start in time.") + shutdown_server.set() # Ensure thread exits if it eventually starts + server_thread.join(timeout=1) + return None + + # Check if server failed during startup + if server_error: + io.tool_error(server_error) + shutdown_server.set() # Ensure thread exits + server_thread.join(timeout=1) + return None + + # Generate codes and URL + code_verifier, code_challenge = generate_pkce_codes() + auth_url_base = "https://openrouter.ai/auth" + auth_params = { + "callback_url": callback_url, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + } + auth_url = f"{auth_url_base}?{'&'.join(f'{k}={v}' for k, v in auth_params.items())}" + + io.tool_output("\nPlease open this URL in your browser to connect Aider with OpenRouter:") + io.tool_output() + print(auth_url) + + MINUTES = 5 + io.tool_output(f"\nWaiting up to {MINUTES} minutes for you to finish in the browser...") + io.tool_output("Use Control-C to interrupt.") + + try: + webbrowser.open(auth_url) + except Exception: + pass + + # Wait for the callback to set the auth_code or for timeout/error + interrupted = False + try: + shutdown_server.wait(timeout=MINUTES * 60) # Convert minutes to seconds + except KeyboardInterrupt: + io.tool_warning("\nOAuth flow interrupted.") + analytics.event("oauth_flow_failed", provider="openrouter", reason="user_interrupt") + interrupted = True + # Ensure the server thread is signaled to shut down + shutdown_server.set() + + # Join the server thread to ensure it's cleaned up + server_thread.join(timeout=1) + + if interrupted: + return None # Return None if interrupted by user + + if server_error: + io.tool_error(f"Authentication failed: {server_error}") + analytics.event("oauth_flow_failed", provider="openrouter", reason=server_error) + return None + + if not auth_code: + io.tool_error("Authentication with OpenRouter failed.") + analytics.event("oauth_flow_failed", provider="openrouter") + return None + + io.tool_output("Completing authentication...") + analytics.event("oauth_flow_code_received", provider="openrouter") + + # Exchange code for key + api_key = exchange_code_for_key(auth_code, code_verifier, io) + + if api_key: + # Set env var for the current session immediately + os.environ["OPENROUTER_API_KEY"] = api_key + + # Save the key to the oauth-keys.env file + try: + config_dir = os.path.expanduser("~/.aider") + os.makedirs(config_dir, exist_ok=True) + key_file = os.path.join(config_dir, "oauth-keys.env") + with open(key_file, "a", encoding="utf-8") as f: + f.write(f'OPENROUTER_API_KEY="{api_key}"\n') + + io.tool_warning("Aider will load the OpenRouter key automatically in future sessions.") + io.tool_output() + + analytics.event("oauth_flow_success", provider="openrouter") + return api_key + except Exception as e: + io.tool_error(f"Successfully obtained key, but failed to save it to file: {e}") + io.tool_warning("Set OPENROUTER_API_KEY environment variable for this session only.") + # Still return the key for the current session even if saving failed + analytics.event("oauth_flow_save_failed", provider="openrouter", reason=str(e)) + return api_key + else: + io.tool_error("Authentication with OpenRouter failed.") + analytics.event("oauth_flow_failed", provider="openrouter", reason="code_exchange_failed") + return None + + +# Dummy Analytics class for testing +class DummyAnalytics: + def event(self, *args, **kwargs): + # print(f"Analytics Event: {args} {kwargs}") # Optional: print events + pass + + +def main(): + """Main function to test the OpenRouter OAuth flow.""" + print("Starting OpenRouter OAuth flow test...") + + # Use a real IO object for interaction + io = InputOutput( + pretty=True, + yes=False, + input_history_file=None, + chat_history_file=None, + tool_output_color="BLUE", + tool_error_color="RED", + ) + # Use a dummy analytics object + analytics = DummyAnalytics() + + # Ensure OPENROUTER_API_KEY is not set, to trigger the flow naturally + # (though start_openrouter_oauth_flow doesn't check this itself) + if "OPENROUTER_API_KEY" in os.environ: + print("Warning: OPENROUTER_API_KEY is already set in environment.") + # del os.environ["OPENROUTER_API_KEY"] # Optionally unset it for testing + + api_key = start_openrouter_oauth_flow(io, analytics) + + if api_key: + print("\nOAuth flow completed successfully!") + print(f"Obtained API Key (first 5 chars): {api_key[:5]}...") + # Be careful printing the key, even partially + else: + print("\nOAuth flow failed or was cancelled.") + + print("\nOpenRouter OAuth flow test finished.") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/metadata.json b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/metadata.json new file mode 100644 index 000000000..52ef79788 --- /dev/null +++ b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_onboarding.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_onboarding.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_onboarding.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:08.298785+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 16001, + "extracted_output_length": 15987, + "expected_output_length": 16118, + "results_dir": "locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808", + "generation_id": "gen-1757433488-makyLZJ7qOrGYl1j7TrP", + "cost_usd": 0.00605576, + "prompt_tokens": 18121, + "completion_tokens": 3582, + "total_tokens": 21703, + "native_prompt_tokens": 18052, + "native_completion_tokens": 3796, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 368 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/output.diff b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/output.diff new file mode 100644 index 000000000..014d614dd --- /dev/null +++ b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/output.diff @@ -0,0 +1,20 @@ + +index 01d9cdcba..c5c3ac11c 100644 +--- a/aider_aider_onboarding.py_expectedoutput.txt (expected):tmp/tmp4o5xg0bs_expected.txt ++++ b/aider_aider_onboarding.py_extracted.txt (actual):tmp/tmpb5hgavpn_actual.txt +@@ -100,7 +100,6 @@ def offer_openrouter_oauth(io, analytics): + # Successfully got key via OAuth, use the default OpenRouter model + # Ensure OPENROUTER_API_KEY is now set in the environment for later use + os.environ["OPENROUTER_API_KEY"] = openrouter_key +- # Track OAuth success leading to model selection + analytics.event("oauth_flow_success") + return True + +@@ -243,7 +242,6 @@ def start_openrouter_oauth_flow(io, analytics): + b"You can close this browser tab.

" + ) + # Signal the main thread to shut down the server +- # Signal the main thread to shut down the server + shutdown_server.set() + else: + # Redirect to aider website if 'code' is missing (e.g., user visited manually) diff --git a/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt new file mode 100644 index 000000000..678b40e9a --- /dev/null +++ b/locodiff-250425/results/aider_aider_onboarding.py/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt @@ -0,0 +1,428 @@ +```python +import base64 +import hashlib +import http.server +import os +import secrets +import socketserver +import threading +import time +import webbrowser +from urllib.parse import parse_qs, urlparse + +import requests + +from aider import urls +from aider.io import InputOutput + + +def check_openrouter_tier(api_key): + """ + Checks if the user is on a free tier for OpenRouter. + + Args: + api_key: The OpenRouter API key to check. + + Returns: + A boolean indicating if the user is on a free tier (True) or paid tier (False). + Returns True if the check fails. + """ + try: + response = requests.get( + "https://openrouter.ai/api/v1/auth/key", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=5, # Add a reasonable timeout + ) + response.raise_for_status() + data = response.json() + # According to the documentation, 'is_free_tier' will be true if the user has never paid + return data.get("data", {}).get("is_free_tier", True) # Default to True if not found + except Exception: + # If there's any error, we'll default to assuming free tier + return True + + +def try_to_select_default_model(): + """ + Attempts to select a default model based on available API keys. + Checks OpenRouter tier status to select appropriate model. + + Returns: + The name of the selected model, or None if no suitable default is found. + """ + # Special handling for OpenRouter + openrouter_key = os.environ.get("OPENROUTER_API_KEY") + if openrouter_key: + # Check if the user is on a free tier + is_free_tier = check_openrouter_tier(openrouter_key) + if is_free_tier: + return "openrouter/google/gemini-2.5-pro-exp-03-25:free" + else: + return "openrouter/anthropic/claude-3.7-sonnet" + + # Select model based on other available API keys + model_key_pairs = [ + ("ANTHROPIC_API_KEY", "sonnet"), + ("DEEPSEEK_API_KEY", "deepseek"), + ("OPENAI_API_KEY", "gpt-4o"), + ("GEMINI_API_KEY", "gemini/gemini-2.5-pro-exp-03-25"), + ("VERTEXAI_PROJECT", "vertex_ai/gemini-2.5-pro-exp-03-25"), + ] + + for env_key, model_name in model_key_pairs: + api_key_value = os.environ.get(env_key) + if api_key_value: + return model_name + + return None + + +def offer_openrouter_oauth(io, analytics): + """ + Offers OpenRouter OAuth flow to the user if no API keys are found. + + Args: + io: The InputOutput object for user interaction. + analytics: The Analytics object for tracking events. + + Returns: + True if authentication was successful, False otherwise. + """ + # No API keys found - Offer OpenRouter OAuth + io.tool_output("OpenRouter provides free and paid access to many LLMs.") + # Use confirm_ask which handles non-interactive cases + if io.confirm_ask( + "Login to OpenRouter or create a free account?", + default="y", + ): + analytics.event("oauth_flow_initiated", provider="openrouter") + openrouter_key = start_openrouter_oauth_flow(io, analytics) + if openrouter_key: + # Successfully got key via OAuth, use the default OpenRouter model + # Ensure OPENROUTER_API_KEY is now set in the environment for later use + os.environ["OPENROUTER_API_KEY"] = openrouter_key + analytics.event("oauth_flow_success") + return True + + # OAuth failed or was cancelled by user implicitly (e.g., closing browser) + # Error messages are handled within start_openrouter_oauth_flow + analytics.event("oauth_flow_failure") + io.tool_error("OpenRouter authentication did not complete successfully.") + # Fall through to the final error message + + return False + + +def select_default_model(args, io, analytics): + """ + Selects a default model based on available API keys if no model is specified. + Offers OAuth flow for OpenRouter if no keys are found. + + Args: + args: The command line arguments object. + io: The InputOutput object for user interaction. + analytics: The Analytics object for tracking events. + + Returns: + The name of the selected model, or None if no suitable default is found. + """ + if args.model: + return args.model # Model already specified + + model = try_to_select_default_model() + if model: + io.tool_warning(f"Using {model} model with API key from environment.") + analytics.event("auto_model_selection", model=model) + return model + + no_model_msg = "No LLM model was specified and no API keys were provided." + io.tool_warning(no_model_msg) + + # Try OAuth if no model was detected + offer_openrouter_oauth(io, analytics) + + # Check again after potential OAuth success + model = try_to_select_default_model() + if model: + return model + + io.offer_url(urls.models_and_keys, "Open documentation URL for more info?") + + +# Helper function to find an available port +def find_available_port(start_port=8484, end_port=8584): + for port in range(start_port, end_port + 1): + try: + # Check if the port is available by trying to bind to it + with socketserver.TCPServer(("localhost", port), None): + return port + except OSError: + # Port is likely already in use + continue + return None + + +# PKCE code generation +def generate_pkce_codes(): + code_verifier = secrets.token_urlsafe(64) + hasher = hashlib.sha256() + hasher.update(code_verifier.encode("utf-8")) + code_challenge = base64.urlsafe_b64encode(hasher.digest()).rstrip(b"=").decode("utf-8") + return code_verifier, code_challenge + + +# Function to exchange the authorization code for an API key +def exchange_code_for_key(code, code_verifier, io): + try: + response = requests.post( + "https://openrouter.ai/api/v1/auth/keys", + headers={"Content-Type": "application/json"}, + json={ + "code": code, + "code_verifier": code_verifier, + "code_challenge_method": "S256", + }, + timeout=30, # Add a timeout + ) + response.raise_for_status() # Raise exception for bad status codes (4xx or 5xx) + data = response.json() + api_key = data.get("key") + if not api_key: + io.tool_error("Error: 'key' not found in OpenRouter response.") + io.tool_error(f"Response: {response.text}") + return None + return api_key + except requests.exceptions.Timeout: + io.tool_error("Error: Request to OpenRouter timed out during code exchange.") + return None + except requests.exceptions.HTTPError as e: + io.tool_error( + "Error exchanging code for OpenRouter key:" + f" {e.response.status_code} {e.response.reason}" + ) + io.tool_error(f"Response: {e.response.text}") + return None + except requests.exceptions.RequestException as e: + io.tool_error(f"Error exchanging code for OpenRouter key: {e}") + return None + except Exception as e: + io.tool_error(f"Unexpected error during code exchange: {e}") + return None + + +# Function to start the OAuth flow +def start_openrouter_oauth_flow(io, analytics): + """Initiates the OpenRouter OAuth PKCE flow using a local server.""" + + port = find_available_port() + if not port: + io.tool_error("Could not find an available port between 8484 and 8584.") + io.tool_error("Please ensure a port in this range is free, or configure manually.") + return None + + callback_url = f"http://localhost:{port}/callback/aider" + auth_code = None + server_error = None + server_started = threading.Event() + shutdown_server = threading.Event() + + class OAuthCallbackHandler(http.server.SimpleHTTPRequestHandler): + def do_GET(self): + nonlocal auth_code, server_error + parsed_path = urlparse(self.path) + if parsed_path.path == "/callback/aider": + query_params = parse_qs(parsed_path.query) + if "code" in query_params: + auth_code = query_params["code"][0] + self.send_response(200) + self.send_header("Content-type", "text/html") + self.end_headers() + self.wfile.write( + b"

Success!

" + b"

Aider has received the authentication code. " + b"You can close this browser tab.

" + ) + # Signal the main thread to shut down the server + shutdown_server.set() + else: + # Redirect to aider website if 'code' is missing (e.g., user visited manually) + self.send_response(302) # Found (temporary redirect) + self.send_header("Location", urls.website) + self.end_headers() + # No need to set server_error, just redirect. + # Do NOT shut down the server here; wait for timeout or success. + else: + # Redirect anything else (e.g., favicon.ico) to the main website as well + self.send_response(302) + self.send_header("Location", urls.website) + self.end_headers() + self.wfile.write(b"Not Found") + + def log_message(self, format, *args): + # Suppress server logging to keep terminal clean + pass + + def run_server(): + nonlocal server_error + try: + with socketserver.TCPServer(("localhost", port), OAuthCallbackHandler) as httpd: + io.tool_output(f"Temporary server listening on {callback_url}", log_only=True) + server_started.set() # Signal that the server is ready + # Wait until shutdown is requested or timeout occurs (handled by main thread) + while not shutdown_server.is_set(): + httpd.handle_request() # Handle one request at a time + # Add a small sleep to prevent busy-waiting if needed, + # though handle_request should block appropriately. + time.sleep(0.1) + io.tool_output("Shutting down temporary server.", log_only=True) + except Exception as e: + server_error = f"Failed to start or run temporary server: {e}" + server_started.set() # Signal even if failed, error will be checked + shutdown_server.set() # Ensure shutdown logic proceeds + + server_thread = threading.Thread(target=run_server, daemon=True) + server_thread.start() + + # Wait briefly for the server to start, or for an error + if not server_started.wait(timeout=5): + io.tool_error("Temporary authentication server failed to start in time.") + shutdown_server.set() # Ensure thread exits if it eventually starts + server_thread.join(timeout=1) + return None + + # Check if server failed during startup + if server_error: + io.tool_error(server_error) + shutdown_server.set() # Ensure thread exits + server_thread.join(timeout=1) + return None + + # Generate codes and URL + code_verifier, code_challenge = generate_pkce_codes() + auth_url_base = "https://openrouter.ai/auth" + auth_params = { + "callback_url": callback_url, + "code_challenge": code_challenge, + "code_challenge_method": "S256", + } + auth_url = f"{auth_url_base}?{'&'.join(f'{k}={v}' for k, v in auth_params.items())}" + + io.tool_output("\nPlease open this URL in your browser to connect Aider with OpenRouter:") + io.tool_output() + print(auth_url) + + MINUTES = 5 + io.tool_output(f"\nWaiting up to {MINUTES} minutes for you to finish in the browser...") + io.tool_output("Use Control-C to interrupt.") + + try: + webbrowser.open(auth_url) + except Exception: + pass + + # Wait for the callback to set the auth_code or for timeout/error + interrupted = False + try: + shutdown_server.wait(timeout=MINUTES * 60) # Convert minutes to seconds + except KeyboardInterrupt: + io.tool_warning("\nOAuth flow interrupted.") + analytics.event("oauth_flow_failed", provider="openrouter", reason="user_interrupt") + interrupted = True + # Ensure the server thread is signaled to shut down + shutdown_server.set() + + # Join the server thread to ensure it's cleaned up + server_thread.join(timeout=1) + + if interrupted: + return None # Return None if interrupted by user + + if server_error: + io.tool_error(f"Authentication failed: {server_error}") + analytics.event("oauth_flow_failed", provider="openrouter", reason=server_error) + return None + + if not auth_code: + io.tool_error("Authentication with OpenRouter failed.") + analytics.event("oauth_flow_failed", provider="openrouter") + return None + + io.tool_output("Completing authentication...") + analytics.event("oauth_flow_code_received", provider="openrouter") + + # Exchange code for key + api_key = exchange_code_for_key(auth_code, code_verifier, io) + + if api_key: + # Set env var for the current session immediately + os.environ["OPENROUTER_API_KEY"] = api_key + + # Save the key to the oauth-keys.env file + try: + config_dir = os.path.expanduser("~/.aider") + os.makedirs(config_dir, exist_ok=True) + key_file = os.path.join(config_dir, "oauth-keys.env") + with open(key_file, "a", encoding="utf-8") as f: + f.write(f'OPENROUTER_API_KEY="{api_key}"\n') + + io.tool_warning("Aider will load the OpenRouter key automatically in future sessions.") + io.tool_output() + + analytics.event("oauth_flow_success", provider="openrouter") + return api_key + except Exception as e: + io.tool_error(f"Successfully obtained key, but failed to save it to file: {e}") + io.tool_warning("Set OPENROUTER_API_KEY environment variable for this session only.") + # Still return the key for the current session even if saving failed + analytics.event("oauth_flow_save_failed", provider="openrouter", reason=str(e)) + return api_key + else: + io.tool_error("Authentication with OpenRouter failed.") + analytics.event("oauth_flow_failed", provider="openrouter", reason="code_exchange_failed") + return None + + +# Dummy Analytics class for testing +class DummyAnalytics: + def event(self, *args, **kwargs): + # print(f"Analytics Event: {args} {kwargs}") # Optional: print events + pass + + +def main(): + """Main function to test the OpenRouter OAuth flow.""" + print("Starting OpenRouter OAuth flow test...") + + # Use a real IO object for interaction + io = InputOutput( + pretty=True, + yes=False, + input_history_file=None, + chat_history_file=None, + tool_output_color="BLUE", + tool_error_color="RED", + ) + # Use a dummy analytics object + analytics = DummyAnalytics() + + # Ensure OPENROUTER_API_KEY is not set, to trigger the flow naturally + # (though start_openrouter_oauth_flow doesn't check this itself) + if "OPENROUTER_API_KEY" in os.environ: + print("Warning: OPENROUTER_API_KEY is already set in environment.") + # del os.environ["OPENROUTER_API_KEY"] # Optionally unset it for testing + + api_key = start_openrouter_oauth_flow(io, analytics) + + if api_key: + print("\nOAuth flow completed successfully!") + print(f"Obtained API Key (first 5 chars): {api_key[:5]}...") + # Be careful printing the key, even partially + else: + print("\nOAuth flow failed or was cancelled.") + + print("\nOpenRouter OAuth flow test finished.") + + +if __name__ == "__main__": + main() +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/extracted_output.txt b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/extracted_output.txt new file mode 100644 index 000000000..76d2fd980 --- /dev/null +++ b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/extracted_output.txt @@ -0,0 +1,470 @@ +import os +import time +from pathlib import Path, PurePosixPath + +try: + import git + + ANY_GIT_ERROR = [ + git.exc.ODBError, + git.exc.GitError, + git.exc.InvalidGitRepositoryError, + git.exc.GitCommandNotFound, + ] +except ImportError: + git = None + ANY_GIT_ERROR = [] + +import pathspec + +from aider import prompts, utils + +from .dump import dump # noqa: F401 + +ANY_GIT_ERROR += [ + OSError, + IndexError, + BufferError, + TypeError, + ValueError, + AttributeError, + AssertionError, + TimeoutError, +] +ANY_GIT_ERROR = tuple(ANY_GIT_ERROR) + + +class GitRepo: + repo = None + aider_ignore_file = None + aider_ignore_spec = None + aider_ignore_ts = 0 + aider_ignore_last_check = 0 + subtree_only = False + ignore_file_cache = {} + git_repo_error = None + + def __init__( + self, + io, + fnames, + git_dname, + aider_ignore_file=None, + models=None, + attribute_author=True, + attribute_committer=True, + attribute_commit_message_author=False, + attribute_commit_message_committer=False, + commit_prompt=None, + subtree_only=False, + git_commit_verify=True, + ): + self.io = io + self.models = models + + self.normalized_path = {} + self.tree_files = {} + + self.attribute_author = attribute_author + self.attribute_committer = attribute_committer + self.attribute_commit_message_author = attribute_commit_message_author + self.attribute_commit_message_committer = attribute_commit_message_committer + self.commit_prompt = commit_prompt + self.subtree_only = subtree_only + self.git_commit_verify = git_commit_verify + self.ignore_file_cache = {} + + if git_dname: + check_fnames = [git_dname] + elif fnames: + check_fnames = fnames + else: + check_fnames = ["."] + + repo_paths = [] + for fname in check_fnames: + fname = Path(fname) + fname = fname.resolve() + + if not fname.exists() and fname.parent.exists(): + fname = fname.parent + + try: + repo_path = git.Repo(fname, search_parent_directories=True).working_dir + repo_path = utils.safe_abs_path(repo_path) + repo_paths.append(repo_path) + except ANY_GIT_ERROR: + pass + + num_repos = len(set(repo_paths)) + + if num_repos == 0: + raise FileNotFoundError + if num_repos > 1: + self.io.tool_error("Files are in different git repos.") + raise FileNotFoundError + + # https://github.com/gitpython-developers/GitPython/issues/427 + self.repo = git.Repo(repo_paths.pop(), odbt=git.GitDB) + self.root = utils.safe_abs_path(self.repo.working_tree_dir) + + if aider_ignore_file: + self.aider_ignore_file = Path(aider_ignore_file) + + def commit(self, fnames=None, context=None, message=None, aider_edits=False): + if not fnames and not self.repo.is_dirty(): + return + + diffs = self.get_diffs(fnames) + if not diffs: + return + + if message: + commit_message = message + else: + commit_message = self.get_commit_message(diffs, context) + + if aider_edits and self.attribute_commit_message_author: + commit_message = "aider: " + commit_message + elif self.attribute_commit_message_committer: + commit_message = "aider: " + commit_message + + if not commit_message: + commit_message = "(no commit message provided)" + + full_commit_message = commit_message + # if context: + # full_commit_message += "\n\n# Aider chat conversation:\n\n" + context + + cmd = ["-m", full_commit_message] + if not self.git_commit_verify: + cmd.append("--no-verify") + if fnames: + fnames = [str(self.abs_root_path(fn)) for fn in fnames] + for fname in fnames: + try: + self.repo.git.add(fname) + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to add {fname}: {err}") + cmd += ["--"] + fnames + else: + cmd += ["-a"] + + original_user_name = self.repo.git.config("--get", "user.name") + original_committer_name_env = os.environ.get("GIT_COMMITTER_NAME") + committer_name = f"{original_user_name} (aider)" + if self.attribute_committer: + os.environ["GIT_COMMITTER_NAME"] = committer_name + + if aider_edits and self.attribute_author: + original_author_name_env = os.environ.get("GIT_AUTHOR_NAME") + os.environ["GIT_AUTHOR_NAME"] = committer_name + + try: + self.repo.git.commit(cmd) + commit_hash = self.get_head_commit_sha(short=True) + self.io.tool_output(f"Commit {commit_hash} {commit_message}", bold=True) + return commit_hash, commit_message + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to commit: {err}") + finally: + # Restore the env + + if self.attribute_committer: + if original_committer_name_env is not None: + os.environ["GIT_COMMITTER_NAME"] = original_committer_name_env + else: + del os.environ["GIT_COMMITTER_NAME"] + + if aider_edits and self.attribute_author: + if original_author_name_env is not None: + os.environ["GIT_AUTHOR_NAME"] = original_author_name_env + else: + del os.environ["GIT_AUTHOR_NAME"] + + def get_rel_repo_dir(self): + try: + return os.path.relpath(self.repo.git_dir, os.getcwd()) + except (ValueError, OSError): + return self.repo.git_dir + + def get_commit_message(self, diffs, context): + diffs = "# Diffs:\n" + diffs + + content = "" + if context: + content += context + "\n" + content += diffs + + system_content = self.commit_prompt or prompts.commit_system + messages = [ + dict(role="system", content=system_content), + dict(role="user", content=content), + ] + + commit_message = None + for model in self.models: + num_tokens = model.token_count(messages) + max_tokens = model.info.get("max_input_tokens") or 0 + if max_tokens and num_tokens > max_tokens: + continue + commit_message = model.simple_send_with_retries(messages) + if commit_message: + break + + if not commit_message: + self.io.tool_error("Failed to generate commit message!") + + commit_message = commit_message.strip() + if commit_message and commit_message[0] == '"' and commit_message[-1] == '"': + commit_message = commit_message[1:-1].strip() + + return commit_message + + def get_diffs(self, fnames=None): + # We always want diffs of index and working dir + + current_branch_has_commits = False + try: + active_branch = self.repo.active_branch + try: + commits = self.repo.iter_commits(active_branch) + current_branch_has_commits = any(commits) + except ANY_GIT_ERROR: + pass + except (TypeError,) + ANY_GIT_ERROR: + pass + + if not fnames: + fnames = [] + + diffs = "" + for fname in fnames: + if not self.path_in_repo(fname): + diffs += f"Added {fname}\n" + + try: + if current_branch_has_commits: + args = ["HEAD", "--"] + list(fnames) + diffs += self.repo.git.diff(*args) + return diffs + + wd_args = ["--"] + list(fnames) + index_args = ["--cached"] + wd_args + + diffs += self.repo.git.diff(*index_args) + diffs += self.repo.git.diff(*wd_args) + + return diffs + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to diff: {err}") + + def diff_commits(self, pretty, from_commit, to_commit): + args = [] + if pretty: + args += ["--color"] + else: + args += ["--color=never"] + + args += [from_commit, to_commit] + diffs = self.repo.git.diff(*args) + + return diffs + + def show_diffs(self, pretty): + diffs = self.get_diffs() + print(diffs) + + def get_tracked_files(self): + if not self.repo: + return [] + + try: + commit = self.repo.head.commit + except ValueError: + commit = None + except ANY_GIT_ERROR as err: + self.git_repo_error = err + self.io.tool_error(f"Unable to list files in git repo: {err}") + self.io.tool_output("Is your git repo corrupted?") + return [] + + files = set() + if commit: + if commit in self.tree_files: + files = self.tree_files[commit] + else: + try: + iterator = commit.tree.traverse() + blob = None # Initialize blob + while True: + try: + blob = next(iterator) + if blob.type == "blob": # blob is a file + files.add(blob.path) + except IndexError: + # Handle potential index error during tree traversal + # without relying on potentially unassigned 'blob' + self.io.tool_warning( + "GitRepo: Index error encountered while reading git tree object." + " Skipping." + ) + continue + except StopIteration: + break + except ANY_GIT_ERROR as err: + self.git_repo_error = err + self.io.tool_error(f"Unable to list files in git repo: {err}") + self.io.tool_output("Is your git repo corrupted?") + return [] + files = set(self.normalize_path(path) for path in files) + self.tree_files[commit] = set(files) + + # Add staged files + index = self.repo.index + try: + staged_files = [path for path, _ in index.entries.keys()] + files.update(self.normalize_path(path) for path in staged_files) + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to read staged files: {err}") + + res = [fname for fname in files if not self.ignored_file(fname)] + + return res + + def normalize_path(self, path): + orig_path = path + res = self.normalized_path.get(orig_path) + if res: + return res + + path = Path(self.root) / path + path = PurePosixPath(path) + path = path.relative_to(self.root) + + path = str(path) + self.normalized_path[orig_path] = path + return path + + def refresh_aider_ignore(self): + if not self.aider_ignore_file: + return + + current_time = time.time() + if current_time - self.aider_ignore_last_check < 1: + return + + self.aider_ignore_last_check = current_time + + if not self.aider_ignore_file.is_file(): + return + + mtime = self.aider_ignore_file.stat().st_mtime + if mtime != self.aider_ignore_ts: + self.aider_ignore_ts = mtime + self.ignore_file_cache = {} + lines = self.aider_ignore_file.read_text().splitlines() + self.aider_ignore_spec = pathspec.PathSpec.from_lines( + pathspec.patterns.GitWildMatchPattern, + lines, + ) + + def git_ignored_file(self, path): + if not self.repo: + return + try: + if self.repo.ignored(path): + return True + except ANY_GIT_ERROR: + return False + + def ignored_file(self, fname): + self.refresh_aider_ignore() + + if fname in self.ignore_file_cache: + return self.ignore_file_cache[fname] + + result = self.ignored_file_raw(fname) + self.ignore_file_cache[fname] = result + return result + + def ignored_file_raw(self, fname): + if self.subtree_only: + try: + fname_path = Path(self.normalize_path(fname)) + cwd_path = Path.cwd().resolve().relative_to(Path(self.root).resolve()) + except ValueError: + # Issue #1524 + # ValueError: 'C:\\dev\\squid-certbot' is not in the subpath of + # 'C:\\dev\\squid-certbot' + # Clearly, fname is not under cwd... so ignore it + return True + + if cwd_path not in fname_path.parents and fname_path != cwd_path: + return True + + if not self.aider_ignore_file or not self.aider_ignore_file.is_file(): + return False + + try: + fname = self.normalize_path(fname) + except ValueError: + return True + + return self.aider_ignore_spec.match_file(fname) + + def path_in_repo(self, path): + if not self.repo: + return + if not path: + return + + tracked_files = set(self.get_tracked_files()) + return self.normalize_path(path) in tracked_files + + def abs_root_path(self, path): + res = Path(self.root) / path + return utils.safe_abs_path(res) + + def get_head_commit(self): + try: + return self.repo.head.commit + except (ValueError,) + ANY_GIT_ERROR: + return None + + def get_head_commit_sha(self, short=False): + commit = self.get_head_commit() + if not commit: + return + if short: + return commit.hexsha[:7] + return commit.hexsha + + def get_head_commit_message(self, default=None): + commit = self.get_head_commit() + if not commit: + return default + return commit.message + + def get_dirty_files(self): + """ + Returns a list of all files which are dirty (not committed), either staged or in the working + directory. + """ + dirty_files = set() + + # Get staged files + staged_files = self.repo.git.diff("--name-only", "--cached").splitlines() + dirty_files.update(staged_files) + + # Get unstaged files + unstaged_files = self.repo.git.diff("--name-only").splitlines() + dirty_files.update(unstaged_files) + + return list(dirty_files) + + def is_dirty(self, path=None): + if path and not self.path_in_repo(path): + return True + + return self.repo.is_dirty(path=path) \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/metadata.json b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/metadata.json new file mode 100644 index 000000000..ee3ddb3f0 --- /dev/null +++ b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_repo.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_repo.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_repo.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:43.823755+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 14914, + "extracted_output_length": 14900, + "expected_output_length": 14779, + "results_dir": "locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843", + "generation_id": "gen-1757433523-NniVc2OMgTwwi34SfUEH", + "cost_usd": 0.00656598, + "prompt_tokens": 52270, + "completion_tokens": 3411, + "total_tokens": 55681, + "native_prompt_tokens": 51270, + "native_completion_tokens": 3693, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 623 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/output.diff b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/output.diff new file mode 100644 index 000000000..b4dc9f7c6 --- /dev/null +++ b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/output.diff @@ -0,0 +1,99 @@ + +index 31db1edfc..76d2fd980 100644 +--- a/aider_aider_repo.py_expectedoutput.txt (expected):tmp/tmpmd0v4gm__expected.txt ++++ b/aider_aider_repo.py_extracted.txt (actual):tmp/tmp5j08s2sj_actual.txt +@@ -153,7 +153,6 @@ class GitRepo: + original_user_name = self.repo.git.config("--get", "user.name") + original_committer_name_env = os.environ.get("GIT_COMMITTER_NAME") + committer_name = f"{original_user_name} (aider)" +- + if self.attribute_committer: + os.environ["GIT_COMMITTER_NAME"] = committer_name + +@@ -215,7 +214,6 @@ class GitRepo: + + if not commit_message: + self.io.tool_error("Failed to generate commit message!") +- return + + commit_message = commit_message.strip() + if commit_message and commit_message[0] == '"' and commit_message[-1] == '"': +@@ -273,6 +271,10 @@ class GitRepo: + + return diffs + ++ def show_diffs(self, pretty): ++ diffs = self.get_diffs() ++ print(diffs) ++ + def get_tracked_files(self): + if not self.repo: + return [] +@@ -336,7 +338,11 @@ class GitRepo: + if res: + return res + +- path = str(Path(PurePosixPath((Path(self.root) / path).relative_to(self.root)))) ++ path = Path(self.root) / path ++ path = PurePosixPath(path) ++ path = path.relative_to(self.root) ++ ++ path = str(path) + self.normalized_path[orig_path] = path + return path + +@@ -420,6 +426,26 @@ class GitRepo: + res = Path(self.root) / path + return utils.safe_abs_path(res) + ++ def get_head_commit(self): ++ try: ++ return self.repo.head.commit ++ except (ValueError,) + ANY_GIT_ERROR: ++ return None ++ ++ def get_head_commit_sha(self, short=False): ++ commit = self.get_head_commit() ++ if not commit: ++ return ++ if short: ++ return commit.hexsha[:7] ++ return commit.hexsha ++ ++ def get_head_commit_message(self, default=None): ++ commit = self.get_head_commit() ++ if not commit: ++ return default ++ return commit.message ++ + def get_dirty_files(self): + """ + Returns a list of all files which are dirty (not committed), either staged or in the working +@@ -441,24 +467,4 @@ class GitRepo: + if path and not self.path_in_repo(path): + return True + +- return self.repo.is_dirty(path=path) +- +- def get_head_commit(self): +- try: +- return self.repo.head.commit +- except (ValueError,) + ANY_GIT_ERROR: +- return None +- +- def get_head_commit_sha(self, short=False): +- commit = self.get_head_commit() +- if not commit: +- return +- if short: +- return commit.hexsha[:7] +- return commit.hexsha +- +- def get_head_commit_message(self, default=None): +- commit = self.get_head_commit() +- if not commit: +- return default +- return commit.message +\ No newline at end of file ++ return self.repo.is_dirty(path=path) +\ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/raw_response.txt b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/raw_response.txt new file mode 100644 index 000000000..a1c764790 --- /dev/null +++ b/locodiff-250425/results/aider_aider_repo.py/x-ai_grok-code-fast-1/20250909_155843/raw_response.txt @@ -0,0 +1,472 @@ +```python +import os +import time +from pathlib import Path, PurePosixPath + +try: + import git + + ANY_GIT_ERROR = [ + git.exc.ODBError, + git.exc.GitError, + git.exc.InvalidGitRepositoryError, + git.exc.GitCommandNotFound, + ] +except ImportError: + git = None + ANY_GIT_ERROR = [] + +import pathspec + +from aider import prompts, utils + +from .dump import dump # noqa: F401 + +ANY_GIT_ERROR += [ + OSError, + IndexError, + BufferError, + TypeError, + ValueError, + AttributeError, + AssertionError, + TimeoutError, +] +ANY_GIT_ERROR = tuple(ANY_GIT_ERROR) + + +class GitRepo: + repo = None + aider_ignore_file = None + aider_ignore_spec = None + aider_ignore_ts = 0 + aider_ignore_last_check = 0 + subtree_only = False + ignore_file_cache = {} + git_repo_error = None + + def __init__( + self, + io, + fnames, + git_dname, + aider_ignore_file=None, + models=None, + attribute_author=True, + attribute_committer=True, + attribute_commit_message_author=False, + attribute_commit_message_committer=False, + commit_prompt=None, + subtree_only=False, + git_commit_verify=True, + ): + self.io = io + self.models = models + + self.normalized_path = {} + self.tree_files = {} + + self.attribute_author = attribute_author + self.attribute_committer = attribute_committer + self.attribute_commit_message_author = attribute_commit_message_author + self.attribute_commit_message_committer = attribute_commit_message_committer + self.commit_prompt = commit_prompt + self.subtree_only = subtree_only + self.git_commit_verify = git_commit_verify + self.ignore_file_cache = {} + + if git_dname: + check_fnames = [git_dname] + elif fnames: + check_fnames = fnames + else: + check_fnames = ["."] + + repo_paths = [] + for fname in check_fnames: + fname = Path(fname) + fname = fname.resolve() + + if not fname.exists() and fname.parent.exists(): + fname = fname.parent + + try: + repo_path = git.Repo(fname, search_parent_directories=True).working_dir + repo_path = utils.safe_abs_path(repo_path) + repo_paths.append(repo_path) + except ANY_GIT_ERROR: + pass + + num_repos = len(set(repo_paths)) + + if num_repos == 0: + raise FileNotFoundError + if num_repos > 1: + self.io.tool_error("Files are in different git repos.") + raise FileNotFoundError + + # https://github.com/gitpython-developers/GitPython/issues/427 + self.repo = git.Repo(repo_paths.pop(), odbt=git.GitDB) + self.root = utils.safe_abs_path(self.repo.working_tree_dir) + + if aider_ignore_file: + self.aider_ignore_file = Path(aider_ignore_file) + + def commit(self, fnames=None, context=None, message=None, aider_edits=False): + if not fnames and not self.repo.is_dirty(): + return + + diffs = self.get_diffs(fnames) + if not diffs: + return + + if message: + commit_message = message + else: + commit_message = self.get_commit_message(diffs, context) + + if aider_edits and self.attribute_commit_message_author: + commit_message = "aider: " + commit_message + elif self.attribute_commit_message_committer: + commit_message = "aider: " + commit_message + + if not commit_message: + commit_message = "(no commit message provided)" + + full_commit_message = commit_message + # if context: + # full_commit_message += "\n\n# Aider chat conversation:\n\n" + context + + cmd = ["-m", full_commit_message] + if not self.git_commit_verify: + cmd.append("--no-verify") + if fnames: + fnames = [str(self.abs_root_path(fn)) for fn in fnames] + for fname in fnames: + try: + self.repo.git.add(fname) + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to add {fname}: {err}") + cmd += ["--"] + fnames + else: + cmd += ["-a"] + + original_user_name = self.repo.git.config("--get", "user.name") + original_committer_name_env = os.environ.get("GIT_COMMITTER_NAME") + committer_name = f"{original_user_name} (aider)" + if self.attribute_committer: + os.environ["GIT_COMMITTER_NAME"] = committer_name + + if aider_edits and self.attribute_author: + original_author_name_env = os.environ.get("GIT_AUTHOR_NAME") + os.environ["GIT_AUTHOR_NAME"] = committer_name + + try: + self.repo.git.commit(cmd) + commit_hash = self.get_head_commit_sha(short=True) + self.io.tool_output(f"Commit {commit_hash} {commit_message}", bold=True) + return commit_hash, commit_message + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to commit: {err}") + finally: + # Restore the env + + if self.attribute_committer: + if original_committer_name_env is not None: + os.environ["GIT_COMMITTER_NAME"] = original_committer_name_env + else: + del os.environ["GIT_COMMITTER_NAME"] + + if aider_edits and self.attribute_author: + if original_author_name_env is not None: + os.environ["GIT_AUTHOR_NAME"] = original_author_name_env + else: + del os.environ["GIT_AUTHOR_NAME"] + + def get_rel_repo_dir(self): + try: + return os.path.relpath(self.repo.git_dir, os.getcwd()) + except (ValueError, OSError): + return self.repo.git_dir + + def get_commit_message(self, diffs, context): + diffs = "# Diffs:\n" + diffs + + content = "" + if context: + content += context + "\n" + content += diffs + + system_content = self.commit_prompt or prompts.commit_system + messages = [ + dict(role="system", content=system_content), + dict(role="user", content=content), + ] + + commit_message = None + for model in self.models: + num_tokens = model.token_count(messages) + max_tokens = model.info.get("max_input_tokens") or 0 + if max_tokens and num_tokens > max_tokens: + continue + commit_message = model.simple_send_with_retries(messages) + if commit_message: + break + + if not commit_message: + self.io.tool_error("Failed to generate commit message!") + + commit_message = commit_message.strip() + if commit_message and commit_message[0] == '"' and commit_message[-1] == '"': + commit_message = commit_message[1:-1].strip() + + return commit_message + + def get_diffs(self, fnames=None): + # We always want diffs of index and working dir + + current_branch_has_commits = False + try: + active_branch = self.repo.active_branch + try: + commits = self.repo.iter_commits(active_branch) + current_branch_has_commits = any(commits) + except ANY_GIT_ERROR: + pass + except (TypeError,) + ANY_GIT_ERROR: + pass + + if not fnames: + fnames = [] + + diffs = "" + for fname in fnames: + if not self.path_in_repo(fname): + diffs += f"Added {fname}\n" + + try: + if current_branch_has_commits: + args = ["HEAD", "--"] + list(fnames) + diffs += self.repo.git.diff(*args) + return diffs + + wd_args = ["--"] + list(fnames) + index_args = ["--cached"] + wd_args + + diffs += self.repo.git.diff(*index_args) + diffs += self.repo.git.diff(*wd_args) + + return diffs + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to diff: {err}") + + def diff_commits(self, pretty, from_commit, to_commit): + args = [] + if pretty: + args += ["--color"] + else: + args += ["--color=never"] + + args += [from_commit, to_commit] + diffs = self.repo.git.diff(*args) + + return diffs + + def show_diffs(self, pretty): + diffs = self.get_diffs() + print(diffs) + + def get_tracked_files(self): + if not self.repo: + return [] + + try: + commit = self.repo.head.commit + except ValueError: + commit = None + except ANY_GIT_ERROR as err: + self.git_repo_error = err + self.io.tool_error(f"Unable to list files in git repo: {err}") + self.io.tool_output("Is your git repo corrupted?") + return [] + + files = set() + if commit: + if commit in self.tree_files: + files = self.tree_files[commit] + else: + try: + iterator = commit.tree.traverse() + blob = None # Initialize blob + while True: + try: + blob = next(iterator) + if blob.type == "blob": # blob is a file + files.add(blob.path) + except IndexError: + # Handle potential index error during tree traversal + # without relying on potentially unassigned 'blob' + self.io.tool_warning( + "GitRepo: Index error encountered while reading git tree object." + " Skipping." + ) + continue + except StopIteration: + break + except ANY_GIT_ERROR as err: + self.git_repo_error = err + self.io.tool_error(f"Unable to list files in git repo: {err}") + self.io.tool_output("Is your git repo corrupted?") + return [] + files = set(self.normalize_path(path) for path in files) + self.tree_files[commit] = set(files) + + # Add staged files + index = self.repo.index + try: + staged_files = [path for path, _ in index.entries.keys()] + files.update(self.normalize_path(path) for path in staged_files) + except ANY_GIT_ERROR as err: + self.io.tool_error(f"Unable to read staged files: {err}") + + res = [fname for fname in files if not self.ignored_file(fname)] + + return res + + def normalize_path(self, path): + orig_path = path + res = self.normalized_path.get(orig_path) + if res: + return res + + path = Path(self.root) / path + path = PurePosixPath(path) + path = path.relative_to(self.root) + + path = str(path) + self.normalized_path[orig_path] = path + return path + + def refresh_aider_ignore(self): + if not self.aider_ignore_file: + return + + current_time = time.time() + if current_time - self.aider_ignore_last_check < 1: + return + + self.aider_ignore_last_check = current_time + + if not self.aider_ignore_file.is_file(): + return + + mtime = self.aider_ignore_file.stat().st_mtime + if mtime != self.aider_ignore_ts: + self.aider_ignore_ts = mtime + self.ignore_file_cache = {} + lines = self.aider_ignore_file.read_text().splitlines() + self.aider_ignore_spec = pathspec.PathSpec.from_lines( + pathspec.patterns.GitWildMatchPattern, + lines, + ) + + def git_ignored_file(self, path): + if not self.repo: + return + try: + if self.repo.ignored(path): + return True + except ANY_GIT_ERROR: + return False + + def ignored_file(self, fname): + self.refresh_aider_ignore() + + if fname in self.ignore_file_cache: + return self.ignore_file_cache[fname] + + result = self.ignored_file_raw(fname) + self.ignore_file_cache[fname] = result + return result + + def ignored_file_raw(self, fname): + if self.subtree_only: + try: + fname_path = Path(self.normalize_path(fname)) + cwd_path = Path.cwd().resolve().relative_to(Path(self.root).resolve()) + except ValueError: + # Issue #1524 + # ValueError: 'C:\\dev\\squid-certbot' is not in the subpath of + # 'C:\\dev\\squid-certbot' + # Clearly, fname is not under cwd... so ignore it + return True + + if cwd_path not in fname_path.parents and fname_path != cwd_path: + return True + + if not self.aider_ignore_file or not self.aider_ignore_file.is_file(): + return False + + try: + fname = self.normalize_path(fname) + except ValueError: + return True + + return self.aider_ignore_spec.match_file(fname) + + def path_in_repo(self, path): + if not self.repo: + return + if not path: + return + + tracked_files = set(self.get_tracked_files()) + return self.normalize_path(path) in tracked_files + + def abs_root_path(self, path): + res = Path(self.root) / path + return utils.safe_abs_path(res) + + def get_head_commit(self): + try: + return self.repo.head.commit + except (ValueError,) + ANY_GIT_ERROR: + return None + + def get_head_commit_sha(self, short=False): + commit = self.get_head_commit() + if not commit: + return + if short: + return commit.hexsha[:7] + return commit.hexsha + + def get_head_commit_message(self, default=None): + commit = self.get_head_commit() + if not commit: + return default + return commit.message + + def get_dirty_files(self): + """ + Returns a list of all files which are dirty (not committed), either staged or in the working + directory. + """ + dirty_files = set() + + # Get staged files + staged_files = self.repo.git.diff("--name-only", "--cached").splitlines() + dirty_files.update(staged_files) + + # Get unstaged files + unstaged_files = self.repo.git.diff("--name-only").splitlines() + dirty_files.update(unstaged_files) + + return list(dirty_files) + + def is_dirty(self, path=None): + if path and not self.path_in_repo(path): + return True + + return self.repo.is_dirty(path=path) +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/extracted_output.txt b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/extracted_output.txt new file mode 100644 index 000000000..f57974be6 --- /dev/null +++ b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/extracted_output.txt @@ -0,0 +1,133 @@ +import os +import platform +import subprocess +import sys +from io import BytesIO + +import pexpect +import psutil + + +def run_cmd(command, verbose=False, error_print=None, cwd=None): + try: + if sys.stdin.isatty() and hasattr(pexpect, "spawn") and platform.system() != "Windows": + return run_cmd_pexpect(command, verbose, cwd) + + return run_cmd_subprocess(command, verbose, cwd) + except OSError as e: + error_message = f"Error occurred while running command '{command}': {str(e)}" + if error_print is None: + print(error_message) + else: + error_print(error_message) + return 1, error_message + + +def get_windows_parent_process_name(): + try: + current_process = psutil.Process() + while True: + parent = current_process.parent() + if parent is None: + break + parent_name = parent.name().lower() + if parent_name in ["powershell.exe", "cmd.exe"]: + return parent_name + current_process = parent + return None + except Exception: + return None + + +def run_cmd_subprocess(command, verbose=False, cwd=None, encoding=sys.stdout.encoding): + if verbose: + print("Using run_cmd_subprocess:", command) + + try: + shell = os.environ.get("SHELL", "/bin/sh") + parent_process = None + + # Determine the appropriate shell + if platform.system() == "Windows": + parent_process = get_windows_parent_process_name() + if parent_process == "powershell.exe": + command = f"powershell -Command {command}" + # else: Assume cmd.exe or other Windows shell, use the command as-is + + if verbose: + print("Running command:", command) + print("SHELL:", shell) + if platform.system() == "Windows": + print("Parent process:", parent_process) + + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=True, + encoding=encoding, + errors="replace", + bufsize=0, # Set bufsize to 0 for unbuffered output + universal_newlines=True, + cwd=cwd, + ) + + output = [] + while True: + chunk = process.stdout.read(1) + if not chunk: + break + print(chunk, end="", flush=True) # Print the chunk in real-time + output.append(chunk) # Store the chunk for later use + + process.wait() + return process.returncode, "".join(output) + except Exception as e: + return 1, str(e) + + +def run_cmd_pexpect(command, verbose=False, cwd=None): + """ + Run a shell command interactively using pexpect, capturing all output. + + :param command: The command to run as a string. + :param verbose: If True, print output in real-time. + :return: A tuple containing (exit_status, output) + """ + if verbose: + print("Using run_cmd_pexpect:", command) + + output = BytesIO() + + def output_callback(b): + output.write(b) + return b + + try: + # Use the SHELL environment variable, falling back to /bin/sh if not set + shell = os.environ.get("SHELL", "/bin/sh") + if verbose: + print("With shell:", shell) + + if os.path.exists(shell): + # Use the shell from SHELL environment variable + if verbose: + print("Running pexpect.spawn with shell:", shell) + child = pexpect.spawn(shell, args=["-i", "-c", command], encoding="utf-8", cwd=cwd) + else: + # Fall back to spawning the command directly + if verbose: + print("Running pexpect.spawn without shell.") + child = pexpect.spawn(command, encoding="utf-8", cwd=cwd) + + # Transfer control to the user, capturing output + child.interact(output_filter=output_callback) + + # Wait for the command to finish and get the exit status + child.close() + return child.exitstatus, output.getvalue().decode("utf-8", errors="replace") + + except (pexpect.ExceptionPexpect, TypeError, ValueError) as e: + error_msg = f"Error running command {command}: {e}" + return 1, error_msg \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/metadata.json b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/metadata.json new file mode 100644 index 000000000..cf48de434 --- /dev/null +++ b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_run_cmd.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_run_cmd.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_run_cmd.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:59:10.605286+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 4317, + "extracted_output_length": 4303, + "expected_output_length": 4223, + "results_dir": "locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910", + "generation_id": "gen-1757433550-9NcidhnegDb352NAyc8U", + "cost_usd": 0.00657334, + "prompt_tokens": 11670, + "completion_tokens": 1406, + "total_tokens": 13076, + "native_prompt_tokens": 11747, + "native_completion_tokens": 2839, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 1927 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/output.diff b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/output.diff new file mode 100644 index 000000000..a28d9aa59 --- /dev/null +++ b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/output.diff @@ -0,0 +1,12 @@ + +index 4b0ef21f8..f57974be6 100644 +--- a/aider_aider_run_cmd.py_expectedoutput.txt (expected):tmp/tmphzzizety_expected.txt ++++ b/aider_aider_run_cmd.py_extracted.txt (actual):tmp/tmp2n5w46wm_actual.txt +@@ -52,6 +52,7 @@ def run_cmd_subprocess(command, verbose=False, cwd=None, encoding=sys.stdout.enc + parent_process = get_windows_parent_process_name() + if parent_process == "powershell.exe": + command = f"powershell -Command {command}" ++ # else: Assume cmd.exe or other Windows shell, use the command as-is + + if verbose: + print("Running command:", command) diff --git a/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/raw_response.txt b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/raw_response.txt new file mode 100644 index 000000000..7247dd856 --- /dev/null +++ b/locodiff-250425/results/aider_aider_run_cmd.py/x-ai_grok-code-fast-1/20250909_155910/raw_response.txt @@ -0,0 +1,135 @@ +```python +import os +import platform +import subprocess +import sys +from io import BytesIO + +import pexpect +import psutil + + +def run_cmd(command, verbose=False, error_print=None, cwd=None): + try: + if sys.stdin.isatty() and hasattr(pexpect, "spawn") and platform.system() != "Windows": + return run_cmd_pexpect(command, verbose, cwd) + + return run_cmd_subprocess(command, verbose, cwd) + except OSError as e: + error_message = f"Error occurred while running command '{command}': {str(e)}" + if error_print is None: + print(error_message) + else: + error_print(error_message) + return 1, error_message + + +def get_windows_parent_process_name(): + try: + current_process = psutil.Process() + while True: + parent = current_process.parent() + if parent is None: + break + parent_name = parent.name().lower() + if parent_name in ["powershell.exe", "cmd.exe"]: + return parent_name + current_process = parent + return None + except Exception: + return None + + +def run_cmd_subprocess(command, verbose=False, cwd=None, encoding=sys.stdout.encoding): + if verbose: + print("Using run_cmd_subprocess:", command) + + try: + shell = os.environ.get("SHELL", "/bin/sh") + parent_process = None + + # Determine the appropriate shell + if platform.system() == "Windows": + parent_process = get_windows_parent_process_name() + if parent_process == "powershell.exe": + command = f"powershell -Command {command}" + # else: Assume cmd.exe or other Windows shell, use the command as-is + + if verbose: + print("Running command:", command) + print("SHELL:", shell) + if platform.system() == "Windows": + print("Parent process:", parent_process) + + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + shell=True, + encoding=encoding, + errors="replace", + bufsize=0, # Set bufsize to 0 for unbuffered output + universal_newlines=True, + cwd=cwd, + ) + + output = [] + while True: + chunk = process.stdout.read(1) + if not chunk: + break + print(chunk, end="", flush=True) # Print the chunk in real-time + output.append(chunk) # Store the chunk for later use + + process.wait() + return process.returncode, "".join(output) + except Exception as e: + return 1, str(e) + + +def run_cmd_pexpect(command, verbose=False, cwd=None): + """ + Run a shell command interactively using pexpect, capturing all output. + + :param command: The command to run as a string. + :param verbose: If True, print output in real-time. + :return: A tuple containing (exit_status, output) + """ + if verbose: + print("Using run_cmd_pexpect:", command) + + output = BytesIO() + + def output_callback(b): + output.write(b) + return b + + try: + # Use the SHELL environment variable, falling back to /bin/sh if not set + shell = os.environ.get("SHELL", "/bin/sh") + if verbose: + print("With shell:", shell) + + if os.path.exists(shell): + # Use the shell from SHELL environment variable + if verbose: + print("Running pexpect.spawn with shell:", shell) + child = pexpect.spawn(shell, args=["-i", "-c", command], encoding="utf-8", cwd=cwd) + else: + # Fall back to spawning the command directly + if verbose: + print("Running pexpect.spawn without shell.") + child = pexpect.spawn(command, encoding="utf-8", cwd=cwd) + + # Transfer control to the user, capturing output + child.interact(output_filter=output_callback) + + # Wait for the command to finish and get the exit status + child.close() + return child.exitstatus, output.getvalue().decode("utf-8", errors="replace") + + except (pexpect.ExceptionPexpect, TypeError, ValueError) as e: + error_msg = f"Error running command {command}: {e}" + return 1, error_msg +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/extracted_output.txt b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/extracted_output.txt new file mode 100644 index 000000000..bbe4132ff --- /dev/null +++ b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/extracted_output.txt @@ -0,0 +1,62 @@ +from aider.dump import dump # noqa: F401 +from aider.utils import format_messages + + +def sanity_check_messages(messages): + """Check if messages alternate between user and assistant roles. + System messages can be interspersed anywhere. + Also verifies the last non-system message is from the user. + Returns True if valid, False otherwise.""" + last_role = None + last_non_system_role = None + + for msg in messages: + role = msg.get("role") + if role == "system": + continue + + if last_role and role == last_role: + print(format_messages(messages)) + # import sys ; sys.exit() + raise ValueError("Messages don't properly alternate user/assistant") + + last_role = role + last_non_system_role = role + + # Ensure last non-system message is from user + return last_non_system_role == "user" + + +def ensure_alternating_roles(messages): + """Ensure messages alternate between 'assistant' and 'user' roles. + + Inserts empty messages of the opposite role when consecutive messages + of the same role are found. + + Args: + messages: List of message dictionaries with 'role' and 'content' keys. + + Returns: + List of messages with alternating roles. + """ + if not messages: + return messages + + fixed_messages = [] + prev_role = None + + for msg in messages: + current_role = msg.get("role") # Get 'role', None if missing + + # If current role same as previous, insert empty message + # of the opposite role + if current_role == prev_role: + if current_role == "user": + fixed_messages.append({"role": "assistant", "content": ""}) + else: + fixed_messages.append({"role": "user", "content": ""}) + + fixed_messages.append(msg) + prev_role = current_role + + return fixed_messages \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/metadata.json b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/metadata.json new file mode 100644 index 000000000..bb8882956 --- /dev/null +++ b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_sendchat.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_sendchat.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_sendchat.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:17.200866+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 1895, + "extracted_output_length": 1881, + "expected_output_length": 1854, + "results_dir": "locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817", + "generation_id": "gen-1757433497-wu41BayGXYC7BYiV7T8S", + "cost_usd": 0.01528258, + "prompt_tokens": 36522, + "completion_tokens": 2526, + "total_tokens": 39048, + "native_prompt_tokens": 36341, + "native_completion_tokens": 5389, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 4998 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/output.diff b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/output.diff new file mode 100644 index 000000000..1d7f2f68b --- /dev/null +++ b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/output.diff @@ -0,0 +1,25 @@ + +index 465e9d989..bbe4132ff 100644 +--- a/aider_aider_sendchat.py_expectedoutput.txt (expected):tmp/tmpmaqyy_up_expected.txt ++++ b/aider_aider_sendchat.py_extracted.txt (actual):tmp/tmpqnls9sfj_actual.txt +@@ -16,8 +16,9 @@ def sanity_check_messages(messages): + continue + + if last_role and role == last_role: +- turns = format_messages(messages) +- raise ValueError("Messages don't properly alternate user/assistant:\n\n" + turns) ++ print(format_messages(messages)) ++ # import sys ; sys.exit() ++ raise ValueError("Messages don't properly alternate user/assistant") + + last_role = role + last_non_system_role = role +@@ -28,7 +29,7 @@ def sanity_check_messages(messages): + + def ensure_alternating_roles(messages): + """Ensure messages alternate between 'assistant' and 'user' roles. +- ++ + Inserts empty messages of the opposite role when consecutive messages + of the same role are found. + diff --git a/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/raw_response.txt b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/raw_response.txt new file mode 100644 index 000000000..96d3ba98c --- /dev/null +++ b/locodiff-250425/results/aider_aider_sendchat.py/x-ai_grok-code-fast-1/20250909_155817/raw_response.txt @@ -0,0 +1,64 @@ +```python +from aider.dump import dump # noqa: F401 +from aider.utils import format_messages + + +def sanity_check_messages(messages): + """Check if messages alternate between user and assistant roles. + System messages can be interspersed anywhere. + Also verifies the last non-system message is from the user. + Returns True if valid, False otherwise.""" + last_role = None + last_non_system_role = None + + for msg in messages: + role = msg.get("role") + if role == "system": + continue + + if last_role and role == last_role: + print(format_messages(messages)) + # import sys ; sys.exit() + raise ValueError("Messages don't properly alternate user/assistant") + + last_role = role + last_non_system_role = role + + # Ensure last non-system message is from user + return last_non_system_role == "user" + + +def ensure_alternating_roles(messages): + """Ensure messages alternate between 'assistant' and 'user' roles. + + Inserts empty messages of the opposite role when consecutive messages + of the same role are found. + + Args: + messages: List of message dictionaries with 'role' and 'content' keys. + + Returns: + List of messages with alternating roles. + """ + if not messages: + return messages + + fixed_messages = [] + prev_role = None + + for msg in messages: + current_role = msg.get("role") # Get 'role', None if missing + + # If current role same as previous, insert empty message + # of the opposite role + if current_role == prev_role: + if current_role == "user": + fixed_messages.append({"role": "assistant", "content": ""}) + else: + fixed_messages.append({"role": "user", "content": ""}) + + fixed_messages.append(msg) + prev_role = current_role + + return fixed_messages +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/extracted_output.txt b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/extracted_output.txt new file mode 100644 index 000000000..7e7226979 --- /dev/null +++ b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/extracted_output.txt @@ -0,0 +1,302 @@ +import re +import threading +from pathlib import Path +from typing import Optional + +from grep_ast import TreeContext +from pathspec import PathSpec +from pathspec.patterns import GitWildMatchPattern +from watchfiles import watch + +from aider.dump import dump # noqa +from aider.watch_prompts import watch_ask_prompt, watch_code_prompt + + +def load_gitignores(gitignore_paths: list[Path]) -> Optional[PathSpec]: + """Load and parse multiple .gitignore files into a single PathSpec""" + if not gitignore_paths: + return None + + patterns = [ + ".aider*", + ".git", + # Common editor backup/temp files + "*~", # Emacs/vim backup + "*.bak", # Generic backup + "*.swp", # Vim swap + "*.swo", # Vim swap + "\\#*\\#", # Emacs auto-save + ".#*", # Emacs lock files + "*.tmp", # Generic temp files + "*.temp", # Generic temp files + "*.orig", # Merge conflict originals + "*.pyc", # Python bytecode + "__pycache__/", # Python cache dir + ".DS_Store", # macOS metadata + "Thumbs.db", # Windows thumbnail cache + # IDE files + ".idea/", # JetBrains IDEs + ".vscode/", # VS Code + "*.sublime-*", # Sublime Text + ".project", # Eclipse + ".settings/", # Eclipse + "*.code-workspace", # VS Code workspace + # Environment files + ".env", # Environment variables + ".venv/", # Python virtual environments + "node_modules/", # Node.js dependencies + "vendor/", # Various dependencies + # Logs and caches + "*.log", # Log files + ".cache/", # Cache directories + ".pytest_cache/", # Python test cache + "coverage/", # Code coverage reports + ] # Always ignore + for path in gitignore_paths: + if path.exists(): + with open(path) as f: + patterns.extend(f.readlines()) + + return PathSpec.from_lines(GitWildMatchPattern, patterns) if patterns else None + + +class FileWatcher: + """Watches source files for changes and AI comments""" + + # Compiled regex pattern for AI comments + ai_comment_pattern = re.compile(r"(?:#|//|--|;+) *(ai\b.*|ai\b.*|.*\bai[?!]?) *$", re.IGNORECASE) + + def __init__(self, coder, gitignores=None, verbose=False, analytics=None, root=None): + self.coder = coder + self.io = coder.io + self.root = Path(root) if root else Path(coder.root) + self.verbose = verbose + self.analytics = analytics + self.stop_event = None + self.watcher_thread = None + self.changed_files = set() + self.gitignores = gitignores + + self.gitignore_spec = load_gitignores( + [Path(g) for g in self.gitignores] if self.gitignores else [] + ) + + coder.io.file_watcher = self + + def filter_func(self, change_type, path): + """Filter function for the file watcher""" + path_obj = Path(path) + path_abs = path_obj.absolute() + + if not path_abs.is_relative_to(self.root.absolute()): + return False + + rel_path = path_abs.relative_to(self.root) + if self.verbose: + dump(rel_path) + + if self.gitignore_spec and self.gitignore_spec.match_file( + rel_path.as_posix() + ("/" if path_abs.is_dir() else "") + ): + return False + + if self.verbose: + dump("ok", rel_path) + + # Check if file contains AI markers + try: + content = self.io.read_text(str(path_abs)) + return self.ai_comment_pattern.search(content) + except Exception: + return False + + def get_roots_to_watch(self): + """Determine which root paths to watch based on gitignore rules""" + if self.gitignore_spec: + roots = [ + str(path) + for path in self.root.iterdir() + if not self.gitignore_spec.match_file( + path.relative_to(self.root).as_posix() + ("/" if path.is_dir() else "") + ) + ] + # Fallback to watching root if all top-level items are filtered out + return roots if roots else [str(self.root)] + return [str(self.root)] + + def handle_changes(self, changes): + """Process the detected changes and update state""" + if not changes: + return False + + changed_files = {str(Path(change[1])) for change in changes} + self.changed_files.update(changed_files) + self.io.interrupt_input() + return True + + def watch_files(self): + """Watch for file changes and process them""" + try: + roots_to_watch = self.get_roots_to_watch() + + for changes in watch( + *roots_to_watch, + watch_filter=self.filter_func, + stop_event=self.stop_event, + ignore_permission_denied=True, + ): + if self.handle_changes(changes): + return + + except Exception as e: + if self.verbose: + dump(f"File watcher error: {e}") + raise e + + def start(self): + """Start watching for file changes""" + self.stop_event = threading.Event() + self.changed_files = set() + + self.watcher_thread = threading.Thread(target=self.watch_files, daemon=True) + self.watcher_thread.start() + + def stop(self): + """Stop watching for file changes""" + if self.stop_event: + self.stop_event.set() + if self.watcher_thread: + self.watcher_thread.join() + self.watcher_thread = None + self.stop_event = None + + def process_changes(self): + """Get any detected file changes""" + + has_action = None + added = False + for fname in self.changed_files: + _, _, action = self.get_ai_comments(fname) + if action in ("!", "?"): + has_action = action + + if fname in self.coder.abs_fnames: + continue + if self.analytics: + self.analytics.event("ai-comments file-add") + self.coder.abs_fnames.add(fname) + rel_fname = self.coder.get_rel_fname(fname) + if not added: + self.io.tool_output() + added = True + self.io.tool_output(f"Added {rel_fname} to the chat") + + if not has_action: + if added: + self.io.tool_output( + "End your comment with AI! to request changes or AI? to ask questions" + ) + return "" + + if self.analytics: + self.analytics.event("ai-comments execute") + self.io.tool_output("Processing your request...") + + res = ( + f"The {!r} comments below can be found in the code files I've shared with you.\n" + "They contain your instructions.\n" + f"Make the requested changes.\n" + "Be sure to remove all these {!r} comments from the code!\n" + "\n" + ) + + # Refresh all AI comments from tracked files + for fname in self.coder.abs_fnames: + line_nums, comments, _action = self.get_ai_comments(fname) + if not line_nums: + continue + + code = self.io.read_text(fname) + if not code: + continue + + rel_fname = self.coder.get_rel_fname(fname) + res += f"{rel_fname}:\n" + + # Convert comment line numbers to line indices (0-based) + lois = [ln - 1 for ln, _ in zip(line_nums, comments) if ln > 0] + + try: + context = TreeContext( + rel_fname, + code, + color=False, + line_number=False, + child_context=False, + last_line=False, + margin=0, + mark_lois=True, + loi_pad=3, + show_top_of_file_parent_scope=False, + ) + context.lines_of_interest = set() + context.add_lines_of_interest(lois) + context.add_context() + res += context.format() + except ValueError: + for ln, comment in zip(line_nums, comments): + res += f" Line {ln}: {comment}\n" + + return res + + def get_ai_comments(self, filepath): + """Extract AI comment line numbers, comments and action status from a file""" + line_nums = [] + comments = [] + has_action = None # None, "!" or "?" + content = self.io.read_text(filepath, silent=True) + if not content: + return None, None, None + + for i, line in enumerate(content.splitlines(), 1): + if match := self.ai_comment_pattern.search(line): + comment = match.group(0).strip() + comments.append(comment) + line_nums.append(i) + comment = comment.lower() + comment = comment.lstrip("/#-;") # Added semicolon for Lisp comments + comment = comment.strip() + if comment.startswith("ai!") or comment.endswith("ai!"): + has_action = "!" + elif comment.startswith("ai?") or comment.endswith("ai?"): + has_action = "?" + if not line_nums: + return None, None, None + return line_nums, comments, has_action + + +def main(): + """Example usage of the file watcher""" + import argparse + + parser = argparse.ArgumentParser(description="Watch source files for changes") + parser.add_argument("directory", help="Directory to watch") + parser.add_argument("--gitignore", action="append", help="Path to .gitignore file (can be specified multiple times)") + args = parser.parse_args() + + watcher = FileWatcher(coder=None, gitignores=args.gitignore) + try: + watcher.start() + while True: + if changes := watcher.process_changes(): + for file in sorted(changes.keys()): + print(file) + watcher.changed_files = None + except KeyboardInterrupt: + print("\nStopped watching files") + finally: + watcher.stop() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/metadata.json b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/metadata.json new file mode 100644 index 000000000..bea712160 --- /dev/null +++ b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_watch.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_watch.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_watch.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:50.782717+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 10294, + "extracted_output_length": 10280, + "expected_output_length": 10409, + "results_dir": "locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750", + "generation_id": "gen-1757433470-O541MP0tYOdzzvtRFdJR", + "cost_usd": 0.01573464, + "prompt_tokens": 56400, + "completion_tokens": 2537, + "total_tokens": 58937, + "native_prompt_tokens": 55896, + "native_completion_tokens": 3060, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 928 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/output.diff b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/output.diff new file mode 100644 index 000000000..b47d1bc37 --- /dev/null +++ b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/output.diff @@ -0,0 +1,107 @@ + +index 6f97774c7..7e7226979 100644 +--- a/aider_aider_watch.py_expectedoutput.txt (expected):tmp/tmprr0z107q_expected.txt ++++ b/aider_aider_watch.py_extracted.txt (actual):tmp/tmpm46q27z1_actual.txt +@@ -105,10 +105,10 @@ class FileWatcher: + + # Check if file contains AI markers + try: +- comments, _, _ = self.get_ai_comments(str(path_abs)) +- return bool(comments) ++ content = self.io.read_text(str(path_abs)) ++ return self.ai_comment_pattern.search(content) + except Exception: +- return ++ return False + + def get_roots_to_watch(self): + """Determine which root paths to watch based on gitignore rules""" +@@ -202,10 +202,13 @@ class FileWatcher: + self.analytics.event("ai-comments execute") + self.io.tool_output("Processing your request...") + +- if has_action == "!": +- res = watch_code_prompt +- elif has_action == "?": +- res = watch_ask_prompt ++ res = ( ++ f"The {!r} comments below can be found in the code files I've shared with you.\n" ++ "They contain your instructions.\n" ++ f"Make the requested changes.\n" ++ "Be sure to remove all these {!r} comments from the code!\n" ++ "\n" ++ ) + + # Refresh all AI comments from tracked files + for fname in self.coder.abs_fnames: +@@ -218,7 +221,7 @@ class FileWatcher: + continue + + rel_fname = self.coder.get_rel_fname(fname) +- res += f"\n{rel_fname}:\n" ++ res += f"{rel_fname}:\n" + + # Convert comment line numbers to line indices (0-based) + lois = [ln - 1 for ln, _ in zip(line_nums, comments) if ln > 0] +@@ -258,16 +261,15 @@ class FileWatcher: + for i, line in enumerate(content.splitlines(), 1): + if match := self.ai_comment_pattern.search(line): + comment = match.group(0).strip() +- if comment: +- line_nums.append(i) +- comments.append(comment) +- comment = comment.lower() +- comment = comment.lstrip("/#-;") # Added semicolon for Lisp comments +- comment = comment.strip() +- if comment.startswith("ai!") or comment.endswith("ai!"): +- has_action = "!" +- elif comment.startswith("ai?") or comment.endswith("ai?"): +- has_action = "?" ++ comments.append(comment) ++ line_nums.append(i) ++ comment = comment.lower() ++ comment = comment.lstrip("/#-;") # Added semicolon for Lisp comments ++ comment = comment.strip() ++ if comment.startswith("ai!") or comment.endswith("ai!"): ++ has_action = "!" ++ elif comment.startswith("ai?") or comment.endswith("ai?"): ++ has_action = "?" + if not line_nums: + return None, None, None + return line_nums, comments, has_action +@@ -279,30 +281,20 @@ def main(): + + parser = argparse.ArgumentParser(description="Watch source files for changes") + parser.add_argument("directory", help="Directory to watch") +- parser.add_argument( +- "--gitignore", +- action="append", +- help="Path to .gitignore file (can be specified multiple times)", +- ) ++ parser.add_argument("--gitignore", action="append", help="Path to .gitignore file (can be specified multiple times)") + args = parser.parse_args() + +- directory = args.directory +- print(f"Watching source files in {directory}...") +- +- # Example ignore function that ignores files with "test" in the name +- def ignore_test_files(path): +- return "test" in path.name.lower() +- +- watcher = FileWatcher(directory, gitignores=args.gitignore) ++ watcher = FileWatcher(coder=None, gitignores=args.gitignore) + try: + watcher.start() + while True: +- if changes := watcher.get_changes(): ++ if changes := watcher.process_changes(): + for file in sorted(changes.keys()): + print(file) +- watcher.changed_files = None ++ watcher.changed_files = None + except KeyboardInterrupt: + print("\nStopped watching files") ++ finally: + watcher.stop() + + diff --git a/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/raw_response.txt b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/raw_response.txt new file mode 100644 index 000000000..a12f98d96 --- /dev/null +++ b/locodiff-250425/results/aider_aider_watch.py/x-ai_grok-code-fast-1/20250909_155750/raw_response.txt @@ -0,0 +1,304 @@ +```python +import re +import threading +from pathlib import Path +from typing import Optional + +from grep_ast import TreeContext +from pathspec import PathSpec +from pathspec.patterns import GitWildMatchPattern +from watchfiles import watch + +from aider.dump import dump # noqa +from aider.watch_prompts import watch_ask_prompt, watch_code_prompt + + +def load_gitignores(gitignore_paths: list[Path]) -> Optional[PathSpec]: + """Load and parse multiple .gitignore files into a single PathSpec""" + if not gitignore_paths: + return None + + patterns = [ + ".aider*", + ".git", + # Common editor backup/temp files + "*~", # Emacs/vim backup + "*.bak", # Generic backup + "*.swp", # Vim swap + "*.swo", # Vim swap + "\\#*\\#", # Emacs auto-save + ".#*", # Emacs lock files + "*.tmp", # Generic temp files + "*.temp", # Generic temp files + "*.orig", # Merge conflict originals + "*.pyc", # Python bytecode + "__pycache__/", # Python cache dir + ".DS_Store", # macOS metadata + "Thumbs.db", # Windows thumbnail cache + # IDE files + ".idea/", # JetBrains IDEs + ".vscode/", # VS Code + "*.sublime-*", # Sublime Text + ".project", # Eclipse + ".settings/", # Eclipse + "*.code-workspace", # VS Code workspace + # Environment files + ".env", # Environment variables + ".venv/", # Python virtual environments + "node_modules/", # Node.js dependencies + "vendor/", # Various dependencies + # Logs and caches + "*.log", # Log files + ".cache/", # Cache directories + ".pytest_cache/", # Python test cache + "coverage/", # Code coverage reports + ] # Always ignore + for path in gitignore_paths: + if path.exists(): + with open(path) as f: + patterns.extend(f.readlines()) + + return PathSpec.from_lines(GitWildMatchPattern, patterns) if patterns else None + + +class FileWatcher: + """Watches source files for changes and AI comments""" + + # Compiled regex pattern for AI comments + ai_comment_pattern = re.compile(r"(?:#|//|--|;+) *(ai\b.*|ai\b.*|.*\bai[?!]?) *$", re.IGNORECASE) + + def __init__(self, coder, gitignores=None, verbose=False, analytics=None, root=None): + self.coder = coder + self.io = coder.io + self.root = Path(root) if root else Path(coder.root) + self.verbose = verbose + self.analytics = analytics + self.stop_event = None + self.watcher_thread = None + self.changed_files = set() + self.gitignores = gitignores + + self.gitignore_spec = load_gitignores( + [Path(g) for g in self.gitignores] if self.gitignores else [] + ) + + coder.io.file_watcher = self + + def filter_func(self, change_type, path): + """Filter function for the file watcher""" + path_obj = Path(path) + path_abs = path_obj.absolute() + + if not path_abs.is_relative_to(self.root.absolute()): + return False + + rel_path = path_abs.relative_to(self.root) + if self.verbose: + dump(rel_path) + + if self.gitignore_spec and self.gitignore_spec.match_file( + rel_path.as_posix() + ("/" if path_abs.is_dir() else "") + ): + return False + + if self.verbose: + dump("ok", rel_path) + + # Check if file contains AI markers + try: + content = self.io.read_text(str(path_abs)) + return self.ai_comment_pattern.search(content) + except Exception: + return False + + def get_roots_to_watch(self): + """Determine which root paths to watch based on gitignore rules""" + if self.gitignore_spec: + roots = [ + str(path) + for path in self.root.iterdir() + if not self.gitignore_spec.match_file( + path.relative_to(self.root).as_posix() + ("/" if path.is_dir() else "") + ) + ] + # Fallback to watching root if all top-level items are filtered out + return roots if roots else [str(self.root)] + return [str(self.root)] + + def handle_changes(self, changes): + """Process the detected changes and update state""" + if not changes: + return False + + changed_files = {str(Path(change[1])) for change in changes} + self.changed_files.update(changed_files) + self.io.interrupt_input() + return True + + def watch_files(self): + """Watch for file changes and process them""" + try: + roots_to_watch = self.get_roots_to_watch() + + for changes in watch( + *roots_to_watch, + watch_filter=self.filter_func, + stop_event=self.stop_event, + ignore_permission_denied=True, + ): + if self.handle_changes(changes): + return + + except Exception as e: + if self.verbose: + dump(f"File watcher error: {e}") + raise e + + def start(self): + """Start watching for file changes""" + self.stop_event = threading.Event() + self.changed_files = set() + + self.watcher_thread = threading.Thread(target=self.watch_files, daemon=True) + self.watcher_thread.start() + + def stop(self): + """Stop watching for file changes""" + if self.stop_event: + self.stop_event.set() + if self.watcher_thread: + self.watcher_thread.join() + self.watcher_thread = None + self.stop_event = None + + def process_changes(self): + """Get any detected file changes""" + + has_action = None + added = False + for fname in self.changed_files: + _, _, action = self.get_ai_comments(fname) + if action in ("!", "?"): + has_action = action + + if fname in self.coder.abs_fnames: + continue + if self.analytics: + self.analytics.event("ai-comments file-add") + self.coder.abs_fnames.add(fname) + rel_fname = self.coder.get_rel_fname(fname) + if not added: + self.io.tool_output() + added = True + self.io.tool_output(f"Added {rel_fname} to the chat") + + if not has_action: + if added: + self.io.tool_output( + "End your comment with AI! to request changes or AI? to ask questions" + ) + return "" + + if self.analytics: + self.analytics.event("ai-comments execute") + self.io.tool_output("Processing your request...") + + res = ( + f"The {!r} comments below can be found in the code files I've shared with you.\n" + "They contain your instructions.\n" + f"Make the requested changes.\n" + "Be sure to remove all these {!r} comments from the code!\n" + "\n" + ) + + # Refresh all AI comments from tracked files + for fname in self.coder.abs_fnames: + line_nums, comments, _action = self.get_ai_comments(fname) + if not line_nums: + continue + + code = self.io.read_text(fname) + if not code: + continue + + rel_fname = self.coder.get_rel_fname(fname) + res += f"{rel_fname}:\n" + + # Convert comment line numbers to line indices (0-based) + lois = [ln - 1 for ln, _ in zip(line_nums, comments) if ln > 0] + + try: + context = TreeContext( + rel_fname, + code, + color=False, + line_number=False, + child_context=False, + last_line=False, + margin=0, + mark_lois=True, + loi_pad=3, + show_top_of_file_parent_scope=False, + ) + context.lines_of_interest = set() + context.add_lines_of_interest(lois) + context.add_context() + res += context.format() + except ValueError: + for ln, comment in zip(line_nums, comments): + res += f" Line {ln}: {comment}\n" + + return res + + def get_ai_comments(self, filepath): + """Extract AI comment line numbers, comments and action status from a file""" + line_nums = [] + comments = [] + has_action = None # None, "!" or "?" + content = self.io.read_text(filepath, silent=True) + if not content: + return None, None, None + + for i, line in enumerate(content.splitlines(), 1): + if match := self.ai_comment_pattern.search(line): + comment = match.group(0).strip() + comments.append(comment) + line_nums.append(i) + comment = comment.lower() + comment = comment.lstrip("/#-;") # Added semicolon for Lisp comments + comment = comment.strip() + if comment.startswith("ai!") or comment.endswith("ai!"): + has_action = "!" + elif comment.startswith("ai?") or comment.endswith("ai?"): + has_action = "?" + if not line_nums: + return None, None, None + return line_nums, comments, has_action + + +def main(): + """Example usage of the file watcher""" + import argparse + + parser = argparse.ArgumentParser(description="Watch source files for changes") + parser.add_argument("directory", help="Directory to watch") + parser.add_argument("--gitignore", action="append", help="Path to .gitignore file (can be specified multiple times)") + args = parser.parse_args() + + watcher = FileWatcher(coder=None, gitignores=args.gitignore) + try: + watcher.start() + while True: + if changes := watcher.process_changes(): + for file in sorted(changes.keys()): + print(file) + watcher.changed_files = None + except KeyboardInterrupt: + print("\nStopped watching files") + finally: + watcher.stop() + + +if __name__ == "__main__": + main() +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/extracted_output.txt b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/extracted_output.txt new file mode 100644 index 000000000..cc755d865 --- /dev/null +++ b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/extracted_output.txt @@ -0,0 +1,425 @@ +document.addEventListener('DOMContentLoaded', function() { + let player; // Store player reference to make it accessible to click handlers + let globalAudio; // Global audio element to be reused + + // Detect if device likely has no physical keyboard + function detectNoKeyboard() { + // Check if it's a touch device (most mobile devices) + const isTouchDevice = ('ontouchstart' in window) || + (navigator.maxTouchPoints > 0) || + (navigator.msMaxTouchPoints > 0); + + // Check common mobile user agents as additional signal + const isMobileUA = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent); + + // If it's a touch device and has a mobile user agent, likely has no physical keyboard + if (isTouchDevice && isMobileUA) { + document.body.classList.add('no-physical-keyboard'); + } + } + + // Run detection + detectNoKeyboard(); + + // Parse the transcript section to create markers and convert timestamps to links + function parseTranscript() { + const markers = []; + // Find the Commentary heading + const transcriptHeading = Array.from(document.querySelectorAll('h2')).find(el => el.textContent.trim() === 'Commentary'); + + if (transcriptHeading) { + // Get all list items after the transcript heading + let currentElement = transcriptHeading.nextElementSibling; + + while (currentElement && currentElement.tagName === 'UL') { + const listItems = currentElement.querySelectorAll('li'); + + listItems.forEach(item => { + const text = item.textContent.trim(); + const match = text.match(/(\d+):(\d+)\s+(.*)/); + + if (match) { + const minutes = parseInt(match[1], 10); + const seconds = parseInt(match[2], 10); + const timeInSeconds = minutes * 60 + seconds; + const formattedTime = `${minutes}:${seconds.toString().padStart(2, '0')}`; + const message = match[3].trim(); + + // Create link for the timestamp + const timeLink = document.createElement('a'); + timeLink.href = '#'; + timeLink.textContent = formattedTime; + timeLink.className = 'timestamp-link'; + timeLink.dataset.time = timeInSeconds; + timeLink.dataset.message = message; + + // Add click event to seek the player + timeLink.addEventListener('click', function(e) { + e.preventDefault(); + if (player && typeof player.seek === 'function') { + player.seek(timeInSeconds); + player.play(); + + // Also trigger toast and speech + speakText(message, timeInSeconds); + + // Highlight this timestamp + highlightTimestamp(timeInSeconds); + } + }); + + // Replace text with the link + message + item.textContent = ''; + item.appendChild(timeLink); + item.appendChild(document.createTextNode(' ' + message)); + + // Add class and click handler to the entire list item + item.classList.add('transcript-item'); + item.dataset.time = timeInSeconds; + item.dataset.message = message; + + item.addEventListener('click', function(e) { + // Prevent click event if the user clicked directly on the timestamp link + // This prevents double-firing of the event + if (e.target !== timeLink) { + e.preventDefault(); + if (player && typeof player.seek === 'function') { + player.seek(timeInSeconds); + player.play(); + + // Also trigger toast and speech + speakText(message, timeInSeconds); + + // Highlight this timestamp + highlightTimestamp(timeInSeconds); + } + } + }); + + markers.push([timeInSeconds, message]); + } + }); + + currentElement = currentElement.nextElementSibling; + } + } + + return markers; + } + + // Parse transcript and create markers + const markers = parseTranscript(); + + // Create player with a single call + player = AsciinemaPlayer.create( + recording_url, + document.getElementById('demo'), + { + speed: 1.25, + idleTimeLimit: 1, + theme: "aider", + poster: "npt:0:01", + markers: markers, + controls: true + } + ); + + // Focus on the player element so keyboard shortcuts work immediately + setTimeout(() => { + // Use setTimeout to ensure the player is fully initialized + if (player && typeof player.focus === 'function') { + player.focus(); + } else { + // If player doesn't have a focus method, try to find and focus the terminal element + const playerElement = document.querySelector('.asciinema-terminal'); + if (playerElement) { + playerElement.focus(); + } else { + // Last resort - try to find element with tabindex + const tabbableElement = document.querySelector('[tabindex]'); + if (tabbableElement) { + tabbableElement.focus(); + } + } + } + }, 100); + + // Track active toast elements + let activeToast = null; + + // Function to display toast notification + function showToast(text) { + // Get the appropriate container based on fullscreen state + let container = document.getElementById('toast-container'); + const isFullscreen = document.fullscreenElement || + document.webkitFullscreenElement || + document.mozFullScreenElement || + document.msFullscreenElement; + + // If in fullscreen, check if we need to create a fullscreen toast container + if (isFullscreen) { + // Target the fullscreen element as the container parent + const fullscreenElement = document.fullscreenElement || + document.webkitFullscreenElement || + document.mozFullScreenElement || + document.msFullscreenElement; + + // Look for an existing fullscreen toast container + let fsContainer = fullscreenElement.querySelector('.fs-toast-container'); + + if (!fsContainer) { + // Create a new container for fullscreen mode + fsContainer = document.createElement('div'); + fsContainer.className = 'toast-container fs-toast-container'; + fsContainer.id = 'fs-toast-container'; + fullscreenElement.appendChild(fsContainer); + } + + container = fsContainer; + } + + // Remove any existing toast + if (activeToast) { + hideToast(activeToast); + } + + // Create toast element + const toast = document.createElement('div'); + toast.className = 'toast-notification'; + toast.textContent = text; + + // Add to container + container.appendChild(toast); + + // Store reference to active toast + activeToast = { + element: toast, + container: container + }; + + // Trigger animation + setTimeout(() => { + toast.style.opacity = '1'; + }, 10); + + return activeToast; + } + + // Function to hide a toast + function hideToast(toastInfo) { + if (!toastInfo || !toastInfo.element) return; + + toastInfo.element.style.opacity = '0'; + setTimeout(() => { + if (toastInfo.container && toastInfo.container.contains(toastInfo.element)) { + toastInfo.container.removeChild(toastInfo.element); + } + + // If this was the active toast, clear the reference + if (activeToast === toastInfo) { + activeToast = null; + } + }, 300); // Wait for fade out animation + } + + // Track if TTS is currently in progress to prevent duplicates + let ttsInProgress = false; + let currentToast = null; + + // Improved browser TTS function + function useBrowserTTS(text) { + // Don't start new speech if already in progress + if (ttsInProgress) { + console.log('Speech synthesis already in progress, skipping'); + return false; + } + + if ('speechSynthesis' in window) { + console.log('Using browser TTS fallback'); + + // Set flag to prevent duplicate speech + ttsInProgress = true; + + // Cancel any ongoing speech + window.speechSynthesis.cancel(); + + const utterance = new SpeechSynthesisUtterance(text); + utterance.rate = 1.0; + utterance.pitch = 1.0; + utterance.volume = 1.0; + + // For iOS, use a shorter utterance if possible + if (/iPad|iPhone|iPod/.test(navigator.userAgent) && !window.MSStream) { + utterance.text = text.length > 100 ? text.substring(0, 100) + '...' : text; + } + + utterance.onstart = () => console.log('Speech started'); + utterance.onend = () => { + console.log('Speech ended'); + ttsInProgress = false; // Reset flag when speech completes + + // Hide toast when speech ends + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + utterance.onerror = (e) => { + console.warn('Speech error:', e); + ttsInProgress = false; // Reset flag on error + + // Also hide toast on error + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + + window.speechSynthesis.speak(utterance); + return true; + } + console.warn('SpeechSynthesis not supported'); + return false; + } + + // Function to play pre-generated TTS audio files + function speakText(text, timeInSeconds) { + // Show the toast and keep reference + currentToast = showToast(text); + + // Format time for filename (MM-SS) + const minutes = Math.floor(timeInSeconds / 60); + const seconds = timeInSeconds % 60; + const formattedTime = `${minutes.toString().padStart(2, '0')}-${seconds.toString().padStart(2, '0')}`; + + // Get recording_id from the page or use default from the URL + const recordingId = typeof recording_id !== 'undefined' ? recording_id : + window.location.pathname.split('/').pop().replace('.html', ''); + + // Construct audio file path + const audioPath = `/assets/audio/${recordingId}/${formattedTime}.mp3`; + + // Log for debugging + console.log(`Attempting to play audio: ${audioPath}`); + + // Detect iOS + const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) && !window.MSStream; + console.log(`Device is iOS: ${isIOS}`); + + // Flag to track if we've already fallen back to TTS + let fallenBackToTTS = false; + + try { + // Create or reuse audio element + if (!globalAudio) { + globalAudio = new Audio(); + console.log("Created new global Audio element"); + } + + // Set up event handlers + globalAudio.onended = () => { + console.log('Audio playback ended'); + // Hide toast when audio ends + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + + globalAudio.onerror = (e) => { + console.warn(`Audio error: ${e.type}`, e); + if (!fallenBackToTTS) { + fallenBackToTTS = true; + useBrowserTTS(text); + } else if (currentToast) { + // If we've already tried TTS and that failed too, hide the toast + hideToast(currentToast); + currentToast = null; + } + }; + + // For iOS, preload might help with subsequent plays + if (isIOS) { + globalAudio.preload = "auto"; + } + + // Set the new source + globalAudio.src = audioPath; + + // Play with proper error handling + const playPromise = globalAudio.play(); + + if (playPromise !== undefined) { + playPromise.catch(error => { + console.warn(`Play error: ${error.message}`); + + // On iOS, a user gesture might be required + if (isIOS) { + console.log("iOS playback failed, trying SpeechSynthesis"); + } + + if (!fallenBackToTTS) { + fallenBackToTTS = true; + useBrowserTTS(text); + } + }); + } + } catch (e) { + console.error(`Exception in audio playback: ${e.message}`); + useBrowserTTS(text); + } + } + + // Function to highlight the active timestamp in the transcript + function highlightTimestamp(timeInSeconds) { + // Remove previous highlights + document.querySelectorAll('.timestamp-active').forEach(el => { + el.classList.remove('timestamp-active'); + }); + + document.querySelectorAll('.active-marker').forEach(el => { + el.classList.remove('active-marker'); + }); + + // Find the timestamp link with matching time + const timestampLinks = document.querySelectorAll('.timestamp-link'); + let activeLink = null; + + for (const link of timestampLinks) { + if (parseInt(link.dataset.time) === timeInSeconds) { + activeLink = link; + break; + } + } + + if (activeLink) { + // Add highlight class to the link + activeLink.classList.add('timestamp-active'); + + // Also highlight the parent list item + const listItem = activeLink.closest('li'); + if (listItem) { + listItem.classList.add('active-marker'); + + // No longer scrolling into view to avoid shifting focus + } + } + } + + // Add event listener with safety checks + if (player && typeof player.addEventListener === 'function') { + player.addEventListener('marker', function(event) { + try { + const { index, time, label } = event; + console.log(`marker! ${index} - ${time} - ${label}`); + + speakText(label, time); + + // Highlight the corresponding timestamp in the transcript + highlightTimestamp(time); + } catch (error) { + console.error('Error in marker event handler:', error); + } + }); + } +}); \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/metadata.json b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/metadata.json new file mode 100644 index 000000000..a221f2847 --- /dev/null +++ b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_aider_website__includes_recording.js", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_aider_website__includes_recording.js_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_aider_website__includes_recording.js_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:34.600908+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 14239, + "extracted_output_length": 14221, + "expected_output_length": 14364, + "results_dir": "locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834", + "generation_id": "gen-1757433514-MjWQl6OS8c8kDkEpIlKi", + "cost_usd": 0.01622328, + "prompt_tokens": 11202, + "completion_tokens": 4951, + "total_tokens": 16153, + "native_prompt_tokens": 11007, + "native_completion_tokens": 9394, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 6387 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/output.diff b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/output.diff new file mode 100644 index 000000000..8395de62c --- /dev/null +++ b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/output.diff @@ -0,0 +1,46 @@ + +index 95a52a673..cc755d865 100644 +--- a/aider_aider_website__includes_recording.js_expectedoutput.txt (expected):tmp/tmpv6y_o9rj_expected.txt ++++ b/aider_aider_website__includes_recording.js_extracted.txt (actual):tmp/tmpxhbly_2o_actual.txt +@@ -61,7 +61,6 @@ document.addEventListener('DOMContentLoaded', function() { + player.play(); + + // Also trigger toast and speech +- showToast(message); + speakText(message, timeInSeconds); + + // Highlight this timestamp +@@ -89,7 +88,6 @@ document.addEventListener('DOMContentLoaded', function() { + player.play(); + + // Also trigger toast and speech +- showToast(message); + speakText(message, timeInSeconds); + + // Highlight this timestamp +@@ -108,7 +106,7 @@ document.addEventListener('DOMContentLoaded', function() { + + return markers; + } +- ++ + // Parse transcript and create markers + const markers = parseTranscript(); + +@@ -407,7 +405,7 @@ document.addEventListener('DOMContentLoaded', function() { + } + } + } +- ++ + // Add event listener with safety checks + if (player && typeof player.addEventListener === 'function') { + player.addEventListener('marker', function(event) { +@@ -415,7 +413,6 @@ document.addEventListener('DOMContentLoaded', function() { + const { index, time, label } = event; + console.log(`marker! ${index} - ${time} - ${label}`); + +- // Speak the marker label (toast is now shown within speakText) + speakText(label, time); + + // Highlight the corresponding timestamp in the transcript diff --git a/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/raw_response.txt b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/raw_response.txt new file mode 100644 index 000000000..d1a3f91c1 --- /dev/null +++ b/locodiff-250425/results/aider_aider_website__includes_recording.js/x-ai_grok-code-fast-1/20250909_155834/raw_response.txt @@ -0,0 +1,427 @@ +```javascript +document.addEventListener('DOMContentLoaded', function() { + let player; // Store player reference to make it accessible to click handlers + let globalAudio; // Global audio element to be reused + + // Detect if device likely has no physical keyboard + function detectNoKeyboard() { + // Check if it's a touch device (most mobile devices) + const isTouchDevice = ('ontouchstart' in window) || + (navigator.maxTouchPoints > 0) || + (navigator.msMaxTouchPoints > 0); + + // Check common mobile user agents as additional signal + const isMobileUA = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini/i.test(navigator.userAgent); + + // If it's a touch device and has a mobile user agent, likely has no physical keyboard + if (isTouchDevice && isMobileUA) { + document.body.classList.add('no-physical-keyboard'); + } + } + + // Run detection + detectNoKeyboard(); + + // Parse the transcript section to create markers and convert timestamps to links + function parseTranscript() { + const markers = []; + // Find the Commentary heading + const transcriptHeading = Array.from(document.querySelectorAll('h2')).find(el => el.textContent.trim() === 'Commentary'); + + if (transcriptHeading) { + // Get all list items after the transcript heading + let currentElement = transcriptHeading.nextElementSibling; + + while (currentElement && currentElement.tagName === 'UL') { + const listItems = currentElement.querySelectorAll('li'); + + listItems.forEach(item => { + const text = item.textContent.trim(); + const match = text.match(/(\d+):(\d+)\s+(.*)/); + + if (match) { + const minutes = parseInt(match[1], 10); + const seconds = parseInt(match[2], 10); + const timeInSeconds = minutes * 60 + seconds; + const formattedTime = `${minutes}:${seconds.toString().padStart(2, '0')}`; + const message = match[3].trim(); + + // Create link for the timestamp + const timeLink = document.createElement('a'); + timeLink.href = '#'; + timeLink.textContent = formattedTime; + timeLink.className = 'timestamp-link'; + timeLink.dataset.time = timeInSeconds; + timeLink.dataset.message = message; + + // Add click event to seek the player + timeLink.addEventListener('click', function(e) { + e.preventDefault(); + if (player && typeof player.seek === 'function') { + player.seek(timeInSeconds); + player.play(); + + // Also trigger toast and speech + speakText(message, timeInSeconds); + + // Highlight this timestamp + highlightTimestamp(timeInSeconds); + } + }); + + // Replace text with the link + message + item.textContent = ''; + item.appendChild(timeLink); + item.appendChild(document.createTextNode(' ' + message)); + + // Add class and click handler to the entire list item + item.classList.add('transcript-item'); + item.dataset.time = timeInSeconds; + item.dataset.message = message; + + item.addEventListener('click', function(e) { + // Prevent click event if the user clicked directly on the timestamp link + // This prevents double-firing of the event + if (e.target !== timeLink) { + e.preventDefault(); + if (player && typeof player.seek === 'function') { + player.seek(timeInSeconds); + player.play(); + + // Also trigger toast and speech + speakText(message, timeInSeconds); + + // Highlight this timestamp + highlightTimestamp(timeInSeconds); + } + } + }); + + markers.push([timeInSeconds, message]); + } + }); + + currentElement = currentElement.nextElementSibling; + } + } + + return markers; + } + + // Parse transcript and create markers + const markers = parseTranscript(); + + // Create player with a single call + player = AsciinemaPlayer.create( + recording_url, + document.getElementById('demo'), + { + speed: 1.25, + idleTimeLimit: 1, + theme: "aider", + poster: "npt:0:01", + markers: markers, + controls: true + } + ); + + // Focus on the player element so keyboard shortcuts work immediately + setTimeout(() => { + // Use setTimeout to ensure the player is fully initialized + if (player && typeof player.focus === 'function') { + player.focus(); + } else { + // If player doesn't have a focus method, try to find and focus the terminal element + const playerElement = document.querySelector('.asciinema-terminal'); + if (playerElement) { + playerElement.focus(); + } else { + // Last resort - try to find element with tabindex + const tabbableElement = document.querySelector('[tabindex]'); + if (tabbableElement) { + tabbableElement.focus(); + } + } + } + }, 100); + + // Track active toast elements + let activeToast = null; + + // Function to display toast notification + function showToast(text) { + // Get the appropriate container based on fullscreen state + let container = document.getElementById('toast-container'); + const isFullscreen = document.fullscreenElement || + document.webkitFullscreenElement || + document.mozFullScreenElement || + document.msFullscreenElement; + + // If in fullscreen, check if we need to create a fullscreen toast container + if (isFullscreen) { + // Target the fullscreen element as the container parent + const fullscreenElement = document.fullscreenElement || + document.webkitFullscreenElement || + document.mozFullScreenElement || + document.msFullscreenElement; + + // Look for an existing fullscreen toast container + let fsContainer = fullscreenElement.querySelector('.fs-toast-container'); + + if (!fsContainer) { + // Create a new container for fullscreen mode + fsContainer = document.createElement('div'); + fsContainer.className = 'toast-container fs-toast-container'; + fsContainer.id = 'fs-toast-container'; + fullscreenElement.appendChild(fsContainer); + } + + container = fsContainer; + } + + // Remove any existing toast + if (activeToast) { + hideToast(activeToast); + } + + // Create toast element + const toast = document.createElement('div'); + toast.className = 'toast-notification'; + toast.textContent = text; + + // Add to container + container.appendChild(toast); + + // Store reference to active toast + activeToast = { + element: toast, + container: container + }; + + // Trigger animation + setTimeout(() => { + toast.style.opacity = '1'; + }, 10); + + return activeToast; + } + + // Function to hide a toast + function hideToast(toastInfo) { + if (!toastInfo || !toastInfo.element) return; + + toastInfo.element.style.opacity = '0'; + setTimeout(() => { + if (toastInfo.container && toastInfo.container.contains(toastInfo.element)) { + toastInfo.container.removeChild(toastInfo.element); + } + + // If this was the active toast, clear the reference + if (activeToast === toastInfo) { + activeToast = null; + } + }, 300); // Wait for fade out animation + } + + // Track if TTS is currently in progress to prevent duplicates + let ttsInProgress = false; + let currentToast = null; + + // Improved browser TTS function + function useBrowserTTS(text) { + // Don't start new speech if already in progress + if (ttsInProgress) { + console.log('Speech synthesis already in progress, skipping'); + return false; + } + + if ('speechSynthesis' in window) { + console.log('Using browser TTS fallback'); + + // Set flag to prevent duplicate speech + ttsInProgress = true; + + // Cancel any ongoing speech + window.speechSynthesis.cancel(); + + const utterance = new SpeechSynthesisUtterance(text); + utterance.rate = 1.0; + utterance.pitch = 1.0; + utterance.volume = 1.0; + + // For iOS, use a shorter utterance if possible + if (/iPad|iPhone|iPod/.test(navigator.userAgent) && !window.MSStream) { + utterance.text = text.length > 100 ? text.substring(0, 100) + '...' : text; + } + + utterance.onstart = () => console.log('Speech started'); + utterance.onend = () => { + console.log('Speech ended'); + ttsInProgress = false; // Reset flag when speech completes + + // Hide toast when speech ends + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + utterance.onerror = (e) => { + console.warn('Speech error:', e); + ttsInProgress = false; // Reset flag on error + + // Also hide toast on error + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + + window.speechSynthesis.speak(utterance); + return true; + } + console.warn('SpeechSynthesis not supported'); + return false; + } + + // Function to play pre-generated TTS audio files + function speakText(text, timeInSeconds) { + // Show the toast and keep reference + currentToast = showToast(text); + + // Format time for filename (MM-SS) + const minutes = Math.floor(timeInSeconds / 60); + const seconds = timeInSeconds % 60; + const formattedTime = `${minutes.toString().padStart(2, '0')}-${seconds.toString().padStart(2, '0')}`; + + // Get recording_id from the page or use default from the URL + const recordingId = typeof recording_id !== 'undefined' ? recording_id : + window.location.pathname.split('/').pop().replace('.html', ''); + + // Construct audio file path + const audioPath = `/assets/audio/${recordingId}/${formattedTime}.mp3`; + + // Log for debugging + console.log(`Attempting to play audio: ${audioPath}`); + + // Detect iOS + const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) && !window.MSStream; + console.log(`Device is iOS: ${isIOS}`); + + // Flag to track if we've already fallen back to TTS + let fallenBackToTTS = false; + + try { + // Create or reuse audio element + if (!globalAudio) { + globalAudio = new Audio(); + console.log("Created new global Audio element"); + } + + // Set up event handlers + globalAudio.onended = () => { + console.log('Audio playback ended'); + // Hide toast when audio ends + if (currentToast) { + hideToast(currentToast); + currentToast = null; + } + }; + + globalAudio.onerror = (e) => { + console.warn(`Audio error: ${e.type}`, e); + if (!fallenBackToTTS) { + fallenBackToTTS = true; + useBrowserTTS(text); + } else if (currentToast) { + // If we've already tried TTS and that failed too, hide the toast + hideToast(currentToast); + currentToast = null; + } + }; + + // For iOS, preload might help with subsequent plays + if (isIOS) { + globalAudio.preload = "auto"; + } + + // Set the new source + globalAudio.src = audioPath; + + // Play with proper error handling + const playPromise = globalAudio.play(); + + if (playPromise !== undefined) { + playPromise.catch(error => { + console.warn(`Play error: ${error.message}`); + + // On iOS, a user gesture might be required + if (isIOS) { + console.log("iOS playback failed, trying SpeechSynthesis"); + } + + if (!fallenBackToTTS) { + fallenBackToTTS = true; + useBrowserTTS(text); + } + }); + } + } catch (e) { + console.error(`Exception in audio playback: ${e.message}`); + useBrowserTTS(text); + } + } + + // Function to highlight the active timestamp in the transcript + function highlightTimestamp(timeInSeconds) { + // Remove previous highlights + document.querySelectorAll('.timestamp-active').forEach(el => { + el.classList.remove('timestamp-active'); + }); + + document.querySelectorAll('.active-marker').forEach(el => { + el.classList.remove('active-marker'); + }); + + // Find the timestamp link with matching time + const timestampLinks = document.querySelectorAll('.timestamp-link'); + let activeLink = null; + + for (const link of timestampLinks) { + if (parseInt(link.dataset.time) === timeInSeconds) { + activeLink = link; + break; + } + } + + if (activeLink) { + // Add highlight class to the link + activeLink.classList.add('timestamp-active'); + + // Also highlight the parent list item + const listItem = activeLink.closest('li'); + if (listItem) { + listItem.classList.add('active-marker'); + + // No longer scrolling into view to avoid shifting focus + } + } + } + + // Add event listener with safety checks + if (player && typeof player.addEventListener === 'function') { + player.addEventListener('marker', function(event) { + try { + const { index, time, label } = event; + console.log(`marker! ${index} - ${time} - ${label}`); + + speakText(label, time); + + // Highlight the corresponding timestamp in the transcript + highlightTimestamp(time); + } catch (error) { + console.error('Error in marker event handler:', error); + } + }); + } +}); +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..1c4fa7683 --- /dev/null +++ b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,175 @@ +#!/usr/bin/env python + +import argparse +import datetime +import os +import re +import subprocess +import sys +from packaging import version + + +# Function to check if we are on the main branch +def check_branch(): + branch = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True + ).stdout.strip() + if branch != "main": + print("Error: Not on the main branch.") + sys.exit(1) + + +# Function to check if the working directory is clean +def check_working_directory_clean(): + status = subprocess.run( + ["git", "status", "--porcelain"], capture_output=True, text=True + ).stdout + if status: + print("Error: Working directory is not clean.") + sys.exit(1) + + +# Function to fetch the latest changes and check if the main branch is up to date +def check_main_branch_up_to_date(): + subprocess.run(["git", "fetch", "origin"], check=True) + local_main = subprocess.run( + ["git", "rev-parse", "main"], capture_output=True, text=True + ).stdout.strip() + print(f"Local main commit hash: {local_main}") + origin_main = subprocess.run( + ["git", "rev-parse", "origin/main"], capture_output=True, text=True + ).stdout.strip() + print(f"Origin main commit hash: {origin_main}") + if local_main != origin_main: + local_date = subprocess.run( + ["git", "show", "-s", "--format=%ci", "main"], capture_output=True, text=True + ).stdout.strip() + origin_date = subprocess.run( + ["git", "show", "-s", "--format=%ci", "origin/main"], capture_output=True, text=True + ).stdout.strip() + local_date = datetime.datetime.strptime(local_date, "%Y-%m-%d %H:%M:%S %z") + origin_date = datetime.datetime.strptime(origin_date, "%Y-%m-%d %H:%M:%S %z") + if local_date < origin_date: + print( + "Error: The local main branch is behind origin/main. Please pull the latest changes." + ) + elif local_date > origin_date: + print( + "Error: The origin/main branch is behind the local main branch. Please push your changes." + ) + else: + print("Error: The main branch and origin/main have diverged.") + sys.exit(1) + + +# Function to check if we can push to the origin repository +def check_ok_to_push(): + print("Checking if it's ok to push to origin repository...") + result = subprocess.run(["git", "push", "--dry-run", "origin"]) + + if result.returncode != 0: + print("Error: Cannot push to origin repository.") + sys.exit(1) + + print("Push to origin repository is possible.") + + +def main(): + parser = argparse.ArgumentParser(description="Bump version") + parser.add_argument("new_version", help="New version in x.y.z format") + parser.add_argument( + "--dry-run", action="store_true", help="Print each step without actually executing them" + ) + parser.add_argument("--force", action="store_true", help="Skip pre-push checks") + + args = parser.parse_args() + dry_run = args.dry_run + force = args.force + + # Perform checks before proceeding unless --force is used + if not force: + check_branch() + check_working_directory_clean() + check_main_branch_up_to_date() + check_ok_to_push() + else: + print("Skipping pre-push checks due to --force flag.") + + new_version_str = args.new_version + if not re.match(r"^\d+\.\d+\.\d+$", new_version_str): + raise ValueError(f"Invalid version format, must be x.y.z: {new_version_str}") + + new_version = version.parse(new_version_str) + incremented_version = version.Version( + f"{new_version.major}.{new_version.minor}.{new_version.micro + 1}" + ) + + from aider import __version__ as current_version + + current_version = current_version.split("-dev")[0].split(".dev")[0] + if new_version <= version.parse(current_version): + raise ValueError( + f"New version {new_version} must be greater than the current version {current_version}" + ) + + with open("aider/__init__.py", "r") as f: + content = f.read() + updated_content = re.sub(r'__version__ = ".+?"', f'__version__ = "{new_version}"', content) + + print("Updating aider/__init__.py with new version:") + print(updated_content) + if not dry_run: + with open("aider/__init__.py", "w") as f: + f.write(updated_content) + + git_commands = [ + ["git", "add", "aider/__init__.py"], + ["git", "commit", "-m", f"version bump to {new_version}"], + ["git", "tag", f"v{new_version}"], + ["git", "push", "origin", "--no-verify"], + ["git", "push", "origin", f"v{new_version}", "--no-verify"], + ] + + for cmd in git_commands: + print(f"Running: {' '.join(cmd)}") + if not dry_run: + subprocess.run(cmd, check=True) + + new_dev_version = f"{incremented_version}.dev" + updated_dev_content = re.sub( + r'__version__ = ".+?"', f'__version__ = "{new_dev_version}"', content + ) + + print() + print("Updating aider/__init__.py with new dev version:") + print(updated_dev_content) + if not dry_run: + with open("aider/__init__.py", "w") as f: + f.write(updated_dev_content) + + git_commands_dev = [ + ["git", "add", "aider/__init__.py"], + ["git", "commit", "-m", f"set version to {new_dev_version}"], + ["git", "tag", f"v{new_dev_version}"], + ["git", "push", "origin", "--no-verify"], + ["git", "push", "origin", f"v{new_dev_version}", "--no-verify"], + ] + + for cmd in git_commands_dev: + print(f"Running: {' '.join(cmd)}") + if not dry_run: + subprocess.run(cmd, check=True) + + # Remove aider/_version.py if it exists + version_file = "aider/_version.py" + if os.path.exists(version_file): + print(f"Removing {version_file}") + if not dry_run: + os.remove(version_file) + subprocess.run(["git", "add", version_file], check=True) + subprocess.run(["git", "commit", "-m", f"Remove {version_file}"], check=True) + subprocess.run(["git", "push", "origin", "--no-verify"], check=True) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..35df5db13 --- /dev/null +++ b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_scripts_versionbump.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_scripts_versionbump.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_scripts_versionbump.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.908984+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 6205, + "extracted_output_length": 6191, + "expected_output_length": 5952, + "results_dir": "locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-q39xjEXNImOdFG5yBhV6", + "cost_usd": 0.00909574, + "prompt_tokens": 22975, + "completion_tokens": 2908, + "total_tokens": 25883, + "native_prompt_tokens": 22937, + "native_completion_tokens": 5755, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 4254 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..5537952f7 --- /dev/null +++ b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,69 @@ + +index 3928e9d6c..1c4fa7683 100644 +--- a/aider_scripts_versionbump.py_expectedoutput.txt (expected):tmp/tmphocsl7s2_expected.txt ++++ b/aider_scripts_versionbump.py_extracted.txt (actual):tmp/tmpgx9_bnbt_actual.txt +@@ -6,7 +6,6 @@ import os + import re + import subprocess + import sys +- + from packaging import version + + +@@ -22,7 +21,9 @@ def check_branch(): + + # Function to check if the working directory is clean + def check_working_directory_clean(): +- status = subprocess.run(["git", "status", "--porcelain"], capture_output=True, text=True).stdout ++ status = subprocess.run( ++ ["git", "status", "--porcelain"], capture_output=True, text=True ++ ).stdout + if status: + print("Error: Working directory is not clean.") + sys.exit(1) +@@ -50,13 +51,11 @@ def check_main_branch_up_to_date(): + origin_date = datetime.datetime.strptime(origin_date, "%Y-%m-%d %H:%M:%S %z") + if local_date < origin_date: + print( +- "Error: The local main branch is behind origin/main. Please pull the latest" +- " changes." ++ "Error: The local main branch is behind origin/main. Please pull the latest changes." + ) + elif local_date > origin_date: + print( +- "Error: The origin/main branch is behind the local main branch. Please push" +- " your changes." ++ "Error: The origin/main branch is behind the local main branch. Please push your changes." + ) + else: + print("Error: The main branch and origin/main have diverged.") +@@ -107,6 +106,7 @@ def main(): + + from aider import __version__ as current_version + ++ current_version = current_version.split("-dev")[0].split(".dev")[0] + if new_version <= version.parse(current_version): + raise ValueError( + f"New version {new_version} must be greater than the current version {current_version}" +@@ -133,10 +133,7 @@ def main(): + for cmd in git_commands: + print(f"Running: {' '.join(cmd)}") + if not dry_run: +- subprocess.run( +- cmd, +- check=True, +- ) ++ subprocess.run(cmd, check=True) + + new_dev_version = f"{incremented_version}.dev" + updated_dev_content = re.sub( +@@ -169,6 +166,9 @@ def main(): + print(f"Removing {version_file}") + if not dry_run: + os.remove(version_file) ++ subprocess.run(["git", "add", version_file], check=True) ++ subprocess.run(["git", "commit", "-m", f"Remove {version_file}"], check=True) ++ subprocess.run(["git", "push", "origin", "--no-verify"], check=True) + + + if __name__ == "__main__": diff --git a/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..c9ad44f2d --- /dev/null +++ b/locodiff-250425/results/aider_scripts_versionbump.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,177 @@ +```python +#!/usr/bin/env python + +import argparse +import datetime +import os +import re +import subprocess +import sys +from packaging import version + + +# Function to check if we are on the main branch +def check_branch(): + branch = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, text=True + ).stdout.strip() + if branch != "main": + print("Error: Not on the main branch.") + sys.exit(1) + + +# Function to check if the working directory is clean +def check_working_directory_clean(): + status = subprocess.run( + ["git", "status", "--porcelain"], capture_output=True, text=True + ).stdout + if status: + print("Error: Working directory is not clean.") + sys.exit(1) + + +# Function to fetch the latest changes and check if the main branch is up to date +def check_main_branch_up_to_date(): + subprocess.run(["git", "fetch", "origin"], check=True) + local_main = subprocess.run( + ["git", "rev-parse", "main"], capture_output=True, text=True + ).stdout.strip() + print(f"Local main commit hash: {local_main}") + origin_main = subprocess.run( + ["git", "rev-parse", "origin/main"], capture_output=True, text=True + ).stdout.strip() + print(f"Origin main commit hash: {origin_main}") + if local_main != origin_main: + local_date = subprocess.run( + ["git", "show", "-s", "--format=%ci", "main"], capture_output=True, text=True + ).stdout.strip() + origin_date = subprocess.run( + ["git", "show", "-s", "--format=%ci", "origin/main"], capture_output=True, text=True + ).stdout.strip() + local_date = datetime.datetime.strptime(local_date, "%Y-%m-%d %H:%M:%S %z") + origin_date = datetime.datetime.strptime(origin_date, "%Y-%m-%d %H:%M:%S %z") + if local_date < origin_date: + print( + "Error: The local main branch is behind origin/main. Please pull the latest changes." + ) + elif local_date > origin_date: + print( + "Error: The origin/main branch is behind the local main branch. Please push your changes." + ) + else: + print("Error: The main branch and origin/main have diverged.") + sys.exit(1) + + +# Function to check if we can push to the origin repository +def check_ok_to_push(): + print("Checking if it's ok to push to origin repository...") + result = subprocess.run(["git", "push", "--dry-run", "origin"]) + + if result.returncode != 0: + print("Error: Cannot push to origin repository.") + sys.exit(1) + + print("Push to origin repository is possible.") + + +def main(): + parser = argparse.ArgumentParser(description="Bump version") + parser.add_argument("new_version", help="New version in x.y.z format") + parser.add_argument( + "--dry-run", action="store_true", help="Print each step without actually executing them" + ) + parser.add_argument("--force", action="store_true", help="Skip pre-push checks") + + args = parser.parse_args() + dry_run = args.dry_run + force = args.force + + # Perform checks before proceeding unless --force is used + if not force: + check_branch() + check_working_directory_clean() + check_main_branch_up_to_date() + check_ok_to_push() + else: + print("Skipping pre-push checks due to --force flag.") + + new_version_str = args.new_version + if not re.match(r"^\d+\.\d+\.\d+$", new_version_str): + raise ValueError(f"Invalid version format, must be x.y.z: {new_version_str}") + + new_version = version.parse(new_version_str) + incremented_version = version.Version( + f"{new_version.major}.{new_version.minor}.{new_version.micro + 1}" + ) + + from aider import __version__ as current_version + + current_version = current_version.split("-dev")[0].split(".dev")[0] + if new_version <= version.parse(current_version): + raise ValueError( + f"New version {new_version} must be greater than the current version {current_version}" + ) + + with open("aider/__init__.py", "r") as f: + content = f.read() + updated_content = re.sub(r'__version__ = ".+?"', f'__version__ = "{new_version}"', content) + + print("Updating aider/__init__.py with new version:") + print(updated_content) + if not dry_run: + with open("aider/__init__.py", "w") as f: + f.write(updated_content) + + git_commands = [ + ["git", "add", "aider/__init__.py"], + ["git", "commit", "-m", f"version bump to {new_version}"], + ["git", "tag", f"v{new_version}"], + ["git", "push", "origin", "--no-verify"], + ["git", "push", "origin", f"v{new_version}", "--no-verify"], + ] + + for cmd in git_commands: + print(f"Running: {' '.join(cmd)}") + if not dry_run: + subprocess.run(cmd, check=True) + + new_dev_version = f"{incremented_version}.dev" + updated_dev_content = re.sub( + r'__version__ = ".+?"', f'__version__ = "{new_dev_version}"', content + ) + + print() + print("Updating aider/__init__.py with new dev version:") + print(updated_dev_content) + if not dry_run: + with open("aider/__init__.py", "w") as f: + f.write(updated_dev_content) + + git_commands_dev = [ + ["git", "add", "aider/__init__.py"], + ["git", "commit", "-m", f"set version to {new_dev_version}"], + ["git", "tag", f"v{new_dev_version}"], + ["git", "push", "origin", "--no-verify"], + ["git", "push", "origin", f"v{new_dev_version}", "--no-verify"], + ] + + for cmd in git_commands_dev: + print(f"Running: {' '.join(cmd)}") + if not dry_run: + subprocess.run(cmd, check=True) + + # Remove aider/_version.py if it exists + version_file = "aider/_version.py" + if os.path.exists(version_file): + print(f"Removing {version_file}") + if not dry_run: + os.remove(version_file) + subprocess.run(["git", "add", version_file], check=True) + subprocess.run(["git", "commit", "-m", f"Remove {version_file}"], check=True) + subprocess.run(["git", "push", "origin", "--no-verify"], check=True) + + +if __name__ == "__main__": + main() +``` \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..a582f1113 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,790 @@ +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +import git + +from aider.coders import Coder +from aider.coders.base_coder import FinishReasonLength, UnknownEditFormat +from aider.dump import dump # noqa: F401 +from aider.io import InputOutput +from aider.models import Model +from aider.repo import GitRepo +from aider.sendchat import sanity_check_messages +from aider.utils import GitTemporaryDirectory + + +class TestCoder(unittest.TestCase): + def setUp(self): + self.GPT35 = Model("gpt-3.5-turbo") + self.webbrowser_patcher = patch("aider.io.webbrowser.open") + self.mock_webbrowser = self.webbrowser_patcher.start() + + def test_allowed_to_edit(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + fname = Path("repo.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # YES! + io = MagicMock() + io.confirm_ask = MagicMock(return_value=True) + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertTrue(coder.allowed_to_edit("repo.txt")) + self.assertTrue(coder.allowed_to_edit("new.txt")) + + self.assertIn("repo.txt", str(coder.abs_fnames)) + self.assertIn("new.txt", str(coder.abs_fnames)) + + self.assertFalse(coder.need_commit_before_edits) + + def test_allowed_to_edit_no(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + fname = Path("repo.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # say NO + io = InputOutput(yes=False) + + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertFalse(coder.allowed_to_edit("repo.txt")) + self.assertFalse(coder.allowed_to_edit("new.txt")) + + self.assertNotIn("repo.txt", str(coder.abs_fnames)) + self.assertNotIn("new.txt", str(coder.abs_fnames)) + + self.assertFalse(coder.need_commit_before_edits) + + def test_allowed_to_edit_dirty(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # say NO + io = InputOutput(yes=False) + + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertFalse(coder.need_commit_before_edits) + + fname.write_text("dirty!") + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertTrue(coder.need_commit_before_edits) + + def test_get_files_content(self): + tempdir = Path(tempfile.mkdtemp()) + + file1 = tempdir / "file1.txt" + file2 = tempdir / "file2.txt" + + file1.touch() + file2.touch() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + content = coder.get_files_content().splitlines() + self.assertIn("file1.txt", content) + self.assertIn("file2.txt", content) + + def test_check_for_filename_mentions(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + mock_io = MagicMock() + + fname1 = Path("file1.txt") + fname2 = Path("file2.py") + + fname1.write_text("one\n") + fname2.write_text("two\n") + + repo.git.add(str(fname1)) + repo.git.add(str(fname2)) + repo.git.commit("-m", "new") + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, mock_io) + + # Call the check_for_file_mentions method + coder.check_for_file_mentions("Please check file1.txt and file2.py") + + # Check if coder.abs_fnames contains both files + expected_files = set( + [ + str(Path(coder.root) / fname1), + str(Path(coder.root) / fname2), + ] + ) + + self.assertEqual(coder.abs_fnames, expected_files) + + def test_check_for_ambiguous_filename_mentions_of_longer_paths(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("file1.txt") + fname.touch() + + other_fname = Path("other") / "file1.txt" + other_fname.parent.mkdir(parents=True, exist_ok=True) + other_fname.touch() + + mock = MagicMock() + mock.return_value = set([str(fname), str(other_fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + coder.check_for_file_mentions(f"Please check {fname}!") + + self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) + + def test_check_for_subdir_mention(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("other") / "file1.txt" + fname.parent.mkdir(parents=True, exist_ok=True) + fname.touch() + + mock = MagicMock() + mock.return_value = set([str(fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + coder.check_for_file_mentions(f"Please check `{fname}`") + + self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) + + def test_skip_duplicate_basename_mentions(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + # Create files with same basename in different directories + fname1 = Path("dir1") / "file.txt" + fname2 = Path("dir2") / "file.txt" + fname3 = Path("dir3") / "unique.txt" + + for fname in [fname1, fname2, fname3]: + fname.parent.mkdir(parents=True, exist_ok=True) + fname.touch() + + # Add one file to chat + coder.add_rel_fname(str(fname1)) + + # Mock get_tracked_files to return all files + mock = MagicMock() + mock.return_value = set([str(fname1), str(fname2), str(fname3)]) + coder.repo.get_tracked_files = mock + + # Check that file mentions of a pure basename skips files with duplicate basenames + mentioned = coder.get_file_mentions(f"Check {fname2.name} and {fname3}") + self.assertEqual(mentioned, {str(fname3)}) + + # Add a read-only file with same basename + coder.abs_read_only_fnames.add(str(fname2.resolve())) + mentioned = coder.get_file_mentions(f"Check {fname1} and {fname3}") + self.assertEqual(mentioned, {str(fname3)}) + + def test_check_for_file_mentions_read_only(self): + with GitTemporaryDirectory(): + io = InputOutput( + pretty=False, yes=True, + ) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("readonly_file.txt") + fname.touch() + + coder.abs_read_only_fnames.add(str(fname.resolve())) + + # Mock the get_tracked_files method + mock = MagicMock() + mock.return_value = set([str(fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + result = coder.check_for_file_mentions(f"Please check {fname}!") + + # Assert that the method returns None (user not asked to add the file) + self.assertIsNone(result) + + # Assert that abs_fnames is still empty (file not added) + self.assertEqual(coder.abs_fnames, set()) + + def test_check_for_file_mentions_with_mocked_confirm(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False) + coder = Coder.create(self.GPT35, None, io) + + # Mock get_file_mentions to return two file names + coder.get_file_mentions = MagicMock(return_value=set(["file1.txt", "file2.txt"])) + + # Mock confirm_ask to return False for the first call and True for the second + io.confirm_ask = MagicMock(side_effect=[False, True, True]) + + # First call to check_for_file_mentions + coder.check_for_file_mentions("Please check file1.txt for the info") + + # Assert that confirm_ask was called twice + self.assertEqual(io.confirm_ask.call_count, 2) + + # Assert that only file2.txt was added to abs_fnames + self.assertEqual(len(coder.abs_fnames), 1) + self.assertIn("file2.txt", str(coder.abs_fnames)) + + # Reset the mock + io.confirm_ask.reset_mock() + + # Second call to check_for_file_mentions + coder.check_for_file_mentions("Please check file1.txt and file2.txt again") + + # Assert that confirm_ask was called only once (for file1.txt) + self.assertEqual(io.confirm_ask.call_count, 1) + + # Assert that abs_fnames still contains only file2.txt + self.assertEqual(len(coder.abs_fnames), 1) + self.assertIn("file2.txt", str(coder.abs_fnames)) + + # Assert that file1.txt is in ignore_mentions + self.assertIn("file1.txt", coder.ignore_mentions) + + def test_run_with_file_deletion(self): + # Create a few temporary files + + tempdir = Path(tempfile.mkdtemp()) + + file1 = tempdir / "file1.txt" + file2 = tempdir / "file2.txt" + + file1.touch() + file2.touch() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + file1.unlink() + + # Call the run method again with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 1) + + def test_run_with_file_unicode_error(self): + # Create a few temporary files + _, file1 = tempfile.mkstemp() + _, file2 = tempfile.mkstemp() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + # Write some non-UTF8 text into the file + with open(file1, "wb") as f: + f.write(b"\x80abc") + + # Call the run method again with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 1) + + def test_choose_fence(self): + # Create a few temporary files + _, file1 = tempfile.mkstemp() + + with open(file1, "wb") as f: + f.write(b"this contains\n```\nbackticks") + + files = [file1] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + + self.assertNotEqual(coder.fence[0], "```") + + def test_run_with_file_utf_unicode_error(self): + "make sure that we honor InputOutput(encoding) and don't just assume utf-8" + # Create a few temporary files + _, file1 = tempfile.mkstemp() + _, file2 = tempfile.mkstemp() + + files = [file1, file2] + + encoding = "utf-16" + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create( + self.GPT35, + None, + io=InputOutput(encoding=encoding), + fnames=files, + ) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + some_content_which_will_error_if_read_with_encoding_utf8 = "ÅÍÎÏ".encode(encoding) + with open(file1, "wb") as f: + f.write(some_content_which_will_error_if_read_with_encoding_utf8) + + coder.run(with_message="hi") + + # both files should still be here + self.assertEqual(len(coder.abs_fnames), 2) + + def test_new_file_edit_one_commit(self): + """A new file shouldn get pre-committed before the GPT edit commit""" + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + self.assertTrue(fname.exists()) + + # make sure it was not committed + with self.assertRaises(git.exc.GitCommandError): + list(repo.iter_commits(repo.active_branch.name)) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +======= +new +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + coder.repo.get_commit_message = MagicMock() + coder.repo.get_commit_message.return_value = "commit message" + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "new\n") + + num_commits = len(list(repo.iter_commits(repo.active_branch.name))) + self.assertEqual(num_commits, 2) + + def test_only_commit_gpt_edited_file(self): + """ + Only commit file that gpt edits, not other dirty files. + Also ensure commit msg only depends on diffs from the GPT edited file. + """ + + with GitTemporaryDirectory(): + repo = git.Repo() + + fname1 = Path("file1.txt") + fname2 = Path("file2.txt") + + fname1.write_text("one\n") + fname2.write_text("two\n") + + repo.git.add(str(fname1)) + repo.git.add(str(fname2)) + repo.git.commit("-m", "new") + + # DIRTY! + fname1.write_text("ONE\n") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname1), str(fname2)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname2)} +<<<<<<< SEARCH +two +======= +TWO +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + def mock_get_commit_message(diffs, context): + self.assertNotIn("one", diffs) + self.assertNotIn("ONE", diffs) + return "commit message" + + coder.send = mock_send + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + + coder.run(with_message="hi") + + content = fname2.read_text() + self.assertEqual(content, "TWO\n") + + self.assertTrue(repo.is_dirty(path=str(fname1))) + + def test_gpt_edit_to_dirty_file(self): + """A dirty file should be committed before the GPT edits are committed""" + + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + fname.write_text("one\n") + repo.git.add(str(fname)) + + fname2 = Path("other.txt") + fname2.write_text("other\n") + repo.git.add(str(fname2)) + + repo.git.commit("-m", "new") + + # dirty + fname.write_text("two\n") + fname2.write_text("OTHER\n") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +two +======= +three +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + saved_diffs = [] + + def mock_get_commit_message(diffs, context): + saved_diffs.append(diffs) + return "commit message" + + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + coder.send = mock_send + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "three\n") + + num_commits = len(list(repo.iter_commits(repo.active_branch.name))) + self.assertEqual(num_commits, 3) + + diff = repo.git.diff(["HEAD~2", "HEAD~1"]) + self.assertIn("one", diff) + self.assertIn("two", diff) + self.assertNotIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = saved_diffs[0] + self.assertIn("one", diff) + self.assertIn("two", diff) + self.assertNotIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = repo.git.diff(["HEAD~1", "HEAD"]) + self.assertNotIn("one", diff) + self.assertIn("two", diff) + self.assertIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = saved_diffs[1] + self.assertNotIn("one", diff) + self.assertIn("two", diff) + self.assertIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + self.assertEqual(len(saved_diffs), 2) + + def test_gpt_edit_to_existing_file_not_in_repo(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + fname.write_text("one\n") + + fname2 = Path("other.txt") + fname2.write_text("other\n") + repo.git.add(str(fname2)) + + repo.git.commit("-m", "initial") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +one +======= +two +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + saved_diffs = [] + + def mock_get_commit_message(diffs, context): + saved_diffs.append(diffs) + return "commit message" + + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + coder.send = mock_send + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "two\n") + + diff = saved_diffs[0] + self.assertIn("file.txt", diff) + + def test_skip_aiderignored_files(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname1 = "ignoreme1.txt" + fname2 = "ignoreme2.txt" + fname3 = "dir/ignoreme3.txt" + + Path(fname2).touch() + repo.git.add(str(fname2)) + repo.git.commit("-m", "initial") + + aignore = Path(".aiderignore") + aignore.write_text(f"{fname1}\n{fname2}\ndir\n") + repo = GitRepo( + InputOutput(yes=True), + [fname1, fname2, fname3], + None, + aider_ignore_file=str(aignore), + ) + + io = InputOutput(yes=True) + coder = Coder.create( + self.GPT35, + None, + io, + fnames=[fname1, fname2, fname3], + repo=repo, + ) + + self.assertNotIn(fname1, str(coder.abs_fnames)) + self.assertNotIn(fname2, str(coder.abs_fnames)) + self.assertNotIn(fname3, str(coder.abs_fnames)) + + def test_check_for_urls(self): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, None, io=io) + coder.commands.scraper = MagicMock() + coder.commands.scraper.scrape = MagicMock(return_value="some content") + + # Test various URL formats + test_cases = [ + ("Check http://example.com, it's cool", "http://example.com"), + ("Visit https://www.example.com/page and see stuff", "https://www.example.com/page"), + ( + "Go to http://subdomain.example.com:8080/path?query=value, or not", + "http://subdomain.example.com:8080/path?query=value", + ), + ( + "See https://example.com/path#fragment for example", + "https://example.com/path#fragment", + ), + ("Look at http://localhost:3000", "http://localhost:3000"), + ("View https://example.com/setup#whatever", "https://example.com/setup#whatever"), + ("Open http://127.0.0.1:8000/api/v1/", "http://127.0.0.1:8000/api/v1/"), + ( + "Try https://example.com/path/to/page.html?param1=value1¶m2=value2", + "https://example.com/path/to/page.html?param1=value1¶m2=value2", + ), + ("Access http://user:password@example.com", "http://user:password@example.com"), + ( + "Use https://example.com/path_(with_parentheses)", + "https://example.com/path_(with_parentheses)", + ), + ] + + for input_text, expected_url in test_cases: + with self.subTest(input_text=input_text): + result = coder.check_for_urls(input_text) + self.assertIn(expected_url, result) + + # Test cases from the GitHub issue + issue_cases = [ + ("check http://localhost:3002, there is an error", "http://localhost:3002"), + ( + "can you check out https://example.com/setup#whatever", + "https://example.com/setup#whatever", + ), + ] + + for input_text, expected_url in issue_cases: + with self.subTest(input_text=input_text): + result = coder.check_for_urls(input_text) + self.assertIn(expected_url, result) + + # Test case with multiple URLs + multi_url_input = "Check http://example1.com and https://example2.com/page" + result = coder.check_for_urls(multi_url_input) + self.assertIn("http://example1.com", result) + self.assertIn("https://example2.com/page", result) + + # Test case with no URL + no_url_input = "This text contains no URL" + result = coder.check_for_urls(no_url_input) + self.assertEqual(result, no_url_input) + + # Test case with the same URL appearing multiple times + repeated_url_input = ( + "Check https://example.com, then https://example.com again, and https://example.com one" + " more time" + ) + result = coder.check_for_urls(repeated_url_input) + self.assertEqual(result.count("https://example.com"), 4) + self.assertIn("https://example.com", result) + + def test_coder_from_coder_with_subdir(self): + with GitTemporaryDirectory() as root: + repo = git.Repo.init(root) + + # Create a file in a subdirectory + subdir = Path(root) / "subdir" + subdir.mkdir() + test_file = subdir / "test_file.txt" + test_file.write_text("Test content") + + repo.git.add(str(test_file)) + repo.git.commit("-m", "Add test file") + + # Change directory to the subdirectory + os.chdir(subdir.resolve()) + + # Create the first coder + io = InputOutput(yes=True) + coder1 = Coder.create(self.GPT35, None, io=io, fnames=[test_file.name]) + + # Create a new coder from the first coder + coder2 = Coder.create(from_coder=coder1) + + # Check if both coders have the same set of abs_fnames + self.assertEqual(coder1.abs_fnames, coder2.abs_fnames) + + # Ensure the abs_fnames contain the correct absolute path + expected_abs_path = os.path.realpath(str(test_file)) + coder1_abs_fnames = set(os.path.realpath(path) for path in coder1.abs_fnames) + self.assertIn(expected_abs_path, coder1_abs_fnames) + self.assertIn(expected_abs_path, coder2.abs_fnames) + + # Check that the abs_fnames do not contain duplicate or incorrect paths + self.assertEqual(len(coder1.abs_fnames), 1) + self.assertEqual(len(coder2.abs_fnames), 1) + + def test_suggest_shell_commands(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io) + + def mock_send(*args, **kwargs): + coder.partial_response_content = """Here's a shell command to run: + +```bash +echo "Hello, World!" \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..452e54127 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_tests_basic_test_coder.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_tests_basic_test_coder.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_tests_basic_test_coder.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.906533+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 31318, + "extracted_output_length": 26530, + "expected_output_length": 47855, + "results_dir": "locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-tvaNNvlP8YSTsj9hW5Gn", + "cost_usd": 0.02257058, + "prompt_tokens": 38224, + "completion_tokens": 8387, + "total_tokens": 46611, + "native_prompt_tokens": 38206, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 3216 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..ac479fc12 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,622 @@ + +index c051e53fd..a582f1113 100644 +--- a/aider_tests_basic_test_coder.py_expectedoutput.txt (expected):tmp/tmpfno0io8c_expected.txt ++++ b/aider_tests_basic_test_coder.py_extracted.txt (actual):tmp/tmp2uaag0j4_actual.txt +@@ -37,7 +37,6 @@ class TestCoder(unittest.TestCase): + repo.git.commit("-m", "init") + + # YES! +- # Use a completely mocked IO object instead of a real one + io = MagicMock() + io.confirm_ask = MagicMock(return_value=True) + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) +@@ -172,6 +171,24 @@ class TestCoder(unittest.TestCase): + + self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) + ++ def test_check_for_subdir_mention(self): ++ with GitTemporaryDirectory(): ++ io = InputOutput(pretty=False, yes=True) ++ coder = Coder.create(self.GPT35, None, io) ++ ++ fname = Path("other") / "file1.txt" ++ fname.parent.mkdir(parents=True, exist_ok=True) ++ fname.touch() ++ ++ mock = MagicMock() ++ mock.return_value = set([str(fname)]) ++ coder.repo.get_tracked_files = mock ++ ++ # Call the check_for_file_mentions method ++ coder.check_for_file_mentions(f"Please check `{fname}`") ++ ++ self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) ++ + def test_skip_duplicate_basename_mentions(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) +@@ -206,8 +223,7 @@ class TestCoder(unittest.TestCase): + def test_check_for_file_mentions_read_only(self): + with GitTemporaryDirectory(): + io = InputOutput( +- pretty=False, +- yes=True, ++ pretty=False, yes=True, + ) + coder = Coder.create(self.GPT35, None, io) + +@@ -267,182 +283,6 @@ class TestCoder(unittest.TestCase): + # Assert that file1.txt is in ignore_mentions + self.assertIn("file1.txt", coder.ignore_mentions) + +- def test_check_for_subdir_mention(self): +- with GitTemporaryDirectory(): +- io = InputOutput(pretty=False, yes=True) +- coder = Coder.create(self.GPT35, None, io) +- +- fname = Path("other") / "file1.txt" +- fname.parent.mkdir(parents=True, exist_ok=True) +- fname.touch() +- +- mock = MagicMock() +- mock.return_value = set([str(fname)]) +- coder.repo.get_tracked_files = mock +- +- # Call the check_for_file_mentions method +- coder.check_for_file_mentions(f"Please check `{fname}`") +- +- self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) +- +- def test_get_file_mentions_various_formats(self): +- with GitTemporaryDirectory(): +- io = InputOutput(pretty=False, yes=True) +- coder = Coder.create(self.GPT35, None, io) +- +- # Create test files +- test_files = [ +- "file1.txt", +- "file2.py", +- "dir/nested_file.js", +- "dir/subdir/deep_file.html", +- "file99.txt", +- "special_chars!@#.md", +- ] +- +- # Pre-format the Windows path to avoid backslash issues in f-string expressions +- windows_path = test_files[2].replace("/", "\\") +- win_path3 = test_files[3].replace("/", "\\") +- +- for fname in test_files: +- fpath = Path(fname) +- fpath.parent.mkdir(parents=True, exist_ok=True) +- fpath.touch() +- +- # Mock get_addable_relative_files to return our test files +- coder.get_addable_relative_files = MagicMock(return_value=set(test_files)) +- +- # Test different mention formats +- test_cases = [ +- # Simple plain text mentions +- (f"You should edit {test_files[0]} first", {test_files[0]}), +- # Multiple files in plain text +- (f"Edit both {test_files[0]} and {test_files[1]}", {test_files[0], test_files[1]}), +- # Files in backticks +- (f"Check the file `{test_files[2]}`", {test_files[2]}), +- # Files in code blocks +- (f"```\n{test_files[3]}\n```", {test_files[3]}), +- # Files in code blocks with language specifier +- # ( +- # f"```python\nwith open('{test_files[1]}', 'r') as f:\n" +- # f" data = f.read()\n```", +- # {test_files[1]}, +- # ), +- # Files with Windows-style paths +- (f"Edit the file {windows_path}", {test_files[2]}), +- # Files with different quote styles +- (f'Check "{test_files[5]}" now', {test_files[5]}), +- # All files in one complex message +- ( +- ( +- f"First, edit `{test_files[0]}`. Then modify {test_files[1]}.\n" +- f"```js\n// Update this file\nconst file = '{test_files[2]}';\n```\n" +- f"Finally check {win_path3}" +- ), +- {test_files[0], test_files[1], test_files[2], test_files[3]}, +- ), +- # Files mentioned in markdown bold format +- (f"You should check **{test_files[0]}** for issues", {test_files[0]}), +- ( +- f"Look at both **{test_files[1]}** and **{test_files[2]}**", +- {test_files[1], test_files[2]}, +- ), +- ( +- f"The file **{win_path3}** needs updating", +- {test_files[3]}, +- ), +- ( +- f"Files to modify:\n- **{test_files[0]}**\n- **{test_files[4]}**", +- {test_files[0], test_files[4]}, +- ), +- ] +- +- for content, expected_mentions in test_cases: +- with self.subTest(content=content): +- mentioned_files = coder.get_file_mentions(content) +- self.assertEqual( +- mentioned_files, +- expected_mentions, +- f"Failed to extract mentions from: {content}", +- ) +- +- def test_get_file_mentions_multiline_backticks(self): +- with GitTemporaryDirectory(): +- io = InputOutput(pretty=False, yes=True) +- coder = Coder.create(self.GPT35, None, io) +- +- # Create test files +- test_files = [ +- "swebench/harness/test_spec/python.py", +- "swebench/harness/test_spec/javascript.py", +- ] +- for fname in test_files: +- fpath = Path(fname) +- fpath.parent.mkdir(parents=True, exist_ok=True) +- fpath.touch() +- +- # Mock get_addable_relative_files to return our test files +- coder.get_addable_relative_files = MagicMock(return_value=set(test_files)) +- +- # Input text with multiline backticked filenames +- content = """ +-Could you please **add the following files to the chat**? +- +-1. `swebench/harness/test_spec/python.py` +-2. `swebench/harness/test_spec/javascript.py` +- +-Once I have these, I can show you precisely how to do the thing. +-""" +- expected_mentions = { +- "swebench/harness/test_spec/python.py", +- "swebench/harness/test_spec/javascript.py", +- } +- +- mentioned_files = coder.get_file_mentions(content) +- self.assertEqual( +- mentioned_files, +- expected_mentions, +- f"Failed to extract mentions from multiline backticked content: {content}", +- ) +- +- def test_get_file_mentions_path_formats(self): +- with GitTemporaryDirectory(): +- io = InputOutput(pretty=False, yes=True) +- coder = Coder.create(self.GPT35, None, io) +- +- # Test cases with different path formats +- test_cases = [ +- # Unix paths in content, Unix paths in get_addable_relative_files +- ("Check file1.txt and dir/file2.txt", ["file1.txt", "dir/file2.txt"]), +- # Windows paths in content, Windows paths in get_addable_relative_files +- ("Check file1.txt and dir\\file2.txt", ["file1.txt", "dir\\file2.txt"]), +- # Unix paths in content, Windows paths in get_addable_relative_files +- ("Check file1.txt and dir/file2.txt", ["file1.txt", "dir\\file2.txt"]), +- # Windows paths in content, Unix paths in get_addable_relative_files +- ("Check file1.txt and dir\\file2.txt", ["file1.txt", "dir/file2.txt"]), +- # Mixed paths in content, Unix paths in get_addable_relative_files +- ( +- "Check file1.txt, dir/file2.txt, and other\\file3.txt", +- ["file1.txt", "dir/file2.txt", "other/file3.txt"], +- ), +- # Mixed paths in content, Windows paths in get_addable_relative_files +- ( +- "Check file1.txt, dir/file2.txt, and other\\file3.txt", +- ["file1.txt", "dir\\file2.txt", "other\\file3.txt"], +- ), +- ] +- +- for content, addable_files in test_cases: +- with self.subTest(content=content, addable_files=addable_files): +- coder.get_addable_relative_files = MagicMock(return_value=set(addable_files)) +- mentioned_files = coder.get_file_mentions(content) +- expected_files = set(addable_files) +- self.assertEqual( +- mentioned_files, +- expected_files, +- f"Failed for content: {content}, addable_files: {addable_files}", +- ) +- + def test_run_with_file_deletion(self): + # Create a few temporary files + +@@ -568,7 +408,7 @@ Once I have these, I can show you precisely how to do the thing. + self.assertEqual(len(coder.abs_fnames), 2) + + def test_new_file_edit_one_commit(self): +- """A new file should get pre-committed before the GPT edit commit""" ++ """A new file shouldn get pre-committed before the GPT edit commit""" + with GitTemporaryDirectory(): + repo = git.Repo() + +@@ -809,24 +649,21 @@ two + repo.git.add(str(fname2)) + repo.git.commit("-m", "initial") + +- io = InputOutput(yes=True) +- +- fnames = [fname1, fname2, fname3] +- + aignore = Path(".aiderignore") + aignore.write_text(f"{fname1}\n{fname2}\ndir\n") + repo = GitRepo( +- io, +- fnames, ++ InputOutput(yes=True), ++ [fname1, fname2, fname3], + None, + aider_ignore_file=str(aignore), + ) + ++ io = InputOutput(yes=True) + coder = Coder.create( + self.GPT35, + None, + io, +- fnames=fnames, ++ fnames=[fname1, fname2, fname3], + repo=repo, + ) + +@@ -902,7 +739,6 @@ two + " more time" + ) + result = coder.check_for_urls(repeated_url_input) +- # the original 3 in the input text, plus 1 more for the scraped text + self.assertEqual(result.count("https://example.com"), 4) + self.assertIn("https://example.com", result) + +@@ -951,342 +787,4 @@ two + coder.partial_response_content = """Here's a shell command to run: + + ```bash +-echo "Hello, World!" +-``` +- +-This command will print 'Hello, World!' to the console.""" +- coder.partial_response_function_call = dict() +- return [] +- +- coder.send = mock_send +- +- # Mock the handle_shell_commands method to check if it's called +- coder.handle_shell_commands = MagicMock() +- +- # Run the coder with a message +- coder.run(with_message="Suggest a shell command") +- +- # Check if the shell command was added to the list +- self.assertEqual(len(coder.shell_commands), 1) +- self.assertEqual(coder.shell_commands[0].strip(), 'echo "Hello, World!"') +- +- # Check if handle_shell_commands was called with the correct argument +- coder.handle_shell_commands.assert_called_once() +- +- def test_no_suggest_shell_commands(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io, suggest_shell_commands=False) +- self.assertFalse(coder.suggest_shell_commands) +- +- def test_detect_urls_enabled(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io, detect_urls=True) +- coder.commands.scraper = MagicMock() +- coder.commands.scraper.scrape = MagicMock(return_value="some content") +- +- # Test with a message containing a URL +- message = "Check out https://example.com" +- coder.check_for_urls(message) +- coder.commands.scraper.scrape.assert_called_once_with("https://example.com") +- +- def test_detect_urls_disabled(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io, detect_urls=False) +- coder.commands.scraper = MagicMock() +- coder.commands.scraper.scrape = MagicMock(return_value="some content") +- +- # Test with a message containing a URL +- message = "Check out https://example.com" +- result = coder.check_for_urls(message) +- self.assertEqual(result, message) +- coder.commands.scraper.scrape.assert_not_called() +- +- def test_unknown_edit_format_exception(self): +- # Test the exception message format +- invalid_format = "invalid_format" +- valid_formats = ["diff", "whole", "map"] +- exc = UnknownEditFormat(invalid_format, valid_formats) +- expected_msg = ( +- f"Unknown edit format {invalid_format}. Valid formats are: {', '.join(valid_formats)}" +- ) +- self.assertEqual(str(exc), expected_msg) +- +- def test_unknown_edit_format_creation(self): +- # Test that creating a Coder with invalid edit format raises the exception +- io = InputOutput(yes=True) +- invalid_format = "invalid_format" +- +- with self.assertRaises(UnknownEditFormat) as cm: +- Coder.create(self.GPT35, invalid_format, io=io) +- +- exc = cm.exception +- self.assertEqual(exc.edit_format, invalid_format) +- self.assertIsInstance(exc.valid_formats, list) +- self.assertTrue(len(exc.valid_formats) > 0) +- +- def test_system_prompt_prefix(self): +- # Test that system_prompt_prefix is properly set and used +- io = InputOutput(yes=True) +- test_prefix = "Test prefix. " +- +- # Create a model with system_prompt_prefix +- model = Model("gpt-3.5-turbo") +- model.system_prompt_prefix = test_prefix +- +- coder = Coder.create(model, None, io=io) +- +- # Get the formatted messages +- chunks = coder.format_messages() +- messages = chunks.all_messages() +- +- # Check if the system message contains our prefix +- system_message = next(msg for msg in messages if msg["role"] == "system") +- self.assertTrue(system_message["content"].startswith(test_prefix)) +- +- def test_coder_create_with_new_file_oserror(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- new_file = "new_file.txt" +- +- # Mock Path.touch() to raise OSError +- with patch("pathlib.Path.touch", side_effect=OSError("Permission denied")): +- # Create the coder with a new file +- coder = Coder.create(self.GPT35, "diff", io=io, fnames=[new_file]) +- +- # Check if the coder was created successfully +- self.assertIsInstance(coder, Coder) +- +- # Check if the new file is not in abs_fnames +- self.assertNotIn(new_file, [os.path.basename(f) for f in coder.abs_fnames]) +- +- def test_show_exhausted_error(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io) +- +- # Set up some real done_messages and cur_messages +- coder.done_messages = [ +- {"role": "user", "content": "Hello, can you help me with a Python problem?"}, +- { +- "role": "assistant", +- "content": "Of course! I'd be happy to help. What's the problem you're facing?", +- }, +- { +- "role": "user", +- "content": ( +- "I need to write a function that calculates the factorial of a number." +- ), +- }, +- { +- "role": "assistant", +- "content": ( +- "Sure, I can help you with that. Here's a simple Python function to" +- " calculate the factorial of a number:" +- ), +- }, +- ] +- +- coder.cur_messages = [ +- {"role": "user", "content": "Can you optimize this function for large numbers?"}, +- ] +- +- # Set up real values for the main model +- coder.main_model.info = { +- "max_input_tokens": 4000, +- "max_output_tokens": 1000, +- } +- coder.partial_response_content = ( +- "Here's an optimized version of the factorial function:" +- ) +- coder.io.tool_error = MagicMock() +- +- # Call the method +- coder.show_exhausted_error() +- +- # Check if tool_error was called with the expected message +- coder.io.tool_error.assert_called() +- error_message = coder.io.tool_error.call_args[0][0] +- +- # Assert that the error message contains the expected information +- self.assertIn("Model gpt-3.5-turbo has hit a token limit!", error_message) +- self.assertIn("Input tokens:", error_message) +- self.assertIn("Output tokens:", error_message) +- self.assertIn("Total tokens:", error_message) +- +- def test_keyboard_interrupt_handling(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io) +- +- # Simulate keyboard interrupt during message processing +- def mock_send(*args, **kwargs): +- coder.partial_response_content = "Partial response" +- coder.partial_response_function_call = dict() +- raise KeyboardInterrupt() +- +- coder.send = mock_send +- +- # Initial valid state +- sanity_check_messages(coder.cur_messages) +- +- # Process message that will trigger interrupt +- list(coder.send_message("Test message")) +- +- # Verify messages are still in valid state +- sanity_check_messages(coder.cur_messages) +- self.assertEqual(coder.cur_messages[-1]["role"], "assistant") +- +- def test_token_limit_error_handling(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io) +- +- # Simulate token limit error +- def mock_send(*args, **kwargs): +- coder.partial_response_content = "Partial response" +- coder.partial_response_function_call = dict() +- raise FinishReasonLength() +- +- coder.send = mock_send +- +- # Initial valid state +- sanity_check_messages(coder.cur_messages) +- +- # Process message that hits token limit +- list(coder.send_message("Long message")) +- +- # Verify messages are still in valid state +- sanity_check_messages(coder.cur_messages) +- self.assertEqual(coder.cur_messages[-1]["role"], "assistant") +- +- def test_message_sanity_after_partial_response(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- coder = Coder.create(self.GPT35, "diff", io=io) +- +- # Simulate partial response then interrupt +- def mock_send(*args, **kwargs): +- coder.partial_response_content = "Partial response" +- coder.partial_response_function_call = dict() +- raise KeyboardInterrupt() +- +- coder.send = mock_send +- +- list(coder.send_message("Test")) +- +- # Verify message structure remains valid +- sanity_check_messages(coder.cur_messages) +- self.assertEqual(coder.cur_messages[-1]["role"], "assistant") +- +- def test_architect_coder_auto_accept_true(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=True) +- io.confirm_ask = MagicMock(return_value=True) +- +- # Create an ArchitectCoder with auto_accept_architect=True +- with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None): +- from aider.coders.architect_coder import ArchitectCoder +- +- coder = ArchitectCoder() +- coder.io = io +- coder.main_model = self.GPT35 +- coder.auto_accept_architect = True +- coder.verbose = False +- coder.total_cost = 0 +- coder.cur_messages = [] +- coder.done_messages = [] +- coder.summarizer = MagicMock() +- coder.summarizer.too_big.return_value = False +- +- # Mock editor_coder creation and execution +- mock_editor = MagicMock() +- with patch("aider.coders.architect_coder.Coder.create", return_value=mock_editor): +- # Set partial response content +- coder.partial_response_content = "Make these changes to the code" +- +- # Call reply_completed +- coder.reply_completed() +- +- # Verify that confirm_ask was not called (auto-accepted) +- io.confirm_ask.assert_not_called() +- +- # Verify that editor coder was created and run +- mock_editor.run.assert_called_once() +- +- def test_architect_coder_auto_accept_false_confirmed(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=False) +- io.confirm_ask = MagicMock(return_value=True) +- +- # Create an ArchitectCoder with auto_accept_architect=False +- with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None): +- from aider.coders.architect_coder import ArchitectCoder +- +- coder = ArchitectCoder() +- coder.io = io +- coder.main_model = self.GPT35 +- coder.auto_accept_architect = False +- coder.verbose = False +- coder.total_cost = 0 +- coder.cur_messages = [] +- coder.done_messages = [] +- coder.summarizer = MagicMock() +- coder.summarizer.too_big.return_value = False +- coder.cur_messages = [] +- coder.done_messages = [] +- coder.summarizer = MagicMock() +- coder.summarizer.too_big.return_value = False +- +- # Mock editor_coder creation and execution +- mock_editor = MagicMock() +- with patch("aider.coders.architect_coder.Coder.create", return_value=mock_editor): +- # Set partial response content +- coder.partial_response_content = "Make these changes to the code" +- +- # Call reply_completed +- coder.reply_completed() +- +- # Verify that confirm_ask was called +- io.confirm_ask.assert_called_once_with("Edit the files?") +- +- # Verify that editor coder was created and run +- mock_editor.run.assert_called_once() +- +- def test_architect_coder_auto_accept_false_rejected(self): +- with GitTemporaryDirectory(): +- io = InputOutput(yes=False) +- io.confirm_ask = MagicMock(return_value=False) +- +- # Create an ArchitectCoder with auto_accept_architect=False +- with patch("aider.coders.architect_coder.AskCoder.__init__", return_value=None): +- from aider.coders.architect_coder import ArchitectCoder +- +- coder = ArchitectCoder() +- coder.io = io +- coder.main_model = self.GPT35 +- coder.auto_accept_architect = False +- coder.verbose = False +- coder.total_cost = 0 +- +- # Mock editor_coder creation and execution +- mock_editor = MagicMock() +- with patch("aider.coders.architect_coder.Coder.create", return_value=mock_editor): +- # Set partial response content +- coder.partial_response_content = "Make these changes to the code" +- +- # Call reply_completed +- coder.reply_completed() +- +- # Verify that confirm_ask was called +- io.confirm_ask.assert_called_once_with("Edit the files?") +- +- # Verify that editor coder was NOT created or run +- # (because user rejected the changes) +- mock_editor.run.assert_not_called() +- +- +-if __name__ == "__main__": +- unittest.main() +\ No newline at end of file ++echo "Hello, World!" +\ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..de3abbfbe --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_coder.py/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,907 @@ +```python +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +import git + +from aider.coders import Coder +from aider.coders.base_coder import FinishReasonLength, UnknownEditFormat +from aider.dump import dump # noqa: F401 +from aider.io import InputOutput +from aider.models import Model +from aider.repo import GitRepo +from aider.sendchat import sanity_check_messages +from aider.utils import GitTemporaryDirectory + + +class TestCoder(unittest.TestCase): + def setUp(self): + self.GPT35 = Model("gpt-3.5-turbo") + self.webbrowser_patcher = patch("aider.io.webbrowser.open") + self.mock_webbrowser = self.webbrowser_patcher.start() + + def test_allowed_to_edit(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + fname = Path("repo.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # YES! + io = MagicMock() + io.confirm_ask = MagicMock(return_value=True) + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertTrue(coder.allowed_to_edit("repo.txt")) + self.assertTrue(coder.allowed_to_edit("new.txt")) + + self.assertIn("repo.txt", str(coder.abs_fnames)) + self.assertIn("new.txt", str(coder.abs_fnames)) + + self.assertFalse(coder.need_commit_before_edits) + + def test_allowed_to_edit_no(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + fname = Path("repo.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # say NO + io = InputOutput(yes=False) + + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertFalse(coder.allowed_to_edit("repo.txt")) + self.assertFalse(coder.allowed_to_edit("new.txt")) + + self.assertNotIn("repo.txt", str(coder.abs_fnames)) + self.assertNotIn("new.txt", str(coder.abs_fnames)) + + self.assertFalse(coder.need_commit_before_edits) + + def test_allowed_to_edit_dirty(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("added.txt") + fname.touch() + repo.git.add(str(fname)) + + repo.git.commit("-m", "init") + + # say NO + io = InputOutput(yes=False) + + coder = Coder.create(self.GPT35, None, io, fnames=["added.txt"]) + + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertFalse(coder.need_commit_before_edits) + + fname.write_text("dirty!") + self.assertTrue(coder.allowed_to_edit("added.txt")) + self.assertTrue(coder.need_commit_before_edits) + + def test_get_files_content(self): + tempdir = Path(tempfile.mkdtemp()) + + file1 = tempdir / "file1.txt" + file2 = tempdir / "file2.txt" + + file1.touch() + file2.touch() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + content = coder.get_files_content().splitlines() + self.assertIn("file1.txt", content) + self.assertIn("file2.txt", content) + + def test_check_for_filename_mentions(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + mock_io = MagicMock() + + fname1 = Path("file1.txt") + fname2 = Path("file2.py") + + fname1.write_text("one\n") + fname2.write_text("two\n") + + repo.git.add(str(fname1)) + repo.git.add(str(fname2)) + repo.git.commit("-m", "new") + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, mock_io) + + # Call the check_for_file_mentions method + coder.check_for_file_mentions("Please check file1.txt and file2.py") + + # Check if coder.abs_fnames contains both files + expected_files = set( + [ + str(Path(coder.root) / fname1), + str(Path(coder.root) / fname2), + ] + ) + + self.assertEqual(coder.abs_fnames, expected_files) + + def test_check_for_ambiguous_filename_mentions_of_longer_paths(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("file1.txt") + fname.touch() + + other_fname = Path("other") / "file1.txt" + other_fname.parent.mkdir(parents=True, exist_ok=True) + other_fname.touch() + + mock = MagicMock() + mock.return_value = set([str(fname), str(other_fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + coder.check_for_file_mentions(f"Please check {fname}!") + + self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) + + def test_check_for_subdir_mention(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("other") / "file1.txt" + fname.parent.mkdir(parents=True, exist_ok=True) + fname.touch() + + mock = MagicMock() + mock.return_value = set([str(fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + coder.check_for_file_mentions(f"Please check `{fname}`") + + self.assertEqual(coder.abs_fnames, set([str(fname.resolve())])) + + def test_skip_duplicate_basename_mentions(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False, yes=True) + coder = Coder.create(self.GPT35, None, io) + + # Create files with same basename in different directories + fname1 = Path("dir1") / "file.txt" + fname2 = Path("dir2") / "file.txt" + fname3 = Path("dir3") / "unique.txt" + + for fname in [fname1, fname2, fname3]: + fname.parent.mkdir(parents=True, exist_ok=True) + fname.touch() + + # Add one file to chat + coder.add_rel_fname(str(fname1)) + + # Mock get_tracked_files to return all files + mock = MagicMock() + mock.return_value = set([str(fname1), str(fname2), str(fname3)]) + coder.repo.get_tracked_files = mock + + # Check that file mentions of a pure basename skips files with duplicate basenames + mentioned = coder.get_file_mentions(f"Check {fname2.name} and {fname3}") + self.assertEqual(mentioned, {str(fname3)}) + + # Add a read-only file with same basename + coder.abs_read_only_fnames.add(str(fname2.resolve())) + mentioned = coder.get_file_mentions(f"Check {fname1} and {fname3}") + self.assertEqual(mentioned, {str(fname3)}) + + def test_check_for_file_mentions_read_only(self): + with GitTemporaryDirectory(): + io = InputOutput( + pretty=False, yes=True, + ) + coder = Coder.create(self.GPT35, None, io) + + fname = Path("readonly_file.txt") + fname.touch() + + coder.abs_read_only_fnames.add(str(fname.resolve())) + + # Mock the get_tracked_files method + mock = MagicMock() + mock.return_value = set([str(fname)]) + coder.repo.get_tracked_files = mock + + # Call the check_for_file_mentions method + result = coder.check_for_file_mentions(f"Please check {fname}!") + + # Assert that the method returns None (user not asked to add the file) + self.assertIsNone(result) + + # Assert that abs_fnames is still empty (file not added) + self.assertEqual(coder.abs_fnames, set()) + + def test_check_for_file_mentions_with_mocked_confirm(self): + with GitTemporaryDirectory(): + io = InputOutput(pretty=False) + coder = Coder.create(self.GPT35, None, io) + + # Mock get_file_mentions to return two file names + coder.get_file_mentions = MagicMock(return_value=set(["file1.txt", "file2.txt"])) + + # Mock confirm_ask to return False for the first call and True for the second + io.confirm_ask = MagicMock(side_effect=[False, True, True]) + + # First call to check_for_file_mentions + coder.check_for_file_mentions("Please check file1.txt for the info") + + # Assert that confirm_ask was called twice + self.assertEqual(io.confirm_ask.call_count, 2) + + # Assert that only file2.txt was added to abs_fnames + self.assertEqual(len(coder.abs_fnames), 1) + self.assertIn("file2.txt", str(coder.abs_fnames)) + + # Reset the mock + io.confirm_ask.reset_mock() + + # Second call to check_for_file_mentions + coder.check_for_file_mentions("Please check file1.txt and file2.txt again") + + # Assert that confirm_ask was called only once (for file1.txt) + self.assertEqual(io.confirm_ask.call_count, 1) + + # Assert that abs_fnames still contains only file2.txt + self.assertEqual(len(coder.abs_fnames), 1) + self.assertIn("file2.txt", str(coder.abs_fnames)) + + # Assert that file1.txt is in ignore_mentions + self.assertIn("file1.txt", coder.ignore_mentions) + + def test_run_with_file_deletion(self): + # Create a few temporary files + + tempdir = Path(tempfile.mkdtemp()) + + file1 = tempdir / "file1.txt" + file2 = tempdir / "file2.txt" + + file1.touch() + file2.touch() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + file1.unlink() + + # Call the run method again with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 1) + + def test_run_with_file_unicode_error(self): + # Create a few temporary files + _, file1 = tempfile.mkstemp() + _, file2 = tempfile.mkstemp() + + files = [file1, file2] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + # Write some non-UTF8 text into the file + with open(file1, "wb") as f: + f.write(b"\x80abc") + + # Call the run method again with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 1) + + def test_choose_fence(self): + # Create a few temporary files + _, file1 = tempfile.mkstemp() + + with open(file1, "wb") as f: + f.write(b"this contains\n```\nbackticks") + + files = [file1] + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create(self.GPT35, None, io=InputOutput(), fnames=files) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + + self.assertNotEqual(coder.fence[0], "```") + + def test_run_with_file_utf_unicode_error(self): + "make sure that we honor InputOutput(encoding) and don't just assume utf-8" + # Create a few temporary files + _, file1 = tempfile.mkstemp() + _, file2 = tempfile.mkstemp() + + files = [file1, file2] + + encoding = "utf-16" + + # Initialize the Coder object with the mocked IO and mocked repo + coder = Coder.create( + self.GPT35, + None, + io=InputOutput(encoding=encoding), + fnames=files, + ) + + def mock_send(*args, **kwargs): + coder.partial_response_content = "ok" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Call the run method with a message + coder.run(with_message="hi") + self.assertEqual(len(coder.abs_fnames), 2) + + some_content_which_will_error_if_read_with_encoding_utf8 = "ÅÍÎÏ".encode(encoding) + with open(file1, "wb") as f: + f.write(some_content_which_will_error_if_read_with_encoding_utf8) + + coder.run(with_message="hi") + + # both files should still be here + self.assertEqual(len(coder.abs_fnames), 2) + + def test_new_file_edit_one_commit(self): + """A new file shouldn get pre-committed before the GPT edit commit""" + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + self.assertTrue(fname.exists()) + + # make sure it was not committed + with self.assertRaises(git.exc.GitCommandError): + list(repo.iter_commits(repo.active_branch.name)) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +======= +new +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + coder.repo.get_commit_message = MagicMock() + coder.repo.get_commit_message.return_value = "commit message" + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "new\n") + + num_commits = len(list(repo.iter_commits(repo.active_branch.name))) + self.assertEqual(num_commits, 2) + + def test_only_commit_gpt_edited_file(self): + """ + Only commit file that gpt edits, not other dirty files. + Also ensure commit msg only depends on diffs from the GPT edited file. + """ + + with GitTemporaryDirectory(): + repo = git.Repo() + + fname1 = Path("file1.txt") + fname2 = Path("file2.txt") + + fname1.write_text("one\n") + fname2.write_text("two\n") + + repo.git.add(str(fname1)) + repo.git.add(str(fname2)) + repo.git.commit("-m", "new") + + # DIRTY! + fname1.write_text("ONE\n") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname1), str(fname2)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname2)} +<<<<<<< SEARCH +two +======= +TWO +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + def mock_get_commit_message(diffs, context): + self.assertNotIn("one", diffs) + self.assertNotIn("ONE", diffs) + return "commit message" + + coder.send = mock_send + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + + coder.run(with_message="hi") + + content = fname2.read_text() + self.assertEqual(content, "TWO\n") + + self.assertTrue(repo.is_dirty(path=str(fname1))) + + def test_gpt_edit_to_dirty_file(self): + """A dirty file should be committed before the GPT edits are committed""" + + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + fname.write_text("one\n") + repo.git.add(str(fname)) + + fname2 = Path("other.txt") + fname2.write_text("other\n") + repo.git.add(str(fname2)) + + repo.git.commit("-m", "new") + + # dirty + fname.write_text("two\n") + fname2.write_text("OTHER\n") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +two +======= +three +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + saved_diffs = [] + + def mock_get_commit_message(diffs, context): + saved_diffs.append(diffs) + return "commit message" + + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + coder.send = mock_send + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "three\n") + + num_commits = len(list(repo.iter_commits(repo.active_branch.name))) + self.assertEqual(num_commits, 3) + + diff = repo.git.diff(["HEAD~2", "HEAD~1"]) + self.assertIn("one", diff) + self.assertIn("two", diff) + self.assertNotIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = saved_diffs[0] + self.assertIn("one", diff) + self.assertIn("two", diff) + self.assertNotIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = repo.git.diff(["HEAD~1", "HEAD"]) + self.assertNotIn("one", diff) + self.assertIn("two", diff) + self.assertIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + diff = saved_diffs[1] + self.assertNotIn("one", diff) + self.assertIn("two", diff) + self.assertIn("three", diff) + self.assertNotIn("other", diff) + self.assertNotIn("OTHER", diff) + + self.assertEqual(len(saved_diffs), 2) + + def test_gpt_edit_to_existing_file_not_in_repo(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname = Path("file.txt") + fname.write_text("one\n") + + fname2 = Path("other.txt") + fname2.write_text("other\n") + repo.git.add(str(fname2)) + + repo.git.commit("-m", "initial") + + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[str(fname)]) + + def mock_send(*args, **kwargs): + coder.partial_response_content = f""" +Do this: + +{str(fname)} +<<<<<<< SEARCH +one +======= +two +>>>>>>> REPLACE + +""" + coder.partial_response_function_call = dict() + return [] + + saved_diffs = [] + + def mock_get_commit_message(diffs, context): + saved_diffs.append(diffs) + return "commit message" + + coder.repo.get_commit_message = MagicMock(side_effect=mock_get_commit_message) + coder.send = mock_send + + coder.run(with_message="hi") + + content = fname.read_text() + self.assertEqual(content, "two\n") + + diff = saved_diffs[0] + self.assertIn("file.txt", diff) + + def test_skip_aiderignored_files(self): + with GitTemporaryDirectory(): + repo = git.Repo() + + fname1 = "ignoreme1.txt" + fname2 = "ignoreme2.txt" + fname3 = "dir/ignoreme3.txt" + + Path(fname2).touch() + repo.git.add(str(fname2)) + repo.git.commit("-m", "initial") + + aignore = Path(".aiderignore") + aignore.write_text(f"{fname1}\n{fname2}\ndir\n") + repo = GitRepo( + InputOutput(yes=True), + [fname1, fname2, fname3], + None, + aider_ignore_file=str(aignore), + ) + + io = InputOutput(yes=True) + coder = Coder.create( + self.GPT35, + None, + io, + fnames=[fname1, fname2, fname3], + repo=repo, + ) + + self.assertNotIn(fname1, str(coder.abs_fnames)) + self.assertNotIn(fname2, str(coder.abs_fnames)) + self.assertNotIn(fname3, str(coder.abs_fnames)) + + def test_check_for_urls(self): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, None, io=io) + coder.commands.scraper = MagicMock() + coder.commands.scraper.scrape = MagicMock(return_value="some content") + + # Test various URL formats + test_cases = [ + ("Check http://example.com, it's cool", "http://example.com"), + ("Visit https://www.example.com/page and see stuff", "https://www.example.com/page"), + ( + "Go to http://subdomain.example.com:8080/path?query=value, or not", + "http://subdomain.example.com:8080/path?query=value", + ), + ( + "See https://example.com/path#fragment for example", + "https://example.com/path#fragment", + ), + ("Look at http://localhost:3000", "http://localhost:3000"), + ("View https://example.com/setup#whatever", "https://example.com/setup#whatever"), + ("Open http://127.0.0.1:8000/api/v1/", "http://127.0.0.1:8000/api/v1/"), + ( + "Try https://example.com/path/to/page.html?param1=value1¶m2=value2", + "https://example.com/path/to/page.html?param1=value1¶m2=value2", + ), + ("Access http://user:password@example.com", "http://user:password@example.com"), + ( + "Use https://example.com/path_(with_parentheses)", + "https://example.com/path_(with_parentheses)", + ), + ] + + for input_text, expected_url in test_cases: + with self.subTest(input_text=input_text): + result = coder.check_for_urls(input_text) + self.assertIn(expected_url, result) + + # Test cases from the GitHub issue + issue_cases = [ + ("check http://localhost:3002, there is an error", "http://localhost:3002"), + ( + "can you check out https://example.com/setup#whatever", + "https://example.com/setup#whatever", + ), + ] + + for input_text, expected_url in issue_cases: + with self.subTest(input_text=input_text): + result = coder.check_for_urls(input_text) + self.assertIn(expected_url, result) + + # Test case with multiple URLs + multi_url_input = "Check http://example1.com and https://example2.com/page" + result = coder.check_for_urls(multi_url_input) + self.assertIn("http://example1.com", result) + self.assertIn("https://example2.com/page", result) + + # Test case with no URL + no_url_input = "This text contains no URL" + result = coder.check_for_urls(no_url_input) + self.assertEqual(result, no_url_input) + + # Test case with the same URL appearing multiple times + repeated_url_input = ( + "Check https://example.com, then https://example.com again, and https://example.com one" + " more time" + ) + result = coder.check_for_urls(repeated_url_input) + self.assertEqual(result.count("https://example.com"), 4) + self.assertIn("https://example.com", result) + + def test_coder_from_coder_with_subdir(self): + with GitTemporaryDirectory() as root: + repo = git.Repo.init(root) + + # Create a file in a subdirectory + subdir = Path(root) / "subdir" + subdir.mkdir() + test_file = subdir / "test_file.txt" + test_file.write_text("Test content") + + repo.git.add(str(test_file)) + repo.git.commit("-m", "Add test file") + + # Change directory to the subdirectory + os.chdir(subdir.resolve()) + + # Create the first coder + io = InputOutput(yes=True) + coder1 = Coder.create(self.GPT35, None, io=io, fnames=[test_file.name]) + + # Create a new coder from the first coder + coder2 = Coder.create(from_coder=coder1) + + # Check if both coders have the same set of abs_fnames + self.assertEqual(coder1.abs_fnames, coder2.abs_fnames) + + # Ensure the abs_fnames contain the correct absolute path + expected_abs_path = os.path.realpath(str(test_file)) + coder1_abs_fnames = set(os.path.realpath(path) for path in coder1.abs_fnames) + self.assertIn(expected_abs_path, coder1_abs_fnames) + self.assertIn(expected_abs_path, coder2.abs_fnames) + + # Check that the abs_fnames do not contain duplicate or incorrect paths + self.assertEqual(len(coder1.abs_fnames), 1) + self.assertEqual(len(coder2.abs_fnames), 1) + + def test_suggest_shell_commands(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io) + + def mock_send(*args, **kwargs): + coder.partial_response_content = """Here's a shell command to run: + +```bash +echo "Hello, World!" +``` + +This command will print 'Hello, World!' to the console.""" + coder.partial_response_function_call = dict() + return [] + + coder.send = mock_send + + # Mock the handle_shell_commands method to check if it's called + coder.handle_shell_commands = MagicMock() + + # Run the coder with a message + coder.run(with_message="Suggest a shell command") + + # Check if the shell command was added to the list + self.assertEqual(len(coder.shell_commands), 1) + self.assertEqual(coder.shell_commands[0].strip(), 'echo "Hello, World!"') + + # Check if handle_shell_commands was called with the correct argument + coder.handle_shell_commands.assert_called_once() + + def test_no_suggest_shell_commands(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, suggest_shell_commands=False) + self.assertFalse(coder.suggest_shell_commands) + + def test_detect_urls_enabled(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, detect_urls=True) + coder.commands.scraper = MagicMock() + coder.commands.scraper.scrape = MagicMock(return_value="some content") + + # Test with a message containing a URL + message = "Check out https://example.com" + coder.check_for_urls(message) + coder.commands.scraper.scrape.assert_called_once_with("https://example.com") + + def test_detect_urls_disabled(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io, detect_urls=False) + coder.commands.scraper = MagicMock() + coder.commands.scraper.scrape = MagicMock(return_value="some content") + + # Test with a message containing a URL + message = "Check out https://example.com" + result = coder.check_for_urls(message) + self.assertEqual(result, []) + coder.commands.scraper.scrape.assert_not_called() + + def test_unknown_edit_format_exception(self): + # Test the exception message format + invalid_format = "invalid_format" + valid_formats = ["diff", "whole", "map"] + exc = UnknownEditFormat(invalid_format, valid_formats) + expected_msg = ( + f"Unknown edit format {invalid_format}. Valid formats are: {', '.join(valid_formats)}" + ) + self.assertEqual(str(exc), expected_msg) + + def test_unknown_edit_format_creation(self): + # Test that creating a Coder with invalid edit format raises the exception + io = InputOutput(yes=True) + invalid_format = "invalid_format" + + with self.assertRaises(UnknownEditFormat) as cm: + Coder.create(self.GPT35, invalid_format, io=io) + + exc = cm.exception + self.assertEqual(exc.edit_format, invalid_format) + self.assertIsInstance(exc.valid_formats, list) + self.assertTrue(len(exc.valid_formats) > 0) + + def test_system_prompt_prefix(self): + # Test that system_prompt_prefix is properly set and used + io = InputOutput(yes=True) + test_prefix = "Test prefix. " + + # Create a model with system_prompt_prefix + model = Model("gpt-3.5-turbo") + model.system_prompt_prefix = test_prefix + + coder = Coder.create(model, None, io=io) + + # Get the formatted messages + chunks = coder.format_messages() + messages = chunks.all_messages() + + # Check if the system message contains our prefix + system_message = next(msg for msg in messages if msg["role"] == "system") + self.assertTrue(system_message["content"].startswith(test_prefix)) + + def test_coder_create_with_new_file_oserror(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + new_file = "new_file.txt" + + # Mock Path.touch() to raise OSError + with patch("pathlib.Path.touch", side_effect=OSError("Permission denied")): + # Create the coder with a new file + coder = Coder.create(self.GPT35, "diff", io=io, fnames=[new_file]) + + # Check if the coder was created successfully + self.assertIsInstance(coder, Coder) + + # Check if the new file is not in abs_fnames + self.assertNotIn(new_file, [os.path.basename(f) for f in coder.abs_fnames]) + + def test_show_exhausted_error(self): + with GitTemporaryDirectory(): + io = InputOutput(yes=True) + coder = Coder.create(self.GPT35, "diff", io=io) + + # Set up some real done \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/extracted_output.txt b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/extracted_output.txt new file mode 100644 index 000000000..868c7e9c0 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/extracted_output.txt @@ -0,0 +1,171 @@ +import unittest +from unittest.mock import MagicMock, patch + +from aider.exceptions import LiteLLMExceptions +from aider.llm import litellm +from aider.models import Model + + +class PrintCalled(Exception): + pass + + +class TestSendChat(unittest.TestCase): + def setUp(self): + self.mock_messages = [{"role": "user", "content": "Hello"}] + self.mock_model = "gpt-4" + + def test_litellm_exceptions(self): + litellm_ex = LiteLLMExceptions() + litellm_ex._load(strict=True) + + @patch("litellm.completion") + @patch("builtins.print") + def test_simple_send_with_retries_rate_limit_error(self, mock_print, mock_completion): + mock = MagicMock() + mock.status_code = 500 + + # Set up the mock to raise + mock_completion.side_effect = [ + litellm.RateLimitError( + "rate limit exceeded", + response=mock, + llm_provider="llm_provider", + model="model", + ), + None, + ] + + # Call the simple_send_with_retries method + Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert mock_print.call_count == 3 + + @patch("litellm.completion") + def test_send_completion_basic(self, mock_completion): + # Setup mock response + mock_response = MagicMock() + mock_completion.return_value = mock_response + + # Test basic send_completion + hash_obj, response = Model(self.mock_model).send_completion( + self.mock_messages, functions=None, stream=False + ) + + assert response == mock_response + mock_completion.assert_called_once() + + @patch("litellm.completion") + def test_send_completion_with_functions(self, mock_completion): + mock_function = {"name": "test_function", "parameters": {"type": "object"}} + + hash_obj, response = Model(self.mock_model).send_completion( + self.mock_messages, functions=[mock_function], stream=False + ) + + # Verify function was properly included in tools + called_kwargs = mock_completion.call_args.kwargs + assert "tools" in called_kwargs + assert called_kwargs["tools"][0]["function"] == mock_function + + @patch("litellm.completion") + def test_simple_send_attribute_error(self, mock_completion): + # Setup mock to raise AttributeError + mock_completion.return_value = MagicMock() + mock_completion.return_value.choices = None + + # Should return None on AttributeError + result = Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert result is None + + @patch("litellm.completion") + @patch("builtins.print") + def test_simple_send_non_retryable_error(self, mock_print, mock_completion): + # Test with an error that shouldn't trigger retries + mock = MagicMock() + mock.status_code = 400 + + mock_completion.side_effect = litellm.NotFoundError( + message="Invalid request", llm_provider="test_provider", model="test_model" + ) + + result = Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert result is None + # Should only print the error message + assert mock_print.call_count == 1 + + def test_ensure_alternating_roles_empty(self): + from aider.sendchat import ensure_alternating_roles + + messages = [] + result = ensure_alternating_roles(messages) + assert result == [] + + def test_ensure_alternating_roles_single_message(self): + from aider.sendchat import ensure_alternating_roles + + messages = [{"role": "user", "content": "Hello"}] + result = ensure_alternating_roles(messages) + assert result == messages + + def test_ensure_alternating_roles_already_alternating(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "How are you?"}, + ] + result = ensure_alternating_roles(messages) + assert result == messages + + def test_ensure_alternating_roles_consecutive_user(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Are you there?"}, + ] + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": ""}, + {"role": "user", "content": "Are you there?"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected + + def test_ensure_alternating_roles_consecutive_assistant(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "assistant", "content": "Hi there"}, + {"role": "assistant", "content": "How can I help?"}, + ] + expected = [ + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": ""}, + {"role": "assistant", "content": "How can I help?"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected + + def test_ensure_alternating_roles_mixed_sequence(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Are you there?"}, + {"role": "assistant", "content": "Yes"}, + {"role": "assistant", "content": "How can I help?"}, + {"role": "user", "content": "Write code"}, + ] + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": ""}, + {"role": "user", "content": "Are you there?"}, + {"role": "assistant", "content": "Yes"}, + {"role": "user", "content": ""}, + {"role": "assistant", "content": "How can I help?"}, + {"role": "user", "content": "Write code"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/metadata.json b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/metadata.json new file mode 100644 index 000000000..998a64f16 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "aider_tests_basic_test_sendchat.py", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/aider_tests_basic_test_sendchat.py_prompt.txt", + "expected_file": "locodiff-250425/prompts/aider_tests_basic_test_sendchat.py_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:25.930921+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 6191, + "extracted_output_length": 6177, + "expected_output_length": 6178, + "results_dir": "locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825", + "generation_id": "gen-1757433505-sRELTfgpuEiqvsm2921U", + "cost_usd": 0.00836358, + "prompt_tokens": 10698, + "completion_tokens": 2216, + "total_tokens": 12914, + "native_prompt_tokens": 10776, + "native_completion_tokens": 4185, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 2832 +} \ No newline at end of file diff --git a/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/output.diff b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/raw_response.txt b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/raw_response.txt new file mode 100644 index 000000000..705d50526 --- /dev/null +++ b/locodiff-250425/results/aider_tests_basic_test_sendchat.py/x-ai_grok-code-fast-1/20250909_155825/raw_response.txt @@ -0,0 +1,173 @@ +```python +import unittest +from unittest.mock import MagicMock, patch + +from aider.exceptions import LiteLLMExceptions +from aider.llm import litellm +from aider.models import Model + + +class PrintCalled(Exception): + pass + + +class TestSendChat(unittest.TestCase): + def setUp(self): + self.mock_messages = [{"role": "user", "content": "Hello"}] + self.mock_model = "gpt-4" + + def test_litellm_exceptions(self): + litellm_ex = LiteLLMExceptions() + litellm_ex._load(strict=True) + + @patch("litellm.completion") + @patch("builtins.print") + def test_simple_send_with_retries_rate_limit_error(self, mock_print, mock_completion): + mock = MagicMock() + mock.status_code = 500 + + # Set up the mock to raise + mock_completion.side_effect = [ + litellm.RateLimitError( + "rate limit exceeded", + response=mock, + llm_provider="llm_provider", + model="model", + ), + None, + ] + + # Call the simple_send_with_retries method + Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert mock_print.call_count == 3 + + @patch("litellm.completion") + def test_send_completion_basic(self, mock_completion): + # Setup mock response + mock_response = MagicMock() + mock_completion.return_value = mock_response + + # Test basic send_completion + hash_obj, response = Model(self.mock_model).send_completion( + self.mock_messages, functions=None, stream=False + ) + + assert response == mock_response + mock_completion.assert_called_once() + + @patch("litellm.completion") + def test_send_completion_with_functions(self, mock_completion): + mock_function = {"name": "test_function", "parameters": {"type": "object"}} + + hash_obj, response = Model(self.mock_model).send_completion( + self.mock_messages, functions=[mock_function], stream=False + ) + + # Verify function was properly included in tools + called_kwargs = mock_completion.call_args.kwargs + assert "tools" in called_kwargs + assert called_kwargs["tools"][0]["function"] == mock_function + + @patch("litellm.completion") + def test_simple_send_attribute_error(self, mock_completion): + # Setup mock to raise AttributeError + mock_completion.return_value = MagicMock() + mock_completion.return_value.choices = None + + # Should return None on AttributeError + result = Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert result is None + + @patch("litellm.completion") + @patch("builtins.print") + def test_simple_send_non_retryable_error(self, mock_print, mock_completion): + # Test with an error that shouldn't trigger retries + mock = MagicMock() + mock.status_code = 400 + + mock_completion.side_effect = litellm.NotFoundError( + message="Invalid request", llm_provider="test_provider", model="test_model" + ) + + result = Model(self.mock_model).simple_send_with_retries(self.mock_messages) + assert result is None + # Should only print the error message + assert mock_print.call_count == 1 + + def test_ensure_alternating_roles_empty(self): + from aider.sendchat import ensure_alternating_roles + + messages = [] + result = ensure_alternating_roles(messages) + assert result == [] + + def test_ensure_alternating_roles_single_message(self): + from aider.sendchat import ensure_alternating_roles + + messages = [{"role": "user", "content": "Hello"}] + result = ensure_alternating_roles(messages) + assert result == messages + + def test_ensure_alternating_roles_already_alternating(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "How are you?"}, + ] + result = ensure_alternating_roles(messages) + assert result == messages + + def test_ensure_alternating_roles_consecutive_user(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Are you there?"}, + ] + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": ""}, + {"role": "user", "content": "Are you there?"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected + + def test_ensure_alternating_roles_consecutive_assistant(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "assistant", "content": "Hi there"}, + {"role": "assistant", "content": "How can I help?"}, + ] + expected = [ + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": ""}, + {"role": "assistant", "content": "How can I help?"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected + + def test_ensure_alternating_roles_mixed_sequence(self): + from aider.sendchat import ensure_alternating_roles + + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "user", "content": "Are you there?"}, + {"role": "assistant", "content": "Yes"}, + {"role": "assistant", "content": "How can I help?"}, + {"role": "user", "content": "Write code"}, + ] + expected = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": ""}, + {"role": "user", "content": "Are you there?"}, + {"role": "assistant", "content": "Yes"}, + {"role": "user", "content": ""}, + {"role": "assistant", "content": "How can I help?"}, + {"role": "user", "content": "Write code"}, + ] + result = ensure_alternating_roles(messages) + assert result == expected +``` \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..a7c269b1c --- /dev/null +++ b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,101 @@ +//! Provides libintl for macOS. +//! +//! IMPORTANT: This is only for macOS. We could support other platforms +//! if/when we need to but generally Linux provides libintl in libc. +//! Windows we'll have to figure out when we get there. +//! +//! Since this is only for macOS, there's a lot of hardcoded stuff +//! here that assumes macOS. For example, I generated the config.h +//! on my own machine (a Mac) and then copied it here. This isn't +//! ideal since we should do the same detection that gettext's configure +//! script does, but its quite a bit of work to do that. +//! +//! UPGRADING: If you need to upgrade gettext, then the only thing to +//! really watch out for is the xlocale.h include we added manually +//! at the end of config.h. The comment there notes why. When we upgrade +//! we should audit our config.h and make sure we add that back (if we +//! have to). + +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + var flags = std.ArrayList([]const u8).init(b.allocator); + defer flags.deinit(); + try flags.appendSlice(&.{ + "-DHAVE_CONFIG_H", + "-DLOCALEDIR=\"\"", + }); + + { + const lib = b.addStaticLibrary(.{ + .name = "intl", + .target = target, + .optimize = optimize, + }); + lib.linkLibC(); + lib.addIncludePath(b.path("")); + + if (target.result.os.tag.isDarwin()) { + const apple_sdk = @import("apple_sdk"); + try apple_sdk.addPaths(b, lib.root_module); + } + + if (b.lazyDependency("gettext", .{})) |upstream| { + lib.addIncludePath(upstream.path("gettext-runtime/intl")); + lib.addIncludePath(upstream.path("gettext-runtime/intl/gnulib-lib")); + lib.addCSourceFiles(.{ + .root = upstream.path("gettext-runtime/intl"), + .files = srcs, + .flags = flags.items, + }); + } + + lib.installHeader(b.path("libintl.h"), "libintl.h"); + b.installArtifact(lib); + } +} + +const srcs: []const []const u8 = &.{ + "bindtextdom.c", + "dcgettext.c", + "dcigettext.c", + "dcngettext.c", + "dgettext.c", + "dngettext.c", + "explodename.c", + "finddomain.c", + "gettext.c", + "hash-string.c", + "intl-compat.c", + "l10nflist.c", + "langprefs.c", + "loadmsgcat.c", + "localealias.c", + "log.c", + "ngettext.c", + "plural-exp.c", + "plural.c", + "setlocale.c", + "textdomain.c", + "version.c", + "compat.c", + + // There's probably a better way to detect that we need these, but + // these are hardcoded for now for macOS. + "gnulib-lib/getlocalename_l-unsafe.c", + "gnulib-lib/localename.c", + "gnulib-lib/localename-environ.c", + "gnulib-lib/localename-unsafe.c", + "gnulib-lib/setlocale-lock.c", + "gnulib-lib/setlocale_null.c", + "gnulib-lib/setlocale_null-unlocked.c", + + // Not needed for macOS, but we might need them for other platforms. + // If we expand this to support other platforms, we should uncomment + // these. + // "osdep.c", + // "printf.c", +}; \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..a951ca3ff --- /dev/null +++ b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_pkg_libintl_build.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_pkg_libintl_build.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_pkg_libintl_build.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.893456+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 3173, + "extracted_output_length": 3162, + "expected_output_length": 3163, + "results_dir": "locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-4An9zwL44O2QtURjV57L", + "cost_usd": 0.00580418, + "prompt_tokens": 11277, + "completion_tokens": 1475, + "total_tokens": 12752, + "native_prompt_tokens": 10939, + "native_completion_tokens": 2457, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 1647 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..2ae605996 --- /dev/null +++ b/locodiff-250425/results/ghostty_pkg_libintl_build.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,103 @@ +```zig +//! Provides libintl for macOS. +//! +//! IMPORTANT: This is only for macOS. We could support other platforms +//! if/when we need to but generally Linux provides libintl in libc. +//! Windows we'll have to figure out when we get there. +//! +//! Since this is only for macOS, there's a lot of hardcoded stuff +//! here that assumes macOS. For example, I generated the config.h +//! on my own machine (a Mac) and then copied it here. This isn't +//! ideal since we should do the same detection that gettext's configure +//! script does, but its quite a bit of work to do that. +//! +//! UPGRADING: If you need to upgrade gettext, then the only thing to +//! really watch out for is the xlocale.h include we added manually +//! at the end of config.h. The comment there notes why. When we upgrade +//! we should audit our config.h and make sure we add that back (if we +//! have to). + +const std = @import("std"); + +pub fn build(b: *std.Build) !void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardOptimizeOption(.{}); + + var flags = std.ArrayList([]const u8).init(b.allocator); + defer flags.deinit(); + try flags.appendSlice(&.{ + "-DHAVE_CONFIG_H", + "-DLOCALEDIR=\"\"", + }); + + { + const lib = b.addStaticLibrary(.{ + .name = "intl", + .target = target, + .optimize = optimize, + }); + lib.linkLibC(); + lib.addIncludePath(b.path("")); + + if (target.result.os.tag.isDarwin()) { + const apple_sdk = @import("apple_sdk"); + try apple_sdk.addPaths(b, lib.root_module); + } + + if (b.lazyDependency("gettext", .{})) |upstream| { + lib.addIncludePath(upstream.path("gettext-runtime/intl")); + lib.addIncludePath(upstream.path("gettext-runtime/intl/gnulib-lib")); + lib.addCSourceFiles(.{ + .root = upstream.path("gettext-runtime/intl"), + .files = srcs, + .flags = flags.items, + }); + } + + lib.installHeader(b.path("libintl.h"), "libintl.h"); + b.installArtifact(lib); + } +} + +const srcs: []const []const u8 = &.{ + "bindtextdom.c", + "dcgettext.c", + "dcigettext.c", + "dcngettext.c", + "dgettext.c", + "dngettext.c", + "explodename.c", + "finddomain.c", + "gettext.c", + "hash-string.c", + "intl-compat.c", + "l10nflist.c", + "langprefs.c", + "loadmsgcat.c", + "localealias.c", + "log.c", + "ngettext.c", + "plural-exp.c", + "plural.c", + "setlocale.c", + "textdomain.c", + "version.c", + "compat.c", + + // There's probably a better way to detect that we need these, but + // these are hardcoded for now for macOS. + "gnulib-lib/getlocalename_l-unsafe.c", + "gnulib-lib/localename.c", + "gnulib-lib/localename-environ.c", + "gnulib-lib/localename-unsafe.c", + "gnulib-lib/setlocale-lock.c", + "gnulib-lib/setlocale_null.c", + "gnulib-lib/setlocale_null-unlocked.c", + + // Not needed for macOS, but we might need them for other platforms. + // If we expand this to support other platforms, we should uncomment + // these. + // "osdep.c", + // "printf.c", +}; +``` \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/extracted_output.txt b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/extracted_output.txt new file mode 100644 index 000000000..1d8550a97 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/extracted_output.txt @@ -0,0 +1,326 @@ +const SharedDeps = @This(); + +const std = @import("std"); +const Config = @import("Config.zig"); +const HelpStrings = @import("HelpStrings.zig"); +const MetallibStep = @import("MetallibStep.zig"); +const UnicodeTables = @import("UnicodeTables.zig"); +const GhosttyFrameData = @import("GhosttyFrameData.zig"); +const DistResource = @import("GhosttyDist.zig").Resource; + +const gresource = @import("../apprt/gtk/gresource.zig"); + +config: *const Config, + +options: *std.Build.Step.Options, +help_strings: HelpStrings, +metallib: ?*MetallibStep, +unicode_tables: UnicodeTables, +framedata: GhosttyFrameData, + +/// Used to keep track of a list of file sources. +pub const LazyPathList = std.ArrayList(std.Build.LazyPath); + +pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps { + var result: SharedDeps = .{ + .config = cfg, + .help_strings = try HelpStrings.init(b, cfg), + .unicode_tables = try UnicodeTables.init(b), + .framedata = try GhosttyFrameData.init(b), + + // Setup by retarget + .options = undefined, + .metallib = undefined, + }; + try result.initTarget(b, cfg.target); + return result; +} + +/// Retarget our dependencies for another build target. Modifies in-place. +pub fn retarget( + self: *const SharedDeps, + b: *std.Build, + target: std.Build.ResolvedTarget, +) !SharedDeps { + var result = self.*; + try result.initTarget(b, target); + return result; +} + +/// Change the exe entrypoint. +pub fn changeEntrypoint( + self: *const SharedDeps, + b: *std.Build, + entrypoint: Config.ExeEntrypoint, +) !SharedDeps { + // Change our config + const config = try b.allocator.create(Config); + config.* = self.config.*; + config.exe_entrypoint = entrypoint; + + var result = self.*; + result.config = config; + return result; +} + +fn initTarget( + self: *SharedDeps, + b: *std.Build, + target: std.Build.ResolvedTarget, +) !void { + // Update our metallib + self.metallib = MetallibStep.create(b, .{ + .name = "Ghostty", + .target = target, + .sources = &.{b.path("src/renderer/shaders/cell.metal")}, + }); + + // Change our config + const config = try b.allocator.create(Config); + config.* = self.config.*; + config.target = target; + self.config = config; + + // Setup our shared build options + self.options = b.addOptions(); + try self.config.addOptions(self.options); +} + +pub fn add( + self: *const SharedDeps, + step: *std.Build.Step.Compile, +) !LazyPathList { + const b = step.step.owner; + + // We could use our config.target/optimize fields here but its more + // correct to always match our step. + const target = step.root_module.resolved_target.?; + const optimize = step.root_module.optimize.?; + + // We maintain a list of our static libraries and return it so that + // we can build a single fat static library for the final app. + var static_libs = LazyPathList.init(b.allocator); + errdefer static_libs.deinit(); + + // Every exe gets build options populated + step.root_module.addOptions("build_options", self.options); + + // Freetype + _ = b.systemIntegrationOption("freetype", .{}); // Shows it in help + if (self.config.font_backend.hasFreetype()) { + const freetype_dep = b.dependency("freetype", .{ + .target = target, + .optimize = optimize, + .@"enable-libpng" = true, + }); + step.root_module.addImport("freetype", freetype_dep.module("freetype")); + + if (b.systemIntegrationOption("freetype", .{})) { + step.linkSystemLibrary2("bzip2", dynamic_link_opts); + step.linkSystemLibrary2("freetype2", dynamic_link_opts); + } else { + step.linkLibrary(freetype_dep.artifact("freetype")); + try static_libs.append(freetype_dep.artifact("freetype").getEmittedBin()); + } + } + + // Harfbuzz + _ = b.systemIntegrationOption("harfbuzz", .{}); // Shows it in help + if (self.config.font_backend.hasHarfbuzz()) { + const harfbuzz_dep = b.dependency("harfbuzz", .{ + .target = target, + .optimize = optimize, + .@"enable-freetype" = true, + .@"enable-coretext" = self.config.font_backend.hasCoretext(), + }); + + step.root_module.addImport( + "harfbuzz", + harfbuzz_dep.module("harfbuzz"), + ); + if (b.systemIntegrationOption("harfbuzz", .{})) { + step.linkSystemLibrary2("harfbuzz", dynamic_link_opts); + } else { + step.linkLibrary(harfbuzz_dep.artifact("harfbuzz")); + try static_libs.append(harfbuzz_dep.artifact("harfbuzz").getEmittedBin()); + } + } + + // Fontconfig + _ = b.systemIntegrationOption("fontconfig", .{}); // Shows it in help + if (self.config.font_backend.hasFontconfig()) { + const fontconfig_dep = b.dependency("fontconfig", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport( + "fontconfig", + fontconfig_dep.module("fontconfig"), + ); + + if (b.systemIntegrationOption("fontconfig", .{})) { + step.linkSystemLibrary2("fontconfig", dynamic_link_opts); + } else { + step.linkLibrary(fontconfig_dep.artifact("fontconfig")); + try static_libs.append(fontconfig_dep.artifact("fontconfig").getEmittedBin()); + } + } + + // Libpng - Ghostty doesn't actually use this directly, its only used + // through dependencies, so we only need to add it to our static + // libs list if we're not using system integration. The dependencies + // will handle linking it. + if (!b.systemIntegrationOption("libpng", .{})) { + const libpng_dep = b.dependency("libpng", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(libpng_dep.artifact("png")); + try static_libs.append(libpng_dep.artifact("png").getEmittedBin()); + } + + // Zlib - same as libpng, only used through dependencies. + if (!b.systemIntegrationOption("zlib", .{})) { + const zlib_dep = b.dependency("zlib", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(zlib_dep.artifact("z")); + try static_libs.append(zlib_dep.artifact("z").getEmittedBin()); + } + + // Oniguruma + const oniguruma_dep = b.dependency("oniguruma", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("oniguruma", oniguruma_dep.module("oniguruma")); + if (b.systemIntegrationOption("oniguruma", .{})) { + step.linkSystemLibrary2("oniguruma", dynamic_link_opts); + } else { + step.linkLibrary(oniguruma_dep.artifact("oniguruma")); + try static_libs.append(oniguruma_dep.artifact("oniguruma").getEmittedBin()); + } + + // Glslang + const glslang_dep = b.dependency("glslang", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("glslang", glslang_dep.module("glslang")); + if (b.systemIntegrationOption("glslang", .{})) { + step.linkSystemLibrary2("glslang", dynamic_link_opts); + step.linkSystemLibrary2("glslang-default-resource-limits", dynamic_link_opts); + } else { + step.linkLibrary(glslang_dep.artifact("glslang")); + try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin()); + } + + // Spirv-cross + const spirv_cross_dep = b.dependency("spirv_cross", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("spirv_cross", spirv_cross_dep.module("spirv_cross")); + if (b.systemIntegrationOption("spirv-cross", .{})) { + step.linkSystemLibrary2("spirv-cross", dynamic_link_opts); + } else { + step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); + try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); + } + + // Simdutf + if (b.systemIntegrationOption("simdutf", .{})) { + step.linkSystemLibrary2("simdutf", dynamic_link_opts); + } else { + const simdutf_dep = b.dependency("simdutf", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(simdutf_dep.artifact("simdutf")); + try static_libs.append(simdutf_dep.artifact("simdutf").getEmittedBin()); + } + + // Sentry + if (self.config.sentry) { + const sentry_dep = b.dependency("sentry", .{ + .target = target, + .optimize = optimize, + .backend = .breakpad, + }); + + step.root_module.addImport("sentry", sentry_dep.module("sentry")); + + // Sentry + step.linkLibrary(sentry_dep.artifact("sentry")); + try static_libs.append(sentry_dep.artifact("sentry").getEmittedBin()); + + // We also need to include breakpad in the static libs. + const breakpad_dep = sentry_dep.builder.dependency("breakpad", .{ + .target = target, + .optimize = optimize, + }); + try static_libs.append(breakpad_dep.artifact("breakpad").getEmittedBin()); + } + + // Wasm we do manually since it is such a different build. + if (step.rootModuleTarget().cpu.arch == .wasm32) { + const js_dep = b.dependency("zig_js", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("zig-js", js_dep.module("zig-js")); + + return static_libs; + } + + // On Linux, we need to add a couple common library paths that aren't + // on the standard search list. i.e. GTK is often in /usr/lib/x86_64-linux-gnu + // on x86_64. + if (step.rootModuleTarget().os.tag == .linux) { + const triple = try step.rootModuleTarget().linuxTriple(b.allocator); + const path = b.fmt("/usr/lib/{s}", .{triple}); + if (std.fs.accessAbsolute(path, .{})) { + step.addLibraryPath(.{ .cwd_relative = path }); + } else |_| {} + } + + // C files + step.linkLibC(); + step.addIncludePath(b.path("src/stb")); + step.addCSourceFiles(.{ .files = &.{"src/stb/stb.c"} }); + if (step.rootModuleTarget().os.tag == .linux) { + step.addIncludePath(b.path("src/apprt/gtk")); + } + + // C++ files + step.linkLibCpp(); + step.addIncludePath(b.path("src")); + { + // From hwy/detect_targets.h + const HWY_AVX3_SPR: c_int = 1 << 4; + const HWY_AVX3_ZEN4: c_int = 1 << 6; + const HWY_AVX3_DL: c_int = 1 << 7; + const HWY_AVX3: c_int = 1 << 8; + + // Zig 0.13 bug: https://github.com/ziglang/zig/issues/20414 + // To workaround this we just disable AVX512 support completely. + // The performance difference between AVX2 and AVX512 is not + // significant for our use case and AVX512 is very rare on consumer + // hardware anyways. + const HWY_DISABLED_TARGETS: c_int = HWY_AVX3_SPR | HWY_AVX3_ZEN4 | HWY_AVX3_DL | HWY_AVX3; + + step.addCSourceFiles(.{ + .files = &.{ + "src/simd/base64.cpp", + "src/simd/codepoint_width.cpp", + "src/simd/index_of.cpp", + "src/simd/vt.cpp", + }, + .flags = if (step.rootModuleTarget().cpu.arch == .x86_64) &.{ + b.fmt("-DHWY_DISABLED_TARGETS={}", .{HWY_DISABLED_TARGETS}), + } else &.{}, + }); + } + + // We always require the system SDK so that our system headers are available. \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/metadata.json b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/metadata.json new file mode 100644 index 000000000..063ba92ff --- /dev/null +++ b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_build_SharedDeps.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_build_SharedDeps.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_build_SharedDeps.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:40.242804+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 11411, + "extracted_output_length": 11403, + "expected_output_length": 27017, + "results_dir": "locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840", + "generation_id": "gen-1757433520-U5oCYEukeaAaFFAvEC6S", + "cost_usd": 0.01552882, + "prompt_tokens": 26696, + "completion_tokens": 5458, + "total_tokens": 32154, + "native_prompt_tokens": 26210, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 7165 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/output.diff b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/output.diff new file mode 100644 index 000000000..27711d2e7 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/output.diff @@ -0,0 +1,734 @@ + +index 8fd602fbd..1d8550a97 100644 +--- a/ghostty_src_build_SharedDeps.zig_expectedoutput.txt (expected):tmp/tmpps5ypvl2_expected.txt ++++ b/ghostty_src_build_SharedDeps.zig_extracted.txt (actual):tmp/tmpg0lif_1q_actual.txt +@@ -108,72 +108,61 @@ pub fn add( + // Freetype + _ = b.systemIntegrationOption("freetype", .{}); // Shows it in help + if (self.config.font_backend.hasFreetype()) { +- if (b.lazyDependency("freetype", .{ ++ const freetype_dep = b.dependency("freetype", .{ + .target = target, + .optimize = optimize, + .@"enable-libpng" = true, +- })) |freetype_dep| { +- step.root_module.addImport( +- "freetype", +- freetype_dep.module("freetype"), +- ); +- +- if (b.systemIntegrationOption("freetype", .{})) { +- step.linkSystemLibrary2("bzip2", dynamic_link_opts); +- step.linkSystemLibrary2("freetype2", dynamic_link_opts); +- } else { +- step.linkLibrary(freetype_dep.artifact("freetype")); +- try static_libs.append( +- freetype_dep.artifact("freetype").getEmittedBin(), +- ); +- } ++ }); ++ step.root_module.addImport("freetype", freetype_dep.module("freetype")); ++ ++ if (b.systemIntegrationOption("freetype", .{})) { ++ step.linkSystemLibrary2("bzip2", dynamic_link_opts); ++ step.linkSystemLibrary2("freetype2", dynamic_link_opts); ++ } else { ++ step.linkLibrary(freetype_dep.artifact("freetype")); ++ try static_libs.append(freetype_dep.artifact("freetype").getEmittedBin()); + } + } + + // Harfbuzz + _ = b.systemIntegrationOption("harfbuzz", .{}); // Shows it in help + if (self.config.font_backend.hasHarfbuzz()) { +- if (b.lazyDependency("harfbuzz", .{ ++ const harfbuzz_dep = b.dependency("harfbuzz", .{ + .target = target, + .optimize = optimize, + .@"enable-freetype" = true, + .@"enable-coretext" = self.config.font_backend.hasCoretext(), +- })) |harfbuzz_dep| { +- step.root_module.addImport( +- "harfbuzz", +- harfbuzz_dep.module("harfbuzz"), +- ); +- if (b.systemIntegrationOption("harfbuzz", .{})) { +- step.linkSystemLibrary2("harfbuzz", dynamic_link_opts); +- } else { +- step.linkLibrary(harfbuzz_dep.artifact("harfbuzz")); +- try static_libs.append( +- harfbuzz_dep.artifact("harfbuzz").getEmittedBin(), +- ); +- } ++ }); ++ ++ step.root_module.addImport( ++ "harfbuzz", ++ harfbuzz_dep.module("harfbuzz"), ++ ); ++ if (b.systemIntegrationOption("harfbuzz", .{})) { ++ step.linkSystemLibrary2("harfbuzz", dynamic_link_opts); ++ } else { ++ step.linkLibrary(harfbuzz_dep.artifact("harfbuzz")); ++ try static_libs.append(harfbuzz_dep.artifact("harfbuzz").getEmittedBin()); + } + } + + // Fontconfig + _ = b.systemIntegrationOption("fontconfig", .{}); // Shows it in help + if (self.config.font_backend.hasFontconfig()) { +- if (b.lazyDependency("fontconfig", .{ ++ const fontconfig_dep = b.dependency("fontconfig", .{ + .target = target, + .optimize = optimize, +- })) |fontconfig_dep| { +- step.root_module.addImport( +- "fontconfig", +- fontconfig_dep.module("fontconfig"), +- ); +- +- if (b.systemIntegrationOption("fontconfig", .{})) { +- step.linkSystemLibrary2("fontconfig", dynamic_link_opts); +- } else { +- step.linkLibrary(fontconfig_dep.artifact("fontconfig")); +- try static_libs.append( +- fontconfig_dep.artifact("fontconfig").getEmittedBin(), +- ); +- } ++ }); ++ step.root_module.addImport( ++ "fontconfig", ++ fontconfig_dep.module("fontconfig"), ++ ); ++ ++ if (b.systemIntegrationOption("fontconfig", .{})) { ++ step.linkSystemLibrary2("fontconfig", dynamic_link_opts); ++ } else { ++ step.linkLibrary(fontconfig_dep.artifact("fontconfig")); ++ try static_libs.append(fontconfig_dep.artifact("fontconfig").getEmittedBin()); + } + } + +@@ -182,142 +171,105 @@ pub fn add( + // libs list if we're not using system integration. The dependencies + // will handle linking it. + if (!b.systemIntegrationOption("libpng", .{})) { +- if (b.lazyDependency("libpng", .{ ++ const libpng_dep = b.dependency("libpng", .{ + .target = target, + .optimize = optimize, +- })) |libpng_dep| { +- step.linkLibrary(libpng_dep.artifact("png")); +- try static_libs.append( +- libpng_dep.artifact("png").getEmittedBin(), +- ); +- } ++ }); ++ step.linkLibrary(libpng_dep.artifact("png")); ++ try static_libs.append(libpng_dep.artifact("png").getEmittedBin()); + } + + // Zlib - same as libpng, only used through dependencies. + if (!b.systemIntegrationOption("zlib", .{})) { +- if (b.lazyDependency("zlib", .{ ++ const zlib_dep = b.dependency("zlib", .{ + .target = target, + .optimize = optimize, +- })) |zlib_dep| { +- step.linkLibrary(zlib_dep.artifact("z")); +- try static_libs.append( +- zlib_dep.artifact("z").getEmittedBin(), +- ); +- } ++ }); ++ step.linkLibrary(zlib_dep.artifact("z")); ++ try static_libs.append(zlib_dep.artifact("z").getEmittedBin()); + } + + // Oniguruma +- if (b.lazyDependency("oniguruma", .{ ++ const oniguruma_dep = b.dependency("oniguruma", .{ + .target = target, + .optimize = optimize, +- })) |oniguruma_dep| { +- step.root_module.addImport( +- "oniguruma", +- oniguruma_dep.module("oniguruma"), +- ); +- if (b.systemIntegrationOption("oniguruma", .{})) { +- step.linkSystemLibrary2("oniguruma", dynamic_link_opts); +- } else { +- step.linkLibrary(oniguruma_dep.artifact("oniguruma")); +- try static_libs.append( +- oniguruma_dep.artifact("oniguruma").getEmittedBin(), +- ); +- } ++ }); ++ step.root_module.addImport("oniguruma", oniguruma_dep.module("oniguruma")); ++ if (b.systemIntegrationOption("oniguruma", .{})) { ++ step.linkSystemLibrary2("oniguruma", dynamic_link_opts); ++ } else { ++ step.linkLibrary(oniguruma_dep.artifact("oniguruma")); ++ try static_libs.append(oniguruma_dep.artifact("oniguruma").getEmittedBin()); + } + + // Glslang +- if (b.lazyDependency("glslang", .{ ++ const glslang_dep = b.dependency("glslang", .{ + .target = target, + .optimize = optimize, +- })) |glslang_dep| { +- step.root_module.addImport("glslang", glslang_dep.module("glslang")); +- if (b.systemIntegrationOption("glslang", .{})) { +- step.linkSystemLibrary2("glslang", dynamic_link_opts); +- step.linkSystemLibrary2( +- "glslang-default-resource-limits", +- dynamic_link_opts, +- ); +- } else { +- step.linkLibrary(glslang_dep.artifact("glslang")); +- try static_libs.append( +- glslang_dep.artifact("glslang").getEmittedBin(), +- ); +- } ++ }); ++ step.root_module.addImport("glslang", glslang_dep.module("glslang")); ++ if (b.systemIntegrationOption("glslang", .{})) { ++ step.linkSystemLibrary2("glslang", dynamic_link_opts); ++ step.linkSystemLibrary2("glslang-default-resource-limits", dynamic_link_opts); ++ } else { ++ step.linkLibrary(glslang_dep.artifact("glslang")); ++ try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin()); + } + + // Spirv-cross +- if (b.lazyDependency("spirv_cross", .{ ++ const spirv_cross_dep = b.dependency("spirv_cross", .{ + .target = target, + .optimize = optimize, +- })) |spirv_cross_dep| { +- step.root_module.addImport( +- "spirv_cross", +- spirv_cross_dep.module("spirv_cross"), +- ); +- if (b.systemIntegrationOption("spirv-cross", .{})) { +- step.linkSystemLibrary2("spirv-cross", dynamic_link_opts); +- } else { +- step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); +- try static_libs.append( +- spirv_cross_dep.artifact("spirv_cross").getEmittedBin(), +- ); +- } ++ }); ++ step.root_module.addImport("spirv_cross", spirv_cross_dep.module("spirv_cross")); ++ if (b.systemIntegrationOption("spirv-cross", .{})) { ++ step.linkSystemLibrary2("spirv-cross", dynamic_link_opts); ++ } else { ++ step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); ++ try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); + } + + // Simdutf + if (b.systemIntegrationOption("simdutf", .{})) { + step.linkSystemLibrary2("simdutf", dynamic_link_opts); + } else { +- if (b.lazyDependency("simdutf", .{ ++ const simdutf_dep = b.dependency("simdutf", .{ + .target = target, + .optimize = optimize, +- })) |simdutf_dep| { +- step.linkLibrary(simdutf_dep.artifact("simdutf")); +- try static_libs.append( +- simdutf_dep.artifact("simdutf").getEmittedBin(), +- ); +- } ++ }); ++ step.linkLibrary(simdutf_dep.artifact("simdutf")); ++ try static_libs.append(simdutf_dep.artifact("simdutf").getEmittedBin()); + } + + // Sentry + if (self.config.sentry) { +- if (b.lazyDependency("sentry", .{ ++ const sentry_dep = b.dependency("sentry", .{ + .target = target, + .optimize = optimize, + .backend = .breakpad, +- })) |sentry_dep| { +- step.root_module.addImport( +- "sentry", +- sentry_dep.module("sentry"), +- ); +- step.linkLibrary(sentry_dep.artifact("sentry")); +- try static_libs.append( +- sentry_dep.artifact("sentry").getEmittedBin(), +- ); +- +- // We also need to include breakpad in the static libs. +- if (sentry_dep.builder.lazyDependency("breakpad", .{ +- .target = target, +- .optimize = optimize, +- })) |breakpad_dep| { +- try static_libs.append( +- breakpad_dep.artifact("breakpad").getEmittedBin(), +- ); +- } +- } ++ }); ++ ++ step.root_module.addImport("sentry", sentry_dep.module("sentry")); ++ ++ // Sentry ++ step.linkLibrary(sentry_dep.artifact("sentry")); ++ try static_libs.append(sentry_dep.artifact("sentry").getEmittedBin()); ++ ++ // We also need to include breakpad in the static libs. ++ const breakpad_dep = sentry_dep.builder.dependency("breakpad", .{ ++ .target = target, ++ .optimize = optimize, ++ }); ++ try static_libs.append(breakpad_dep.artifact("breakpad").getEmittedBin()); + } + + // Wasm we do manually since it is such a different build. + if (step.rootModuleTarget().cpu.arch == .wasm32) { +- if (b.lazyDependency("zig_js", .{ ++ const js_dep = b.dependency("zig_js", .{ + .target = target, + .optimize = optimize, +- })) |js_dep| { +- step.root_module.addImport( +- "zig-js", +- js_dep.module("zig-js"), +- ); +- } ++ }); ++ step.root_module.addImport("zig-js", js_dep.module("zig-js")); + + return static_libs; + } +@@ -371,421 +323,4 @@ pub fn add( + }); + } + +- // We always require the system SDK so that our system headers are available. +- // This makes things like `os/log.h` available for cross-compiling. +- if (step.rootModuleTarget().os.tag.isDarwin()) { +- try @import("apple_sdk").addPaths(b, step.root_module); +- +- const metallib = self.metallib.?; +- metallib.output.addStepDependencies(&step.step); +- step.root_module.addAnonymousImport("ghostty_metallib", .{ +- .root_source_file = metallib.output, +- }); +- } +- +- // Other dependencies, mostly pure Zig +- if (b.lazyDependency("opengl", .{})) |dep| { +- step.root_module.addImport("opengl", dep.module("opengl")); +- } +- if (b.lazyDependency("vaxis", .{})) |dep| { +- step.root_module.addImport("vaxis", dep.module("vaxis")); +- } +- if (b.lazyDependency("wuffs", .{ +- .target = target, +- .optimize = optimize, +- })) |dep| { +- step.root_module.addImport("wuffs", dep.module("wuffs")); +- } +- if (b.lazyDependency("libxev", .{ +- .target = target, +- .optimize = optimize, +- })) |dep| { +- step.root_module.addImport("xev", dep.module("xev")); +- } +- if (b.lazyDependency("z2d", .{})) |dep| { +- step.root_module.addImport("z2d", b.addModule("z2d", .{ +- .root_source_file = dep.path("src/z2d.zig"), +- .target = target, +- .optimize = optimize, +- })); +- } +- if (b.lazyDependency("ziglyph", .{ +- .target = target, +- .optimize = optimize, +- })) |dep| { +- step.root_module.addImport("ziglyph", dep.module("ziglyph")); +- } +- if (b.lazyDependency("zf", .{ +- .target = target, +- .optimize = optimize, +- .with_tui = false, +- })) |dep| { +- step.root_module.addImport("zf", dep.module("zf")); +- } +- +- // Mac Stuff +- if (step.rootModuleTarget().os.tag.isDarwin()) { +- if (b.lazyDependency("zig_objc", .{ +- .target = target, +- .optimize = optimize, +- })) |objc_dep| { +- step.root_module.addImport( +- "objc", +- objc_dep.module("objc"), +- ); +- } +- +- if (b.lazyDependency("macos", .{ +- .target = target, +- .optimize = optimize, +- })) |macos_dep| { +- step.root_module.addImport( +- "macos", +- macos_dep.module("macos"), +- ); +- step.linkLibrary( +- macos_dep.artifact("macos"), +- ); +- try static_libs.append( +- macos_dep.artifact("macos").getEmittedBin(), +- ); +- } +- +- if (self.config.renderer == .opengl) { +- step.linkFramework("OpenGL"); +- } +- +- // Apple platforms do not include libc libintl so we bundle it. +- // This is LGPL but since our source code is open source we are +- // in compliance with the LGPL since end users can modify this +- // build script to replace the bundled libintl with their own. +- if (b.lazyDependency("libintl", .{ +- .target = target, +- .optimize = optimize, +- })) |libintl_dep| { +- step.linkLibrary(libintl_dep.artifact("intl")); +- try static_libs.append( +- libintl_dep.artifact("intl").getEmittedBin(), +- ); +- } +- } +- +- // cimgui +- if (b.lazyDependency("cimgui", .{ +- .target = target, +- .optimize = optimize, +- })) |cimgui_dep| { +- step.root_module.addImport("cimgui", cimgui_dep.module("cimgui")); +- step.linkLibrary(cimgui_dep.artifact("cimgui")); +- try static_libs.append(cimgui_dep.artifact("cimgui").getEmittedBin()); +- } +- +- // Highway +- if (b.lazyDependency("highway", .{ +- .target = target, +- .optimize = optimize, +- })) |highway_dep| { +- step.linkLibrary(highway_dep.artifact("highway")); +- try static_libs.append(highway_dep.artifact("highway").getEmittedBin()); +- } +- +- // utfcpp - This is used as a dependency on our hand-written C++ code +- if (b.lazyDependency("utfcpp", .{ +- .target = target, +- .optimize = optimize, +- })) |utfcpp_dep| { +- step.linkLibrary(utfcpp_dep.artifact("utfcpp")); +- try static_libs.append(utfcpp_dep.artifact("utfcpp").getEmittedBin()); +- } +- +- // If we're building an exe then we have additional dependencies. +- if (step.kind != .lib) { +- // We always statically compile glad +- step.addIncludePath(b.path("vendor/glad/include/")); +- step.addCSourceFile(.{ +- .file = b.path("vendor/glad/src/gl.c"), +- .flags = &.{}, +- }); +- +- // When we're targeting flatpak we ALWAYS link GTK so we +- // get access to glib for dbus. +- if (self.config.flatpak) step.linkSystemLibrary2("gtk4", dynamic_link_opts); +- +- switch (self.config.app_runtime) { +- .none => {}, +- +- .glfw => if (b.lazyDependency("glfw", .{ +- .target = target, +- .optimize = optimize, +- })) |glfw_dep| { +- step.root_module.addImport( +- "glfw", +- glfw_dep.module("glfw"), +- ); +- }, +- +- .gtk => try self.addGTK(step), +- } +- } +- +- self.help_strings.addImport(step); +- self.unicode_tables.addImport(step); +- self.framedata.addImport(step); +- +- return static_libs; +-} +- +-/// Setup the dependencies for the GTK apprt build. The GTK apprt +-/// is particularly involved compared to others so we pull this out +-/// into a dedicated function. +-fn addGTK( +- self: *const SharedDeps, +- step: *std.Build.Step.Compile, +-) !void { +- const b = step.step.owner; +- const target = step.root_module.resolved_target.?; +- const optimize = step.root_module.optimize.?; +- +- const gobject_ = b.lazyDependency("gobject", .{ +- .target = target, +- .optimize = optimize, +- }); +- if (gobject_) |gobject| { +- const gobject_imports = .{ +- .{ "adw", "adw1" }, +- .{ "gdk", "gdk4" }, +- .{ "gio", "gio2" }, +- .{ "glib", "glib2" }, +- .{ "gobject", "gobject2" }, +- .{ "gtk", "gtk4" }, +- .{ "xlib", "xlib2" }, +- }; +- inline for (gobject_imports) |import| { +- const name, const module = import; +- step.root_module.addImport(name, gobject.module(module)); +- } +- } +- +- step.linkSystemLibrary2("gtk4", dynamic_link_opts); +- step.linkSystemLibrary2("libadwaita-1", dynamic_link_opts); +- +- if (self.config.x11) { +- step.linkSystemLibrary2("X11", dynamic_link_opts); +- if (gobject_) |gobject| { +- step.root_module.addImport( +- "gdk_x11", +- gobject.module("gdkx114"), +- ); +- } +- } +- +- if (self.config.wayland) wayland: { +- // These need to be all be called to note that we need them. +- const wayland_dep_ = b.lazyDependency("wayland", .{}); +- const wayland_protocols_dep_ = b.lazyDependency( +- "wayland_protocols", +- .{}, +- ); +- const plasma_wayland_protocols_dep_ = b.lazyDependency( +- "plasma_wayland_protocols", +- .{}, +- ); +- +- // Unwrap or return, there are no more dependencies below. +- const wayland_dep = wayland_dep_ orelse break :wayland; +- const wayland_protocols_dep = wayland_protocols_dep_ orelse break :wayland; +- const plasma_wayland_protocols_dep = plasma_wayland_protocols_dep_ orelse break :wayland; +- +- // Note that zig_wayland cannot be lazy because lazy dependencies +- // can't be imported since they don't exist and imports are +- // resolved at compile time of the build. +- const zig_wayland_dep = b.dependency("zig_wayland", .{}); +- const Scanner = @import("zig_wayland").Scanner; +- const scanner = Scanner.create(zig_wayland_dep.builder, .{ +- .wayland_xml = wayland_dep.path("protocol/wayland.xml"), +- .wayland_protocols = wayland_protocols_dep.path(""), +- }); +- +- // FIXME: replace with `zxdg_decoration_v1` once GTK merges https://gitlab.gnome.org/GNOME/gtk/-/merge_requests/6398 +- scanner.addCustomProtocol( +- plasma_wayland_protocols_dep.path("src/protocols/blur.xml"), +- ); +- scanner.addCustomProtocol( +- plasma_wayland_protocols_dep.path("src/protocols/server-decoration.xml"), +- ); +- scanner.addCustomProtocol( +- plasma_wayland_protocols_dep.path("src/protocols/slide.xml"), +- ); +- +- scanner.generate("wl_compositor", 1); +- scanner.generate("org_kde_kwin_blur_manager", 1); +- scanner.generate("org_kde_kwin_server_decoration_manager", 1); +- scanner.generate("org_kde_kwin_slide_manager", 1); +- +- step.root_module.addImport("wayland", b.createModule(.{ +- .root_source_file = scanner.result, +- })); +- if (gobject_) |gobject| step.root_module.addImport( +- "gdk_wayland", +- gobject.module("gdkwayland4"), +- ); +- +- if (b.lazyDependency("gtk4_layer_shell", .{ +- .target = target, +- .optimize = optimize, +- })) |gtk4_layer_shell| { +- const layer_shell_module = gtk4_layer_shell.module("gtk4-layer-shell"); +- if (gobject_) |gobject| layer_shell_module.addImport( +- "gtk", +- gobject.module("gtk4"), +- ); +- step.root_module.addImport( +- "gtk4-layer-shell", +- layer_shell_module, +- ); +- +- // IMPORTANT: gtk4-layer-shell must be linked BEFORE +- // wayland-client, as it relies on shimming libwayland's APIs. +- if (b.systemIntegrationOption("gtk4-layer-shell", .{})) { +- step.linkSystemLibrary2( +- "gtk4-layer-shell-0", +- dynamic_link_opts, +- ); +- } else { +- // gtk4-layer-shell *must* be dynamically linked, +- // so we don't add it as a static library +- step.linkLibrary(gtk4_layer_shell.artifact("gtk4-layer-shell")); +- } +- } +- +- step.linkSystemLibrary2("wayland-client", dynamic_link_opts); +- } +- +- { +- // For our actual build, we validate our GTK builder files if we can. +- { +- const gtk_builder_check = b.addExecutable(.{ +- .name = "gtk_builder_check", +- .root_source_file = b.path("src/apprt/gtk/builder_check.zig"), +- .target = b.graph.host, +- }); +- gtk_builder_check.root_module.addOptions("build_options", self.options); +- if (gobject_) |gobject| { +- gtk_builder_check.root_module.addImport( +- "gtk", +- gobject.module("gtk4"), +- ); +- gtk_builder_check.root_module.addImport( +- "adw", +- gobject.module("adw1"), +- ); +- } +- +- for (gresource.dependencies) |pathname| { +- const extension = std.fs.path.extension(pathname); +- if (!std.mem.eql(u8, extension, ".ui")) continue; +- const check = b.addRunArtifact(gtk_builder_check); +- check.addFileArg(b.path(pathname)); +- step.step.dependOn(&check.step); +- } +- } +- +- // Get our gresource c/h files and add them to our build. +- const dist = gtkDistResources(b); +- step.addCSourceFile(.{ .file = dist.resources_c.path(b), .flags = &.{} }); +- step.addIncludePath(dist.resources_h.path(b).dirname()); +- } +-} +- +-/// Creates the resources that can be prebuilt for our dist build. +-pub fn gtkDistResources( +- b: *std.Build, +-) struct { +- resources_c: DistResource, +- resources_h: DistResource, +-} { +- const gresource_xml = gresource_xml: { +- const xml_exe = b.addExecutable(.{ +- .name = "generate_gresource_xml", +- .root_source_file = b.path("src/apprt/gtk/gresource.zig"), +- .target = b.graph.host, +- }); +- const xml_run = b.addRunArtifact(xml_exe); +- +- const blueprint_exe = b.addExecutable(.{ +- .name = "gtk_blueprint_compiler", +- .root_source_file = b.path("src/apprt/gtk/blueprint_compiler.zig"), +- .target = b.graph.host, +- }); +- blueprint_exe.linkLibC(); +- blueprint_exe.linkSystemLibrary2("gtk4", dynamic_link_opts); +- blueprint_exe.linkSystemLibrary2("libadwaita-1", dynamic_link_opts); +- +- for (gresource.blueprint_files) |blueprint_file| { +- const blueprint_run = b.addRunArtifact(blueprint_exe); +- blueprint_run.addArgs(&.{ +- b.fmt("{d}", .{blueprint_file.major}), +- b.fmt("{d}", .{blueprint_file.minor}), +- }); +- const ui_file = blueprint_run.addOutputFileArg(b.fmt( +- "{d}.{d}/{s}.ui", +- .{ +- blueprint_file.major, +- blueprint_file.minor, +- blueprint_file.name, +- }, +- )); +- blueprint_run.addFileArg(b.path(b.fmt( +- "src/apprt/gtk/ui/{d}.{d}/{s}.blp", +- .{ +- blueprint_file.major, +- blueprint_file.minor, +- blueprint_file.name, +- }, +- ))); +- +- xml_run.addFileArg(ui_file); +- } +- +- break :gresource_xml xml_run.captureStdOut(); +- }; +- +- const generate_c = b.addSystemCommand(&.{ +- "glib-compile-resources", +- "--c-name", +- "ghostty", +- "--generate-source", +- "--target", +- }); +- const resources_c = generate_c.addOutputFileArg("ghostty_resources.c"); +- generate_c.addFileArg(gresource_xml); +- +- const generate_h = b.addSystemCommand(&.{ +- "glib-compile-resources", +- "--c-name", +- "ghostty", +- "--generate-header", +- "--target", +- }); +- const resources_h = generate_h.addOutputFileArg("ghostty_resources.h"); +- generate_h.addFileArg(gresource_xml); +- +- return .{ +- .resources_c = .{ +- .dist = "src/apprt/gtk/ghostty_resources.c", +- .generated = resources_c, +- }, +- .resources_h = .{ +- .dist = "src/apprt/gtk/ghostty_resources.h", +- .generated = resources_h, +- }, +- }; +-} +- +-// For dynamic linking, we prefer dynamic linking and to search by +-// mode first. Mode first will search all paths for a dynamic library +-// before falling back to static. +-const dynamic_link_opts: std.Build.Module.LinkSystemLibraryOptions = .{ +- .preferred_link_mode = .dynamic, +- .search_strategy = .mode_first, +-}; +\ No newline at end of file ++ // We always require the system SDK so that our system headers are available. +\ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/raw_response.txt b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/raw_response.txt new file mode 100644 index 000000000..4864276ac --- /dev/null +++ b/locodiff-250425/results/ghostty_src_build_SharedDeps.zig/x-ai_grok-code-fast-1/20250909_155840/raw_response.txt @@ -0,0 +1,327 @@ +```zig +const SharedDeps = @This(); + +const std = @import("std"); +const Config = @import("Config.zig"); +const HelpStrings = @import("HelpStrings.zig"); +const MetallibStep = @import("MetallibStep.zig"); +const UnicodeTables = @import("UnicodeTables.zig"); +const GhosttyFrameData = @import("GhosttyFrameData.zig"); +const DistResource = @import("GhosttyDist.zig").Resource; + +const gresource = @import("../apprt/gtk/gresource.zig"); + +config: *const Config, + +options: *std.Build.Step.Options, +help_strings: HelpStrings, +metallib: ?*MetallibStep, +unicode_tables: UnicodeTables, +framedata: GhosttyFrameData, + +/// Used to keep track of a list of file sources. +pub const LazyPathList = std.ArrayList(std.Build.LazyPath); + +pub fn init(b: *std.Build, cfg: *const Config) !SharedDeps { + var result: SharedDeps = .{ + .config = cfg, + .help_strings = try HelpStrings.init(b, cfg), + .unicode_tables = try UnicodeTables.init(b), + .framedata = try GhosttyFrameData.init(b), + + // Setup by retarget + .options = undefined, + .metallib = undefined, + }; + try result.initTarget(b, cfg.target); + return result; +} + +/// Retarget our dependencies for another build target. Modifies in-place. +pub fn retarget( + self: *const SharedDeps, + b: *std.Build, + target: std.Build.ResolvedTarget, +) !SharedDeps { + var result = self.*; + try result.initTarget(b, target); + return result; +} + +/// Change the exe entrypoint. +pub fn changeEntrypoint( + self: *const SharedDeps, + b: *std.Build, + entrypoint: Config.ExeEntrypoint, +) !SharedDeps { + // Change our config + const config = try b.allocator.create(Config); + config.* = self.config.*; + config.exe_entrypoint = entrypoint; + + var result = self.*; + result.config = config; + return result; +} + +fn initTarget( + self: *SharedDeps, + b: *std.Build, + target: std.Build.ResolvedTarget, +) !void { + // Update our metallib + self.metallib = MetallibStep.create(b, .{ + .name = "Ghostty", + .target = target, + .sources = &.{b.path("src/renderer/shaders/cell.metal")}, + }); + + // Change our config + const config = try b.allocator.create(Config); + config.* = self.config.*; + config.target = target; + self.config = config; + + // Setup our shared build options + self.options = b.addOptions(); + try self.config.addOptions(self.options); +} + +pub fn add( + self: *const SharedDeps, + step: *std.Build.Step.Compile, +) !LazyPathList { + const b = step.step.owner; + + // We could use our config.target/optimize fields here but its more + // correct to always match our step. + const target = step.root_module.resolved_target.?; + const optimize = step.root_module.optimize.?; + + // We maintain a list of our static libraries and return it so that + // we can build a single fat static library for the final app. + var static_libs = LazyPathList.init(b.allocator); + errdefer static_libs.deinit(); + + // Every exe gets build options populated + step.root_module.addOptions("build_options", self.options); + + // Freetype + _ = b.systemIntegrationOption("freetype", .{}); // Shows it in help + if (self.config.font_backend.hasFreetype()) { + const freetype_dep = b.dependency("freetype", .{ + .target = target, + .optimize = optimize, + .@"enable-libpng" = true, + }); + step.root_module.addImport("freetype", freetype_dep.module("freetype")); + + if (b.systemIntegrationOption("freetype", .{})) { + step.linkSystemLibrary2("bzip2", dynamic_link_opts); + step.linkSystemLibrary2("freetype2", dynamic_link_opts); + } else { + step.linkLibrary(freetype_dep.artifact("freetype")); + try static_libs.append(freetype_dep.artifact("freetype").getEmittedBin()); + } + } + + // Harfbuzz + _ = b.systemIntegrationOption("harfbuzz", .{}); // Shows it in help + if (self.config.font_backend.hasHarfbuzz()) { + const harfbuzz_dep = b.dependency("harfbuzz", .{ + .target = target, + .optimize = optimize, + .@"enable-freetype" = true, + .@"enable-coretext" = self.config.font_backend.hasCoretext(), + }); + + step.root_module.addImport( + "harfbuzz", + harfbuzz_dep.module("harfbuzz"), + ); + if (b.systemIntegrationOption("harfbuzz", .{})) { + step.linkSystemLibrary2("harfbuzz", dynamic_link_opts); + } else { + step.linkLibrary(harfbuzz_dep.artifact("harfbuzz")); + try static_libs.append(harfbuzz_dep.artifact("harfbuzz").getEmittedBin()); + } + } + + // Fontconfig + _ = b.systemIntegrationOption("fontconfig", .{}); // Shows it in help + if (self.config.font_backend.hasFontconfig()) { + const fontconfig_dep = b.dependency("fontconfig", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport( + "fontconfig", + fontconfig_dep.module("fontconfig"), + ); + + if (b.systemIntegrationOption("fontconfig", .{})) { + step.linkSystemLibrary2("fontconfig", dynamic_link_opts); + } else { + step.linkLibrary(fontconfig_dep.artifact("fontconfig")); + try static_libs.append(fontconfig_dep.artifact("fontconfig").getEmittedBin()); + } + } + + // Libpng - Ghostty doesn't actually use this directly, its only used + // through dependencies, so we only need to add it to our static + // libs list if we're not using system integration. The dependencies + // will handle linking it. + if (!b.systemIntegrationOption("libpng", .{})) { + const libpng_dep = b.dependency("libpng", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(libpng_dep.artifact("png")); + try static_libs.append(libpng_dep.artifact("png").getEmittedBin()); + } + + // Zlib - same as libpng, only used through dependencies. + if (!b.systemIntegrationOption("zlib", .{})) { + const zlib_dep = b.dependency("zlib", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(zlib_dep.artifact("z")); + try static_libs.append(zlib_dep.artifact("z").getEmittedBin()); + } + + // Oniguruma + const oniguruma_dep = b.dependency("oniguruma", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("oniguruma", oniguruma_dep.module("oniguruma")); + if (b.systemIntegrationOption("oniguruma", .{})) { + step.linkSystemLibrary2("oniguruma", dynamic_link_opts); + } else { + step.linkLibrary(oniguruma_dep.artifact("oniguruma")); + try static_libs.append(oniguruma_dep.artifact("oniguruma").getEmittedBin()); + } + + // Glslang + const glslang_dep = b.dependency("glslang", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("glslang", glslang_dep.module("glslang")); + if (b.systemIntegrationOption("glslang", .{})) { + step.linkSystemLibrary2("glslang", dynamic_link_opts); + step.linkSystemLibrary2("glslang-default-resource-limits", dynamic_link_opts); + } else { + step.linkLibrary(glslang_dep.artifact("glslang")); + try static_libs.append(glslang_dep.artifact("glslang").getEmittedBin()); + } + + // Spirv-cross + const spirv_cross_dep = b.dependency("spirv_cross", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("spirv_cross", spirv_cross_dep.module("spirv_cross")); + if (b.systemIntegrationOption("spirv-cross", .{})) { + step.linkSystemLibrary2("spirv-cross", dynamic_link_opts); + } else { + step.linkLibrary(spirv_cross_dep.artifact("spirv_cross")); + try static_libs.append(spirv_cross_dep.artifact("spirv_cross").getEmittedBin()); + } + + // Simdutf + if (b.systemIntegrationOption("simdutf", .{})) { + step.linkSystemLibrary2("simdutf", dynamic_link_opts); + } else { + const simdutf_dep = b.dependency("simdutf", .{ + .target = target, + .optimize = optimize, + }); + step.linkLibrary(simdutf_dep.artifact("simdutf")); + try static_libs.append(simdutf_dep.artifact("simdutf").getEmittedBin()); + } + + // Sentry + if (self.config.sentry) { + const sentry_dep = b.dependency("sentry", .{ + .target = target, + .optimize = optimize, + .backend = .breakpad, + }); + + step.root_module.addImport("sentry", sentry_dep.module("sentry")); + + // Sentry + step.linkLibrary(sentry_dep.artifact("sentry")); + try static_libs.append(sentry_dep.artifact("sentry").getEmittedBin()); + + // We also need to include breakpad in the static libs. + const breakpad_dep = sentry_dep.builder.dependency("breakpad", .{ + .target = target, + .optimize = optimize, + }); + try static_libs.append(breakpad_dep.artifact("breakpad").getEmittedBin()); + } + + // Wasm we do manually since it is such a different build. + if (step.rootModuleTarget().cpu.arch == .wasm32) { + const js_dep = b.dependency("zig_js", .{ + .target = target, + .optimize = optimize, + }); + step.root_module.addImport("zig-js", js_dep.module("zig-js")); + + return static_libs; + } + + // On Linux, we need to add a couple common library paths that aren't + // on the standard search list. i.e. GTK is often in /usr/lib/x86_64-linux-gnu + // on x86_64. + if (step.rootModuleTarget().os.tag == .linux) { + const triple = try step.rootModuleTarget().linuxTriple(b.allocator); + const path = b.fmt("/usr/lib/{s}", .{triple}); + if (std.fs.accessAbsolute(path, .{})) { + step.addLibraryPath(.{ .cwd_relative = path }); + } else |_| {} + } + + // C files + step.linkLibC(); + step.addIncludePath(b.path("src/stb")); + step.addCSourceFiles(.{ .files = &.{"src/stb/stb.c"} }); + if (step.rootModuleTarget().os.tag == .linux) { + step.addIncludePath(b.path("src/apprt/gtk")); + } + + // C++ files + step.linkLibCpp(); + step.addIncludePath(b.path("src")); + { + // From hwy/detect_targets.h + const HWY_AVX3_SPR: c_int = 1 << 4; + const HWY_AVX3_ZEN4: c_int = 1 << 6; + const HWY_AVX3_DL: c_int = 1 << 7; + const HWY_AVX3: c_int = 1 << 8; + + // Zig 0.13 bug: https://github.com/ziglang/zig/issues/20414 + // To workaround this we just disable AVX512 support completely. + // The performance difference between AVX2 and AVX512 is not + // significant for our use case and AVX512 is very rare on consumer + // hardware anyways. + const HWY_DISABLED_TARGETS: c_int = HWY_AVX3_SPR | HWY_AVX3_ZEN4 | HWY_AVX3_DL | HWY_AVX3; + + step.addCSourceFiles(.{ + .files = &.{ + "src/simd/base64.cpp", + "src/simd/codepoint_width.cpp", + "src/simd/index_of.cpp", + "src/simd/vt.cpp", + }, + .flags = if (step.rootModuleTarget().cpu.arch == .x86_64) &.{ + b.fmt("-DHWY_DISABLED_TARGETS={}", .{HWY_DISABLED_TARGETS}), + } else &.{}, + }); + } + + // We always require the system SDK so that our system headers are available. diff --git a/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/extracted_output.txt b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/extracted_output.txt new file mode 100644 index 000000000..bf843866b --- /dev/null +++ b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/extracted_output.txt @@ -0,0 +1,212 @@ +const std = @import("std"); + +const key = @import("key.zig"); +const Config = @import("Config.zig"); +const Color = Config.Color; +const Key = key.Key; +const Value = key.Value; + +/// Get a value from the config by key into the given pointer. This is +/// specifically for C-compatible APIs. If you're using Zig, just access +/// the configuration directly. +/// +/// The return value is false if the given key is not supported by the +/// C API yet. This is a fixable problem so if it is important to support +/// some key, please open an issue. +pub fn get(config: *const Config, k: Key, ptr_raw: *anyopaque) bool { + @setEvalBranchQuota(10_000); + switch (k) { + inline else => |tag| { + const value = fieldByKey(config, tag); + return getValue(ptr_raw, value); + }, + } +} + +/// Get the value anytype and put it into the pointer. Returns false if +/// the type is not supported by the C API yet or the value is null. +fn getValue(ptr_raw: *anyopaque, value: anytype) bool { + switch (@TypeOf(value)) { + ?[:0]const u8 => { + const ptr: *?[*:0]const u8 = @ptrCast(@alignCast(ptr_raw)); + ptr.* = if (value) |slice| @ptrCast(slice.ptr) else null; + }, + + bool => { + const ptr: *bool = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value; + }, + + u8, u32 => { + const ptr: *c_uint = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(value); + }, + + i16 => { + const ptr: *c_short = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(value); + }, + + f32, f64 => |Float| { + const ptr: *Float = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @floatCast(value); + }, + + else => |T| switch (@typeInfo(T)) { + .optional => { + // If an optional has no value we return false. + const unwrapped = value orelse return false; + return getValue(ptr_raw, unwrapped); + }, + + .@"enum" => { + const ptr: *[*:0]const u8 = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @tagName(value); + }, + + .@"struct" => |info| { + // If the struct implements cval then we call then. + if (@hasDecl(T, "cval")) { + const PtrT = @typeInfo(@TypeOf(T.cval)).@"fn".return_type.?; + const ptr: *PtrT = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value.cval(); + return true; + } + + // Packed structs that are less than or equal to the + // size of a C int can be passed directly as their + // bit representation. + if (info.layout != .@"packed") return false; + const Backing = info.backing_integer orelse return false; + if (@bitSizeOf(Backing) > @bitSizeOf(c_uint)) return false; + + const ptr: *c_uint = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(@as(Backing, @bitCast(value))); + }, + + .@"union" => |_| { + if (@hasDecl(T, "cval")) { + const PtrT = @typeInfo(@TypeOf(T.cval)).@"fn".return_type.?; + const ptr: *PtrT = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value.cval(); + return true; + } + + return false; + }, + + else => return false, + }, + } + + return true; +} + +/// Get a value from the config by key. +fn fieldByKey(self: *const Config, comptime k: Key) Value(k) { + const field = comptime field: { + const fields = std.meta.fields(Config); + for (fields) |field| { + if (@field(Key, field.name) == k) { + break :field field; + } + } + + unreachable; + }; + + return @field(self, field.name); +} + +test "c_get: u8" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.@"font-size" = 24; + + var cval: f32 = undefined; + try testing.expect(get(&c, .@"font-size", &cval)); + try testing.expectEqual(@as(f32, 24), cval); +} + +test "c_get: enum" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.@"window-theme" = .dark; + + var cval: [*:0]u8 = undefined; + try testing.expect(get(&c, .@"window-theme", @ptrCast(&cval))); + + const str = std.mem.sliceTo(cval, 0); + try testing.expectEqualStrings("dark", str); +} + +test "c_get: color" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.background = .{ .r = 255, .g = 0, .b = 0 }; + + var cval: Color.C = undefined; + try testing.expect(get(&c, .background, @ptrCast(&cval))); + try testing.expectEqual(255, cval.r); + try testing.expectEqual(0, cval.g); + try testing.expectEqual(0, cval.b); +} + +test "c_get: optional" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + + { + c.@"unfocused-split-fill" = null; + var cval: Color.C = undefined; + try testing.expect(!get(&c, .@"unfocused-split-fill", @ptrCast(&cval))); + } + + { + c.@"unfocused-split-fill" = .{ .r = 255, .g = 0, .b = 0 }; + var cval: Color.C = undefined; + try testing.expect(get(&c, .@"unfocused-split-fill", @ptrCast(&cval))); + try testing.expectEqual(255, cval.r); + try testing.expectEqual(0, cval.g); + try testing.expectEqual(0, cval.b); + } +} + +test "c_get: background-blur" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + + { + c.@"background-blur" = .false; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(0, cval); + } + { + c.@"background-blur" = .true; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(20, cval); + } + { + c.@"background-blur" = .{ .radius = 42 }; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(42, cval); + } +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/metadata.json b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/metadata.json new file mode 100644 index 000000000..6730f461f --- /dev/null +++ b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_config_c_get.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_config_c_get.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_config_c_get.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:36.796587+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 6510, + "extracted_output_length": 6499, + "expected_output_length": 6500, + "results_dir": "locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836", + "generation_id": "gen-1757433516-x54AFhPULv4iS2QUzfNF", + "cost_usd": 0.00713538, + "prompt_tokens": 8053, + "completion_tokens": 2405, + "total_tokens": 10458, + "native_prompt_tokens": 8190, + "native_completion_tokens": 3711, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 2038 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/output.diff b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/raw_response.txt b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/raw_response.txt new file mode 100644 index 000000000..9d12721bd --- /dev/null +++ b/locodiff-250425/results/ghostty_src_config_c_get.zig/x-ai_grok-code-fast-1/20250909_155836/raw_response.txt @@ -0,0 +1,214 @@ +```zig +const std = @import("std"); + +const key = @import("key.zig"); +const Config = @import("Config.zig"); +const Color = Config.Color; +const Key = key.Key; +const Value = key.Value; + +/// Get a value from the config by key into the given pointer. This is +/// specifically for C-compatible APIs. If you're using Zig, just access +/// the configuration directly. +/// +/// The return value is false if the given key is not supported by the +/// C API yet. This is a fixable problem so if it is important to support +/// some key, please open an issue. +pub fn get(config: *const Config, k: Key, ptr_raw: *anyopaque) bool { + @setEvalBranchQuota(10_000); + switch (k) { + inline else => |tag| { + const value = fieldByKey(config, tag); + return getValue(ptr_raw, value); + }, + } +} + +/// Get the value anytype and put it into the pointer. Returns false if +/// the type is not supported by the C API yet or the value is null. +fn getValue(ptr_raw: *anyopaque, value: anytype) bool { + switch (@TypeOf(value)) { + ?[:0]const u8 => { + const ptr: *?[*:0]const u8 = @ptrCast(@alignCast(ptr_raw)); + ptr.* = if (value) |slice| @ptrCast(slice.ptr) else null; + }, + + bool => { + const ptr: *bool = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value; + }, + + u8, u32 => { + const ptr: *c_uint = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(value); + }, + + i16 => { + const ptr: *c_short = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(value); + }, + + f32, f64 => |Float| { + const ptr: *Float = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @floatCast(value); + }, + + else => |T| switch (@typeInfo(T)) { + .optional => { + // If an optional has no value we return false. + const unwrapped = value orelse return false; + return getValue(ptr_raw, unwrapped); + }, + + .@"enum" => { + const ptr: *[*:0]const u8 = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @tagName(value); + }, + + .@"struct" => |info| { + // If the struct implements cval then we call then. + if (@hasDecl(T, "cval")) { + const PtrT = @typeInfo(@TypeOf(T.cval)).@"fn".return_type.?; + const ptr: *PtrT = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value.cval(); + return true; + } + + // Packed structs that are less than or equal to the + // size of a C int can be passed directly as their + // bit representation. + if (info.layout != .@"packed") return false; + const Backing = info.backing_integer orelse return false; + if (@bitSizeOf(Backing) > @bitSizeOf(c_uint)) return false; + + const ptr: *c_uint = @ptrCast(@alignCast(ptr_raw)); + ptr.* = @intCast(@as(Backing, @bitCast(value))); + }, + + .@"union" => |_| { + if (@hasDecl(T, "cval")) { + const PtrT = @typeInfo(@TypeOf(T.cval)).@"fn".return_type.?; + const ptr: *PtrT = @ptrCast(@alignCast(ptr_raw)); + ptr.* = value.cval(); + return true; + } + + return false; + }, + + else => return false, + }, + } + + return true; +} + +/// Get a value from the config by key. +fn fieldByKey(self: *const Config, comptime k: Key) Value(k) { + const field = comptime field: { + const fields = std.meta.fields(Config); + for (fields) |field| { + if (@field(Key, field.name) == k) { + break :field field; + } + } + + unreachable; + }; + + return @field(self, field.name); +} + +test "c_get: u8" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.@"font-size" = 24; + + var cval: f32 = undefined; + try testing.expect(get(&c, .@"font-size", &cval)); + try testing.expectEqual(@as(f32, 24), cval); +} + +test "c_get: enum" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.@"window-theme" = .dark; + + var cval: [*:0]u8 = undefined; + try testing.expect(get(&c, .@"window-theme", @ptrCast(&cval))); + + const str = std.mem.sliceTo(cval, 0); + try testing.expectEqualStrings("dark", str); +} + +test "c_get: color" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + c.background = .{ .r = 255, .g = 0, .b = 0 }; + + var cval: Color.C = undefined; + try testing.expect(get(&c, .background, @ptrCast(&cval))); + try testing.expectEqual(255, cval.r); + try testing.expectEqual(0, cval.g); + try testing.expectEqual(0, cval.b); +} + +test "c_get: optional" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + + { + c.@"unfocused-split-fill" = null; + var cval: Color.C = undefined; + try testing.expect(!get(&c, .@"unfocused-split-fill", @ptrCast(&cval))); + } + + { + c.@"unfocused-split-fill" = .{ .r = 255, .g = 0, .b = 0 }; + var cval: Color.C = undefined; + try testing.expect(get(&c, .@"unfocused-split-fill", @ptrCast(&cval))); + try testing.expectEqual(255, cval.r); + try testing.expectEqual(0, cval.g); + try testing.expectEqual(0, cval.b); + } +} + +test "c_get: background-blur" { + const testing = std.testing; + const alloc = testing.allocator; + + var c = try Config.default(alloc); + defer c.deinit(); + + { + c.@"background-blur" = .false; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(0, cval); + } + { + c.@"background-blur" = .true; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(20, cval); + } + { + c.@"background-blur" = .{ .radius = 42 }; + var cval: u8 = undefined; + try testing.expect(get(&c, .@"background-blur", @ptrCast(&cval))); + try testing.expectEqual(42, cval); + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/metadata.json b/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/metadata.json new file mode 100644 index 000000000..766952e4b --- /dev/null +++ b/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_font_Collection.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_font_Collection.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_font_Collection.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:24.302327+00:00", + "success": false, + "error": "Model returned empty output", + "api_error": false, + "raw_response_length": 0, + "extracted_output_length": null, + "expected_output_length": 32848, + "results_dir": "locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824", + "generation_id": "gen-1757433504-NoLcmLVpYuIFtahueYBR", + "cost_usd": 0.01985568, + "prompt_tokens": 24848, + "completion_tokens": 2756, + "total_tokens": 27604, + "native_prompt_tokens": 24624, + "native_completion_tokens": 10000, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 10000 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/output.diff b/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/output.diff new file mode 100644 index 000000000..6f659ceaa --- /dev/null +++ b/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/output.diff @@ -0,0 +1,963 @@ + +index efd8b5f4f..e69de29bb 100644 +--- a/ghostty_src_font_Collection.zig_expectedoutput.txt (expected):tmp/tmp2mfpcfbj_expected.txt ++++ b/ghostty_src_font_Collection.zig_extracted.txt (actual):tmp/tmpq5ohwwq6_actual.txt +@@ -1,957 +0,0 @@ +-//! A font collection is a list of faces of different styles. The list is +-//! ordered by priority (per style). All fonts in a collection share the same +-//! size so they can be used interchangeably in cases a glyph is missing in one +-//! and present in another. +-//! +-//! The purpose of a collection is to store a list of fonts by style +-//! and priority order. A collection does not handle searching for font +-//! callbacks, rasterization, etc. For this, see CodepointResolver. +-//! +-//! The collection can contain both loaded and deferred faces. Deferred faces +-//! typically use less memory while still providing some necessary information +-//! such as codepoint support, presentation, etc. This is useful for looking +-//! for fallback fonts as efficiently as possible. For example, when the glyph +-//! "X" is not found, we can quickly search through deferred fonts rather +-//! than loading the font completely. +-const Collection = @This(); +- +-const std = @import("std"); +-const assert = std.debug.assert; +-const Allocator = std.mem.Allocator; +-const config = @import("../config.zig"); +-const font = @import("main.zig"); +-const options = font.options; +-const DeferredFace = font.DeferredFace; +-const DesiredSize = font.face.DesiredSize; +-const Face = font.Face; +-const Library = font.Library; +-const Metrics = font.Metrics; +-const Presentation = font.Presentation; +-const Style = font.Style; +- +-const log = std.log.scoped(.font_collection); +- +-/// The available faces we have. This shouldn't be modified manually. +-/// Instead, use the functions available on Collection. +-faces: StyleArray, +- +-/// The metric modifiers to use for this collection. The memory +-/// for this is owned by the user and is not freed by the collection. +-/// +-/// Call `Collection.updateMetrics` to recompute the +-/// collection's metrics after making changes to these. +-metric_modifiers: Metrics.ModifierSet = .{}, +- +-/// Metrics for this collection. Call `Collection.updateMetrics` to (re)compute +-/// these after adding a primary font or making changes to `metric_modifiers`. +-metrics: ?Metrics = null, +- +-/// The load options for deferred faces in the face list. If this +-/// is not set, then deferred faces will not be loaded. Attempting to +-/// add a deferred face will result in an error. +-load_options: ?LoadOptions = null, +- +-/// Initialize an empty collection. +-pub fn init() Collection { +- // Initialize our styles array, preallocating some space that is +- // likely to be used. +- return .{ .faces = StyleArray.initFill(.{}) }; +-} +- +-pub fn deinit(self: *Collection, alloc: Allocator) void { +- var it = self.faces.iterator(); +- while (it.next()) |array| { +- var entry_it = array.value.iterator(0); +- while (entry_it.next()) |entry| entry.deinit(); +- array.value.deinit(alloc); +- } +- +- if (self.load_options) |*v| v.deinit(alloc); +-} +- +-pub const AddError = Allocator.Error || error{ +- CollectionFull, +- DeferredLoadingUnavailable, +-}; +- +-/// Add a face to the collection for the given style. This face will be added +-/// next in priority if others exist already, i.e. it'll be the _last_ to be +-/// searched for a glyph in that list. +-/// +-/// The collection takes ownership of the face. The face will be deallocated +-/// when the collection is deallocated. +-/// +-/// If a loaded face is added to the collection, it should be the same +-/// size as all the other faces in the collection. This function will not +-/// verify or modify the size until the size of the entire collection is +-/// changed. +-pub fn add( +- self: *Collection, +- alloc: Allocator, +- style: Style, +- face: Entry, +-) AddError!Index { +- const list = self.faces.getPtr(style); +- +- // We have some special indexes so we must never pass those. +- const idx = list.count(); +- if (idx >= Index.Special.start - 1) +- return error.CollectionFull; +- +- // If this is deferred and we don't have load options, we can't. +- if (face.isDeferred() and self.load_options == null) +- return error.DeferredLoadingUnavailable; +- +- try list.append(alloc, face); +- return .{ .style = style, .idx = @intCast(idx) }; +-} +- +-/// Return the Face represented by a given Index. The returned pointer +-/// is only valid as long as this collection is not modified. +-/// +-/// This will initialize the face if it is deferred and not yet loaded, +-/// which can fail. +-pub fn getFace(self: *Collection, index: Index) !*Face { +- if (index.special() != null) return error.SpecialHasNoFace; +- const list = self.faces.getPtr(index.style); +- const item: *Entry = item: { +- var item = list.at(index.idx); +- switch (item.*) { +- .alias => |ptr| item = ptr, +- +- .deferred, +- .fallback_deferred, +- .loaded, +- .fallback_loaded, +- => {}, +- } +- assert(item.* != .alias); +- break :item item; +- }; +- +- return try self.getFaceFromEntry(item); +-} +- +-/// Get the face from an entry. +-/// +-/// This entry must not be an alias. +-fn getFaceFromEntry(self: *Collection, entry: *Entry) !*Face { +- assert(entry.* != .alias); +- +- return switch (entry.*) { +- inline .deferred, .fallback_deferred => |*d, tag| deferred: { +- const opts = self.load_options orelse +- return error.DeferredLoadingUnavailable; +- const face = try d.load(opts.library, opts.faceOptions()); +- d.deinit(); +- entry.* = switch (tag) { +- .deferred => .{ .loaded = face }, +- .fallback_deferred => .{ .fallback_loaded = face }, +- else => unreachable, +- }; +- +- break :deferred switch (tag) { +- .deferred => &entry.loaded, +- .fallback_deferred => &entry.fallback_loaded, +- else => unreachable, +- }; +- }, +- +- .loaded, .fallback_loaded => |*f| f, +- +- // When setting `entry` above, we ensure we don't end up with +- // an alias. +- .alias => unreachable, +- }; +-} +- +-/// Return the index of the font in this collection that contains +-/// the given codepoint, style, and presentation. If no font is found, +-/// null is returned. +-/// +-/// This does not trigger font loading; deferred fonts can be +-/// searched for codepoints. +-pub fn getIndex( +- self: *const Collection, +- cp: u32, +- style: Style, +- p_mode: PresentationMode, +-) ?Index { +- var i: usize = 0; +- var it = self.faces.get(style).constIterator(0); +- while (it.next()) |entry| { +- if (entry.hasCodepoint(cp, p_mode)) { +- return .{ +- .style = style, +- .idx = @intCast(i), +- }; +- } +- +- i += 1; +- } +- +- // Not found +- return null; +-} +- +-/// Check if a specific font index has a specific codepoint. This does not +-/// necessarily force the font to load. The presentation value "p" will +-/// verify the Emoji representation matches if it is non-null. If "p" is +-/// null then any presentation will be accepted. +-pub fn hasCodepoint( +- self: *const Collection, +- index: Index, +- cp: u32, +- p_mode: PresentationMode, +-) bool { +- const list = self.faces.get(index.style); +- if (index.idx >= list.count()) return false; +- return list.at(index.idx).hasCodepoint(cp, p_mode); +-} +- +-pub const CompleteError = Allocator.Error || error{ +- DefaultUnavailable, +-}; +- +-/// Ensure we have an option for all styles in the collection, such +-/// as italic and bold by synthesizing them if necessary from the +-/// first regular face that has text glyphs. +-/// +-/// If there is no regular face that has text glyphs, then this +-/// does nothing. +-pub fn completeStyles( +- self: *Collection, +- alloc: Allocator, +- synthetic_config: config.FontSyntheticStyle, +-) CompleteError!void { +- // If every style has at least one entry then we're done! +- // This is the most common case. +- empty: { +- var it = self.faces.iterator(); +- while (it.next()) |entry| { +- if (entry.value.count() == 0) break :empty; +- } +- +- return; +- } +- +- // Find the first regular face that has non-colorized text glyphs. +- // This is the font we want to fallback to. This may not be index zero +- // if a user configures something like an Emoji font first. +- const regular_entry: *Entry = entry: { +- const list = self.faces.getPtr(.regular); +- if (list.count() == 0) return; +- +- // Find our first regular face that has text glyphs. +- var it = list.iterator(0); +- while (it.next()) |entry| { +- // Load our face. If we fail to load it, we just skip it and +- // continue on to try the next one. +- const face = self.getFaceFromEntry(entry) catch |err| { +- log.warn("error loading regular entry={d} err={}", .{ +- it.index - 1, +- err, +- }); +- +- continue; +- }; +- +- // We have two conditionals here. The color check is obvious: +- // we want to auto-italicize a normal text font. The second +- // check is less obvious... for mixed color/non-color fonts, we +- // accept the regular font if it has basic ASCII. This may not +- // be strictly correct (especially with international fonts) but +- // it's a reasonable heuristic and the first case will match 99% +- // of the time. +- if (!face.hasColor() or face.glyphIndex('A') != null) { +- break :entry entry; +- } +- } +- +- // No regular text face found. We can't provide any fallback. +- return error.DefaultUnavailable; +- }; +- +- // If we don't have italic, attempt to create a synthetic italic face. +- // If we can't create a synthetic italic face, we'll just use the regular +- // face for italic. +- const italic_list = self.faces.getPtr(.italic); +- const have_italic = italic_list.count() > 0; +- if (!have_italic) italic: { +- if (!synthetic_config.italic) { +- log.info("italic style not available and synthetic italic disabled", .{}); +- try italic_list.append(alloc, .{ .alias = regular_entry }); +- break :italic; +- } +- +- const synthetic = self.syntheticItalic(regular_entry) catch |err| { +- log.warn("failed to create synthetic italic, italic style will not be available err={}", .{err}); +- try italic_list.append(alloc, .{ .alias = regular_entry }); +- break :italic; +- }; +- +- log.info("synthetic italic face created", .{}); +- try italic_list.append(alloc, .{ .loaded = synthetic }); +- } +- +- // If we don't have bold, use the regular font. +- const bold_list = self.faces.getPtr(.bold); +- const have_bold = bold_list.count() > 0; +- if (!have_bold) bold: { +- if (!synthetic_config.bold) { +- log.info("bold style not available and synthetic bold disabled", .{}); +- try bold_list.append(alloc, .{ .alias = regular_entry }); +- break :bold; +- } +- +- const synthetic = self.syntheticBold(regular_entry) catch |err| { +- log.warn("failed to create synthetic bold, bold style will not be available err={}", .{err}); +- try bold_list.append(alloc, .{ .alias = regular_entry }); +- break :bold; +- }; +- +- log.info("synthetic bold face created", .{}); +- try bold_list.append(alloc, .{ .loaded = synthetic }); +- } +- +- // If we don't have bold italic, we attempt to synthesize a bold variant +- // of the italic font. If we can't do that, we'll use the italic font. +- const bold_italic_list = self.faces.getPtr(.bold_italic); +- if (bold_italic_list.count() == 0) bold_italic: { +- if (!synthetic_config.@"bold-italic") { +- log.info("bold italic style not available and synthetic bold italic disabled", .{}); +- try bold_italic_list.append(alloc, .{ .alias = regular_entry }); +- break :bold_italic; +- } +- +- // Prefer to synthesize on top of the face we already had. If we +- // have bold then we try to synthesize italic on top of bold. +- if (have_bold) { +- if (self.syntheticItalic(bold_list.at(0))) |synthetic| { +- log.info("synthetic bold italic face created from bold", .{}); +- try bold_italic_list.append(alloc, .{ .loaded = synthetic }); +- break :bold_italic; +- } else |_| {} +- +- // If synthesizing italic failed, then we try to synthesize +- // bold on whatever italic font we have. +- } +- +- // Nested alias isn't allowed so we need to unwrap the italic entry. +- const base_entry = base: { +- const italic_entry = italic_list.at(0); +- break :base switch (italic_entry.*) { +- .alias => |v| v, +- +- .loaded, +- .fallback_loaded, +- .deferred, +- .fallback_deferred, +- => italic_entry, +- }; +- }; +- +- if (self.syntheticBold(base_entry)) |synthetic| { +- log.info("synthetic bold italic face created from italic", .{}); +- try bold_italic_list.append(alloc, .{ .loaded = synthetic }); +- break :bold_italic; +- } else |_| {} +- +- log.warn("bold italic style not available, using italic font", .{}); +- try bold_italic_list.append(alloc, .{ .alias = base_entry }); +- } +-} +- +-// Create a synthetic bold font face from the given entry and return it. +-fn syntheticBold(self: *Collection, entry: *Entry) !Face { +- // Not all font backends support synthetic bold. +- if (comptime !@hasDecl(Face, "syntheticBold")) return error.SyntheticBoldUnavailable; +- +- // We require loading options to create a synthetic bold face. +- const opts = self.load_options orelse return error.DeferredLoadingUnavailable; +- +- // Try to bold it. +- const regular = try self.getFaceFromEntry(entry); +- const face = try regular.syntheticBold(opts.faceOptions()); +- +- var buf: [256]u8 = undefined; +- if (face.name(&buf)) |name| { +- log.info("font synthetic bold created family={s}", .{name}); +- } else |_| {} +- +- return face; +-} +- +-// Create a synthetic italic font face from the given entry and return it. +-fn syntheticItalic(self: *Collection, entry: *Entry) !Face { +- // Not all font backends support synthetic italicization. +- if (comptime !@hasDecl(Face, "syntheticItalic")) return error.SyntheticItalicUnavailable; +- +- // We require loading options to create a synthetic italic face. +- const opts = self.load_options orelse return error.DeferredLoadingUnavailable; +- +- // Try to italicize it. +- const regular = try self.getFaceFromEntry(entry); +- const face = try regular.syntheticItalic(opts.faceOptions()); +- +- var buf: [256]u8 = undefined; +- if (face.name(&buf)) |name| { +- log.info("font synthetic italic created family={s}", .{name}); +- } else |_| {} +- +- return face; +-} +- +-/// Update the size of all faces in the collection. This will +-/// also update the size in the load options for future deferred +-/// face loading. +-/// +-/// This requires load options to be set. +-pub fn setSize(self: *Collection, size: DesiredSize) !void { +- // Get a pointer to our options so we can modify the size. +- const opts = if (self.load_options) |*v| +- v +- else +- return error.DeferredLoadingUnavailable; +- opts.size = size; +- +- // Resize all our faces that are loaded +- var it = self.faces.iterator(); +- while (it.next()) |array| { +- var entry_it = array.value.iterator(0); +- while (entry_it.next()) |entry| switch (entry.*) { +- .loaded, .fallback_loaded => |*f| try f.setSize( +- opts.faceOptions(), +- ), +- +- // Deferred aren't loaded so we don't need to set their size. +- // The size for when they're loaded is set since `opts` changed. +- .deferred, .fallback_deferred => continue, +- +- // Alias faces don't own their size. +- .alias => continue, +- }; +- } +- +- try self.updateMetrics(); +-} +- +-const UpdateMetricsError = font.Face.GetMetricsError || error{ +- CannotLoadPrimaryFont, +-}; +- +-/// Update the cell metrics for this collection, based on +-/// the primary font and the modifiers in `metric_modifiers`. +-/// +-/// This requires a primary font (index `0`) to be present. +-pub fn updateMetrics(self: *Collection) UpdateMetricsError!void { +- const primary_face = self.getFace(.{ .idx = 0 }) catch return error.CannotLoadPrimaryFont; +- +- const face_metrics = try primary_face.getMetrics(); +- +- var metrics = Metrics.calc(face_metrics); +- +- metrics.apply(self.metric_modifiers); +- +- self.metrics = metrics; +-} +- +-/// Packed array of all Style enum cases mapped to a growable list of faces. +-/// +-/// We use this data structure because there aren't many styles and all +-/// styles are typically loaded for a terminal session. The overhead per +-/// style even if it is not used or barely used is minimal given the +-/// small style count. +-/// +-/// We use a segmented list because the entry values must be pointer-stable +-/// to support the "alias" field in Entry. +-/// +-/// WARNING: We cannot use any prealloc yet for the segmented list because +-/// the collection is copied around by value and pointers aren't stable. +-const StyleArray = std.EnumArray(Style, std.SegmentedList(Entry, 0)); +- +-/// Load options are used to configure all the details a Collection +-/// needs to load deferred faces. +-pub const LoadOptions = struct { +- /// The library to use for loading faces. This is not owned by +- /// the collection and can be used by multiple collections. When +- /// deinitializing the collection, the library is not deinitialized. +- library: Library, +- +- /// The desired font size for all loaded faces. +- size: DesiredSize = .{ .points = 12 }, +- +- /// Freetype Load Flags to use when loading glyphs. This is a list of +- /// bitfield constants that controls operations to perform during glyph +- /// loading. Only a subset is exposed for configuration, for the whole set +- /// of flags see `pkg.freetype.face.LoadFlags`. +- freetype_load_flags: font.face.FreetypeLoadFlags = font.face.freetype_load_flags_default, +- +- pub fn deinit(self: *LoadOptions, alloc: Allocator) void { +- _ = self; +- _ = alloc; +- } +- +- /// The options to use for loading faces. +- pub fn faceOptions(self: *const LoadOptions) font.face.Options { +- return .{ +- .size = self.size, +- .freetype_load_flags = self.freetype_load_flags, +- }; +- } +-}; +- +-/// A entry in a collection can be deferred or loaded. A deferred face +-/// is not yet fully loaded and only represents the font descriptor +-/// and usually uses less resources. A loaded face is fully parsed, +-/// ready to rasterize, and usually uses more resources than a +-/// deferred version. +-/// +-/// A face can also be a "fallback" variant that is still either +-/// deferred or loaded. Today, there is only one difference between +-/// fallback and non-fallback (or "explicit") faces: the handling +-/// of emoji presentation. +-/// +-/// For explicit faces, when an explicit emoji presentation is +-/// not requested, we will use any glyph for that codepoint found +-/// even if the font presentation does not match the UCD +-/// (Unicode Character Database) value. When an explicit presentation +-/// is requested (via either VS15/V16), that is always honored. +-/// The reason we do this is because we assume that if a user +-/// explicitly chosen a font face (hence it is "explicit" and +-/// not "fallback"), they want to use any glyphs possible within that +-/// font face. Fallback fonts on the other hand are picked as a +-/// last resort, so we should prefer exactness if possible. +-pub const Entry = union(enum) { +- deferred: DeferredFace, // Not loaded +- loaded: Face, // Loaded, explicit use +- +- // The same as deferred/loaded but fallback font semantics (see large +- // comment above Entry). +- fallback_deferred: DeferredFace, +- fallback_loaded: Face, +- +- // An alias to another entry. This is used to share the same face, +- // avoid memory duplication. An alias must point to a non-alias entry. +- alias: *Entry, +- +- pub fn deinit(self: *Entry) void { +- switch (self.*) { +- inline .deferred, +- .loaded, +- .fallback_deferred, +- .fallback_loaded, +- => |*v| v.deinit(), +- +- // Aliased fonts are not owned by this entry so we let them +- // be deallocated by the owner. +- .alias => {}, +- } +- } +- +- /// True if the entry is deferred. +- fn isDeferred(self: Entry) bool { +- return switch (self) { +- .deferred, .fallback_deferred => true, +- .loaded, .fallback_loaded => false, +- .alias => |v| v.isDeferred(), +- }; +- } +- +- /// True if this face satisfies the given codepoint and presentation. +- pub fn hasCodepoint( +- self: Entry, +- cp: u32, +- p_mode: PresentationMode, +- ) bool { +- return switch (self) { +- .alias => |v| v.hasCodepoint(cp, p_mode), +- +- // Non-fallback fonts require explicit presentation matching but +- // otherwise don't care about presentation +- .deferred => |v| switch (p_mode) { +- .explicit => |p| v.hasCodepoint(cp, p), +- .default, .any => v.hasCodepoint(cp, null), +- }, +- +- .loaded => |face| switch (p_mode) { +- .explicit => |p| explicit: { +- const index = face.glyphIndex(cp) orelse break :explicit false; +- break :explicit switch (p) { +- .text => !face.isColorGlyph(index), +- .emoji => face.isColorGlyph(index), +- }; +- }, +- .default, .any => face.glyphIndex(cp) != null, +- }, +- +- // Fallback fonts require exact presentation matching. +- .fallback_deferred => |v| switch (p_mode) { +- .explicit, .default => |p| v.hasCodepoint(cp, p), +- .any => v.hasCodepoint(cp, null), +- }, +- +- .fallback_loaded => |face| switch (p_mode) { +- .explicit, +- .default, +- => |p| explicit: { +- const index = face.glyphIndex(cp) orelse break :explicit false; +- break :explicit switch (p) { +- .text => !face.isColorGlyph(index), +- .emoji => face.isColorGlyph(index), +- }; +- }, +- .any => face.glyphIndex(cp) != null, +- }, +- }; +- } +-}; +- +-/// The requested presentation for a codepoint. +-pub const PresentationMode = union(enum) { +- /// The codepoint has an explicit presentation that is required, +- /// i.e. VS15/V16. +- explicit: Presentation, +- +- /// The codepoint has no explicit presentation and we should use +- /// the presentation from the UCD. +- default: Presentation, +- +- /// The codepoint can be any presentation. +- any: void, +-}; +- +-/// This represents a specific font in the collection. +-/// +-/// The backing size of this packed struct represents the total number +-/// of possible usable fonts in a collection. And the number of bits +-/// used for the index and not the style represents the total number +-/// of possible usable fonts for a given style. +-/// +-/// The goal is to keep the size of this struct as small as practical. We +-/// accept the limitations that this imposes so long as they're reasonable. +-/// At the time of writing this comment, this is a 16-bit struct with 13 +-/// bits used for the index, supporting up to 8192 fonts per style. This +-/// seems more than reasonable. There are synthetic scenarios where this +-/// could be a limitation but I can't think of any that are practical. +-/// +-/// If you somehow need more fonts per style, you can increase the size of +-/// the Backing type and everything should just work fine. +-pub const Index = packed struct(Index.Backing) { +- const Backing = u16; +- const backing_bits = @typeInfo(Backing).int.bits; +- +- /// The number of bits we use for the index. +- const idx_bits = backing_bits - @typeInfo(@typeInfo(Style).@"enum".tag_type).int.bits; +- pub const IndexInt = @Type(.{ .int = .{ .signedness = .unsigned, .bits = idx_bits } }); +- +- /// The special-case fonts that we support. +- pub const Special = enum(IndexInt) { +- // We start all special fonts at this index so they can be detected. +- pub const start = std.math.maxInt(IndexInt); +- +- /// Sprite drawing, this is rendered JIT using 2D graphics APIs. +- sprite = start, +- }; +- +- style: Style = .regular, +- idx: IndexInt = 0, +- +- /// Initialize a special font index. +- pub fn initSpecial(v: Special) Index { +- return .{ .style = .regular, .idx = @intFromEnum(v) }; +- } +- +- /// Convert to int +- pub fn int(self: Index) Backing { +- return @bitCast(self); +- } +- +- /// Returns true if this is a "special" index which doesn't map to +- /// a real font face. We can still render it but there is no face for +- /// this font. +- pub fn special(self: Index) ?Special { +- if (self.idx < Special.start) return null; +- return @enumFromInt(self.idx); +- } +- +- test { +- // We never want to take up more than a byte since font indexes are +- // everywhere so if we increase the size of this we'll dramatically +- // increase our memory usage. +- try std.testing.expectEqual(@sizeOf(Backing), @sizeOf(Index)); +- +- // Just so we're aware when this changes. The current maximum number +- // of fonts for a style is 13 bits or 8192 fonts. +- try std.testing.expectEqual(13, idx_bits); +- } +-}; +- +-test init { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var c = init(); +- defer c.deinit(alloc); +-} +- +-test "add full" { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- +- for (0..Index.Special.start - 1) |_| { +- _ = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12 } }, +- ) }); +- } +- +- try testing.expectError(error.CollectionFull, c.add( +- alloc, +- .regular, +- .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12 } }, +- ) }, +- )); +-} +- +-test "add deferred without loading options" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var c = init(); +- defer c.deinit(alloc); +- +- try testing.expectError(error.DeferredLoadingUnavailable, c.add( +- alloc, +- .regular, +- +- // This can be undefined because it should never be accessed. +- .{ .deferred = undefined }, +- )); +-} +- +-test getFace { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- +- const idx = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- { +- const face1 = try c.getFace(idx); +- const face2 = try c.getFace(idx); +- try testing.expectEqual(@intFromPtr(face1), @intFromPtr(face2)); +- } +-} +- +-test getIndex { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- +- _ = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- // Should find all visible ASCII +- var i: u32 = 32; +- while (i < 127) : (i += 1) { +- const idx = c.getIndex(i, .regular, .{ .any = {} }); +- try testing.expect(idx != null); +- } +- +- // Should not find emoji +- { +- const idx = c.getIndex('🥸', .regular, .{ .any = {} }); +- try testing.expect(idx == null); +- } +-} +- +-test completeStyles { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- c.load_options = .{ .library = lib }; +- +- _ = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- try testing.expect(c.getIndex('A', .bold, .{ .any = {} }) == null); +- try testing.expect(c.getIndex('A', .italic, .{ .any = {} }) == null); +- try testing.expect(c.getIndex('A', .bold_italic, .{ .any = {} }) == null); +- try c.completeStyles(alloc, .{}); +- try testing.expect(c.getIndex('A', .bold, .{ .any = {} }) != null); +- try testing.expect(c.getIndex('A', .italic, .{ .any = {} }) != null); +- try testing.expect(c.getIndex('A', .bold_italic, .{ .any = {} }) != null); +-} +- +-test setSize { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- c.load_options = .{ .library = lib }; +- +- _ = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- try testing.expectEqual(@as(u32, 12), c.load_options.?.size.points); +- try c.setSize(.{ .points = 24 }); +- try testing.expectEqual(@as(u32, 24), c.load_options.?.size.points); +-} +- +-test hasCodepoint { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.regular; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- c.load_options = .{ .library = lib }; +- +- const idx = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- try testing.expect(c.hasCodepoint(idx, 'A', .{ .any = {} })); +- try testing.expect(!c.hasCodepoint(idx, '🥸', .{ .any = {} })); +-} +- +-test "hasCodepoint emoji default graphical" { +- if (options.backend != .fontconfig_freetype) return error.SkipZigTest; +- +- const testing = std.testing; +- const alloc = testing.allocator; +- const testEmoji = font.embedded.emoji; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- c.load_options = .{ .library = lib }; +- +- const idx = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testEmoji, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- try testing.expect(!c.hasCodepoint(idx, 'A', .{ .any = {} })); +- try testing.expect(c.hasCodepoint(idx, '🥸', .{ .any = {} })); +- // TODO(fontmem): test explicit/implicit +-} +- +-test "metrics" { +- const testing = std.testing; +- const alloc = testing.allocator; +- const testFont = font.embedded.inconsolata; +- +- var lib = try Library.init(); +- defer lib.deinit(); +- +- var c = init(); +- defer c.deinit(alloc); +- c.load_options = .{ .library = lib }; +- +- _ = try c.add(alloc, .regular, .{ .loaded = try Face.init( +- lib, +- testFont, +- .{ .size = .{ .points = 12, .xdpi = 96, .ydpi = 96 } }, +- ) }); +- +- try c.updateMetrics(); +- +- try std.testing.expectEqual(font.Metrics{ +- .cell_width = 8, +- // The cell height is 17 px because the calculation is +- // +- // ascender - descender + gap +- // +- // which, for inconsolata is +- // +- // 859 - -190 + 0 +- // +- // font units, at 1000 units per em that works out to 1.049 em, +- // and 1em should be the point size * dpi scale, so 12 * (96/72) +- // which is 16, and 16 * 1.049 = 16.784, which finally is rounded +- // to 17. +- .cell_height = 17, +- .cell_baseline = 3, +- .underline_position = 17, +- .underline_thickness = 1, +- .strikethrough_position = 10, +- .strikethrough_thickness = 1, +- .overline_position = 0, +- .overline_thickness = 1, +- .box_thickness = 1, +- .cursor_height = 17, +- }, c.metrics); +- +- // Resize should change metrics +- try c.setSize(.{ .points = 24, .xdpi = 96, .ydpi = 96 }); +- try std.testing.expectEqual(font.Metrics{ +- .cell_width = 16, +- .cell_height = 34, +- .cell_baseline = 6, +- .underline_position = 34, +- .underline_thickness = 2, +- .strikethrough_position = 19, +- .strikethrough_thickness = 2, +- .overline_position = 0, +- .overline_thickness = 2, +- .box_thickness = 2, +- .cursor_height = 34, +- }, c.metrics); +-} +\ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/raw_response.txt b/locodiff-250425/results/ghostty_src_font_Collection.zig/x-ai_grok-code-fast-1/20250909_155824/raw_response.txt new file mode 100644 index 000000000..e69de29bb diff --git a/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..7e871f890 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,503 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const builtin = @import("builtin"); +const posix = std.posix; +const xev = @import("../global.zig").xev; + +const log = std.log.scoped(.flatpak); + +/// Returns true if we're running in a Flatpak environment. +pub fn isFlatpak() bool { + // If we're not on Linux then we'll make this comptime false. + if (comptime builtin.os.tag != .linux) return false; + return if (std.fs.accessAbsolute("/.flatpak-info", .{})) true else |_| false; +} + +/// A struct to help execute commands on the host via the +/// org.freedesktop.Flatpak.Development DBus module. This uses GIO/GLib +/// under the hood. +/// +/// This always spawns its own thread and maintains its own GLib event loop. +/// This makes it easy for the command to behave synchronously similar to +/// std.process.Child. +/// +/// There are lots of chances for low-hanging improvements here (automatic +/// pipes, /dev/null, etc.) but this was purpose built for my needs so +/// it doesn't have all of those. +/// +/// Requires GIO, GLib to be available and linked. +pub const FlatpakHostCommand = struct { + const fd_t = posix.fd_t; + const EnvMap = std.process.EnvMap; + const c = @cImport({ + @cInclude("gio/gio.h"); + @cInclude("gio/gunixfdlist.h"); + }); + + /// Argv are the arguments to call on the host with argv[0] being + /// the command to execute. + argv: []const []const u8, + + /// The cwd for the new process. If this is not set then it will use + /// the current cwd of the calling process. + cwd: ?[:0]const u8 = null, + + /// Environment variables for the child process. If this is null, this + /// does not send any environment variables. + env: ?*const EnvMap = null, + + /// File descriptors to send to the child process. It is up to the + /// caller to create the file descriptors and set them up. + stdin: fd_t, + stdout: fd_t, + stderr: fd_t, + + /// State of the process. This is updated by the dedicated thread it + /// runs in and is protected by the given lock and condition variable. + state: State = .{ .init = {} }, + state_mutex: std.Thread.Mutex = .{}, + state_cv: std.Thread.Condition = .{}, + + /// State the process is in. This can't be inspected directly, you + /// must use getters on the struct to get access. + const State = union(enum) { + /// Initial state + init: void, + + /// Error starting. The error message is only available via logs. + /// (This isn't a fundamental limitation, just didn't need the + /// error message yet) + err: void, + + /// Process started with the given pid on the host. + started: struct { + pid: u32, + loop_xev: ?*xev.Loop, + completion: ?*Completion, + subscription: c.guint, + loop: *c.GMainLoop, + }, + + /// Process exited + exited: struct { + pid: u32, + status: u8, + }, + }; + + pub const Completion = struct { + callback: *const fn (ud: ?*anyopaque, l: *xev.Loop, c: *Completion, r: WaitError!u8) void = noopCallback, + c_xev: xev.Completion = .{}, + userdata: ?*anyopaque = null, + timer: ?xev.Timer = null, + result: ?WaitError!u8 = null, + }; + + /// Errors that are possible from us. + pub const Error = error{ + FlatpakMustBeStarted, + FlatpakSpawnFail, + FlatpakSetupFail, + FlatpakRPCFail, + }; + + pub const WaitError = xev.Timer.RunError || Error; + + /// Spawn the command. This will start the host command. On return, + /// the pid will be available. This must only be called with the + /// state in "init". + /// + /// Precondition: The self pointer MUST be stable. + pub fn spawn(self: *FlatpakHostCommand, alloc: Allocator) !u32 { + const thread = try std.Thread.spawn(.{}, threadMain, .{ self, alloc }); + thread.setName("flatpak-host-command") catch {}; + + // Wait for the process to start or error. + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + while (self.state == .init) self.state_cv.wait(&self.state_mutex); + + return switch (self.state) { + .init => unreachable, + .err => Error.FlatpakSpawnFail, + .started => |v| v.pid, + .exited => |v| v.pid, + }; + } + + /// Wait for the process to end and return the exit status. This + /// can only be called ONCE. Once this returns, the state is reset. + pub fn wait(self: *FlatpakHostCommand) !u8 { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + + while (true) { + switch (self.state) { + .init => return Error.FlatpakMustBeStarted, + .err => return Error.FlatpakSpawnFail, + .started => {}, + .exited => |v| { + self.state = .{ .init = {} }; + self.state_cv.broadcast(); + return v.status; + }, + } + + self.state_cv.wait(&self.state_mutex); + } + } + + /// Wait for the process to end asynchronously via libxev. This + /// can only be called ONCE. + pub fn waitXev( + self: *FlatpakHostCommand, + loop: *xev.Loop, + completion: *Completion, + comptime Userdata: type, + userdata: ?*Userdata, + comptime cb: *const fn ( + ud: ?*Userdata, + l: *xev.Loop, + c: *Completion, + r: WaitError!u8, + ) void, + ) void { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + + completion.* = .{ + .callback = (struct { + fn callback( + ud_: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *Completion, + r: WaitError!u8, + ) void { + const ud = @as(?*Userdata, if (Userdata == void) null else @ptrCast(@alignCast(ud_))); + @call(.always_inline, cb, .{ ud, l_inner, c_inner, r }); + } + }).callback, + .userdata = userdata, + .timer = xev.Timer.init() catch unreachable, // not great, but xev timer can't fail atm + }; + + switch (self.state) { + .init => completion.result = Error.FlatpakMustBeStarted, + .err => completion.result = Error.FlatpakSpawnFail, + .started => |*v| { + v.loop_xev = loop; + v.completion = completion; + return; + }, + .exited => |v| { + completion.result = v.status; + }, + } + + completion.timer.?.run( + loop, + &completion.c_xev, + 0, + anyopaque, + completion.userdata, + (struct { + fn callback( + ud: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *xev.Completion, + r: xev.Timer.RunError!void, + ) xev.CallbackAction { + const c_outer: *Completion = @fieldParentPtr("c_xev", c_inner); + defer if (c_outer.timer) |*t| t.deinit(); + + const result = if (r) |_| c_outer.result.? else |err| err; + c_outer.callback(ud, l_inner, c_outer, result); + return .disarm; + } + }).callback, + ); + } + + /// Send a signal to the started command. This does nothing if the + /// command is not in the started state. + pub fn signal(self: *FlatpakHostCommand, sig: u8, pg: bool) !void { + const pid = pid: { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + switch (self.state) { + .started => |v| break :pid v.pid, + else => return, + } + }; + + // Get our bus connection. + var g_err: [*c]c.GError = null; + const bus = c.g_bus_get_sync(c.G_BUS_TYPE_SESSION, null, &g_err) orelse { + log.warn("signal error getting bus: {s}", .{g_err.*.message}); + return Error.FlatpakSetupFail; + }; + defer c.g_object_unref(bus); + + const reply = c.g_dbus_connection_call_sync( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandSignal", + c.g_variant_new( + "(uub)", + pid, + sig, + @as(c_int, @intCast(@intFromBool(pg))), + ), + c.G_VARIANT_TYPE("()"), + c.G_DBUS_CALL_FLAGS_NONE, + c.G_MAXINT, + null, + &g_err, + ); + if (g_err != null) { + log.warn("signal send error: {s}", .{g_err.*.message}); + return; + } + defer c.g_variant_unref(reply); + } + + fn threadMain(self: *FlatpakHostCommand, alloc: Allocator) void { + // Create a new thread-local context so that all our sources go + // to this context and we can run our loop correctly. + const ctx = c.g_main_context_new(); + defer c.g_main_context_unref(ctx); + c.g_main_context_push_thread_default(ctx); + defer c.g_main_context_pop_thread_default(ctx); + + // Get our loop for the current thread + const loop = c.g_main_loop_new(ctx, 1).?; + defer c.g_main_loop_unref(loop); + + // Get our bus connection. This has to remain active until we exit + // the thread otherwise our signals won't be called. + var g_err: [*c]c.GError = null; + const bus = c.g_bus_get_sync(c.G_BUS_TYPE_SESSION, null, &g_err) orelse { + log.warn("spawn error getting bus: {s}", .{g_err.*.message}); + self.updateState(.{ .err = {} }); + return; + }; + defer c.g_object_unref(bus); + + // Spawn the command first. This will setup all our IO. + self.start(alloc, bus, loop) catch |err| { + log.warn("error starting host command: {}", .{err}); + self.updateState(.{ .err = {} }); + return; + }; + + // Run the event loop. It quits in the exit callback. + c.g_main_loop_run(loop); + } + + /// Start the command. This will start the host command and set the + /// pid field on success. This will not wait for completion. + /// + /// Once this is called, the self pointer MUST remain stable. This + /// requirement is due to using GLib under the covers with callbacks. + fn start( + self: *FlatpakHostCommand, + alloc: Allocator, + bus: *c.GDBusConnection, + loop: *c.GMainLoop, + ) !void { + var err: [*c]c.GError = null; + var arena_allocator = std.heap.ArenaAllocator.init(alloc); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // Our list of file descriptors that we need to send to the process. + const fd_list = c.g_unix_fd_list_new(); + defer c.g_object_unref(fd_list); + if (c.g_unix_fd_list_append(fd_list, self.stdin, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + if (c.g_unix_fd_list_append(fd_list, self.stdout, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + if (c.g_unix_fd_list_append(fd_list, self.stderr, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + + // Build our arguments for the file descriptors. + const fd_builder = c.g_variant_builder_new(c.G_VARIANT_TYPE("a{uh}")); + defer c.g_variant_builder_unref(fd_builder); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 0), self.stdin); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 1), self.stdout); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 2), self.stderr); + + // Build our env vars + const env_builder = c.g_variant_builder_new(c.G_VARIANT_TYPE("a{ss}")); + defer c.g_variant_builder_unref(env_builder); + if (self.env) |env| { + var it = env.iterator(); + while (it.next()) |pair| { + const key = try arena.dupeZ(u8, pair.key_ptr.*); + const value = try arena.dupeZ(u8, pair.value_ptr.*); + c.g_variant_builder_add(env_builder, "{ss}", key.ptr, value.ptr); + } + } + + // Build our args + const args = try arena.alloc(?[*:0]u8, self.argv.len + 1); + for (0.., self.argv) |i, arg| { + const argZ = try arena.dupeZ(u8, arg); + args[i] = argZ.ptr; + } + args[args.len - 1] = null; + + // Get the cwd in case we don't have ours set. A small optimization + // would be to do this only if we need it but this isn't a + // common code path. + const g_cwd = c.g_get_current_dir(); + defer c.g_free(g_cwd); + + // The params for our RPC call + const params = c.g_variant_new( + "(^ay^aay@a{uh}@a{ss}u)", + @as(*const anyopaque, if (self.cwd) |*cwd| cwd.ptr else g_cwd), + args.ptr, + c.g_variant_builder_end(fd_builder), + c.g_variant_builder_end(env_builder), + @as(c_int, 0), + ); + _ = c.g_variant_ref_sink(params); // take ownership + defer c.g_variant_unref(params); + + // Subscribe to exit notifications + const subscription_id = c.g_dbus_connection_signal_subscribe( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandExited", + "/org/freedesktop/Flatpak/Development", + null, + 0, + onExit, + self, + null, + ); + errdefer c.g_dbus_connection_signal_unsubscribe(bus, subscription_id); + + // Go! + const reply = c.g_dbus_connection_call_with_unix_fd_list_sync( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandSignal", + params, + c.G_VARIANT_TYPE("(u)"), + c.G_DBUS_CALL_FLAGS_NONE, + c.G_MAXINT, + fd_list, + null, + &err, + ) orelse { + log.warn("Flatpak.HostCommand failed: {s}", .{err.*.message}); + return Error.FlatpakRPCFail; + }; + defer c.g_variant_unref(reply); + + var pid: u32 = 0; + c.g_variant_get(reply, "(u)", &pid); + log.debug("HostCommand started pid={} subscription={}", .{ + pid, + subscription_id, + }); + + self.updateState(.{ + .started = .{ + .pid = pid, + .subscription = subscription_id, + .loop = loop, + .completion = null, + .loop_xev = null, + }, + }); + } + + /// Helper to update the state and notify waiters via the cv. + fn updateState(self: *FlatpakHostCommand, state: State) void { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + defer self.state_cv.broadcast(); + self.state = state; + } + + fn onExit( + bus: ?*c.GDBusConnection, + _: [*c]const u8, + _: [*c]const u8, + _: [*c]const u8, + _: [*c]const u8, + params: ?*c.GVariant, + ud: ?*anyopaque, + ) callconv(.C) void { + const self = @as(*FlatpakHostCommand, @ptrCast(@alignCast(ud))); + const state = state: { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + break :state self.state.started; + }; + + var pid: u32 = 0; + var exit_status_raw: u32 = 0; + c.g_variant_get(params.?, "(uu)", &pid, &exit_status_raw); + if (state.pid != pid) return; + + const exit_status = posix.W.EXITSTATUS(exit_status_raw); + // Update our state + self.updateState(.{ + .exited = .{ + .pid = pid, + .status = exit_status, + }, + }); + if (state.completion) |completion| { + completion.result = exit_status; + completion.timer.?.run( + state.loop_xev.?, + &completion.c_xev, + 0, + anyopaque, + completion.userdata, + (struct { + fn callback( + ud_inner: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *xev.Completion, + r: xev.Timer.RunError!void, + ) xev.CallbackAction { + const c_outer: *Completion = @fieldParentPtr("c_xev", c_inner); + defer if (c_outer.timer) |*t| t.deinit(); + + const result = if (r) |_| c_outer.result.? else |err| err; + c_outer.callback(ud_inner, l_inner, c_outer, result); + return .disarm; + } + }).callback, + ); + } + log.debug("HostCommand exited pid={} status={}", .{ pid, exit_status }); + + // We're done now, so we can unsubscribe + c.g_dbus_connection_signal_unsubscribe(bus.?, state.subscription); + + // We are also done with our loop so we can exit. + c.g_main_loop_quit(state.loop); + } + + fn noopCallback(_: ?*anyopaque, _: *xev.Loop, _: *Completion, _: WaitError!u8) void {} +}; \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..787e192da --- /dev/null +++ b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_os_flatpak.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_os_flatpak.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_os_flatpak.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.909671+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 17842, + "extracted_output_length": 17831, + "expected_output_length": 17792, + "results_dir": "locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-s5MjH0gYqmFjKcW7TjFT", + "cost_usd": 0.00918694, + "prompt_tokens": 12039, + "completion_tokens": 4374, + "total_tokens": 16413, + "native_prompt_tokens": 12095, + "native_completion_tokens": 4535, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 374 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..36154fcd0 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,28 @@ + +index f77a8d507..7e871f890 100644 +--- a/ghostty_src_os_flatpak.zig_expectedoutput.txt (expected):tmp/tmpd7kfvolm_expected.txt ++++ b/ghostty_src_os_flatpak.zig_extracted.txt (actual):tmp/tmp02vdxrfs_actual.txt +@@ -378,6 +378,7 @@ pub const FlatpakHostCommand = struct { + const subscription_id = c.g_dbus_connection_signal_subscribe( + bus, + "org.freedesktop.Flatpak", ++ "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandExited", + "/org/freedesktop/Flatpak/Development", +@@ -395,14 +396,13 @@ pub const FlatpakHostCommand = struct { + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", +- "HostCommand", ++ "HostCommandSignal", + params, + c.G_VARIANT_TYPE("(u)"), + c.G_DBUS_CALL_FLAGS_NONE, + c.G_MAXINT, + fd_list, + null, +- null, + &err, + ) orelse { + log.warn("Flatpak.HostCommand failed: {s}", .{err.*.message}); diff --git a/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..08cc03c83 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_os_flatpak.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,505 @@ +```zig +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const builtin = @import("builtin"); +const posix = std.posix; +const xev = @import("../global.zig").xev; + +const log = std.log.scoped(.flatpak); + +/// Returns true if we're running in a Flatpak environment. +pub fn isFlatpak() bool { + // If we're not on Linux then we'll make this comptime false. + if (comptime builtin.os.tag != .linux) return false; + return if (std.fs.accessAbsolute("/.flatpak-info", .{})) true else |_| false; +} + +/// A struct to help execute commands on the host via the +/// org.freedesktop.Flatpak.Development DBus module. This uses GIO/GLib +/// under the hood. +/// +/// This always spawns its own thread and maintains its own GLib event loop. +/// This makes it easy for the command to behave synchronously similar to +/// std.process.Child. +/// +/// There are lots of chances for low-hanging improvements here (automatic +/// pipes, /dev/null, etc.) but this was purpose built for my needs so +/// it doesn't have all of those. +/// +/// Requires GIO, GLib to be available and linked. +pub const FlatpakHostCommand = struct { + const fd_t = posix.fd_t; + const EnvMap = std.process.EnvMap; + const c = @cImport({ + @cInclude("gio/gio.h"); + @cInclude("gio/gunixfdlist.h"); + }); + + /// Argv are the arguments to call on the host with argv[0] being + /// the command to execute. + argv: []const []const u8, + + /// The cwd for the new process. If this is not set then it will use + /// the current cwd of the calling process. + cwd: ?[:0]const u8 = null, + + /// Environment variables for the child process. If this is null, this + /// does not send any environment variables. + env: ?*const EnvMap = null, + + /// File descriptors to send to the child process. It is up to the + /// caller to create the file descriptors and set them up. + stdin: fd_t, + stdout: fd_t, + stderr: fd_t, + + /// State of the process. This is updated by the dedicated thread it + /// runs in and is protected by the given lock and condition variable. + state: State = .{ .init = {} }, + state_mutex: std.Thread.Mutex = .{}, + state_cv: std.Thread.Condition = .{}, + + /// State the process is in. This can't be inspected directly, you + /// must use getters on the struct to get access. + const State = union(enum) { + /// Initial state + init: void, + + /// Error starting. The error message is only available via logs. + /// (This isn't a fundamental limitation, just didn't need the + /// error message yet) + err: void, + + /// Process started with the given pid on the host. + started: struct { + pid: u32, + loop_xev: ?*xev.Loop, + completion: ?*Completion, + subscription: c.guint, + loop: *c.GMainLoop, + }, + + /// Process exited + exited: struct { + pid: u32, + status: u8, + }, + }; + + pub const Completion = struct { + callback: *const fn (ud: ?*anyopaque, l: *xev.Loop, c: *Completion, r: WaitError!u8) void = noopCallback, + c_xev: xev.Completion = .{}, + userdata: ?*anyopaque = null, + timer: ?xev.Timer = null, + result: ?WaitError!u8 = null, + }; + + /// Errors that are possible from us. + pub const Error = error{ + FlatpakMustBeStarted, + FlatpakSpawnFail, + FlatpakSetupFail, + FlatpakRPCFail, + }; + + pub const WaitError = xev.Timer.RunError || Error; + + /// Spawn the command. This will start the host command. On return, + /// the pid will be available. This must only be called with the + /// state in "init". + /// + /// Precondition: The self pointer MUST be stable. + pub fn spawn(self: *FlatpakHostCommand, alloc: Allocator) !u32 { + const thread = try std.Thread.spawn(.{}, threadMain, .{ self, alloc }); + thread.setName("flatpak-host-command") catch {}; + + // Wait for the process to start or error. + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + while (self.state == .init) self.state_cv.wait(&self.state_mutex); + + return switch (self.state) { + .init => unreachable, + .err => Error.FlatpakSpawnFail, + .started => |v| v.pid, + .exited => |v| v.pid, + }; + } + + /// Wait for the process to end and return the exit status. This + /// can only be called ONCE. Once this returns, the state is reset. + pub fn wait(self: *FlatpakHostCommand) !u8 { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + + while (true) { + switch (self.state) { + .init => return Error.FlatpakMustBeStarted, + .err => return Error.FlatpakSpawnFail, + .started => {}, + .exited => |v| { + self.state = .{ .init = {} }; + self.state_cv.broadcast(); + return v.status; + }, + } + + self.state_cv.wait(&self.state_mutex); + } + } + + /// Wait for the process to end asynchronously via libxev. This + /// can only be called ONCE. + pub fn waitXev( + self: *FlatpakHostCommand, + loop: *xev.Loop, + completion: *Completion, + comptime Userdata: type, + userdata: ?*Userdata, + comptime cb: *const fn ( + ud: ?*Userdata, + l: *xev.Loop, + c: *Completion, + r: WaitError!u8, + ) void, + ) void { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + + completion.* = .{ + .callback = (struct { + fn callback( + ud_: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *Completion, + r: WaitError!u8, + ) void { + const ud = @as(?*Userdata, if (Userdata == void) null else @ptrCast(@alignCast(ud_))); + @call(.always_inline, cb, .{ ud, l_inner, c_inner, r }); + } + }).callback, + .userdata = userdata, + .timer = xev.Timer.init() catch unreachable, // not great, but xev timer can't fail atm + }; + + switch (self.state) { + .init => completion.result = Error.FlatpakMustBeStarted, + .err => completion.result = Error.FlatpakSpawnFail, + .started => |*v| { + v.loop_xev = loop; + v.completion = completion; + return; + }, + .exited => |v| { + completion.result = v.status; + }, + } + + completion.timer.?.run( + loop, + &completion.c_xev, + 0, + anyopaque, + completion.userdata, + (struct { + fn callback( + ud: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *xev.Completion, + r: xev.Timer.RunError!void, + ) xev.CallbackAction { + const c_outer: *Completion = @fieldParentPtr("c_xev", c_inner); + defer if (c_outer.timer) |*t| t.deinit(); + + const result = if (r) |_| c_outer.result.? else |err| err; + c_outer.callback(ud, l_inner, c_outer, result); + return .disarm; + } + }).callback, + ); + } + + /// Send a signal to the started command. This does nothing if the + /// command is not in the started state. + pub fn signal(self: *FlatpakHostCommand, sig: u8, pg: bool) !void { + const pid = pid: { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + switch (self.state) { + .started => |v| break :pid v.pid, + else => return, + } + }; + + // Get our bus connection. + var g_err: [*c]c.GError = null; + const bus = c.g_bus_get_sync(c.G_BUS_TYPE_SESSION, null, &g_err) orelse { + log.warn("signal error getting bus: {s}", .{g_err.*.message}); + return Error.FlatpakSetupFail; + }; + defer c.g_object_unref(bus); + + const reply = c.g_dbus_connection_call_sync( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandSignal", + c.g_variant_new( + "(uub)", + pid, + sig, + @as(c_int, @intCast(@intFromBool(pg))), + ), + c.G_VARIANT_TYPE("()"), + c.G_DBUS_CALL_FLAGS_NONE, + c.G_MAXINT, + null, + &g_err, + ); + if (g_err != null) { + log.warn("signal send error: {s}", .{g_err.*.message}); + return; + } + defer c.g_variant_unref(reply); + } + + fn threadMain(self: *FlatpakHostCommand, alloc: Allocator) void { + // Create a new thread-local context so that all our sources go + // to this context and we can run our loop correctly. + const ctx = c.g_main_context_new(); + defer c.g_main_context_unref(ctx); + c.g_main_context_push_thread_default(ctx); + defer c.g_main_context_pop_thread_default(ctx); + + // Get our loop for the current thread + const loop = c.g_main_loop_new(ctx, 1).?; + defer c.g_main_loop_unref(loop); + + // Get our bus connection. This has to remain active until we exit + // the thread otherwise our signals won't be called. + var g_err: [*c]c.GError = null; + const bus = c.g_bus_get_sync(c.G_BUS_TYPE_SESSION, null, &g_err) orelse { + log.warn("spawn error getting bus: {s}", .{g_err.*.message}); + self.updateState(.{ .err = {} }); + return; + }; + defer c.g_object_unref(bus); + + // Spawn the command first. This will setup all our IO. + self.start(alloc, bus, loop) catch |err| { + log.warn("error starting host command: {}", .{err}); + self.updateState(.{ .err = {} }); + return; + }; + + // Run the event loop. It quits in the exit callback. + c.g_main_loop_run(loop); + } + + /// Start the command. This will start the host command and set the + /// pid field on success. This will not wait for completion. + /// + /// Once this is called, the self pointer MUST remain stable. This + /// requirement is due to using GLib under the covers with callbacks. + fn start( + self: *FlatpakHostCommand, + alloc: Allocator, + bus: *c.GDBusConnection, + loop: *c.GMainLoop, + ) !void { + var err: [*c]c.GError = null; + var arena_allocator = std.heap.ArenaAllocator.init(alloc); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // Our list of file descriptors that we need to send to the process. + const fd_list = c.g_unix_fd_list_new(); + defer c.g_object_unref(fd_list); + if (c.g_unix_fd_list_append(fd_list, self.stdin, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + if (c.g_unix_fd_list_append(fd_list, self.stdout, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + if (c.g_unix_fd_list_append(fd_list, self.stderr, &err) < 0) { + log.warn("error adding fd: {s}", .{err.*.message}); + return Error.FlatpakSetupFail; + } + + // Build our arguments for the file descriptors. + const fd_builder = c.g_variant_builder_new(c.G_VARIANT_TYPE("a{uh}")); + defer c.g_variant_builder_unref(fd_builder); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 0), self.stdin); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 1), self.stdout); + c.g_variant_builder_add(fd_builder, "{uh}", @as(c_int, 2), self.stderr); + + // Build our env vars + const env_builder = c.g_variant_builder_new(c.G_VARIANT_TYPE("a{ss}")); + defer c.g_variant_builder_unref(env_builder); + if (self.env) |env| { + var it = env.iterator(); + while (it.next()) |pair| { + const key = try arena.dupeZ(u8, pair.key_ptr.*); + const value = try arena.dupeZ(u8, pair.value_ptr.*); + c.g_variant_builder_add(env_builder, "{ss}", key.ptr, value.ptr); + } + } + + // Build our args + const args = try arena.alloc(?[*:0]u8, self.argv.len + 1); + for (0.., self.argv) |i, arg| { + const argZ = try arena.dupeZ(u8, arg); + args[i] = argZ.ptr; + } + args[args.len - 1] = null; + + // Get the cwd in case we don't have ours set. A small optimization + // would be to do this only if we need it but this isn't a + // common code path. + const g_cwd = c.g_get_current_dir(); + defer c.g_free(g_cwd); + + // The params for our RPC call + const params = c.g_variant_new( + "(^ay^aay@a{uh}@a{ss}u)", + @as(*const anyopaque, if (self.cwd) |*cwd| cwd.ptr else g_cwd), + args.ptr, + c.g_variant_builder_end(fd_builder), + c.g_variant_builder_end(env_builder), + @as(c_int, 0), + ); + _ = c.g_variant_ref_sink(params); // take ownership + defer c.g_variant_unref(params); + + // Subscribe to exit notifications + const subscription_id = c.g_dbus_connection_signal_subscribe( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandExited", + "/org/freedesktop/Flatpak/Development", + null, + 0, + onExit, + self, + null, + ); + errdefer c.g_dbus_connection_signal_unsubscribe(bus, subscription_id); + + // Go! + const reply = c.g_dbus_connection_call_with_unix_fd_list_sync( + bus, + "org.freedesktop.Flatpak", + "/org/freedesktop/Flatpak/Development", + "org.freedesktop.Flatpak.Development", + "HostCommandSignal", + params, + c.G_VARIANT_TYPE("(u)"), + c.G_DBUS_CALL_FLAGS_NONE, + c.G_MAXINT, + fd_list, + null, + &err, + ) orelse { + log.warn("Flatpak.HostCommand failed: {s}", .{err.*.message}); + return Error.FlatpakRPCFail; + }; + defer c.g_variant_unref(reply); + + var pid: u32 = 0; + c.g_variant_get(reply, "(u)", &pid); + log.debug("HostCommand started pid={} subscription={}", .{ + pid, + subscription_id, + }); + + self.updateState(.{ + .started = .{ + .pid = pid, + .subscription = subscription_id, + .loop = loop, + .completion = null, + .loop_xev = null, + }, + }); + } + + /// Helper to update the state and notify waiters via the cv. + fn updateState(self: *FlatpakHostCommand, state: State) void { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + defer self.state_cv.broadcast(); + self.state = state; + } + + fn onExit( + bus: ?*c.GDBusConnection, + _: [*c]const u8, + _: [*c]const u8, + _: [*c]const u8, + _: [*c]const u8, + params: ?*c.GVariant, + ud: ?*anyopaque, + ) callconv(.C) void { + const self = @as(*FlatpakHostCommand, @ptrCast(@alignCast(ud))); + const state = state: { + self.state_mutex.lock(); + defer self.state_mutex.unlock(); + break :state self.state.started; + }; + + var pid: u32 = 0; + var exit_status_raw: u32 = 0; + c.g_variant_get(params.?, "(uu)", &pid, &exit_status_raw); + if (state.pid != pid) return; + + const exit_status = posix.W.EXITSTATUS(exit_status_raw); + // Update our state + self.updateState(.{ + .exited = .{ + .pid = pid, + .status = exit_status, + }, + }); + if (state.completion) |completion| { + completion.result = exit_status; + completion.timer.?.run( + state.loop_xev.?, + &completion.c_xev, + 0, + anyopaque, + completion.userdata, + (struct { + fn callback( + ud_inner: ?*anyopaque, + l_inner: *xev.Loop, + c_inner: *xev.Completion, + r: xev.Timer.RunError!void, + ) xev.CallbackAction { + const c_outer: *Completion = @fieldParentPtr("c_xev", c_inner); + defer if (c_outer.timer) |*t| t.deinit(); + + const result = if (r) |_| c_outer.result.? else |err| err; + c_outer.callback(ud_inner, l_inner, c_outer, result); + return .disarm; + } + }).callback, + ); + } + log.debug("HostCommand exited pid={} status={}", .{ pid, exit_status }); + + // We're done now, so we can unsubscribe + c.g_dbus_connection_signal_unsubscribe(bus.?, state.subscription); + + // We are also done with our loop so we can exit. + c.g_main_loop_quit(state.loop); + } + + fn noopCallback(_: ?*anyopaque, _: *xev.Loop, _: *Completion, _: WaitError!u8) void {} +}; +``` \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt new file mode 100644 index 000000000..e8b7e23e8 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/extracted_output.txt @@ -0,0 +1,415 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const posix = std.posix; +const fastmem = @import("../../fastmem.zig"); +const command = @import("graphics_command.zig"); +const point = @import("../point.zig"); +const PageList = @import("../PageList.zig"); +const internal_os = @import("../../os/main.zig"); +const wuffs = @import("wuffs"); + +const log = std.log.scoped(.kitty_gfx); + +/// Maximum width or height of an image. Taken directly from Kitty. +const max_dimension = 10000; + +/// Maximum size in bytes, taken from Kitty. +const max_size = 400 * 1024 * 1024; // 400MB + +/// An image that is still being loaded. The image should be initialized +/// using init on the first chunk and then addData for each subsequent +/// chunk. Once all chunks have been added, complete should be called +/// to finalize the image. +pub const LoadingImage = struct { + /// The in-progress image. The first chunk must have all the metadata + /// so this comes from that initially. + image: Image, + + /// The data that is being built up. + data: std.ArrayListUnmanaged(u8) = .{}, + + /// This is non-null when a transmit and display command is given + /// so that we display the image after it is fully loaded. + display: ?command.Display = null, + + /// Quiet is the quiet settings for the initial load command. This is + /// used if q isn't set on subsequent chunks. + quiet: command.Command.Quiet, + + /// Initialize a chunked immage from the first image transmission. + /// If this is a multi-chunk image, this should only be the FIRST + /// chunk. + pub fn init(alloc: Allocator, cmd: *const command.Command) !LoadingImage { + // We must have data to load an image + if (cmd.data.len == 0) return error.InvalidData; + + // Build our initial image from the properties sent via the control. + // These can be overwritten by the data loading process. For example, + // PNG loading sets the width/height from the data. + const t = cmd.transmission().?; + var result: LoadingImage = .{ + .image = .{ + .id = t.image_id, + .number = t.image_number, + .width = t.width, + .height = t.height, + .compression = t.compression, + .format = t.format, + }, + + .display = cmd.display(), + .quiet = cmd.quiet, + }; + + // Special case for the direct medium, we just add the chunk directly. + if (t.medium == .direct) { + try result.addData(alloc, cmd.data); + return result; + } + + // Otherwise, the payload data is guaranteed to be a path. + + if (comptime builtin.os.tag != .windows) { + if (std.mem.indexOfScalar(u8, cmd.data, 0) != null) { + // posix.realpath *asserts* that the path does not have + // internal nulls instead of erroring. + log.warn("failed to get absolute path: BadPathName", .{}); + return error.InvalidData; + } + } + + var abs_buf: [std.fs.max_path_bytes]u8 = undefined; + const path = switch (t.medium) { + .direct => unreachable, // handled above + .file, .temporary_file => posix.realpath(cmd.data, &abs_buf) catch |err| { + log.warn("failed to get absolute path: {}", .{err}); + return error.InvalidData; + }, + .shared_memory => cmd.data, + }; + + // Depending on the medium, load the data from the path. + switch (t.medium) { + .direct => unreachable, // handled above + .file => try result.readFile(.file, alloc, t, path), + .temporary_file => try result.readFile(.temporary_file, alloc, t, path), + .shared_memory => try result.readSharedMemory(alloc, t, path), + } + + return result; + } + + pub fn deinit(self: *LoadingImage, alloc: Allocator) void { + self.image.deinit(alloc); + self.data.deinit(alloc); + } + + pub fn destroy(self: *LoadingImage, alloc: Allocator) void { + self.deinit(alloc); + alloc.destroy(self); + } + + /// Adds a chunk of data to the image. Use this if the image + /// is coming in chunks (the "m" parameter in the protocol). + pub fn addData(self: *LoadingImage, alloc: Allocator, data: []const u8) !void { + // If no data, skip + if (data.len == 0) return; + + // If our data would get too big, return an error + if (self.data.items.len + data.len > max_size) { + log.warn("image data too large max_size={}", .{max_size}); + return error.InvalidData; + } + + // Ensure we have enough room to add the data + // to the end of the ArrayList before doing so. + try self.data.ensureUnusedCapacity(alloc, data.len); + + const start_i = self.data.items.len; + self.data.items.len = start_i + data.len; + fastmem.copy(u8, self.data.items[start_i..], data); + } + + /// Complete the chunked image, returning a completed image. + pub fn complete(self: *LoadingImage, alloc: Allocator) !Image { + const img = &self.image; + + // Decompress the data if it is compressed. + try self.decompress(alloc); + + // Decode the png if we have to + if (img.format == .png) try self.decodePng(alloc); + + // Validate our dimensions. + if (img.width == 0 or img.height == 0) return error.DimensionsRequired; + if (img.width > max_dimension or img.height > max_dimension) return error.DimensionsTooLarge; + + // Data length must be what we expect + const bpp = img.format.bpp(); + const expected_len = img.width * img.height * bpp; + const actual_len = self.data.items.len; + if (actual_len != expected_len) { + std.log.warn( + "unexpected length image id={} width={} height={} bpp={} expected_len={} actual_len={}", + .{ img.id, img.width, img.height, bpp, expected_len, actual_len }, + ); + return error.InvalidData; + } + + // Everything looks good, copy the image data over. + var result = self.image; + result.data = try self.data.toOwnedSlice(alloc); + errdefer result.deinit(alloc); + self.image = .{}; + try result.complete(alloc); + return result; + } + + /// Debug function to write the data to a file. This is useful for + /// capturing some test data for unit tests. + pub fn debugDump(self: LoadingImage) !void { + if (comptime builtin.mode != .Debug) @compileError("debugDump in non-debug"); + + var buf: [1024]u8 = undefined; + const filename = try std.fmt.bufPrint( + &buf, + "image-{s}-{s}-{d}x{d}-{}.data", + .{ + @tagName(self.image.format), + @tagName(self.image.compression), + self.image.width, + self.image.height, + self.image.id, + }, + ); + const cwd = std.fs.cwd(); + const f = try cwd.createFile(filename, .{}); + defer f.close(); + + const writer = f.writer(); + try writer.writeAll(self.data.items); + } + + /// Decompress the data in-place. + fn decompress(self: *LoadingImage, alloc: Allocator) !void { + return switch (self.image.compression) { + .none => {}, + .zlib_deflate => self.decompressZlib(alloc), + }; + } + + fn decompressZlib(self: *LoadingImage, alloc: Allocator) !void { + // Open our zlib stream + var fbs = std.io.fixedBufferStream(self.data.items); + var stream = std.compress.zlib.decompressor(fbs.reader()); + + // Write it to an array list + var list = std.ArrayList(u8).init(alloc); + defer list.deinit(); + stream.reader().readAllArrayList(&list, max_size) catch |err| { + log.warn("failed to read decompressed data: {}", .{err}); + return error.DecompressionFailed; + }; + + // Empty our current data list, take ownership over managed array list + self.data.deinit(alloc); + self.data = .{ .items = list.items, .capacity = list.capacity }; + + // Make sure we note that our image is no longer compressed + self.image.compression = .none; + } + + /// Decode the data as PNG. This will also updated the image dimensions. + fn decodePng(self: *LoadingImage, alloc: Allocator) !void { + assert(self.image.format == .png); + + const result = wuffs.png.decode( + alloc, + self.data.items, + ) catch |err| switch (err) { + error.WuffsError => return error.InvalidData, + error.OutOfMemory => return error.OutOfMemory, + }; + defer alloc.free(result.data); + + if (result.data.len > max_size) { + log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size }); + return error.InvalidData; + } + + // Replace our data + self.data.deinit(alloc); + self.data = .{}; + try self.data.ensureUnusedCapacity(alloc, result.data.len); + try self.data.appendSlice(alloc, result.data[0..result.data.len]); + + // Store updated image dimensions + self.image.width = result.width; + self.image.height = result.height; + self.image.format = .rgba; + } + + /// Reads the data from a shared memory segment. + fn readSharedMemory( + self: *LoadingImage, + alloc: Allocator, + t: command.Transmission, + path: []const u8, + ) !void { + // windows is currently unsupported, does it support shm? + if (comptime builtin.target.os.tag == .windows) { + return error.UnsupportedMedium; + } + + // libc is required for shm_open + if (comptime !builtin.link_libc) { + return error.UnsupportedMedium; + } + + // Since we're only supporting posix then max_path_bytes should + // be enough to stack allocate the path. + var buf: [std.fs.max_path_bytes]u8 = undefined; + const pathz = std.fmt.bufPrintZ(&buf, "{s}", .{path}) catch return error.InvalidData; + + const fd = std.c.shm_open(pathz, @as(c_int, @bitCast(std.c.O{ .ACCMODE = .RDONLY })), 0); + switch (std.posix.errno(fd)) { + .SUCCESS => {}, + else => |err| { + log.warn("unable to open shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }, + } + defer _ = std.c.close(fd); + defer _ = std.c.shm_unlink(pathz); + + // The size from stat on may be larger than our expected size because + // shared memory has to be a multiple of the page size. + const stat_size: usize = stat: { + const stat = std.posix.fstat(fd) catch |err| { + log.warn("unable to fstat shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }; + if (stat.size <= 0) return error.InvalidData; + break :stat @intCast(stat.size); + }; + + const expected_size: usize = switch (self.image.format) { + // Png we decode the full data size because later decoding will + // get the proper dimensions and assert validity. + .png => stat_size, + + // For these formats we have a size we must have. + .gray, .gray_alpha, .rgb, .rgba => |f| size: { + const bpp = f.bpp(); + break :size self.image.width * self.image.height * bpp; + }, + }; + + // Our stat size must be at least the expected size otherwise + // the shared memory data is invalid. + if (stat_size < expected_size) { + log.warn( + "shared memory size too small expected={} actual={}", + .{ expected_size, stat_size }, + ); + return error.InvalidData; + } + + const map = std.posix.mmap( + null, + stat_size, // mmap always uses the stat size + std.c.PROT.READ, + std.c.MAP{ .TYPE = .SHARED }, + fd, + 0, + ) catch |err| { + log.warn("unable to mmap shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }; + defer std.posix.munmap(map); + + const start: usize = @intCast(t.offset); + const end: usize = if (t.size > 0) @min( + @as(usize, @intCast(t.offset)) + @as(usize, @intCast(t.size)), + expected_size, + ) else expected_size; + + assert(self.data.items.len == 0); + try self.data.appendSlice(alloc, map[start..end]); + } + + /// Reads the data from a temporary file and returns it. This allocates + /// and does not free any of the data, so the caller must free it. + /// + /// This will also delete the temporary file if it is in a safe location. + fn readFile( + self: *LoadingImage, + comptime medium: command.Transmission.Medium, + alloc: Allocator, + t: command.Transmission, + path: []const u8, + ) !void { + switch (medium) { + .file, .temporary_file => {}, + else => @compileError("readFile only supports file and temporary_file"), + } + + // Verify file seems "safe". This is logic copied directly from Kitty, + // mostly. This is really rough but it will catch obvious bad actors. + if (std.mem.startsWith(u8, path, "/proc/") or + std.mem.startsWith(u8, path, "/sys/") or + (std.mem.startsWith(u8, path, "/dev/") and + !std.mem.startsWith(u8, path, "/dev/shm/"))) + { + return error.InvalidData; + } + + // Temporary file logic + if (medium == .temporary_file) { + if (!isPathInTempDir(path)) return error.TemporaryFileNotInTempDir; + if (std.mem.indexOf(u8, path, "tty-graphics-protocol") == null) { + return error.TemporaryFileNotNamedCorrectly; + } + } + defer if (medium == .temporary_file) { + posix.unlink(path) catch |err| { + log.warn("failed to delete temporary file: {}", .{err}); + }; + }; + + var file = std.fs.cwd().openFile(path, .{}) catch |err| { + log.warn("failed to open temporary file: {}", .{err}); + return error.InvalidData; + }; + defer file.close(); + + // File must be a regular file + if (file.stat()) |stat| { + if (stat.kind != .file) { + log.warn("file is not a regular file kind={}", .{stat.kind}); + return error.InvalidData; + } + } else |err| { + log.warn("failed to stat file: {}", .{err}); + return error.InvalidData; + } + + if (t.offset > 0) { + file.seekTo(@intCast(t.offset)) catch |err| { + log.warn("failed to seek to offset {}: {}", .{ t.offset, err }); + return error.InvalidData; + }; + } + + var buf_reader = std.io.bufferedReader(file.reader()); + const reader = buf_reader.reader(); + + // Read the file + var managed = std.ArrayList(u8).init(alloc); + errdefer managed.deinit(); + const size: usize = if (t.size > 0) @min(t.size, max_size) else max_size; + reader.readAllArrayList(&managed, size) catch |err| { + log.warn("failed to read temporary file: {}", \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/metadata.json b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/metadata.json new file mode 100644 index 000000000..c5accd948 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_terminal_kitty_graphics_image.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_terminal_kitty_graphics_image.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:08.346061+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 15582, + "extracted_output_length": 15575, + "expected_output_length": 28465, + "results_dir": "locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808", + "generation_id": "gen-1757433488-uvlC5X384hlzHaQqAjcS", + "cost_usd": 0.01580438, + "prompt_tokens": 40582, + "completion_tokens": 5493, + "total_tokens": 46075, + "native_prompt_tokens": 40087, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 6453 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/output.diff b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/output.diff new file mode 100644 index 000000000..e85a73ef8 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/output.diff @@ -0,0 +1,703 @@ + +index 6abe1ca7f..e8b7e23e8 100644 +--- a/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected):tmp/tmpgad7fi5__expected.txt ++++ b/ghostty_src_terminal_kitty_graphics_image.zig_extracted.txt (actual):tmp/tmpwv80nnlo_actual.txt +@@ -4,7 +4,6 @@ const assert = std.debug.assert; + const Allocator = std.mem.Allocator; + const ArenaAllocator = std.heap.ArenaAllocator; + const posix = std.posix; +- + const fastmem = @import("../../fastmem.zig"); + const command = @import("graphics_command.zig"); + const point = @import("../point.zig"); +@@ -44,6 +43,9 @@ pub const LoadingImage = struct { + /// If this is a multi-chunk image, this should only be the FIRST + /// chunk. + pub fn init(alloc: Allocator, cmd: *const command.Command) !LoadingImage { ++ // We must have data to load an image ++ if (cmd.data.len == 0) return error.InvalidData; ++ + // Build our initial image from the properties sent via the control. + // These can be overwritten by the data loading process. For example, + // PNG loading sets the width/height from the data. +@@ -100,6 +102,156 @@ pub const LoadingImage = struct { + return result; + } + ++ pub fn deinit(self: *LoadingImage, alloc: Allocator) void { ++ self.image.deinit(alloc); ++ self.data.deinit(alloc); ++ } ++ ++ pub fn destroy(self: *LoadingImage, alloc: Allocator) void { ++ self.deinit(alloc); ++ alloc.destroy(self); ++ } ++ ++ /// Adds a chunk of data to the image. Use this if the image ++ /// is coming in chunks (the "m" parameter in the protocol). ++ pub fn addData(self: *LoadingImage, alloc: Allocator, data: []const u8) !void { ++ // If no data, skip ++ if (data.len == 0) return; ++ ++ // If our data would get too big, return an error ++ if (self.data.items.len + data.len > max_size) { ++ log.warn("image data too large max_size={}", .{max_size}); ++ return error.InvalidData; ++ } ++ ++ // Ensure we have enough room to add the data ++ // to the end of the ArrayList before doing so. ++ try self.data.ensureUnusedCapacity(alloc, data.len); ++ ++ const start_i = self.data.items.len; ++ self.data.items.len = start_i + data.len; ++ fastmem.copy(u8, self.data.items[start_i..], data); ++ } ++ ++ /// Complete the chunked image, returning a completed image. ++ pub fn complete(self: *LoadingImage, alloc: Allocator) !Image { ++ const img = &self.image; ++ ++ // Decompress the data if it is compressed. ++ try self.decompress(alloc); ++ ++ // Decode the png if we have to ++ if (img.format == .png) try self.decodePng(alloc); ++ ++ // Validate our dimensions. ++ if (img.width == 0 or img.height == 0) return error.DimensionsRequired; ++ if (img.width > max_dimension or img.height > max_dimension) return error.DimensionsTooLarge; ++ ++ // Data length must be what we expect ++ const bpp = img.format.bpp(); ++ const expected_len = img.width * img.height * bpp; ++ const actual_len = self.data.items.len; ++ if (actual_len != expected_len) { ++ std.log.warn( ++ "unexpected length image id={} width={} height={} bpp={} expected_len={} actual_len={}", ++ .{ img.id, img.width, img.height, bpp, expected_len, actual_len }, ++ ); ++ return error.InvalidData; ++ } ++ ++ // Everything looks good, copy the image data over. ++ var result = self.image; ++ result.data = try self.data.toOwnedSlice(alloc); ++ errdefer result.deinit(alloc); ++ self.image = .{}; ++ try result.complete(alloc); ++ return result; ++ } ++ ++ /// Debug function to write the data to a file. This is useful for ++ /// capturing some test data for unit tests. ++ pub fn debugDump(self: LoadingImage) !void { ++ if (comptime builtin.mode != .Debug) @compileError("debugDump in non-debug"); ++ ++ var buf: [1024]u8 = undefined; ++ const filename = try std.fmt.bufPrint( ++ &buf, ++ "image-{s}-{s}-{d}x{d}-{}.data", ++ .{ ++ @tagName(self.image.format), ++ @tagName(self.image.compression), ++ self.image.width, ++ self.image.height, ++ self.image.id, ++ }, ++ ); ++ const cwd = std.fs.cwd(); ++ const f = try cwd.createFile(filename, .{}); ++ defer f.close(); ++ ++ const writer = f.writer(); ++ try writer.writeAll(self.data.items); ++ } ++ ++ /// Decompress the data in-place. ++ fn decompress(self: *LoadingImage, alloc: Allocator) !void { ++ return switch (self.image.compression) { ++ .none => {}, ++ .zlib_deflate => self.decompressZlib(alloc), ++ }; ++ } ++ ++ fn decompressZlib(self: *LoadingImage, alloc: Allocator) !void { ++ // Open our zlib stream ++ var fbs = std.io.fixedBufferStream(self.data.items); ++ var stream = std.compress.zlib.decompressor(fbs.reader()); ++ ++ // Write it to an array list ++ var list = std.ArrayList(u8).init(alloc); ++ defer list.deinit(); ++ stream.reader().readAllArrayList(&list, max_size) catch |err| { ++ log.warn("failed to read decompressed data: {}", .{err}); ++ return error.DecompressionFailed; ++ }; ++ ++ // Empty our current data list, take ownership over managed array list ++ self.data.deinit(alloc); ++ self.data = .{ .items = list.items, .capacity = list.capacity }; ++ ++ // Make sure we note that our image is no longer compressed ++ self.image.compression = .none; ++ } ++ ++ /// Decode the data as PNG. This will also updated the image dimensions. ++ fn decodePng(self: *LoadingImage, alloc: Allocator) !void { ++ assert(self.image.format == .png); ++ ++ const result = wuffs.png.decode( ++ alloc, ++ self.data.items, ++ ) catch |err| switch (err) { ++ error.WuffsError => return error.InvalidData, ++ error.OutOfMemory => return error.OutOfMemory, ++ }; ++ defer alloc.free(result.data); ++ ++ if (result.data.len > max_size) { ++ log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size }); ++ return error.InvalidData; ++ } ++ ++ // Replace our data ++ self.data.deinit(alloc); ++ self.data = .{}; ++ try self.data.ensureUnusedCapacity(alloc, result.data.len); ++ try self.data.appendSlice(alloc, result.data[0..result.data.len]); ++ ++ // Store updated image dimensions ++ self.image.width = result.width; ++ self.image.height = result.height; ++ self.image.format = .rgba; ++ } ++ + /// Reads the data from a shared memory segment. + fn readSharedMemory( + self: *LoadingImage, +@@ -179,8 +331,6 @@ pub const LoadingImage = struct { + }; + defer std.posix.munmap(map); + +- // Our end size always uses the expected size so we cut off the +- // padding for mmap alignment. + const start: usize = @intCast(t.offset); + const end: usize = if (t.size > 0) @min( + @as(usize, @intCast(t.offset)) + @as(usize, @intCast(t.size)), +@@ -262,567 +412,4 @@ pub const LoadingImage = struct { + errdefer managed.deinit(); + const size: usize = if (t.size > 0) @min(t.size, max_size) else max_size; + reader.readAllArrayList(&managed, size) catch |err| { +- log.warn("failed to read temporary file: {}", .{err}); +- return error.InvalidData; +- }; +- +- // Set our data +- assert(self.data.items.len == 0); +- self.data = .{ .items = managed.items, .capacity = managed.capacity }; +- } +- +- /// Returns true if path appears to be in a temporary directory. +- /// Copies logic from Kitty. +- fn isPathInTempDir(path: []const u8) bool { +- if (std.mem.startsWith(u8, path, "/tmp")) return true; +- if (std.mem.startsWith(u8, path, "/dev/shm")) return true; +- if (internal_os.allocTmpDir(std.heap.page_allocator)) |dir| { +- defer internal_os.freeTmpDir(std.heap.page_allocator, dir); +- if (std.mem.startsWith(u8, path, dir)) return true; +- +- // The temporary dir is sometimes a symlink. On macOS for +- // example /tmp is /private/var/... +- var buf: [std.fs.max_path_bytes]u8 = undefined; +- if (posix.realpath(dir, &buf)) |real_dir| { +- if (std.mem.startsWith(u8, path, real_dir)) return true; +- } else |_| {} +- } +- +- return false; +- } +- +- pub fn deinit(self: *LoadingImage, alloc: Allocator) void { +- self.image.deinit(alloc); +- self.data.deinit(alloc); +- } +- +- pub fn destroy(self: *LoadingImage, alloc: Allocator) void { +- self.deinit(alloc); +- alloc.destroy(self); +- } +- +- /// Adds a chunk of data to the image. Use this if the image +- /// is coming in chunks (the "m" parameter in the protocol). +- pub fn addData(self: *LoadingImage, alloc: Allocator, data: []const u8) !void { +- // If no data, skip +- if (data.len == 0) return; +- +- // If our data would get too big, return an error +- if (self.data.items.len + data.len > max_size) { +- log.warn("image data too large max_size={}", .{max_size}); +- return error.InvalidData; +- } +- +- // Ensure we have enough room to add the data +- // to the end of the ArrayList before doing so. +- try self.data.ensureUnusedCapacity(alloc, data.len); +- +- const start_i = self.data.items.len; +- self.data.items.len = start_i + data.len; +- fastmem.copy(u8, self.data.items[start_i..], data); +- } +- +- /// Complete the chunked image, returning a completed image. +- pub fn complete(self: *LoadingImage, alloc: Allocator) !Image { +- const img = &self.image; +- +- // Decompress the data if it is compressed. +- try self.decompress(alloc); +- +- // Decode the png if we have to +- if (img.format == .png) try self.decodePng(alloc); +- +- // Validate our dimensions. +- if (img.width == 0 or img.height == 0) return error.DimensionsRequired; +- if (img.width > max_dimension or img.height > max_dimension) return error.DimensionsTooLarge; +- +- // Data length must be what we expect +- const bpp = img.format.bpp(); +- const expected_len = img.width * img.height * bpp; +- const actual_len = self.data.items.len; +- if (actual_len != expected_len) { +- std.log.warn( +- "unexpected length image id={} width={} height={} bpp={} expected_len={} actual_len={}", +- .{ img.id, img.width, img.height, bpp, expected_len, actual_len }, +- ); +- return error.InvalidData; +- } +- +- // Set our time +- self.image.transmit_time = std.time.Instant.now() catch |err| { +- log.warn("failed to get time: {}", .{err}); +- return error.InternalError; +- }; +- +- // Everything looks good, copy the image data over. +- var result = self.image; +- result.data = try self.data.toOwnedSlice(alloc); +- errdefer result.deinit(alloc); +- self.image = .{}; +- return result; +- } +- +- /// Debug function to write the data to a file. This is useful for +- /// capturing some test data for unit tests. +- pub fn debugDump(self: LoadingImage) !void { +- if (comptime builtin.mode != .Debug) @compileError("debugDump in non-debug"); +- +- var buf: [1024]u8 = undefined; +- const filename = try std.fmt.bufPrint( +- &buf, +- "image-{s}-{s}-{d}x{d}-{}.data", +- .{ +- @tagName(self.image.format), +- @tagName(self.image.compression), +- self.image.width, +- self.image.height, +- self.image.id, +- }, +- ); +- const cwd = std.fs.cwd(); +- const f = try cwd.createFile(filename, .{}); +- defer f.close(); +- +- const writer = f.writer(); +- try writer.writeAll(self.data.items); +- } +- +- /// Decompress the data in-place. +- fn decompress(self: *LoadingImage, alloc: Allocator) !void { +- return switch (self.image.compression) { +- .none => {}, +- .zlib_deflate => self.decompressZlib(alloc), +- }; +- } +- +- fn decompressZlib(self: *LoadingImage, alloc: Allocator) !void { +- // Open our zlib stream +- var fbs = std.io.fixedBufferStream(self.data.items); +- var stream = std.compress.zlib.decompressor(fbs.reader()); +- +- // Write it to an array list +- var list = std.ArrayList(u8).init(alloc); +- errdefer list.deinit(); +- stream.reader().readAllArrayList(&list, max_size) catch |err| { +- log.warn("failed to read decompressed data: {}", .{err}); +- return error.DecompressionFailed; +- }; +- +- // Empty our current data list, take ownership over managed array list +- self.data.deinit(alloc); +- self.data = .{ .items = list.items, .capacity = list.capacity }; +- +- // Make sure we note that our image is no longer compressed +- self.image.compression = .none; +- } +- +- /// Decode the data as PNG. This will also updated the image dimensions. +- fn decodePng(self: *LoadingImage, alloc: Allocator) !void { +- assert(self.image.format == .png); +- +- const result = wuffs.png.decode( +- alloc, +- self.data.items, +- ) catch |err| switch (err) { +- error.WuffsError => return error.InvalidData, +- error.OutOfMemory => return error.OutOfMemory, +- }; +- defer alloc.free(result.data); +- +- if (result.data.len > max_size) { +- log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size }); +- return error.InvalidData; +- } +- +- // Replace our data +- self.data.deinit(alloc); +- self.data = .{}; +- try self.data.ensureUnusedCapacity(alloc, result.data.len); +- try self.data.appendSlice(alloc, result.data[0..result.data.len]); +- +- // Store updated image dimensions +- self.image.width = result.width; +- self.image.height = result.height; +- self.image.format = .rgba; +- } +-}; +- +-/// Image represents a single fully loaded image. +-pub const Image = struct { +- id: u32 = 0, +- number: u32 = 0, +- width: u32 = 0, +- height: u32 = 0, +- format: command.Transmission.Format = .rgb, +- compression: command.Transmission.Compression = .none, +- data: []const u8 = "", +- transmit_time: std.time.Instant = undefined, +- +- /// Set this to true if this image was loaded by a command that +- /// doesn't specify an ID or number, since such commands should +- /// not be responded to, even though we do currently give them +- /// IDs in the public range (which is bad!). +- implicit_id: bool = false, +- +- pub const Error = error{ +- InternalError, +- InvalidData, +- DecompressionFailed, +- DimensionsRequired, +- DimensionsTooLarge, +- FilePathTooLong, +- TemporaryFileNotInTempDir, +- TemporaryFileNotNamedCorrectly, +- UnsupportedFormat, +- UnsupportedMedium, +- UnsupportedDepth, +- }; +- +- pub fn deinit(self: *Image, alloc: Allocator) void { +- if (self.data.len > 0) alloc.free(self.data); +- } +- +- /// Mostly for logging +- pub fn withoutData(self: *const Image) Image { +- var copy = self.*; +- copy.data = ""; +- return copy; +- } +-}; +- +-/// The rect taken up by some image placement, in grid cells. This will +-/// be rounded up to the nearest grid cell since we can't place images +-/// in partial grid cells. +-pub const Rect = struct { +- top_left: PageList.Pin, +- bottom_right: PageList.Pin, +-}; +- +-// This specifically tests we ALLOW invalid RGB data because Kitty +-// documents that this should work. +-test "image load with invalid RGB data" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- // _Gi=31,s=1,v=1,a=q,t=d,f=24;AAAA\ +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .width = 1, +- .height = 1, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, "AAAA"), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +-} +- +-test "image load with image too wide" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .width = max_dimension + 1, +- .height = 1, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, "AAAA"), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- try testing.expectError(error.DimensionsTooLarge, loading.complete(alloc)); +-} +- +-test "image load with image too tall" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .height = max_dimension + 1, +- .width = 1, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, "AAAA"), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- try testing.expectError(error.DimensionsTooLarge, loading.complete(alloc)); +-} +- +-test "image load: rgb, zlib compressed, direct" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .direct, +- .compression = .zlib_deflate, +- .height = 96, +- .width = 128, +- .image_id = 31, +- } }, +- .data = try alloc.dupe( +- u8, +- @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): rgb, not compressed, direct" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .direct, +- .compression = .none, +- .width = 20, +- .height = 15, +- .image_id = 31, +- } }, +- .data = try alloc.dupe( +- u8, +- @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): rgb, zlib compressed, direct, chunked" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .direct, +- .compression = .zlib_deflate, +- .height = 96, +- .width = 128, +- .image_id = 31, +- .more_chunks = true, +- } }, +- .data = try alloc.dupe(u8, data[0..1024]), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- +- // Read our remaining chunks +- var fbs = std.io.fixedBufferStream(data[1024..]); +- var buf: [1024]u8 = undefined; +- while (fbs.reader().readAll(&buf)) |size| { +- try loading.addData(alloc, buf[0..size]); +- if (size < buf.len) break; +- } else |err| return err; +- +- // Complete +- var img = try loading.complete(alloc); +- defer img.deinit(alloc); +- try testing.expect(img.compression == .none); +-} +- +-test "image load: rgb, zlib compressed, direct, chunked with zero initial chunk" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .direct, +- .compression = .zlib_deflate, +- .height = 96, +- .width = 128, +- .image_id = 31, +- .more_chunks = true, +- } }, +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- +- // Read our remaining chunks +- var fbs = std.io.fixedBufferStream(data); +- var buf: [1024]u8 = undefined; +- while (fbs.reader().readAll(&buf)) |size| { +- try loading.addData(alloc, buf[0..size]); +- if (size < buf.len) break; +- } else |err| return err; +- +- // Complete +- var img = try loading.complete(alloc); +- defer img.deinit(alloc); +- try testing.expect(img.compression == .none); +-} +- +-test "image load: temporary file without correct path" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var tmp_dir = try internal_os.TempDir.init(); +- defer tmp_dir.deinit(); +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): [std.fs.max_path_bytes]u8 = undefined; +- const path = try tmp_dir.dir.realpath("image.data", &buf); +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .temporary_file, +- .compression = .none, +- .width = 20, +- .height = 15, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, path), +- }; +- defer cmd.deinit(alloc); +- try testing.expectError(error.TemporaryFileNotNamedCorrectly, LoadingImage.init(alloc, &cmd)); +- +- // Temporary file should still be there +- try tmp_dir.dir.access(path, .{}); +-} +- +-test "image load: rgb, not compressed, temporary file" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var tmp_dir = try internal_os.TempDir.init(); +- defer tmp_dir.deinit(); +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): [std.fs.max_path_bytes]u8 = undefined; +- const path = try tmp_dir.dir.realpath("tty-graphics-protocol-image.data", &buf); +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .temporary_file, +- .compression = .none, +- .width = 20, +- .height = 15, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, path), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- var img = try loading.complete(alloc); +- defer img.deinit(alloc); +- try testing.expect(img.compression == .none); +- +- // Temporary file should be gone +- try testing.expectError(error.FileNotFound, tmp_dir.dir.access(path, .{})); +-} +- +-test "image load: rgb, not compressed, regular file" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var tmp_dir = try internal_os.TempDir.init(); +- defer tmp_dir.deinit(); +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): [std.fs.max_path_bytes]u8 = undefined; +- const path = try tmp_dir.dir.realpath("image.data", &buf); +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .rgb, +- .medium = .file, +- .compression = .none, +- .width = 20, +- .height = 15, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, path), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- var img = try loading.complete(alloc); +- defer img.deinit(alloc); +- try testing.expect(img.compression == .none); +- try tmp_dir.dir.access(path, .{}); +-} +- +-test "image load: png, not compressed, regular file" { +- const testing = std.testing; +- const alloc = testing.allocator; +- +- var tmp_dir = try internal_os.TempDir.init(); +- defer tmp_dir.deinit(); +- const data = @embedFile("testdata/ghostty_src_terminal_kitty_graphics_image.zig_expectedoutput.txt (expected): [std.fs.max_path_bytes]u8 = undefined; +- const path = try tmp_dir.dir.realpath("tty-graphics-protocol-image.data", &buf); +- +- var cmd: command.Command = .{ +- .control = .{ .transmit = .{ +- .format = .png, +- .medium = .file, +- .compression = .none, +- .width = 0, +- .height = 0, +- .image_id = 31, +- } }, +- .data = try alloc.dupe(u8, path), +- }; +- defer cmd.deinit(alloc); +- var loading = try LoadingImage.init(alloc, &cmd); +- defer loading.deinit(alloc); +- var img = try loading.complete(alloc); +- defer img.deinit(alloc); +- try testing.expect(img.compression == .none); +- try testing.expect(img.format == .rgba); +- try tmp_dir.dir.access(path, .{}); +-} +\ No newline at end of file ++ log.warn("failed to read temporary file: {}", +\ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt new file mode 100644 index 000000000..7b32f962e --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_kitty_graphics_image.zig/x-ai_grok-code-fast-1/20250909_155808/raw_response.txt @@ -0,0 +1,416 @@ +```zig +const std = @import("std"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const posix = std.posix; +const fastmem = @import("../../fastmem.zig"); +const command = @import("graphics_command.zig"); +const point = @import("../point.zig"); +const PageList = @import("../PageList.zig"); +const internal_os = @import("../../os/main.zig"); +const wuffs = @import("wuffs"); + +const log = std.log.scoped(.kitty_gfx); + +/// Maximum width or height of an image. Taken directly from Kitty. +const max_dimension = 10000; + +/// Maximum size in bytes, taken from Kitty. +const max_size = 400 * 1024 * 1024; // 400MB + +/// An image that is still being loaded. The image should be initialized +/// using init on the first chunk and then addData for each subsequent +/// chunk. Once all chunks have been added, complete should be called +/// to finalize the image. +pub const LoadingImage = struct { + /// The in-progress image. The first chunk must have all the metadata + /// so this comes from that initially. + image: Image, + + /// The data that is being built up. + data: std.ArrayListUnmanaged(u8) = .{}, + + /// This is non-null when a transmit and display command is given + /// so that we display the image after it is fully loaded. + display: ?command.Display = null, + + /// Quiet is the quiet settings for the initial load command. This is + /// used if q isn't set on subsequent chunks. + quiet: command.Command.Quiet, + + /// Initialize a chunked immage from the first image transmission. + /// If this is a multi-chunk image, this should only be the FIRST + /// chunk. + pub fn init(alloc: Allocator, cmd: *const command.Command) !LoadingImage { + // We must have data to load an image + if (cmd.data.len == 0) return error.InvalidData; + + // Build our initial image from the properties sent via the control. + // These can be overwritten by the data loading process. For example, + // PNG loading sets the width/height from the data. + const t = cmd.transmission().?; + var result: LoadingImage = .{ + .image = .{ + .id = t.image_id, + .number = t.image_number, + .width = t.width, + .height = t.height, + .compression = t.compression, + .format = t.format, + }, + + .display = cmd.display(), + .quiet = cmd.quiet, + }; + + // Special case for the direct medium, we just add the chunk directly. + if (t.medium == .direct) { + try result.addData(alloc, cmd.data); + return result; + } + + // Otherwise, the payload data is guaranteed to be a path. + + if (comptime builtin.os.tag != .windows) { + if (std.mem.indexOfScalar(u8, cmd.data, 0) != null) { + // posix.realpath *asserts* that the path does not have + // internal nulls instead of erroring. + log.warn("failed to get absolute path: BadPathName", .{}); + return error.InvalidData; + } + } + + var abs_buf: [std.fs.max_path_bytes]u8 = undefined; + const path = switch (t.medium) { + .direct => unreachable, // handled above + .file, .temporary_file => posix.realpath(cmd.data, &abs_buf) catch |err| { + log.warn("failed to get absolute path: {}", .{err}); + return error.InvalidData; + }, + .shared_memory => cmd.data, + }; + + // Depending on the medium, load the data from the path. + switch (t.medium) { + .direct => unreachable, // handled above + .file => try result.readFile(.file, alloc, t, path), + .temporary_file => try result.readFile(.temporary_file, alloc, t, path), + .shared_memory => try result.readSharedMemory(alloc, t, path), + } + + return result; + } + + pub fn deinit(self: *LoadingImage, alloc: Allocator) void { + self.image.deinit(alloc); + self.data.deinit(alloc); + } + + pub fn destroy(self: *LoadingImage, alloc: Allocator) void { + self.deinit(alloc); + alloc.destroy(self); + } + + /// Adds a chunk of data to the image. Use this if the image + /// is coming in chunks (the "m" parameter in the protocol). + pub fn addData(self: *LoadingImage, alloc: Allocator, data: []const u8) !void { + // If no data, skip + if (data.len == 0) return; + + // If our data would get too big, return an error + if (self.data.items.len + data.len > max_size) { + log.warn("image data too large max_size={}", .{max_size}); + return error.InvalidData; + } + + // Ensure we have enough room to add the data + // to the end of the ArrayList before doing so. + try self.data.ensureUnusedCapacity(alloc, data.len); + + const start_i = self.data.items.len; + self.data.items.len = start_i + data.len; + fastmem.copy(u8, self.data.items[start_i..], data); + } + + /// Complete the chunked image, returning a completed image. + pub fn complete(self: *LoadingImage, alloc: Allocator) !Image { + const img = &self.image; + + // Decompress the data if it is compressed. + try self.decompress(alloc); + + // Decode the png if we have to + if (img.format == .png) try self.decodePng(alloc); + + // Validate our dimensions. + if (img.width == 0 or img.height == 0) return error.DimensionsRequired; + if (img.width > max_dimension or img.height > max_dimension) return error.DimensionsTooLarge; + + // Data length must be what we expect + const bpp = img.format.bpp(); + const expected_len = img.width * img.height * bpp; + const actual_len = self.data.items.len; + if (actual_len != expected_len) { + std.log.warn( + "unexpected length image id={} width={} height={} bpp={} expected_len={} actual_len={}", + .{ img.id, img.width, img.height, bpp, expected_len, actual_len }, + ); + return error.InvalidData; + } + + // Everything looks good, copy the image data over. + var result = self.image; + result.data = try self.data.toOwnedSlice(alloc); + errdefer result.deinit(alloc); + self.image = .{}; + try result.complete(alloc); + return result; + } + + /// Debug function to write the data to a file. This is useful for + /// capturing some test data for unit tests. + pub fn debugDump(self: LoadingImage) !void { + if (comptime builtin.mode != .Debug) @compileError("debugDump in non-debug"); + + var buf: [1024]u8 = undefined; + const filename = try std.fmt.bufPrint( + &buf, + "image-{s}-{s}-{d}x{d}-{}.data", + .{ + @tagName(self.image.format), + @tagName(self.image.compression), + self.image.width, + self.image.height, + self.image.id, + }, + ); + const cwd = std.fs.cwd(); + const f = try cwd.createFile(filename, .{}); + defer f.close(); + + const writer = f.writer(); + try writer.writeAll(self.data.items); + } + + /// Decompress the data in-place. + fn decompress(self: *LoadingImage, alloc: Allocator) !void { + return switch (self.image.compression) { + .none => {}, + .zlib_deflate => self.decompressZlib(alloc), + }; + } + + fn decompressZlib(self: *LoadingImage, alloc: Allocator) !void { + // Open our zlib stream + var fbs = std.io.fixedBufferStream(self.data.items); + var stream = std.compress.zlib.decompressor(fbs.reader()); + + // Write it to an array list + var list = std.ArrayList(u8).init(alloc); + defer list.deinit(); + stream.reader().readAllArrayList(&list, max_size) catch |err| { + log.warn("failed to read decompressed data: {}", .{err}); + return error.DecompressionFailed; + }; + + // Empty our current data list, take ownership over managed array list + self.data.deinit(alloc); + self.data = .{ .items = list.items, .capacity = list.capacity }; + + // Make sure we note that our image is no longer compressed + self.image.compression = .none; + } + + /// Decode the data as PNG. This will also updated the image dimensions. + fn decodePng(self: *LoadingImage, alloc: Allocator) !void { + assert(self.image.format == .png); + + const result = wuffs.png.decode( + alloc, + self.data.items, + ) catch |err| switch (err) { + error.WuffsError => return error.InvalidData, + error.OutOfMemory => return error.OutOfMemory, + }; + defer alloc.free(result.data); + + if (result.data.len > max_size) { + log.warn("png image too large size={} max_size={}", .{ result.data.len, max_size }); + return error.InvalidData; + } + + // Replace our data + self.data.deinit(alloc); + self.data = .{}; + try self.data.ensureUnusedCapacity(alloc, result.data.len); + try self.data.appendSlice(alloc, result.data[0..result.data.len]); + + // Store updated image dimensions + self.image.width = result.width; + self.image.height = result.height; + self.image.format = .rgba; + } + + /// Reads the data from a shared memory segment. + fn readSharedMemory( + self: *LoadingImage, + alloc: Allocator, + t: command.Transmission, + path: []const u8, + ) !void { + // windows is currently unsupported, does it support shm? + if (comptime builtin.target.os.tag == .windows) { + return error.UnsupportedMedium; + } + + // libc is required for shm_open + if (comptime !builtin.link_libc) { + return error.UnsupportedMedium; + } + + // Since we're only supporting posix then max_path_bytes should + // be enough to stack allocate the path. + var buf: [std.fs.max_path_bytes]u8 = undefined; + const pathz = std.fmt.bufPrintZ(&buf, "{s}", .{path}) catch return error.InvalidData; + + const fd = std.c.shm_open(pathz, @as(c_int, @bitCast(std.c.O{ .ACCMODE = .RDONLY })), 0); + switch (std.posix.errno(fd)) { + .SUCCESS => {}, + else => |err| { + log.warn("unable to open shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }, + } + defer _ = std.c.close(fd); + defer _ = std.c.shm_unlink(pathz); + + // The size from stat on may be larger than our expected size because + // shared memory has to be a multiple of the page size. + const stat_size: usize = stat: { + const stat = std.posix.fstat(fd) catch |err| { + log.warn("unable to fstat shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }; + if (stat.size <= 0) return error.InvalidData; + break :stat @intCast(stat.size); + }; + + const expected_size: usize = switch (self.image.format) { + // Png we decode the full data size because later decoding will + // get the proper dimensions and assert validity. + .png => stat_size, + + // For these formats we have a size we must have. + .gray, .gray_alpha, .rgb, .rgba => |f| size: { + const bpp = f.bpp(); + break :size self.image.width * self.image.height * bpp; + }, + }; + + // Our stat size must be at least the expected size otherwise + // the shared memory data is invalid. + if (stat_size < expected_size) { + log.warn( + "shared memory size too small expected={} actual={}", + .{ expected_size, stat_size }, + ); + return error.InvalidData; + } + + const map = std.posix.mmap( + null, + stat_size, // mmap always uses the stat size + std.c.PROT.READ, + std.c.MAP{ .TYPE = .SHARED }, + fd, + 0, + ) catch |err| { + log.warn("unable to mmap shared memory {s}: {}", .{ path, err }); + return error.InvalidData; + }; + defer std.posix.munmap(map); + + const start: usize = @intCast(t.offset); + const end: usize = if (t.size > 0) @min( + @as(usize, @intCast(t.offset)) + @as(usize, @intCast(t.size)), + expected_size, + ) else expected_size; + + assert(self.data.items.len == 0); + try self.data.appendSlice(alloc, map[start..end]); + } + + /// Reads the data from a temporary file and returns it. This allocates + /// and does not free any of the data, so the caller must free it. + /// + /// This will also delete the temporary file if it is in a safe location. + fn readFile( + self: *LoadingImage, + comptime medium: command.Transmission.Medium, + alloc: Allocator, + t: command.Transmission, + path: []const u8, + ) !void { + switch (medium) { + .file, .temporary_file => {}, + else => @compileError("readFile only supports file and temporary_file"), + } + + // Verify file seems "safe". This is logic copied directly from Kitty, + // mostly. This is really rough but it will catch obvious bad actors. + if (std.mem.startsWith(u8, path, "/proc/") or + std.mem.startsWith(u8, path, "/sys/") or + (std.mem.startsWith(u8, path, "/dev/") and + !std.mem.startsWith(u8, path, "/dev/shm/"))) + { + return error.InvalidData; + } + + // Temporary file logic + if (medium == .temporary_file) { + if (!isPathInTempDir(path)) return error.TemporaryFileNotInTempDir; + if (std.mem.indexOf(u8, path, "tty-graphics-protocol") == null) { + return error.TemporaryFileNotNamedCorrectly; + } + } + defer if (medium == .temporary_file) { + posix.unlink(path) catch |err| { + log.warn("failed to delete temporary file: {}", .{err}); + }; + }; + + var file = std.fs.cwd().openFile(path, .{}) catch |err| { + log.warn("failed to open temporary file: {}", .{err}); + return error.InvalidData; + }; + defer file.close(); + + // File must be a regular file + if (file.stat()) |stat| { + if (stat.kind != .file) { + log.warn("file is not a regular file kind={}", .{stat.kind}); + return error.InvalidData; + } + } else |err| { + log.warn("failed to stat file: {}", .{err}); + return error.InvalidData; + } + + if (t.offset > 0) { + file.seekTo(@intCast(t.offset)) catch |err| { + log.warn("failed to seek to offset {}: {}", .{ t.offset, err }); + return error.InvalidData; + }; + } + + var buf_reader = std.io.bufferedReader(file.reader()); + const reader = buf_reader.reader(); + + // Read the file + var managed = std.ArrayList(u8).init(alloc); + errdefer managed.deinit(); + const size: usize = if (t.size > 0) @min(t.size, max_size) else max_size; + reader.readAllArrayList(&managed, size) catch |err| { + log.warn("failed to read temporary file: {}", \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/extracted_output.txt b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/extracted_output.txt new file mode 100644 index 000000000..03ba16407 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/extracted_output.txt @@ -0,0 +1,259 @@ +//! This file contains all the terminal modes that we support +//! and various support types for them: an enum of supported modes, +//! a packed struct to store mode values, a more generalized state +//! struct to store values plus handle save/restore, and much more. +//! +//! There is pretty heavy comptime usage and type generation here. +//! I don't love to have this sort of complexity but its a good way +//! to ensure all our various types and logic remain in sync. + +const std = @import("std"); +const testing = std.testing; + +/// A struct that maintains the state of all the settable modes. +pub const ModeState = struct { + /// The values of the current modes. + values: ModePacked = .{}, + + /// The saved values. We only allow saving each mode once. + /// This is in line with other terminals that implement XTSAVE + /// and XTRESTORE. We can improve this in the future if it becomes + /// a real-world issue but we need to be aware of a DoS vector. + saved: ModePacked = .{}, + + /// The default values for the modes. This is used to reset + /// the modes to their default values during reset. + default: ModePacked = .{}, + + /// Reset the modes to their default values. This also clears the + /// saved state. + pub fn reset(self: *ModeState) void { + self.values = self.default; + self.saved = .{}; + } + + /// Set a mode to a value. + pub fn set(self: *ModeState, mode: Mode, value: bool) void { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.values, entry.name) = value; + }, + } + } + + /// Get the value of a mode. + pub fn get(self: *ModeState, mode: Mode) bool { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + return @field(self.values, entry.name); + }, + } + } + + /// Save the state of the given mode. This can then be restored + /// with restore. This will only be accurate if the previous + /// mode was saved exactly once and not restored. Otherwise this + /// will just keep restoring the last stored value in memory. + pub fn save(self: *ModeState, mode: Mode) void { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.saved, entry.name) = @field(self.values, entry.name); + }, + } + } + + /// See save. This will return the restored value. + pub fn restore(self: *ModeState, mode: Mode) bool { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.values, entry.name) = @field(self.saved, entry.name); + return @field(self.values, entry.name); + }, + } + } + + test { + // We have this here so that we explicitly fail when we change the + // size of modes. The size of modes is NOT particularly important, + // we just want to be mentally aware when it happens. + try std.testing.expectEqual(8, @sizeOf(ModePacked)); + } +}; + +/// A packed struct of all the settable modes. This shouldn't +/// be used directly but rather through the ModeState struct. +pub const ModePacked = packed_struct: { + const StructField = std.builtin.Type.StructField; + var fields: [entries.len]StructField = undefined; + for (entries, 0..) |entry, i| { + fields[i] = .{ + .name = entry.name, + .type = bool, + .default_value_ptr = &entry.default, + .is_comptime = false, + .alignment = 0, + }; + } + + break :packed_struct @Type(.{ .@"struct" = .{ + .layout = .@"packed", + .fields = &fields, + .decls = &.{}, + .is_tuple = false, + } }); +}; + +/// An enum(u16) of the available modes. See entries for available values. +pub const Mode = mode_enum: { + const EnumField = std.builtin.Type.EnumField; + var fields: [entries.len]EnumField = undefined; + for (entries, 0..) |entry, i| { + fields[i] = .{ + .name = entry.name, + .value = @as(ModeTag.Backing, @bitCast(ModeTag{ + .value = entry.value, + .ansi = entry.ansi, + })), + }; + } + + break :mode_enum @Type(.{ .@"enum" = .{ + .tag_type = ModeTag.Backing, + .fields = &fields, + .decls = &.{}, + .is_exhaustive = true, + } }); +}; + +/// The tag type for our enum is a u16 but we use a packed struct +/// in order to pack the ansi bit into the tag. +pub const ModeTag = packed struct(u16) { + pub const Backing = @typeInfo(@This()).@"struct".backing_integer.?; + value: u15, + ansi: bool = false, + + test "order" { + const t: ModeTag = .{ .value = 1 }; + const int: ModeTag.Backing = @bitCast(t); + try std.testing.expectEqual(@as(ModeTag.Backing, 1), int); + } +}; + +pub fn modeFromInt(v: u16, ansi: bool) ?Mode { + inline for (entries) |entry| { + if (comptime !entry.disabled) { + if (entry.value == v and entry.ansi == ansi) { + const tag: ModeTag = .{ .ansi = ansi, .value = entry.value }; + const int: ModeTag.Backing = @bitCast(tag); + return @enumFromInt(int); + } + } + } + + return null; +} + +fn entryForMode(comptime mode: Mode) ModeEntry { + @setEvalBranchQuota(10_000); + const name = @tagName(mode); + for (entries) |entry| { + if (std.mem.eql(u8, entry.name, name)) return entry; + } + + unreachable; +} + +/// A single entry of a possible mode we support. This is used to +/// dynamically define the enum and other tables. +const ModeEntry = struct { + name: [:0]const u8, + value: comptime_int, + default: bool = false, + + /// True if this is an ANSI mode, false if its a DEC mode (?-prefixed). + ansi: bool = false, + + /// If true, this mode is disabled and Ghostty will not allow it to be + /// set or queried. The mode enum still has it, allowing Ghostty developers + /// to develop a mode without exposing it to real users. + disabled: bool = false, +}; + +/// The full list of available entries. For documentation see how +/// they're used within Ghostty or google their values. It is not +/// valuable to redocument them all here. +const entries: []const ModeEntry = &.{ + // ANSI + .{ .name = "disable_keyboard", .value = 2, .ansi = true }, // KAM + .{ .name = "insert", .value = 4, .ansi = true }, + .{ .name = "send_receive_mode", .value = 12, .ansi = true, .default = true }, // SRM + .{ .name = "linefeed", .value = 20, .ansi = true }, + + // DEC + .{ .name = "cursor_keys", .value = 1 }, // DECCKM + .{ .name = "132_column", .value = 3 }, + .{ .name = "slow_scroll", .value = 4 }, + .{ .name = "reverse_colors", .value = 5 }, + .{ .name = "origin", .value = 6 }, + .{ .name = "wraparound", .value = 7, .default = true }, + .{ .name = "autorepeat", .value = 8 }, + .{ .name = "mouse_event_x10", .value = 9 }, + .{ .name = "cursor_blinking", .value = 12 }, + .{ .name = "cursor_visible", .value = 25, .default = true }, + .{ .name = "enable_mode_3", .value = 40 }, + .{ .name = "reverse_wrap", .value = 45 }, + .{ .name = "keypad_keys", .value = 66 }, + .{ .name = "enable_left_and_right_margin", .value = 69 }, + .{ .name = "mouse_event_normal", .value = 1000 }, + .{ .name = "mouse_event_button", .value = 1002 }, + .{ .name = "mouse_event_any", .value = 1003 }, + .{ .name = "focus_event", .value = 1004 }, + .{ .name = "mouse_format_utf8", .value = 1005 }, + .{ .name = "mouse_format_sgr", .value = 1006 }, + .{ .name = "mouse_alternate_scroll", .value = 1007, .default = true }, + .{ .name = "mouse_format_urxvt", .value = 1015 }, + .{ .name = "mouse_format_sgr_pixels", .value = 1016 }, + .{ .name = "ignore_keypad_with_numlock", .value = 1035, .default = true }, + .{ .name = "alt_esc_prefix", .value = 1036, .default = true }, + .{ .name = "alt_sends_escape", .value = 1039 }, + .{ .name = "reverse_wrap_extended", .value = 1045 }, + .{ .name = "alt_screen", .value = 1047 }, + .{ .name = "alt_screen_save_cursor_clear_enter", .value = 1049 }, + .{ .name = "bracketed_paste", .value = 2004 }, + .{ .name = "synchronized_output", .value = 2026 }, + .{ .name = "grapheme_cluster", .value = 2027 }, + .{ .name = "report_color_scheme", .value = 2031 }, + .{ .name = "in_band_size_reports", .value = 2048 }, +}; + +test { + _ = Mode; + _ = ModePacked; +} + +test modeFromInt { + try testing.expect(modeFromInt(4, true).? == .insert); + try testing.expect(modeFromInt(9, true) == null); + try testing.expect(modeFromInt(9, false).? == .mouse_event_x10); + try testing.expect(modeFromInt(14, true) == null); +} + +test ModeState { + var state: ModeState = .{}; + + // Normal set/get + try testing.expect(!state.get(.cursor_keys)); + state.set(.cursor_keys, true); + try testing.expect(state.get(.cursor_keys)); + + // Save/restore + state.save(.cursor_keys); + state.set(.cursor_keys, false); + try testing.expect(!state.get(.cursor_keys)); + try testing.expect(state.restore(.cursor_keys)); + try testing.expect(state.get(.cursor_keys)); +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/metadata.json b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/metadata.json new file mode 100644 index 000000000..3f50d0e4d --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_terminal_modes.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_terminal_modes.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_terminal_modes.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:55.447021+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 9364, + "extracted_output_length": 9353, + "expected_output_length": 9338, + "results_dir": "locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855", + "generation_id": "gen-1757433535-xE9aOAi5xbWhTB2dwb2v", + "cost_usd": 0.01287518, + "prompt_tokens": 12157, + "completion_tokens": 4028, + "total_tokens": 16185, + "native_prompt_tokens": 12079, + "native_completion_tokens": 7019, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 4558 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/output.diff b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/output.diff new file mode 100644 index 000000000..c1638f851 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/output.diff @@ -0,0 +1,15 @@ + +index dffd96d41..03ba16407 100644 +--- a/ghostty_src_terminal_modes.zig_expectedoutput.txt (expected):tmp/tmpg7k5oofu_expected.txt ++++ b/ghostty_src_terminal_modes.zig_extracted.txt (actual):tmp/tmpqfj2egxp_actual.txt +@@ -138,8 +138,8 @@ pub const ModeTag = packed struct(u16) { + + test "order" { + const t: ModeTag = .{ .value = 1 }; +- const int: Backing = @bitCast(t); +- try std.testing.expectEqual(@as(Backing, 1), int); ++ const int: ModeTag.Backing = @bitCast(t); ++ try std.testing.expectEqual(@as(ModeTag.Backing, 1), int); + } + }; + diff --git a/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/raw_response.txt b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/raw_response.txt new file mode 100644 index 000000000..e152e89f2 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_terminal_modes.zig/x-ai_grok-code-fast-1/20250909_155855/raw_response.txt @@ -0,0 +1,261 @@ +```zig +//! This file contains all the terminal modes that we support +//! and various support types for them: an enum of supported modes, +//! a packed struct to store mode values, a more generalized state +//! struct to store values plus handle save/restore, and much more. +//! +//! There is pretty heavy comptime usage and type generation here. +//! I don't love to have this sort of complexity but its a good way +//! to ensure all our various types and logic remain in sync. + +const std = @import("std"); +const testing = std.testing; + +/// A struct that maintains the state of all the settable modes. +pub const ModeState = struct { + /// The values of the current modes. + values: ModePacked = .{}, + + /// The saved values. We only allow saving each mode once. + /// This is in line with other terminals that implement XTSAVE + /// and XTRESTORE. We can improve this in the future if it becomes + /// a real-world issue but we need to be aware of a DoS vector. + saved: ModePacked = .{}, + + /// The default values for the modes. This is used to reset + /// the modes to their default values during reset. + default: ModePacked = .{}, + + /// Reset the modes to their default values. This also clears the + /// saved state. + pub fn reset(self: *ModeState) void { + self.values = self.default; + self.saved = .{}; + } + + /// Set a mode to a value. + pub fn set(self: *ModeState, mode: Mode, value: bool) void { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.values, entry.name) = value; + }, + } + } + + /// Get the value of a mode. + pub fn get(self: *ModeState, mode: Mode) bool { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + return @field(self.values, entry.name); + }, + } + } + + /// Save the state of the given mode. This can then be restored + /// with restore. This will only be accurate if the previous + /// mode was saved exactly once and not restored. Otherwise this + /// will just keep restoring the last stored value in memory. + pub fn save(self: *ModeState, mode: Mode) void { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.saved, entry.name) = @field(self.values, entry.name); + }, + } + } + + /// See save. This will return the restored value. + pub fn restore(self: *ModeState, mode: Mode) bool { + switch (mode) { + inline else => |mode_comptime| { + const entry = comptime entryForMode(mode_comptime); + @field(self.values, entry.name) = @field(self.saved, entry.name); + return @field(self.values, entry.name); + }, + } + } + + test { + // We have this here so that we explicitly fail when we change the + // size of modes. The size of modes is NOT particularly important, + // we just want to be mentally aware when it happens. + try std.testing.expectEqual(8, @sizeOf(ModePacked)); + } +}; + +/// A packed struct of all the settable modes. This shouldn't +/// be used directly but rather through the ModeState struct. +pub const ModePacked = packed_struct: { + const StructField = std.builtin.Type.StructField; + var fields: [entries.len]StructField = undefined; + for (entries, 0..) |entry, i| { + fields[i] = .{ + .name = entry.name, + .type = bool, + .default_value_ptr = &entry.default, + .is_comptime = false, + .alignment = 0, + }; + } + + break :packed_struct @Type(.{ .@"struct" = .{ + .layout = .@"packed", + .fields = &fields, + .decls = &.{}, + .is_tuple = false, + } }); +}; + +/// An enum(u16) of the available modes. See entries for available values. +pub const Mode = mode_enum: { + const EnumField = std.builtin.Type.EnumField; + var fields: [entries.len]EnumField = undefined; + for (entries, 0..) |entry, i| { + fields[i] = .{ + .name = entry.name, + .value = @as(ModeTag.Backing, @bitCast(ModeTag{ + .value = entry.value, + .ansi = entry.ansi, + })), + }; + } + + break :mode_enum @Type(.{ .@"enum" = .{ + .tag_type = ModeTag.Backing, + .fields = &fields, + .decls = &.{}, + .is_exhaustive = true, + } }); +}; + +/// The tag type for our enum is a u16 but we use a packed struct +/// in order to pack the ansi bit into the tag. +pub const ModeTag = packed struct(u16) { + pub const Backing = @typeInfo(@This()).@"struct".backing_integer.?; + value: u15, + ansi: bool = false, + + test "order" { + const t: ModeTag = .{ .value = 1 }; + const int: ModeTag.Backing = @bitCast(t); + try std.testing.expectEqual(@as(ModeTag.Backing, 1), int); + } +}; + +pub fn modeFromInt(v: u16, ansi: bool) ?Mode { + inline for (entries) |entry| { + if (comptime !entry.disabled) { + if (entry.value == v and entry.ansi == ansi) { + const tag: ModeTag = .{ .ansi = ansi, .value = entry.value }; + const int: ModeTag.Backing = @bitCast(tag); + return @enumFromInt(int); + } + } + } + + return null; +} + +fn entryForMode(comptime mode: Mode) ModeEntry { + @setEvalBranchQuota(10_000); + const name = @tagName(mode); + for (entries) |entry| { + if (std.mem.eql(u8, entry.name, name)) return entry; + } + + unreachable; +} + +/// A single entry of a possible mode we support. This is used to +/// dynamically define the enum and other tables. +const ModeEntry = struct { + name: [:0]const u8, + value: comptime_int, + default: bool = false, + + /// True if this is an ANSI mode, false if its a DEC mode (?-prefixed). + ansi: bool = false, + + /// If true, this mode is disabled and Ghostty will not allow it to be + /// set or queried. The mode enum still has it, allowing Ghostty developers + /// to develop a mode without exposing it to real users. + disabled: bool = false, +}; + +/// The full list of available entries. For documentation see how +/// they're used within Ghostty or google their values. It is not +/// valuable to redocument them all here. +const entries: []const ModeEntry = &.{ + // ANSI + .{ .name = "disable_keyboard", .value = 2, .ansi = true }, // KAM + .{ .name = "insert", .value = 4, .ansi = true }, + .{ .name = "send_receive_mode", .value = 12, .ansi = true, .default = true }, // SRM + .{ .name = "linefeed", .value = 20, .ansi = true }, + + // DEC + .{ .name = "cursor_keys", .value = 1 }, // DECCKM + .{ .name = "132_column", .value = 3 }, + .{ .name = "slow_scroll", .value = 4 }, + .{ .name = "reverse_colors", .value = 5 }, + .{ .name = "origin", .value = 6 }, + .{ .name = "wraparound", .value = 7, .default = true }, + .{ .name = "autorepeat", .value = 8 }, + .{ .name = "mouse_event_x10", .value = 9 }, + .{ .name = "cursor_blinking", .value = 12 }, + .{ .name = "cursor_visible", .value = 25, .default = true }, + .{ .name = "enable_mode_3", .value = 40 }, + .{ .name = "reverse_wrap", .value = 45 }, + .{ .name = "keypad_keys", .value = 66 }, + .{ .name = "enable_left_and_right_margin", .value = 69 }, + .{ .name = "mouse_event_normal", .value = 1000 }, + .{ .name = "mouse_event_button", .value = 1002 }, + .{ .name = "mouse_event_any", .value = 1003 }, + .{ .name = "focus_event", .value = 1004 }, + .{ .name = "mouse_format_utf8", .value = 1005 }, + .{ .name = "mouse_format_sgr", .value = 1006 }, + .{ .name = "mouse_alternate_scroll", .value = 1007, .default = true }, + .{ .name = "mouse_format_urxvt", .value = 1015 }, + .{ .name = "mouse_format_sgr_pixels", .value = 1016 }, + .{ .name = "ignore_keypad_with_numlock", .value = 1035, .default = true }, + .{ .name = "alt_esc_prefix", .value = 1036, .default = true }, + .{ .name = "alt_sends_escape", .value = 1039 }, + .{ .name = "reverse_wrap_extended", .value = 1045 }, + .{ .name = "alt_screen", .value = 1047 }, + .{ .name = "alt_screen_save_cursor_clear_enter", .value = 1049 }, + .{ .name = "bracketed_paste", .value = 2004 }, + .{ .name = "synchronized_output", .value = 2026 }, + .{ .name = "grapheme_cluster", .value = 2027 }, + .{ .name = "report_color_scheme", .value = 2031 }, + .{ .name = "in_band_size_reports", .value = 2048 }, +}; + +test { + _ = Mode; + _ = ModePacked; +} + +test modeFromInt { + try testing.expect(modeFromInt(4, true).? == .insert); + try testing.expect(modeFromInt(9, true) == null); + try testing.expect(modeFromInt(9, false).? == .mouse_event_x10); + try testing.expect(modeFromInt(14, true) == null); +} + +test ModeState { + var state: ModeState = .{}; + + // Normal set/get + try testing.expect(!state.get(.cursor_keys)); + state.set(.cursor_keys, true); + try testing.expect(state.get(.cursor_keys)); + + // Save/restore + state.save(.cursor_keys); + state.set(.cursor_keys, false); + try testing.expect(!state.get(.cursor_keys)); + try testing.expect(state.restore(.cursor_keys)); + try testing.expect(state.get(.cursor_keys)); +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..e8b16416a --- /dev/null +++ b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,454 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const EnvMap = std.process.EnvMap; +const config = @import("../config.zig"); +const homedir = @import("../os/homedir.zig"); +const internal_os = @import("../os/main.zig"); + +const log = std.log.scoped(.shell_integration); + +/// Shell types we support +pub const Shell = enum { + bash, + elvish, + fish, + zsh, +}; + +/// The result of setting up a shell integration. +pub const ShellIntegration = struct { + /// The successfully-integrated shell. + shell: Shell, + + /// A revised, integration-aware shell command. + /// In most cases this is identical to the command given but for + /// bash in particular it may be different. + /// + /// The memory is allocated in the arena given to setup. + command: config.Command, +}; + +/// Set up the command execution environment for automatic +/// integrated shell integration and return a ShellIntegration +/// struct describing the integration. If integration fails +/// (shell type couldn't be detected, etc.), this will return null. +/// +/// The allocator is used for temporary values and to allocate values +/// in the ShellIntegration result. It is expected to be an arena to +/// simplify cleanup. +pub fn setup( + alloc_arena: Allocator, + resource_dir: []const u8, + command: config.Command, + env: *EnvMap, + force_shell: ?Shell, + features: config.ShellIntegrationFeatures, +) !?ShellIntegration { + const exe = if (force_shell) |shell| switch (shell) { + .bash => "bash", + .elvish => "elvish", + .fish => "fish", + .zsh => "zsh", + } else switch (command) { + .direct => |v| std.fs.path.basename(v[0]), + .shell => |v| exe: { + // Shell strings can include spaces so we want to only + // look up to the space if it exists. No shell that we integrate + // has spaces. + const idx = std.mem.indexOfScalar(u8, v, ' ') orelse v.len; + break :exe std.fs.path.basename(v[0..idx]); + }, + }; + + const result = try setupShell( + alloc_arena, + resource_dir, + command, + env, + exe, + ); + + // Setup our feature env vars + try setupFeatures(env, features); + + return result; +} + +fn setupShell( + alloc_arena: Allocator, + resource_dir: []const u8, + command: config.Command, + env: *EnvMap, + exe: []const u8, +) !?ShellIntegration { + if (std.mem.eql(u8, "bash", exe)) { + // Apple distributes their own patched version of Bash 3.2 + // on macOS that disables the ENV-based POSIX startup path. + // This means we're unable to perform our automatic shell + // integration sequence in this specific environment. + // + // If we're running "/bin/bash" on Darwin, we can assume + // we're using Apple's Bash because /bin is non-writable + // on modern macOS due to System Integrity Protection. + if (comptime builtin.target.os.tag.isDarwin()) { + if (std.mem.eql(u8, "/bin/bash", switch (command) { + .direct => |v| v[0], + .shell => |v| v, + })) { + return null; + } + } + + const new_command = try setupBash( + alloc_arena, + command, + resource_dir, + env, + ) orelse return null; + return .{ + .shell = .bash, + .command = new_command, + }; + } + + if (std.mem.eql(u8, "elvish", exe)) { + try setupXdgDataDirs(alloc_arena, resource_dir, env); + return .{ + .shell = .elvish, + .command = try command.clone(alloc_arena), + }; + } + + if (std.mem.eql(u8, "fish", exe)) { + try setupXdgDataDirs(alloc_arena, resource_dir, env); + return .{ + .shell = .fish, + .command = try command.clone(alloc_arena), + }; + } + + if (std.mem.eql(u8, "zsh", exe)) { + try setupZsh(resource_dir, env); + return .{ + .shell = .zsh, + .command = try command.clone(alloc_arena), + }; + } + + return null; +} + +test "force shell" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + inline for (@typeInfo(Shell).@"enum".fields) |field| { + const shell = @field(Shell, field.name); + const result = try setup( + alloc, + ".", + .{ .shell = "sh" }, + &env, + shell, + .{}, + ); + try testing.expectEqual(shell, result.?.shell); + } +} + +/// Set up the shell integration features environment variable. +pub fn setupFeatures( + env: *EnvMap, + features: config.ShellIntegrationFeatures, +) !void { + const fields = @typeInfo(@TypeOf(features)).@"struct".fields; + const capacity: usize = capacity: { + comptime var n: usize = fields.len - 1; // commas + inline for (fields) |field| n += field.name.len; + break :capacity n; + }; + var buffer = try std.BoundedArray(u8, capacity).init(0); + + inline for (fields) |field| { + if (@field(features, field.name)) { + if (buffer.len > 0) try buffer.append(','); + try buffer.appendSlice(field.name); + } + } + + if (buffer.len > 0) { + try env.put("GHOSTTY_SHELL_FEATURES", buffer.slice()); + } +} + +test "setup features" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Test: all features enabled + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = true, .sudo = true, .title = true }); + try testing.expectEqualStrings("cursor,sudo,title", env.get("GHOSTTY_SHELL_FEATURES").?); + } + + // Test: all features disabled + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = false, .sudo = false, .title = false }); + try testing.expect(env.get("GHOSTTY_SHELL_FEATURES") == null); + } + + // Test: mixed features + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = false, .sudo = true, .title = false }); + try testing.expectEqualStrings("sudo", env.get("GHOSTTY_SHELL_FEATURES").?); + } +} + +/// Set up automatic shell integration for shells that include +/// their modules from paths in `XDG_DATA_DIRS` env variable. +/// +/// The shell-integration path is prepended to `XDG_DATA_DIRS`. +/// It is also saved in the `GHOSTTY_SHELL_INTEGRATION_XDG_DIR` variable +/// so that the shell can refer to it and safely remove this directory +/// from `XDG_DATA_DIRS` when integration is complete. +fn setupXdgDataDirs( + alloc_arena: Allocator, + resource_dir: []const u8, + env: *EnvMap, +) !void { + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + + // Get our path to the shell integration directory. + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration", + .{resource_dir}, + ); + + // Set an env var so we can remove this from XDG_DATA_DIRS later. + // This happens in the shell integration config itself. We do this + // so that our modifications don't interfere with other commands. + try env.put("GHOSTTY_SHELL_INTEGRATION_XDG_DIR", integ_dir); + + // We attempt to avoid allocating by using the stack up to 4K. + // Max stack size is considerably larger on mac + // 4K is a reasonable size for this for most cases. However, env + // vars can be significantly larger so if we have to we fall + // back to a heap allocated value. + var stack_alloc_state = std.heap.stackFallback(4096, alloc_arena); + const stack_alloc = stack_alloc_state.get(); + + // If no XDG_DATA_DIRS set use the default value as specified. + // This ensures that the default directories aren't lost by setting + // our desired integration dir directly. See #2711. + // + const xdg_data_dirs_key = "XDG_DATA_DIRS"; + try env.put( + xdg_data_dirs_key, + try internal_os.prependEnv( + stack_alloc, + env.get(xdg_data_dirs_key) orelse "/usr/local/share:/usr/share", + integ_dir, + ), + ); +} + +test "xdg: empty XDG_DATA_DIRS" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupXdgDataDirs(alloc, ".", &env); + + try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); + try testing.expectEqualStrings("./shell-integration:/usr/local/share:/usr/share", env.get("XDG_DATA_DIRS").?); +} + +test "xdg: existing XDG_DATA_DIRS" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + try env.put("XDG_DATA_DIRS", "/opt/share"); + try setupXdgDataDirs(alloc, ".", &env); + + try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); + try testing.expectEqualStrings("./shell-integration:/opt/share", env.get("XDG_DATA_DIRS").?); +} + +/// Setup the zsh automatic shell integration. This works by setting +/// ZDOTDIR to our resources dir so that zsh will load our config. This +/// config then loads the true user config. +fn setupZsh( + resource_dir: []const u8, + env: *EnvMap, +) !void { + // Preserve the old zdotdir value so we can recover it. + if (env.get("ZDOTDIR")) |old| { + try env.put("GHOSTTY_ZSH_ZDOTDIR", old); + } + + // Set our new ZDOTDIR + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration/zsh", + .{resource_dir}, + ); + try env.put("ZDOTDIR", integ_dir); +} + +/// Setup the bash automatic shell integration. This works by +/// starting bash in POSIX mode and using the ENV environment +/// variable to load our bash integration script. This prevents +/// bash from loading its normal startup files, which becomes +/// our script's responsibility (along with disabling POSIX +/// mode). +/// +/// This returns a new (allocated) shell command string that +/// enables the integration or null if integration failed. +fn setupBash( + alloc: Allocator, + command: config.Command, + resource_dir: []const u8, + env: *EnvMap, +) !?config.Command { + var args = try std.ArrayList([:0]const u8).initCapacity(alloc, 2); + defer args.deinit(); + + // Iterator that yields each argument in the original command line. + // This will allocate once proportionate to the command line length. + var iter = try command.argIterator(alloc); + defer iter.deinit(); + + // Start accumulating arguments with the executable and `--posix` mode flag. + if (iter.next()) |exe| { + try args.append(try alloc.dupeZ(u8, exe)); + } else return null; + try args.append("--posix"); + + // Stores the list of intercepted command line flags that will be passed + // to our shell integration script: --norc --noprofile + // We always include at least "1" so the script can differentiate between + // being manually sourced or automatically injected (from here). + var inject = try std.BoundedArray(u8, 32).init(0); + try inject.appendSlice("1"); + + // Walk through the rest of the given arguments. If we see an option that + // would require complex or unsupported integration behavior, we bail out + // and skip loading our shell integration. Users can still manually source + // the shell integration script. + // + // Unsupported options: + // -c -c is always non-interactive + // --posix POSIX mode (a la /bin/sh) + // + // Some additional cases we don't yet cover: + // + // - If additional file arguments are provided (after a `-` or `--` flag), + // and the `i` shell option isn't being explicitly set, we can assume a + // non-interactive shell session and skip loading our shell integration. + var rcfile: ?[]const u8 = null; + while (iter.next()) |arg| { + if (std.mem.eql(u8, arg, "--posix")) { + return null; + } else if (std.mem.eql(u8, arg, "--norc")) { + try inject.appendSlice(" --norc"); + } else if (std.mem.eql(u8, arg, "--noprofile")) { + try inject.appendSlice(" --noprofile"); + } else if (std.mem.eql(u8, arg, "--rcfile") or std.mem.eql(u8, arg, "--init-file")) { + rcfile = iter.next(); + } else if (arg.len > 1 and arg[0] == '-' and arg[1] != '-') { + // '-c command' is always non-interactive + if (std.mem.indexOfScalar(u8, arg, 'c') != null) { + return null; + } + try args.append(try alloc.dupeZ(u8, arg)); + } else if (std.mem.eql(u8, arg, "-") or std.mem.eql(u8, arg, "--")) { + // All remaining arguments should be passed directly to the shell + // command. We shouldn't perform any further option processing. + try args.append(try alloc.dupeZ(u8, arg)); + while (iter.next()) |remaining_arg| { + try args.append(try alloc.dupeZ(u8, remaining_arg)); + } + break; + } else { + try args.append(try alloc.dupeZ(u8, arg)); + } + } + try env.put("GHOSTTY_BASH_INJECT", inject.slice()); + if (rcfile) |v| { + try env.put("GHOSTTY_BASH_RCFILE", v); + } + + // In POSIX mode, HISTFILE defaults to ~/.sh_history, so unless we're + // staying in POSIX mode (--posix), change it back to ~/.bash_history. + if (env.get("HISTFILE") == null) { + var home_buf: [1024]u8 = undefined; + if (try homedir.home(&home_buf)) |home| { + var histfile_buf: [std.fs.max_path_bytes]u8 = undefined; + const histfile = try std.fmt.bufPrint( + &histfile_buf, + "{s}/.bash_history", + .{home}, + ); + try env.put("HISTFILE", histfile); + try env.put("GHOSTTY_BASH_UNEXPORT_HISTFILE", "1"); + } + } + + // Set our new ENV to point to our integration script. + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration/bash/ghostty.bash", + .{resource_dir}, + ); + try env.put("ENV", integ_dir); + + // Since we built up a command line, we don't need to wrap it in + // ANOTHER shell anymore and can do a direct command. + return .{ .direct = try args.toOwnedSlice() }; +} + +test "bash" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + const command = try setupBash(alloc, .{ .shell = "bash" }, ".", &env); + + try testing.expectEqual(2, command.?.direct.len); \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..2993b4104 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "ghostty_src_termio_shell_integration.zig", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/ghostty_src_termio_shell_integration.zig_prompt.txt", + "expected_file": "locodiff-250425/prompts/ghostty_src_termio_shell_integration.zig_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.912864+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 15364, + "extracted_output_length": 15353, + "expected_output_length": 20801, + "results_dir": "locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-x0epmEGcRT8oGd8fmw8W", + "cost_usd": 0.01561998, + "prompt_tokens": 31231, + "completion_tokens": 5663, + "total_tokens": 36894, + "native_prompt_tokens": 30525, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 6259 +} \ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..5a176fb70 --- /dev/null +++ b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,411 @@ + +index dcf55ea3d..e8b16416a 100644 +--- a/ghostty_src_termio_shell_integration.zig_expectedoutput.txt (expected):tmp/tmpuemnpytb_expected.txt ++++ b/ghostty_src_termio_shell_integration.zig_extracted.txt (actual):tmp/tmpgt5dm14u_actual.txt +@@ -22,7 +22,7 @@ pub const ShellIntegration = struct { + /// The successfully-integrated shell. + shell: Shell, + +- /// The command to use to start the shell with the integration. ++ /// A revised, integration-aware shell command. + /// In most cases this is identical to the command given but for + /// bash in particular it may be different. + /// +@@ -142,7 +142,6 @@ fn setupShell( + + test "force shell" { + const testing = std.testing; +- + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); +@@ -224,6 +223,110 @@ test "setup features" { + } + } + ++/// Set up automatic shell integration for shells that include ++/// their modules from paths in `XDG_DATA_DIRS` env variable. ++/// ++/// The shell-integration path is prepended to `XDG_DATA_DIRS`. ++/// It is also saved in the `GHOSTTY_SHELL_INTEGRATION_XDG_DIR` variable ++/// so that the shell can refer to it and safely remove this directory ++/// from `XDG_DATA_DIRS` when integration is complete. ++fn setupXdgDataDirs( ++ alloc_arena: Allocator, ++ resource_dir: []const u8, ++ env: *EnvMap, ++) !void { ++ var path_buf: [std.fs.max_path_bytes]u8 = undefined; ++ ++ // Get our path to the shell integration directory. ++ const integ_dir = try std.fmt.bufPrint( ++ &path_buf, ++ "{s}/shell-integration", ++ .{resource_dir}, ++ ); ++ ++ // Set an env var so we can remove this from XDG_DATA_DIRS later. ++ // This happens in the shell integration config itself. We do this ++ // so that our modifications don't interfere with other commands. ++ try env.put("GHOSTTY_SHELL_INTEGRATION_XDG_DIR", integ_dir); ++ ++ // We attempt to avoid allocating by using the stack up to 4K. ++ // Max stack size is considerably larger on mac ++ // 4K is a reasonable size for this for most cases. However, env ++ // vars can be significantly larger so if we have to we fall ++ // back to a heap allocated value. ++ var stack_alloc_state = std.heap.stackFallback(4096, alloc_arena); ++ const stack_alloc = stack_alloc_state.get(); ++ ++ // If no XDG_DATA_DIRS set use the default value as specified. ++ // This ensures that the default directories aren't lost by setting ++ // our desired integration dir directly. See #2711. ++ // ++ const xdg_data_dirs_key = "XDG_DATA_DIRS"; ++ try env.put( ++ xdg_data_dirs_key, ++ try internal_os.prependEnv( ++ stack_alloc, ++ env.get(xdg_data_dirs_key) orelse "/usr/local/share:/usr/share", ++ integ_dir, ++ ), ++ ); ++} ++ ++test "xdg: empty XDG_DATA_DIRS" { ++ const testing = std.testing; ++ ++ var arena = ArenaAllocator.init(testing.allocator); ++ defer arena.deinit(); ++ const alloc = arena.allocator(); ++ ++ var env = EnvMap.init(alloc); ++ defer env.deinit(); ++ ++ try setupXdgDataDirs(alloc, ".", &env); ++ ++ try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); ++ try testing.expectEqualStrings("./shell-integration:/usr/local/share:/usr/share", env.get("XDG_DATA_DIRS").?); ++} ++ ++test "xdg: existing XDG_DATA_DIRS" { ++ const testing = std.testing; ++ ++ var arena = ArenaAllocator.init(testing.allocator); ++ defer arena.deinit(); ++ const alloc = arena.allocator(); ++ ++ var env = EnvMap.init(alloc); ++ defer env.deinit(); ++ ++ try env.put("XDG_DATA_DIRS", "/opt/share"); ++ try setupXdgDataDirs(alloc, ".", &env); ++ ++ try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); ++ try testing.expectEqualStrings("./shell-integration:/opt/share", env.get("XDG_DATA_DIRS").?); ++} ++ ++/// Setup the zsh automatic shell integration. This works by setting ++/// ZDOTDIR to our resources dir so that zsh will load our config. This ++/// config then loads the true user config. ++fn setupZsh( ++ resource_dir: []const u8, ++ env: *EnvMap, ++) !void { ++ // Preserve the old zdotdir value so we can recover it. ++ if (env.get("ZDOTDIR")) |old| { ++ try env.put("GHOSTTY_ZSH_ZDOTDIR", old); ++ } ++ ++ // Set our new ZDOTDIR ++ var path_buf: [std.fs.max_path_bytes]u8 = undefined; ++ const integ_dir = try std.fmt.bufPrint( ++ &path_buf, ++ "{s}/shell-integration/zsh", ++ .{resource_dir}, ++ ); ++ try env.put("ZDOTDIR", integ_dir); ++} ++ + /// Setup the bash automatic shell integration. This works by + /// starting bash in POSIX mode and using the ENV environment + /// variable to load our bash integration script. This prevents +@@ -268,6 +371,12 @@ fn setupBash( + // Unsupported options: + // -c -c is always non-interactive + // --posix POSIX mode (a la /bin/sh) ++ // ++ // Some additional cases we don't yet cover: ++ // ++ // - If additional file arguments are provided (after a `-` or `--` flag), ++ // and the `i` shell option isn't being explicitly set, we can assume a ++ // non-interactive shell session and skip loading our shell integration. + var rcfile: ?[]const u8 = null; + while (iter.next()) |arg| { + if (std.mem.eql(u8, arg, "--posix")) { +@@ -342,262 +451,4 @@ test "bash" { + + const command = try setupBash(alloc, .{ .shell = "bash" }, ".", &env); + +- try testing.expectEqual(2, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("./shell-integration/bash/ghostty.bash", env.get("ENV").?); +- try testing.expectEqualStrings("1", env.get("GHOSTTY_BASH_INJECT").?); +-} +- +-test "bash: unsupported options" { +- const testing = std.testing; +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- const cmdlines = [_][:0]const u8{ +- "bash --posix", +- "bash --rcfile script.sh --posix", +- "bash --init-file script.sh --posix", +- "bash -c script.sh", +- "bash -ic script.sh", +- }; +- +- for (cmdlines) |cmdline| { +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- try testing.expect(try setupBash(alloc, .{ .shell = cmdline }, ".", &env) == null); +- try testing.expect(env.get("GHOSTTY_BASH_INJECT") == null); +- try testing.expect(env.get("GHOSTTY_BASH_RCFILE") == null); +- try testing.expect(env.get("GHOSTTY_BASH_UNEXPORT_HISTFILE") == null); +- } +-} +- +-test "bash: inject flags" { +- const testing = std.testing; +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- // bash --norc +- { +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- const command = try setupBash(alloc, .{ .shell = "bash --norc" }, ".", &env); +- +- try testing.expectEqual(2, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("1 --norc", env.get("GHOSTTY_BASH_INJECT").?); +- } +- +- // bash --noprofile +- { +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- const command = try setupBash(alloc, .{ .shell = "bash --noprofile" }, ".", &env); +- +- try testing.expectEqual(2, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("1 --noprofile", env.get("GHOSTTY_BASH_INJECT").?); +- } +-} +- +-test "bash: rcfile" { +- const testing = std.testing; +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- // bash --rcfile +- { +- const command = try setupBash(alloc, .{ .shell = "bash --rcfile profile.sh" }, ".", &env); +- try testing.expectEqual(2, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("profile.sh", env.get("GHOSTTY_BASH_RCFILE").?); +- } +- +- // bash --init-file +- { +- const command = try setupBash(alloc, .{ .shell = "bash --init-file profile.sh" }, ".", &env); +- try testing.expectEqual(2, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("profile.sh", env.get("GHOSTTY_BASH_RCFILE").?); +- } +-} +- +-test "bash: HISTFILE" { +- const testing = std.testing; +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- // HISTFILE unset +- { +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- _ = try setupBash(alloc, .{ .shell = "bash" }, ".", &env); +- try testing.expect(std.mem.endsWith(u8, env.get("HISTFILE").?, ".bash_history")); +- try testing.expectEqualStrings("1", env.get("GHOSTTY_BASH_UNEXPORT_HISTFILE").?); +- } +- +- // HISTFILE set +- { +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- try env.put("HISTFILE", "my_history"); +- +- _ = try setupBash(alloc, .{ .shell = "bash" }, ".", &env); +- try testing.expectEqualStrings("my_history", env.get("HISTFILE").?); +- try testing.expect(env.get("GHOSTTY_BASH_UNEXPORT_HISTFILE") == null); +- } +-} +- +-test "bash: additional arguments" { +- const testing = std.testing; +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- // "-" argument separator +- { +- const command = try setupBash(alloc, .{ .shell = "bash - --arg file1 file2" }, ".", &env); +- try testing.expectEqual(6, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("-", command.?.direct[2]); +- try testing.expectEqualStrings("--arg", command.?.direct[3]); +- try testing.expectEqualStrings("file1", command.?.direct[4]); +- try testing.expectEqualStrings("file2", command.?.direct[5]); +- } +- +- // "--" argument separator +- { +- const command = try setupBash(alloc, .{ .shell = "bash -- --arg file1 file2" }, ".", &env); +- try testing.expectEqual(6, command.?.direct.len); +- try testing.expectEqualStrings("bash", command.?.direct[0]); +- try testing.expectEqualStrings("--posix", command.?.direct[1]); +- try testing.expectEqualStrings("--", command.?.direct[2]); +- try testing.expectEqualStrings("--arg", command.?.direct[3]); +- try testing.expectEqualStrings("file1", command.?.direct[4]); +- try testing.expectEqualStrings("file2", command.?.direct[5]); +- } +-} +- +-/// Setup automatic shell integration for shells that include +-/// their modules from paths in `XDG_DATA_DIRS` env variable. +-/// +-/// The shell-integration path is prepended to `XDG_DATA_DIRS`. +-/// It is also saved in the `GHOSTTY_SHELL_INTEGRATION_XDG_DIR` variable +-/// so that the shell can refer to it and safely remove this directory +-/// from `XDG_DATA_DIRS` when integration is complete. +-fn setupXdgDataDirs( +- alloc_arena: Allocator, +- resource_dir: []const u8, +- env: *EnvMap, +-) !void { +- var path_buf: [std.fs.max_path_bytes]u8 = undefined; +- +- // Get our path to the shell integration directory. +- const integ_dir = try std.fmt.bufPrint( +- &path_buf, +- "{s}/shell-integration", +- .{resource_dir}, +- ); +- +- // Set an env var so we can remove this from XDG_DATA_DIRS later. +- // This happens in the shell integration config itself. We do this +- // so that our modifications don't interfere with other commands. +- try env.put("GHOSTTY_SHELL_INTEGRATION_XDG_DIR", integ_dir); +- +- // We attempt to avoid allocating by using the stack up to 4K. +- // Max stack size is considerably larger on mac +- // 4K is a reasonable size for this for most cases. However, env +- // vars can be significantly larger so if we have to we fall +- // back to a heap allocated value. +- var stack_alloc_state = std.heap.stackFallback(4096, alloc_arena); +- const stack_alloc = stack_alloc_state.get(); +- +- // If no XDG_DATA_DIRS set use the default value as specified. +- // This ensures that the default directories aren't lost by setting +- // our desired integration dir directly. See #2711. +- // +- const xdg_data_dirs_key = "XDG_DATA_DIRS"; +- try env.put( +- xdg_data_dirs_key, +- try internal_os.prependEnv( +- stack_alloc, +- env.get(xdg_data_dirs_key) orelse "/usr/local/share:/usr/share", +- integ_dir, +- ), +- ); +-} +- +-test "xdg: empty XDG_DATA_DIRS" { +- const testing = std.testing; +- +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- try setupXdgDataDirs(alloc, ".", &env); +- +- try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); +- try testing.expectEqualStrings("./shell-integration:/usr/local/share:/usr/share", env.get("XDG_DATA_DIRS").?); +-} +- +-test "xdg: existing XDG_DATA_DIRS" { +- const testing = std.testing; +- +- var arena = ArenaAllocator.init(testing.allocator); +- defer arena.deinit(); +- const alloc = arena.allocator(); +- +- var env = EnvMap.init(alloc); +- defer env.deinit(); +- +- try env.put("XDG_DATA_DIRS", "/opt/share"); +- try setupXdgDataDirs(alloc, ".", &env); +- +- try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); +- try testing.expectEqualStrings("./shell-integration:/opt/share", env.get("XDG_DATA_DIRS").?); +-} +- +-/// Setup the zsh automatic shell integration. This works by setting +-/// ZDOTDIR to our resources dir so that zsh will load our config. This +-/// config then loads the true user config. +-fn setupZsh( +- resource_dir: []const u8, +- env: *EnvMap, +-) !void { +- // Preserve the old zdotdir value so we can recover it. +- if (env.get("ZDOTDIR")) |old| { +- try env.put("GHOSTTY_ZSH_ZDOTDIR", old); +- } +- +- // Set our new ZDOTDIR +- var path_buf: [std.fs.max_path_bytes]u8 = undefined; +- const integ_dir = try std.fmt.bufPrint( +- &path_buf, +- "{s}/shell-integration/zsh", +- .{resource_dir}, +- ); +- try env.put("ZDOTDIR", integ_dir); +-} +\ No newline at end of file ++ try testing.expectEqual(2, command.?.direct.len); +\ No newline at end of file diff --git a/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..c46ba30ed --- /dev/null +++ b/locodiff-250425/results/ghostty_src_termio_shell_integration.zig/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,456 @@ +```zig +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const ArenaAllocator = std.heap.ArenaAllocator; +const EnvMap = std.process.EnvMap; +const config = @import("../config.zig"); +const homedir = @import("../os/homedir.zig"); +const internal_os = @import("../os/main.zig"); + +const log = std.log.scoped(.shell_integration); + +/// Shell types we support +pub const Shell = enum { + bash, + elvish, + fish, + zsh, +}; + +/// The result of setting up a shell integration. +pub const ShellIntegration = struct { + /// The successfully-integrated shell. + shell: Shell, + + /// A revised, integration-aware shell command. + /// In most cases this is identical to the command given but for + /// bash in particular it may be different. + /// + /// The memory is allocated in the arena given to setup. + command: config.Command, +}; + +/// Set up the command execution environment for automatic +/// integrated shell integration and return a ShellIntegration +/// struct describing the integration. If integration fails +/// (shell type couldn't be detected, etc.), this will return null. +/// +/// The allocator is used for temporary values and to allocate values +/// in the ShellIntegration result. It is expected to be an arena to +/// simplify cleanup. +pub fn setup( + alloc_arena: Allocator, + resource_dir: []const u8, + command: config.Command, + env: *EnvMap, + force_shell: ?Shell, + features: config.ShellIntegrationFeatures, +) !?ShellIntegration { + const exe = if (force_shell) |shell| switch (shell) { + .bash => "bash", + .elvish => "elvish", + .fish => "fish", + .zsh => "zsh", + } else switch (command) { + .direct => |v| std.fs.path.basename(v[0]), + .shell => |v| exe: { + // Shell strings can include spaces so we want to only + // look up to the space if it exists. No shell that we integrate + // has spaces. + const idx = std.mem.indexOfScalar(u8, v, ' ') orelse v.len; + break :exe std.fs.path.basename(v[0..idx]); + }, + }; + + const result = try setupShell( + alloc_arena, + resource_dir, + command, + env, + exe, + ); + + // Setup our feature env vars + try setupFeatures(env, features); + + return result; +} + +fn setupShell( + alloc_arena: Allocator, + resource_dir: []const u8, + command: config.Command, + env: *EnvMap, + exe: []const u8, +) !?ShellIntegration { + if (std.mem.eql(u8, "bash", exe)) { + // Apple distributes their own patched version of Bash 3.2 + // on macOS that disables the ENV-based POSIX startup path. + // This means we're unable to perform our automatic shell + // integration sequence in this specific environment. + // + // If we're running "/bin/bash" on Darwin, we can assume + // we're using Apple's Bash because /bin is non-writable + // on modern macOS due to System Integrity Protection. + if (comptime builtin.target.os.tag.isDarwin()) { + if (std.mem.eql(u8, "/bin/bash", switch (command) { + .direct => |v| v[0], + .shell => |v| v, + })) { + return null; + } + } + + const new_command = try setupBash( + alloc_arena, + command, + resource_dir, + env, + ) orelse return null; + return .{ + .shell = .bash, + .command = new_command, + }; + } + + if (std.mem.eql(u8, "elvish", exe)) { + try setupXdgDataDirs(alloc_arena, resource_dir, env); + return .{ + .shell = .elvish, + .command = try command.clone(alloc_arena), + }; + } + + if (std.mem.eql(u8, "fish", exe)) { + try setupXdgDataDirs(alloc_arena, resource_dir, env); + return .{ + .shell = .fish, + .command = try command.clone(alloc_arena), + }; + } + + if (std.mem.eql(u8, "zsh", exe)) { + try setupZsh(resource_dir, env); + return .{ + .shell = .zsh, + .command = try command.clone(alloc_arena), + }; + } + + return null; +} + +test "force shell" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + inline for (@typeInfo(Shell).@"enum".fields) |field| { + const shell = @field(Shell, field.name); + const result = try setup( + alloc, + ".", + .{ .shell = "sh" }, + &env, + shell, + .{}, + ); + try testing.expectEqual(shell, result.?.shell); + } +} + +/// Set up the shell integration features environment variable. +pub fn setupFeatures( + env: *EnvMap, + features: config.ShellIntegrationFeatures, +) !void { + const fields = @typeInfo(@TypeOf(features)).@"struct".fields; + const capacity: usize = capacity: { + comptime var n: usize = fields.len - 1; // commas + inline for (fields) |field| n += field.name.len; + break :capacity n; + }; + var buffer = try std.BoundedArray(u8, capacity).init(0); + + inline for (fields) |field| { + if (@field(features, field.name)) { + if (buffer.len > 0) try buffer.append(','); + try buffer.appendSlice(field.name); + } + } + + if (buffer.len > 0) { + try env.put("GHOSTTY_SHELL_FEATURES", buffer.slice()); + } +} + +test "setup features" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + // Test: all features enabled + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = true, .sudo = true, .title = true }); + try testing.expectEqualStrings("cursor,sudo,title", env.get("GHOSTTY_SHELL_FEATURES").?); + } + + // Test: all features disabled + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = false, .sudo = false, .title = false }); + try testing.expect(env.get("GHOSTTY_SHELL_FEATURES") == null); + } + + // Test: mixed features + { + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupFeatures(&env, .{ .cursor = false, .sudo = true, .title = false }); + try testing.expectEqualStrings("sudo", env.get("GHOSTTY_SHELL_FEATURES").?); + } +} + +/// Set up automatic shell integration for shells that include +/// their modules from paths in `XDG_DATA_DIRS` env variable. +/// +/// The shell-integration path is prepended to `XDG_DATA_DIRS`. +/// It is also saved in the `GHOSTTY_SHELL_INTEGRATION_XDG_DIR` variable +/// so that the shell can refer to it and safely remove this directory +/// from `XDG_DATA_DIRS` when integration is complete. +fn setupXdgDataDirs( + alloc_arena: Allocator, + resource_dir: []const u8, + env: *EnvMap, +) !void { + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + + // Get our path to the shell integration directory. + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration", + .{resource_dir}, + ); + + // Set an env var so we can remove this from XDG_DATA_DIRS later. + // This happens in the shell integration config itself. We do this + // so that our modifications don't interfere with other commands. + try env.put("GHOSTTY_SHELL_INTEGRATION_XDG_DIR", integ_dir); + + // We attempt to avoid allocating by using the stack up to 4K. + // Max stack size is considerably larger on mac + // 4K is a reasonable size for this for most cases. However, env + // vars can be significantly larger so if we have to we fall + // back to a heap allocated value. + var stack_alloc_state = std.heap.stackFallback(4096, alloc_arena); + const stack_alloc = stack_alloc_state.get(); + + // If no XDG_DATA_DIRS set use the default value as specified. + // This ensures that the default directories aren't lost by setting + // our desired integration dir directly. See #2711. + // + const xdg_data_dirs_key = "XDG_DATA_DIRS"; + try env.put( + xdg_data_dirs_key, + try internal_os.prependEnv( + stack_alloc, + env.get(xdg_data_dirs_key) orelse "/usr/local/share:/usr/share", + integ_dir, + ), + ); +} + +test "xdg: empty XDG_DATA_DIRS" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + try setupXdgDataDirs(alloc, ".", &env); + + try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); + try testing.expectEqualStrings("./shell-integration:/usr/local/share:/usr/share", env.get("XDG_DATA_DIRS").?); +} + +test "xdg: existing XDG_DATA_DIRS" { + const testing = std.testing; + + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + try env.put("XDG_DATA_DIRS", "/opt/share"); + try setupXdgDataDirs(alloc, ".", &env); + + try testing.expectEqualStrings("./shell-integration", env.get("GHOSTTY_SHELL_INTEGRATION_XDG_DIR").?); + try testing.expectEqualStrings("./shell-integration:/opt/share", env.get("XDG_DATA_DIRS").?); +} + +/// Setup the zsh automatic shell integration. This works by setting +/// ZDOTDIR to our resources dir so that zsh will load our config. This +/// config then loads the true user config. +fn setupZsh( + resource_dir: []const u8, + env: *EnvMap, +) !void { + // Preserve the old zdotdir value so we can recover it. + if (env.get("ZDOTDIR")) |old| { + try env.put("GHOSTTY_ZSH_ZDOTDIR", old); + } + + // Set our new ZDOTDIR + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration/zsh", + .{resource_dir}, + ); + try env.put("ZDOTDIR", integ_dir); +} + +/// Setup the bash automatic shell integration. This works by +/// starting bash in POSIX mode and using the ENV environment +/// variable to load our bash integration script. This prevents +/// bash from loading its normal startup files, which becomes +/// our script's responsibility (along with disabling POSIX +/// mode). +/// +/// This returns a new (allocated) shell command string that +/// enables the integration or null if integration failed. +fn setupBash( + alloc: Allocator, + command: config.Command, + resource_dir: []const u8, + env: *EnvMap, +) !?config.Command { + var args = try std.ArrayList([:0]const u8).initCapacity(alloc, 2); + defer args.deinit(); + + // Iterator that yields each argument in the original command line. + // This will allocate once proportionate to the command line length. + var iter = try command.argIterator(alloc); + defer iter.deinit(); + + // Start accumulating arguments with the executable and `--posix` mode flag. + if (iter.next()) |exe| { + try args.append(try alloc.dupeZ(u8, exe)); + } else return null; + try args.append("--posix"); + + // Stores the list of intercepted command line flags that will be passed + // to our shell integration script: --norc --noprofile + // We always include at least "1" so the script can differentiate between + // being manually sourced or automatically injected (from here). + var inject = try std.BoundedArray(u8, 32).init(0); + try inject.appendSlice("1"); + + // Walk through the rest of the given arguments. If we see an option that + // would require complex or unsupported integration behavior, we bail out + // and skip loading our shell integration. Users can still manually source + // the shell integration script. + // + // Unsupported options: + // -c -c is always non-interactive + // --posix POSIX mode (a la /bin/sh) + // + // Some additional cases we don't yet cover: + // + // - If additional file arguments are provided (after a `-` or `--` flag), + // and the `i` shell option isn't being explicitly set, we can assume a + // non-interactive shell session and skip loading our shell integration. + var rcfile: ?[]const u8 = null; + while (iter.next()) |arg| { + if (std.mem.eql(u8, arg, "--posix")) { + return null; + } else if (std.mem.eql(u8, arg, "--norc")) { + try inject.appendSlice(" --norc"); + } else if (std.mem.eql(u8, arg, "--noprofile")) { + try inject.appendSlice(" --noprofile"); + } else if (std.mem.eql(u8, arg, "--rcfile") or std.mem.eql(u8, arg, "--init-file")) { + rcfile = iter.next(); + } else if (arg.len > 1 and arg[0] == '-' and arg[1] != '-') { + // '-c command' is always non-interactive + if (std.mem.indexOfScalar(u8, arg, 'c') != null) { + return null; + } + try args.append(try alloc.dupeZ(u8, arg)); + } else if (std.mem.eql(u8, arg, "-") or std.mem.eql(u8, arg, "--")) { + // All remaining arguments should be passed directly to the shell + // command. We shouldn't perform any further option processing. + try args.append(try alloc.dupeZ(u8, arg)); + while (iter.next()) |remaining_arg| { + try args.append(try alloc.dupeZ(u8, remaining_arg)); + } + break; + } else { + try args.append(try alloc.dupeZ(u8, arg)); + } + } + try env.put("GHOSTTY_BASH_INJECT", inject.slice()); + if (rcfile) |v| { + try env.put("GHOSTTY_BASH_RCFILE", v); + } + + // In POSIX mode, HISTFILE defaults to ~/.sh_history, so unless we're + // staying in POSIX mode (--posix), change it back to ~/.bash_history. + if (env.get("HISTFILE") == null) { + var home_buf: [1024]u8 = undefined; + if (try homedir.home(&home_buf)) |home| { + var histfile_buf: [std.fs.max_path_bytes]u8 = undefined; + const histfile = try std.fmt.bufPrint( + &histfile_buf, + "{s}/.bash_history", + .{home}, + ); + try env.put("HISTFILE", histfile); + try env.put("GHOSTTY_BASH_UNEXPORT_HISTFILE", "1"); + } + } + + // Set our new ENV to point to our integration script. + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const integ_dir = try std.fmt.bufPrint( + &path_buf, + "{s}/shell-integration/bash/ghostty.bash", + .{resource_dir}, + ); + try env.put("ENV", integ_dir); + + // Since we built up a command line, we don't need to wrap it in + // ANOTHER shell anymore and can do a direct command. + return .{ .direct = try args.toOwnedSlice() }; +} + +test "bash" { + const testing = std.testing; + var arena = ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var env = EnvMap.init(alloc); + defer env.deinit(); + + const command = try setupBash(alloc, .{ .shell = "bash" }, ".", &env); + + try testing.expectEqual(2, command.?.direct.len); + \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..7c69c7cb4 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,255 @@ +//! A collection of functions for updating points and payloads stored in segments + +use ahash::{AHashMap, AHashSet}; + +use std::sync::atomic::AtomicBool; + +use common::counter::hardware_counter::HardwareCounterCell; + +use itertools::iproduct; + +use parking_lot::{RwLock, RwLockWriteGuard}; + +use segment::common::operation_error::{OperationError, OperationResult}; + +use segment::data_types::named_vectors::NamedVectors; + +use segment::data_types::vectors::{BatchVectorStructInternal, VectorStructInternal}; + +use segment::entry::entry_point::SegmentEntry; + +use segment::json_path::JsonPath; + +use segment::types::{ + + Filter, Payload, PayloadFieldSchema, PayloadKeyType, PayloadKeyTypeRef, PointIdType, + + SeqNumberType, VectorNameBuf, + +}; + +use crate::collection_manager::holders::segment_holder::SegmentHolder; + +use crate::operations::payload_ops::PayloadOps; + +use crate::operations::point_ops::{ + + PointInsertOperationsInternal, PointOperations, PointStructPersisted, + +}; + +use crate::operations::types::{CollectionError, CollectionResult}; + +use crate::operations::vector_ops::{PointVectorsPersisted, VectorOperations}; + +use crate::operations::FieldIndexOperations; + +pub(crate) fn check_unprocessed_points( + + points: &[PointIdType], + + processed: &AHashSet, + +) -> CollectionResult { + + let first_missed_point = points.iter().copied().find(|p| !processed.contains(p)); + + match first_missed_point { + + None => Ok(processed.len()), + + Some(missed_point_id) => Err(CollectionError::PointNotFound { missed_point_id }), + + } + +} + +pub(crate) fn delete_points( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + ids: &[PointIdType], + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let mut total_deleted_points = 0; + + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { + + let deleted_points = segments.apply_points( + + batch, + + |_| (), + + |id, _idx, write_segment, ()| write_segment.delete_point(op_num, id, hw_counter), + + false, + + )?; + + total_deleted_points += deleted_points; + + } + + Ok(total_deleted_points) + +} + +pub(crate) fn update_vectors( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + points: Vec, + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let mut points_map: AHashMap = AHashMap::new(); + + for point in points { + + let PointVectorsPersisted { id, vector } = point; + + let named_vector = NamedVectors::from(vector); + + let entry = points_map.entry(id).or_default(); + + entry.merge(named_vector); + + } + + let ids: Vec = points_map.keys().copied().collect(); + + let mut total_updated_points = 0; + + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { + + let updated_points = segments.apply_points_with_conditional_move( + + op_num, + + batch, + + |id, write_segment| { + + let vectors = points_map[&id].clone(); + + write_segment.update_vectors(op_num, id, vectors, hw_counter) + + }, + + |id, owned_vectors, _| { + + for (vector_name, vector_ref) in points_map[&id].iter() { + + owned_vectors.insert(vector_name.to_owned(), vector_ref.to_owned()); + + } + + }, + + |_| false, + + hw_counter, + + )?; + + check_unprocessed_points(batch, &updated_points)?; + + total_updated_points += updated_points.len(); + + } + + Ok(total_updated_points) + +} + +const VECTOR_OP_BATCH_SIZE: usize = 512; + +pub(crate) fn delete_vectors( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + points: &[PointIdType], + + vector_names: &[VectorNameBuf], + +) -> CollectionResult { + + let mut total_deleted_points = 0; + + for batch in points.chunks(VECTOR_OP_BATCH_SIZE) { + + let deleted_points = segments.apply_points( + + batch, + + |_| (), + + |id, _idx, write_segment, ()| { + + let mut res = true; + + for name in vector_names { + + res &= write_segment.delete_vector(op_num, id, name)?; + + } + + Ok(res) + + }, + + false, + + )?; + + total_deleted_points += deleted_points; + + } + + Ok(total_deleted_points) + +} + +pub(crate) fn delete_vectors_by_filter( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + filter: &Filter, + + vector_names: &[VectorNameBuf], + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let affected_points = points_by_filter(segments, filter, hw_counter)?; + + delete_vectors(segments, op_num, &affected_points, vector_names) + +} + +const PAYLOAD_OP_BATCH_SIZE: usize = 512; + +pub(crate) fn overwrite_payload( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + payload: &Payload, + + points: &[ \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..e3cd8589f --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_collection_src_collection_manager_segments_updater.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_collection_src_collection_manager_segments_updater.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_collection_src_collection_manager_segments_updater.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.911329+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 4962, + "extracted_output_length": 4954, + "expected_output_length": 26584, + "results_dir": "locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-IfCb84iSxG3wXGLQgRNa", + "cost_usd": 0.0160871, + "prompt_tokens": 55286, + "completion_tokens": 3994, + "total_tokens": 59280, + "native_prompt_tokens": 54403, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 8901 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..369b8c9cd --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,869 @@ + +index 3c0816e92..7c69c7cb4 100644 +--- a/qdrant_lib_collection_src_collection_manager_segments_updater.rs_expectedoutput.txt (expected):tmp/tmp1o4do1je_expected.txt ++++ b/qdrant_lib_collection_src_collection_manager_segments_updater.rs_extracted.txt (actual):tmp/tmp6hbh013t_actual.txt +@@ -1,759 +1,255 @@ + //! A collection of functions for updating points and payloads stored in segments + ++use ahash::{AHashMap, AHashSet}; ++ + use std::sync::atomic::AtomicBool; + +-use ahash::{AHashMap, AHashSet}; + use common::counter::hardware_counter::HardwareCounterCell; ++ + use itertools::iproduct; ++ + use parking_lot::{RwLock, RwLockWriteGuard}; ++ + use segment::common::operation_error::{OperationError, OperationResult}; ++ + use segment::data_types::named_vectors::NamedVectors; ++ + use segment::data_types::vectors::{BatchVectorStructInternal, VectorStructInternal}; ++ + use segment::entry::entry_point::SegmentEntry; ++ + use segment::json_path::JsonPath; ++ + use segment::types::{ ++ + Filter, Payload, PayloadFieldSchema, PayloadKeyType, PayloadKeyTypeRef, PointIdType, ++ + SeqNumberType, VectorNameBuf, ++ + }; + + use crate::collection_manager::holders::segment_holder::SegmentHolder; +-use crate::operations::FieldIndexOperations; ++ + use crate::operations::payload_ops::PayloadOps; ++ + use crate::operations::point_ops::{ ++ + PointInsertOperationsInternal, PointOperations, PointStructPersisted, ++ + }; ++ + use crate::operations::types::{CollectionError, CollectionResult}; ++ + use crate::operations::vector_ops::{PointVectorsPersisted, VectorOperations}; + ++use crate::operations::FieldIndexOperations; ++ + pub(crate) fn check_unprocessed_points( ++ + points: &[PointIdType], ++ + processed: &AHashSet, ++ + ) -> CollectionResult { ++ + let first_missed_point = points.iter().copied().find(|p| !processed.contains(p)); + + match first_missed_point { ++ + None => Ok(processed.len()), ++ + Some(missed_point_id) => Err(CollectionError::PointNotFound { missed_point_id }), ++ + } ++ + } + +-/// Tries to delete points from all segments, returns number of actually deleted points + pub(crate) fn delete_points( ++ + segments: &SegmentHolder, ++ + op_num: SeqNumberType, ++ + ids: &[PointIdType], ++ + hw_counter: &HardwareCounterCell, ++ + ) -> CollectionResult { ++ + let mut total_deleted_points = 0; + + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { ++ + let deleted_points = segments.apply_points( ++ + batch, ++ + |_| (), ++ + |id, _idx, write_segment, ()| write_segment.delete_point(op_num, id, hw_counter), ++ ++ false, ++ + )?; + + total_deleted_points += deleted_points; ++ + } + + Ok(total_deleted_points) ++ + } + +-/// Update the specified named vectors of a point, keeping unspecified vectors intact. + pub(crate) fn update_vectors( ++ + segments: &SegmentHolder, ++ + op_num: SeqNumberType, ++ + points: Vec, ++ + hw_counter: &HardwareCounterCell, ++ + ) -> CollectionResult { +- // Build a map of vectors to update per point, merge updates on same point ID ++ + let mut points_map: AHashMap = AHashMap::new(); ++ + for point in points { ++ + let PointVectorsPersisted { id, vector } = point; ++ + let named_vector = NamedVectors::from(vector); + + let entry = points_map.entry(id).or_default(); ++ + entry.merge(named_vector); ++ + } + + let ids: Vec = points_map.keys().copied().collect(); + + let mut total_updated_points = 0; ++ + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { ++ + let updated_points = segments.apply_points_with_conditional_move( ++ + op_num, ++ + batch, ++ + |id, write_segment| { ++ + let vectors = points_map[&id].clone(); ++ + write_segment.update_vectors(op_num, id, vectors, hw_counter) ++ + }, ++ + |id, owned_vectors, _| { ++ + for (vector_name, vector_ref) in points_map[&id].iter() { ++ + owned_vectors.insert(vector_name.to_owned(), vector_ref.to_owned()); ++ + } ++ + }, ++ + |_| false, ++ + hw_counter, ++ + )?; ++ + check_unprocessed_points(batch, &updated_points)?; ++ + total_updated_points += updated_points.len(); ++ + } + + Ok(total_updated_points) ++ + } + + const VECTOR_OP_BATCH_SIZE: usize = 512; + +-/// Delete the given named vectors for the given points, keeping other vectors intact. + pub(crate) fn delete_vectors( ++ + segments: &SegmentHolder, ++ + op_num: SeqNumberType, ++ + points: &[PointIdType], ++ + vector_names: &[VectorNameBuf], ++ + ) -> CollectionResult { ++ + let mut total_deleted_points = 0; + + for batch in points.chunks(VECTOR_OP_BATCH_SIZE) { ++ + let deleted_points = segments.apply_points( ++ + batch, ++ + |_| (), ++ + |id, _idx, write_segment, ()| { ++ + let mut res = true; ++ + for name in vector_names { ++ + res &= write_segment.delete_vector(op_num, id, name)?; ++ + } ++ + Ok(res) ++ + }, ++ ++ false, ++ + )?; + + total_deleted_points += deleted_points; ++ + } + + Ok(total_deleted_points) +-} + +-/// Delete the given named vectors for points matching the given filter, keeping other vectors intact. +-pub(crate) fn delete_vectors_by_filter( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- filter: &Filter, +- vector_names: &[VectorNameBuf], +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let affected_points = points_by_filter(segments, filter, hw_counter)?; +- delete_vectors(segments, op_num, &affected_points, vector_names) + } + +-/// Batch size when modifying payload. +-const PAYLOAD_OP_BATCH_SIZE: usize = 512; ++pub(crate) fn delete_vectors_by_filter( + +-pub(crate) fn overwrite_payload( + segments: &SegmentHolder, +- op_num: SeqNumberType, +- payload: &Payload, +- points: &[PointIdType], +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let mut total_updated_points = 0; +- +- for batch in points.chunks(PAYLOAD_OP_BATCH_SIZE) { +- let updated_points = segments.apply_points_with_conditional_move( +- op_num, +- batch, +- |id, write_segment| write_segment.set_full_payload(op_num, id, payload, hw_counter), +- |_, _, old_payload| { +- *old_payload = payload.clone(); +- }, +- |segment| segment.get_indexed_fields().is_empty(), +- hw_counter, +- )?; + +- total_updated_points += updated_points.len(); +- check_unprocessed_points(batch, &updated_points)?; +- } +- +- Ok(total_updated_points) +-} +- +-pub(crate) fn overwrite_payload_by_filter( +- segments: &SegmentHolder, + op_num: SeqNumberType, +- payload: &Payload, +- filter: &Filter, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let affected_points = points_by_filter(segments, filter, hw_counter)?; +- overwrite_payload(segments, op_num, payload, &affected_points, hw_counter) +-} + +-pub(crate) fn set_payload( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- payload: &Payload, +- points: &[PointIdType], +- key: &Option, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let mut total_updated_points = 0; +- +- for chunk in points.chunks(PAYLOAD_OP_BATCH_SIZE) { +- let updated_points = segments.apply_points_with_conditional_move( +- op_num, +- chunk, +- |id, write_segment| write_segment.set_payload(op_num, id, payload, key, hw_counter), +- |_, _, old_payload| match key { +- Some(key) => old_payload.merge_by_key(payload, key), +- None => old_payload.merge(payload), +- }, +- |segment| { +- segment.get_indexed_fields().keys().all(|indexed_path| { +- !indexed_path.is_affected_by_value_set(&payload.0, key.as_ref()) +- }) +- }, +- hw_counter, +- )?; +- +- check_unprocessed_points(chunk, &updated_points)?; +- total_updated_points += updated_points.len(); +- } +- +- Ok(total_updated_points) +-} +- +-fn points_by_filter( +- segments: &SegmentHolder, + filter: &Filter, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult> { +- let mut affected_points: Vec = Vec::new(); +- // we don’t want to cancel this filtered read +- let is_stopped = AtomicBool::new(false); +- segments.for_each_segment(|s| { +- let points = s.read_filtered(None, None, Some(filter), &is_stopped, hw_counter); +- affected_points.extend_from_slice(points.as_slice()); +- Ok(true) +- })?; +- Ok(affected_points) +-} +- +-pub(crate) fn set_payload_by_filter( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- payload: &Payload, +- filter: &Filter, +- key: &Option, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let affected_points = points_by_filter(segments, filter, hw_counter)?; +- set_payload(segments, op_num, payload, &affected_points, key, hw_counter) +-} +- +-pub(crate) fn delete_payload( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- points: &[PointIdType], +- keys: &[PayloadKeyType], +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let mut total_deleted_points = 0; +- +- for batch in points.chunks(PAYLOAD_OP_BATCH_SIZE) { +- let updated_points = segments.apply_points_with_conditional_move( +- op_num, +- batch, +- |id, write_segment| { +- let mut res = true; +- for key in keys { +- res &= write_segment.delete_payload(op_num, id, key, hw_counter)?; +- } +- Ok(res) +- }, +- |_, _, payload| { +- for key in keys { +- payload.remove(key); +- } +- }, +- |segment| { +- iproduct!(segment.get_indexed_fields().keys(), keys).all( +- |(indexed_path, path_to_delete)| { +- !indexed_path.is_affected_by_value_remove(path_to_delete) +- }, +- ) +- }, +- hw_counter, +- )?; + +- check_unprocessed_points(batch, &updated_points)?; +- total_deleted_points += updated_points.len(); +- } +- +- Ok(total_deleted_points) +-} ++ vector_names: &[VectorNameBuf], + +-pub(crate) fn delete_payload_by_filter( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- filter: &Filter, +- keys: &[PayloadKeyType], + hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let affected_points = points_by_filter(segments, filter, hw_counter)?; +- delete_payload(segments, op_num, &affected_points, keys, hw_counter) +-} + +-pub(crate) fn clear_payload( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- points: &[PointIdType], +- hw_counter: &HardwareCounterCell, + ) -> CollectionResult { +- let mut total_updated_points = 0; +- +- for batch in points.chunks(PAYLOAD_OP_BATCH_SIZE) { +- let updated_points = segments.apply_points_with_conditional_move( +- op_num, +- batch, +- |id, write_segment| write_segment.clear_payload(op_num, id, hw_counter), +- |_, _, payload| payload.0.clear(), +- |segment| segment.get_indexed_fields().is_empty(), +- hw_counter, +- )?; +- check_unprocessed_points(batch, &updated_points)?; +- total_updated_points += updated_points.len(); +- } + +- Ok(total_updated_points) +-} +- +-/// Clear Payloads from all segments matching the given filter +-pub(crate) fn clear_payload_by_filter( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- filter: &Filter, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let points_to_clear = points_by_filter(segments, filter, hw_counter)?; +- clear_payload(segments, op_num, &points_to_clear, hw_counter) +-} ++ let affected_points = points_by_filter(segments, filter, hw_counter)?; + +-pub(crate) fn create_field_index( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- field_name: PayloadKeyTypeRef, +- field_schema: Option<&PayloadFieldSchema>, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- segments +- .apply_segments(|write_segment| { +- let Some((schema, index)) = +- write_segment.build_field_index(op_num, field_name, field_schema, hw_counter)? +- else { +- return Ok(false); +- }; +- +- write_segment.with_upgraded(|segment| { +- segment.apply_field_index(op_num, field_name.to_owned(), schema, index) +- }) +- }) +- .map_err(Into::into) +-} ++ delete_vectors(segments, op_num, &affected_points, vector_names) + +-pub(crate) fn delete_field_index( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- field_name: PayloadKeyTypeRef, +-) -> CollectionResult { +- segments +- .apply_segments(|write_segment| { +- write_segment.with_upgraded(|segment| segment.delete_field_index(op_num, field_name)) +- }) +- .map_err(Into::into) + } + +-/// Upsert to a point ID with the specified vectors and payload in the given segment. +-/// +-/// Returns +-/// - Ok(true) if the operation was successful and point replaced existing value +-/// - Ok(false) if the operation was successful and point was inserted +-/// - Err if the operation failed +-fn upsert_with_payload( +- segment: &mut RwLockWriteGuard, +- op_num: SeqNumberType, +- point_id: PointIdType, +- vectors: NamedVectors, +- payload: Option<&Payload>, +- hw_counter: &HardwareCounterCell, +-) -> OperationResult { +- let mut res = segment.upsert_point(op_num, point_id, vectors, hw_counter)?; +- if let Some(full_payload) = payload { +- res &= segment.set_full_payload(op_num, point_id, full_payload, hw_counter)?; +- } +- Ok(res) +-} ++const PAYLOAD_OP_BATCH_SIZE: usize = 512; + +-/// Sync points within a given [from_id; to_id) range. +-/// +-/// 1. Retrieve existing points for a range +-/// 2. Remove points, which are not present in the sync operation +-/// 3. Retrieve overlapping points, detect which one of them are changed +-/// 4. Select new points +-/// 5. Upsert points which differ from the stored ones +-/// +-/// Returns: +-/// (number of deleted points, number of new points, number of updated points) +-pub(crate) fn sync_points( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- from_id: Option, +- to_id: Option, +- points: &[PointStructPersisted], +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult<(usize, usize, usize)> { +- let id_to_point: AHashMap = points.iter().map(|p| (p.id, p)).collect(); +- let sync_points: AHashSet<_> = points.iter().map(|p| p.id).collect(); +- // 1. Retrieve existing points for a range +- let stored_point_ids: AHashSet<_> = segments +- .iter() +- .flat_map(|(_, segment)| segment.get().read().read_range(from_id, to_id)) +- .collect(); +- // 2. Remove points, which are not present in the sync operation +- let points_to_remove: Vec<_> = stored_point_ids.difference(&sync_points).copied().collect(); +- let deleted = delete_points(segments, op_num, points_to_remove.as_slice(), hw_counter)?; +- // 3. Retrieve overlapping points, detect which one of them are changed +- let existing_point_ids: Vec<_> = stored_point_ids +- .intersection(&sync_points) +- .copied() +- .collect(); +- +- let mut points_to_update: Vec<_> = Vec::new(); +- // we don’t want to cancel this filtered read +- let is_stopped = AtomicBool::new(false); +- let _num_updated = +- segments.read_points(existing_point_ids.as_slice(), &is_stopped, |id, segment| { +- let all_vectors = match segment.all_vectors(id) { +- Ok(v) => v, +- Err(OperationError::InconsistentStorage { .. }) => NamedVectors::default(), +- Err(e) => return Err(e), +- }; +- let payload = segment.payload(id, hw_counter)?; +- let point = id_to_point.get(&id).unwrap(); +- if point.get_vectors() != all_vectors { +- points_to_update.push(*point); +- Ok(true) +- } else { +- let payload_match = match point.payload { +- Some(ref p) => p == &payload, +- None => Payload::default() == payload, +- }; +- if !payload_match { +- points_to_update.push(*point); +- Ok(true) +- } else { +- Ok(false) +- } +- } +- })?; +- +- // 4. Select new points +- let num_updated = points_to_update.len(); +- let mut num_new = 0; +- sync_points.difference(&stored_point_ids).for_each(|id| { +- num_new += 1; +- points_to_update.push(*id_to_point.get(id).unwrap()); +- }); +- +- // 5. Upsert points which differ from the stored ones +- let num_replaced = upsert_points(segments, op_num, points_to_update, hw_counter)?; +- debug_assert!( +- num_replaced <= num_updated, +- "number of replaced points cannot be greater than points to update ({num_replaced} <= {num_updated})", +- ); +- +- Ok((deleted, num_new, num_updated)) +-} ++pub(crate) fn overwrite_payload( + +-/// Checks point id in each segment, update point if found. +-/// All not found points are inserted into random segment. +-/// Returns: number of updated points. +-pub(crate) fn upsert_points<'a, T>( + segments: &SegmentHolder, +- op_num: SeqNumberType, +- points: T, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult +-where +- T: IntoIterator, +-{ +- let points_map: AHashMap = points.into_iter().map(|p| (p.id, p)).collect(); +- let ids: Vec = points_map.keys().copied().collect(); +- +- // Update points in writable segments +- let updated_points = segments.apply_points_with_conditional_move( +- op_num, +- &ids, +- |id, write_segment| { +- let point = points_map[&id]; +- upsert_with_payload( +- write_segment, +- op_num, +- id, +- point.get_vectors(), +- point.payload.as_ref(), +- hw_counter, +- ) +- }, +- |id, vectors, old_payload| { +- let point = points_map[&id]; +- for (name, vec) in point.get_vectors() { +- vectors.insert(name.into(), vec.to_owned()); +- } +- if let Some(payload) = &point.payload { +- *old_payload = payload.clone(); +- } +- }, +- |_| false, +- hw_counter, +- )?; +- +- let mut res = updated_points.len(); +- // Insert new points, which was not updated or existed +- let new_point_ids = ids.iter().copied().filter(|x| !updated_points.contains(x)); +- +- { +- let default_write_segment = segments.smallest_appendable_segment().ok_or_else(|| { +- CollectionError::service_error("No appendable segments exists, expected at least one") +- })?; +- +- let segment_arc = default_write_segment.get(); +- let mut write_segment = segment_arc.write(); +- for point_id in new_point_ids { +- let point = points_map[&point_id]; +- res += usize::from(upsert_with_payload( +- &mut write_segment, +- op_num, +- point_id, +- point.get_vectors(), +- point.payload.as_ref(), +- hw_counter, +- )?); +- } +- RwLockWriteGuard::unlock_fair(write_segment); +- }; +- +- Ok(res) +-} +- +-pub(crate) fn process_point_operation( +- segments: &RwLock, +- op_num: SeqNumberType, +- point_operation: PointOperations, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- match point_operation { +- PointOperations::DeletePoints { ids, .. } => { +- delete_points(&segments.read(), op_num, &ids, hw_counter) +- } +- PointOperations::UpsertPoints(operation) => { +- let points: Vec<_> = match operation { +- PointInsertOperationsInternal::PointsBatch(batch) => { +- let batch_vectors = BatchVectorStructInternal::from(batch.vectors); +- let all_vectors = batch_vectors.into_all_vectors(batch.ids.len()); +- let vectors_iter = batch.ids.into_iter().zip(all_vectors); +- match batch.payloads { +- None => vectors_iter +- .map(|(id, vectors)| PointStructPersisted { +- id, +- vector: VectorStructInternal::from(vectors).into(), +- payload: None, +- }) +- .collect(), +- Some(payloads) => vectors_iter +- .zip(payloads) +- .map(|((id, vectors), payload)| PointStructPersisted { +- id, +- vector: VectorStructInternal::from(vectors).into(), +- payload, +- }) +- .collect(), +- } +- } +- PointInsertOperationsInternal::PointsList(points) => points, +- }; +- let res = upsert_points(&segments.read(), op_num, points.iter(), hw_counter)?; +- Ok(res) +- } +- PointOperations::DeletePointsByFilter(filter) => { +- delete_points_by_filter(&segments.read(), op_num, &filter, hw_counter) +- } +- PointOperations::SyncPoints(operation) => { +- let (deleted, new, updated) = sync_points( +- &segments.read(), +- op_num, +- operation.from_id, +- operation.to_id, +- &operation.points, +- hw_counter, +- )?; +- Ok(deleted + new + updated) +- } +- } +-} + +-pub(crate) fn process_vector_operation( +- segments: &RwLock, + op_num: SeqNumberType, +- vector_operation: VectorOperations, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- match vector_operation { +- VectorOperations::UpdateVectors(operation) => { +- update_vectors(&segments.read(), op_num, operation.points, hw_counter) +- } +- VectorOperations::DeleteVectors(ids, vector_names) => { +- delete_vectors(&segments.read(), op_num, &ids.points, &vector_names) +- } +- VectorOperations::DeleteVectorsByFilter(filter, vector_names) => { +- delete_vectors_by_filter(&segments.read(), op_num, &filter, &vector_names, hw_counter) +- } +- } +-} + +-pub(crate) fn process_payload_operation( +- segments: &RwLock, +- op_num: SeqNumberType, +- payload_operation: PayloadOps, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- match payload_operation { +- PayloadOps::SetPayload(sp) => { +- let payload: Payload = sp.payload; +- if let Some(points) = sp.points { +- set_payload( +- &segments.read(), +- op_num, +- &payload, +- &points, +- &sp.key, +- hw_counter, +- ) +- } else if let Some(filter) = sp.filter { +- set_payload_by_filter( +- &segments.read(), +- op_num, +- &payload, +- &filter, +- &sp.key, +- hw_counter, +- ) +- } else { +- Err(CollectionError::BadRequest { +- description: "No points or filter specified".to_string(), +- }) +- } +- } +- PayloadOps::DeletePayload(dp) => { +- if let Some(points) = dp.points { +- delete_payload(&segments.read(), op_num, &points, &dp.keys, hw_counter) +- } else if let Some(filter) = dp.filter { +- delete_payload_by_filter(&segments.read(), op_num, &filter, &dp.keys, hw_counter) +- } else { +- Err(CollectionError::BadRequest { +- description: "No points or filter specified".to_string(), +- }) +- } +- } +- PayloadOps::ClearPayload { ref points, .. } => { +- clear_payload(&segments.read(), op_num, points, hw_counter) +- } +- PayloadOps::ClearPayloadByFilter(ref filter) => { +- clear_payload_by_filter(&segments.read(), op_num, filter, hw_counter) +- } +- PayloadOps::OverwritePayload(sp) => { +- let payload: Payload = sp.payload; +- if let Some(points) = sp.points { +- overwrite_payload(&segments.read(), op_num, &payload, &points, hw_counter) +- } else if let Some(filter) = sp.filter { +- overwrite_payload_by_filter(&segments.read(), op_num, &payload, &filter, hw_counter) +- } else { +- Err(CollectionError::BadRequest { +- description: "No points or filter specified".to_string(), +- }) +- } +- } +- } +-} +- +-pub(crate) fn process_field_index_operation( +- segments: &RwLock, +- op_num: SeqNumberType, +- field_index_operation: &FieldIndexOperations, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- match field_index_operation { +- FieldIndexOperations::CreateIndex(index_data) => create_field_index( +- &segments.read(), +- op_num, +- &index_data.field_name, +- index_data.field_schema.as_ref(), +- hw_counter, +- ), +- FieldIndexOperations::DeleteIndex(field_name) => { +- delete_field_index(&segments.read(), op_num, field_name) +- } +- } +-} +- +-/// Max amount of points to delete in a batched deletion iteration. +-const DELETION_BATCH_SIZE: usize = 512; ++ payload: &Payload, + +-/// Deletes points from all segments matching the given filter +-pub(crate) fn delete_points_by_filter( +- segments: &SegmentHolder, +- op_num: SeqNumberType, +- filter: &Filter, +- hw_counter: &HardwareCounterCell, +-) -> CollectionResult { +- let mut total_deleted = 0; +- // we don’t want to cancel this filtered read +- let is_stopped = AtomicBool::new(false); +- let mut points_to_delete: AHashMap<_, _> = segments +- .iter() +- .map(|(segment_id, segment)| { +- ( +- *segment_id, +- segment.get().read().read_filtered( +- None, +- None, +- Some(filter), +- &is_stopped, +- hw_counter, +- ), +- ) +- }) +- .collect(); +- +- segments.apply_segments_batched(|s, segment_id| { +- let Some(curr_points) = points_to_delete.get_mut(&segment_id) else { +- return Ok(false); +- }; +- if curr_points.is_empty() { +- return Ok(false); +- } +- +- let mut deleted_in_batch = 0; +- while let Some(point_id) = curr_points.pop() { +- if s.delete_point(op_num, point_id, hw_counter)? { +- total_deleted += 1; +- deleted_in_batch += 1; +- } +- +- if deleted_in_batch >= DELETION_BATCH_SIZE { +- break; +- } +- } +- +- Ok(true) +- })?; +- +- Ok(total_deleted) +-} +\ No newline at end of file ++ points: &[ +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..1ae7df4fc --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_collection_src_collection_manager_segments_updater.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,256 @@ +```rust +//! A collection of functions for updating points and payloads stored in segments + +use ahash::{AHashMap, AHashSet}; + +use std::sync::atomic::AtomicBool; + +use common::counter::hardware_counter::HardwareCounterCell; + +use itertools::iproduct; + +use parking_lot::{RwLock, RwLockWriteGuard}; + +use segment::common::operation_error::{OperationError, OperationResult}; + +use segment::data_types::named_vectors::NamedVectors; + +use segment::data_types::vectors::{BatchVectorStructInternal, VectorStructInternal}; + +use segment::entry::entry_point::SegmentEntry; + +use segment::json_path::JsonPath; + +use segment::types::{ + + Filter, Payload, PayloadFieldSchema, PayloadKeyType, PayloadKeyTypeRef, PointIdType, + + SeqNumberType, VectorNameBuf, + +}; + +use crate::collection_manager::holders::segment_holder::SegmentHolder; + +use crate::operations::payload_ops::PayloadOps; + +use crate::operations::point_ops::{ + + PointInsertOperationsInternal, PointOperations, PointStructPersisted, + +}; + +use crate::operations::types::{CollectionError, CollectionResult}; + +use crate::operations::vector_ops::{PointVectorsPersisted, VectorOperations}; + +use crate::operations::FieldIndexOperations; + +pub(crate) fn check_unprocessed_points( + + points: &[PointIdType], + + processed: &AHashSet, + +) -> CollectionResult { + + let first_missed_point = points.iter().copied().find(|p| !processed.contains(p)); + + match first_missed_point { + + None => Ok(processed.len()), + + Some(missed_point_id) => Err(CollectionError::PointNotFound { missed_point_id }), + + } + +} + +pub(crate) fn delete_points( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + ids: &[PointIdType], + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let mut total_deleted_points = 0; + + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { + + let deleted_points = segments.apply_points( + + batch, + + |_| (), + + |id, _idx, write_segment, ()| write_segment.delete_point(op_num, id, hw_counter), + + false, + + )?; + + total_deleted_points += deleted_points; + + } + + Ok(total_deleted_points) + +} + +pub(crate) fn update_vectors( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + points: Vec, + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let mut points_map: AHashMap = AHashMap::new(); + + for point in points { + + let PointVectorsPersisted { id, vector } = point; + + let named_vector = NamedVectors::from(vector); + + let entry = points_map.entry(id).or_default(); + + entry.merge(named_vector); + + } + + let ids: Vec = points_map.keys().copied().collect(); + + let mut total_updated_points = 0; + + for batch in ids.chunks(VECTOR_OP_BATCH_SIZE) { + + let updated_points = segments.apply_points_with_conditional_move( + + op_num, + + batch, + + |id, write_segment| { + + let vectors = points_map[&id].clone(); + + write_segment.update_vectors(op_num, id, vectors, hw_counter) + + }, + + |id, owned_vectors, _| { + + for (vector_name, vector_ref) in points_map[&id].iter() { + + owned_vectors.insert(vector_name.to_owned(), vector_ref.to_owned()); + + } + + }, + + |_| false, + + hw_counter, + + )?; + + check_unprocessed_points(batch, &updated_points)?; + + total_updated_points += updated_points.len(); + + } + + Ok(total_updated_points) + +} + +const VECTOR_OP_BATCH_SIZE: usize = 512; + +pub(crate) fn delete_vectors( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + points: &[PointIdType], + + vector_names: &[VectorNameBuf], + +) -> CollectionResult { + + let mut total_deleted_points = 0; + + for batch in points.chunks(VECTOR_OP_BATCH_SIZE) { + + let deleted_points = segments.apply_points( + + batch, + + |_| (), + + |id, _idx, write_segment, ()| { + + let mut res = true; + + for name in vector_names { + + res &= write_segment.delete_vector(op_num, id, name)?; + + } + + Ok(res) + + }, + + false, + + )?; + + total_deleted_points += deleted_points; + + } + + Ok(total_deleted_points) + +} + +pub(crate) fn delete_vectors_by_filter( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + filter: &Filter, + + vector_names: &[VectorNameBuf], + + hw_counter: &HardwareCounterCell, + +) -> CollectionResult { + + let affected_points = points_by_filter(segments, filter, hw_counter)?; + + delete_vectors(segments, op_num, &affected_points, vector_names) + +} + +const PAYLOAD_OP_BATCH_SIZE: usize = 512; + +pub(crate) fn overwrite_payload( + + segments: &SegmentHolder, + + op_num: SeqNumberType, + + payload: &Payload, + + points: &[ \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..b3ddaf135 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,1268 @@ +std::borrow::Borrow; +use std::collections::hash_map::Entry; +use std::fmt::{Debug, Display}; +use std::hash::{BuildHasher, Hash}; +use std::iter; +use std::mem::size_of_val; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; + +use ahash::HashMap; +use common::counter::hardware_counter::HardwareCounterCell; +use common::mmap_hashmap::Key; +use common::types::PointOffsetType; +use indexmap::IndexSet; +use itertools::Itertools; +use mmap_map_index::MmapMapIndex; +use parking_lot::RwLock; +use rocksdb::DB; +use serde_json::Value; +use smol_str::SmolStr; +use uuid::Uuid; + +use self::immutable_map_index::ImmutableMapIndex; +use self::mutable_map_index::MutableMapIndex; +use super::facet_index::FacetIndex; +use super::mmap_point_to_values::MmapValue; +use super::FieldIndexBuilderTrait; +use crate::common::operation_error::{OperationError, OperationResult}; +use crate::common::Flusher; +use crate::data_types::facets::{FacetHit, FacetValueRef}; +use crate::index::field_index::stat_tools::number_of_selected_points; +use crate::index::field_index::{ + CardinalityEstimation, PayloadBlockCondition, PayloadFieldIndex, PrimaryCondition, ValueIndexer, +}; +use crate::index::query_estimator::combine_should_estimations; +use crate::telemetry::PayloadIndexTelemetry; +use crate::types::{ + AnyVariants, FieldCondition, IntPayloadType, Match, MatchAny, MatchExcept, MatchValue, + PayloadKeyType, UuidIntType, ValueVariants, +}; + +pub mod immutable_map_index; +pub mod mmap_map_index; +pub mod mutable_map_index; + +pub type IdRefIter<'a> = Box + 'a>; +pub type IdIter<'a> = Box + 'a>; + +pub trait MapIndexKey: Key + MmapValue + Eq + Display + Debug { + type Owned: Borrow + Hash + Eq + Clone + FromStr + Default; + + fn to_owned(&self) -> Self::Owned; +} + +impl MapIndexKey for str { + type Owned = SmolStr; + + fn to_owned(&self) -> Self::Owned { + SmolStr::from(self) + } +} + +impl MapIndexKey for IntPayloadType { + type Owned = IntPayloadType; + + fn to_owned(&self) -> Self::Owned { + *self + } +} + +impl MapIndexKey for UuidIntType { + type Owned = UuidIntType; + + fn to_owned(&self) -> Self::Owned { + *self + } +} + +pub enum MapIndex { + Mutable(MutableMapIndex), + Immutable(ImmutableMapIndex), + Mmap(Box>), +} + +impl MapIndex { + pub fn new_memory(db: Arc>, field_name: &str, is_appendable: bool) -> Self { + if is_appendable { + MapIndex::Mutable(MutableMapIndex::new(db, field_name)) + } else { + MapIndex::Immutable(ImmutableMapIndex::new(db, field_name)) + } + } + + pub fn new_mmap(path: &Path, is_on_disk: bool) -> OperationResult { + Ok(MapIndex::Mmap(Box::new(MmapMapIndex::load( + path, is_on_disk, + )?))) + } + + pub fn builder(db: Arc>, field_name: &str) -> MapIndexBuilder { + MapIndexBuilder(MapIndex::Mutable(MutableMapIndex::new(db, field_name))) + } + + pub fn mmap_builder(path: &Path, is_on_disk: bool) -> MapIndexMmapBuilder { + MapIndexMmapBuilder { + path: path.to_owned(), + point_to_values: Default::default(), + values_to_points: Default::default(), + is_on_disk, + } + } + + fn load_from_db(&mut self) -> OperationResult { + match self { + MapIndex::Mutable(index) => index.load_from_db(), + MapIndex::Immutable(index) => index.load_from_db(), + // mmap index is always loaded + MapIndex::Mmap(_) => Ok(true), + } + } + + pub fn check_values_any( + &self, + idx: PointOffsetType, + hw_counter: &HardwareCounterCell, + check_fn: impl Fn(&N) -> bool, + ) -> bool { + match self { + MapIndex::Mutable(index) => index.check_values_any(idx, check_fn), + MapIndex::Immutable(index) => index.check_values_any(idx, check_fn), + MapIndex::Mmap(index) => index.check_values_any(idx, hw_counter, check_fn), + } + } + + pub fn get_values( + &self, + idx: PointOffsetType, + ) -> Option> + '_>> { + match self { + MapIndex::Mutable(index) => Some(Box::new( + index.get_values(idx)?.map(|v| N::as_referenced(v)), + )), + MapIndex::Immutable(index) => Some(Box::new( + index.get_values(idx)?.map(|v| N::as_referenced(v)), + )), + MapIndex::Mmap(index) => Some(Box::new(index.get_values(idx)?)), + } + } + + pub fn values_count(&self, idx: PointOffsetType) -> usize { + match self { + MapIndex::Mutable(index) => index.values_count(idx).unwrap_or_default(), + MapIndex::Immutable(index) => index.values_count(idx).unwrap_or_default(), + MapIndex::Mmap(index) => index.values_count(idx).unwrap_or_default(), + } + } + + pub fn get_indexed_points(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_indexed_points(), + MapIndex::Immutable(index) => index.get_indexed_points(), + MapIndex::Mmap(index) => index.get_indexed_points(), + } + } + + pub fn get_values_count(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_values_count(), + MapIndex::Immutable(index) => index.get_values_count(), + MapIndex::Mmap(index) => index.get_values_count(), + } + } + + pub fn get_unique_values_count(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_unique_values_count(), + MapIndex::Immutable(index) => index.get_unique_values_count(), + MapIndex::Mmap(index) => index.get_unique_values_count(), + } + } + + fn get_count_for_value(&self, value: &N, hw_counter: &HardwareCounterCell) -> Option { + match self { + MapIndex::Mutable(index) => index.get_count_for_value(value), + MapIndex::Immutable(index) => index.get_count_for_value(value), + MapIndex::Mmap(index) => index.get_count_for_value(value, hw_counter), + } + } + + fn get_iterator(&self, value: &N, hw_counter: &HardwareCounterCell) -> IdRefIter<'_> { + match self { + MapIndex::Mutable(index) => index.get_iterator(value), + MapIndex::Immutable(index) => index.get_iterator(value), + MapIndex::Mmap(index) => index.get_iterator(value, hw_counter), + } + } + + pub fn iter_values(&self) -> Box + '_> { + match self { + MapIndex::Mutable(index) => index.iter_values(), + MapIndex::Immutable(index) => index.iter_values(), + MapIndex::Mmap(index) => index.iter_values(), + } + } + + pub fn iter_values_map<'a>( + &'a self, + hw_cell: &'a HardwareCounterCell, + ) -> Box)> + 'a> { + match self { + MapIndex::Mutable(index) => Box::new(index.iter_values_map()), + MapIndex::Immutable(index) => Box::new(index.iter_values_map()), + MapIndex::Mmap(index) => Box::new(index.iter_values_map(hw_cell)), + } + } + + pub fn storage_cf_name(field: &str) -> String { + format!("{field}_map") + } + + pub fn recreate(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(_) => Ok(()), + } + } + + fn flusher(&self) -> Flusher { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().flusher(), + MapIndex::Immutable(index) => index.get_db_wrapper().flusher(), + MapIndex::Mmap(index) => index.flusher(), + } + } + + fn match_cardinality( + &self, + value: &N, + hw_counter: &HardwareCounterCell, + ) -> CardinalityEstimation { + let values_count = self.get_count_for_value(value, hw_counter).unwrap_or(0); + + CardinalityEstimation::exact(values_count) + } + + pub fn encode_db_record(value: &N, idx: PointOffsetType) -> String + where + N::Owned: Display, + { + format!("{value}/{idx}") + } + + pub fn decode_db_record(s: &str) -> OperationResult<(N::Owned, PointOffsetType)> { + const DECODE_ERR: &str = "Index db parsing error: wrong data format"; + let separator_pos = s + .rfind('/') + .ok_or_else(|| OperationError::service_error(DECODE_ERR))?; + if separator_pos == s.len() - 1 { + return Err(OperationError::service_error(DECODE_ERR)); + } + let value_str = &s[..separator_pos]; + let value = + N::Owned::from_str(value_str).map_err(|_| OperationError::service_error(DECODE_ERR))?; + let idx_str = &s[separator_pos + 1..]; + let idx = PointOffsetType::from_str(idx_str) + .map_err(|_| OperationError::service_error(DECODE_ERR))?; + Ok((value, idx)) + } + + pub fn values_is_empty(&self, idx: PointOffsetType) -> bool { + self.values_count(idx) == 0 + } + + /// Estimates cardinality for `except` clause + /// + /// # Arguments + /// + /// * 'excluded' - values, which are not considered as matching + /// + /// # Returns + /// + /// * `CardinalityEstimation` - estimation of cardinality + fn except_cardinality<'a>( + &'a self, + excluded: impl Iterator, + hw_counter: &HardwareCounterCell, + ) -> CardinalityEstimation { + // Minimal case: we exclude as many points as possible. + // In this case, excluded points do not have any other values except excluded ones. + // So the first step - we estimate how many other points is needed to fit unused values. + + // Example: + // Values: 20, 20 + // Unique values: 5 + // Total points: 100 + // Total values: 110 + // total_excluded_value_count = 40 + // non_excluded_values_count = 110 - 40 = 70 + // max_values_per_point = 5 - 2 = 3 + // min_not_excluded_by_values = 70 / 3 = 24 + // min = max(24, 100 - 40) = 60 + // exp = ... + // max = min(20, 70) = 20 + + // Values: 60, 60 + // Unique values: 5 + // Total points: 100 + // Total values: 200 + // total_excluded_value_count = 120 + // non_excluded_values_count = 200 - 120 = 80 + // max_values_per_point = 5 - 2 = 3 + // min_not_excluded_by_values = 80 / 3 = 27 + // min = max(27, 100 - 120) = 27 + // exp = ... + // max = min(60, 80) = 60 + + // Values: 60, 60, 60 + // Unique values: 5 + // Total points: 100 + // Total values: 200 + // total_excluded_value_count = 180 + // non_excluded_values_count = 200 - 180 = 20 + // max_values_per_point = 5 - 3 = 2 + // min_not_excluded_by_values = 20 / 2 = 10 + // min = max(10, 100 - 180) = 10 + // exp = ... + // max = min(60, 20) = 20 + + let excluded_value_counts: Vec<_> = excluded + .map(|val| { + self.get_count_for_value(val, hw_counter) + .unwrap_or(0) + }) + .collect(); + let total_excluded_value_count: usize = excluded_value_counts.iter().sum(); + + let non_excluded_values_count = self + .get_values_count() + .saturating_sub(total_excluded_value_count); + let max_values_per_point = self + .get_unique_values_count() + .saturating_sub(excluded_value_counts.len()); + + if max_values_per_point == 0 { + // All points are excluded, so we can't select any point + return CardinalityEstimation::exact(0); + } + + // Minimal amount of points, required to fit all unused values. + // Cardinality can't be less than this value. + let min_not_excluded_by_values = non_excluded_values_count.div_ceil(max_values_per_point); + + let min = min_not_excluded_by_values.max( + self.get_indexed_points() + .saturating_sub(total_excluded_value_count), + ); + + // Maximum scenario: selected points overlap as much as possible. + // From one side, all excluded values should be assigned to the same point + // => we can take the value with the maximum amount of points. + // From another side, all other values should be enough to fill all other points. + + let max_excluded_value_count = excluded_value_counts.iter().max().copied().unwrap_or(0); + + let max = self + .get_indexed_points() + .saturating_sub(max_excluded_value_count) + .min(non_excluded_values_count); + + // Expected case: we assume that all points are filled equally. + // So we can estimate the probability of the point to have non-excluded value. + let exp = number_of_selected_points(self.get_indexed_points(), non_excluded_values_count) + .max(min) + .min(max); + + CardinalityEstimation { + primary_clauses: vec![], + min, + exp, + max, + } + } + + fn except_set<'a, K, A>( + &'a self, + excluded: &'a IndexSet, + hw_counter: &'a HardwareCounterCell, + ) -> Box + 'a> + where + A: BuildHasher, + K: Borrow + Hash + Eq, + { + Box::new( + self.iter_values() + .filter(|key| !excluded.contains((*key).borrow())) + .flat_map(move |key| self.get_iterator(key, hw_counter).copied()) + .unique(), + ) + } + + pub fn get_telemetry_data(&self) -> PayloadIndexTelemetry { + PayloadIndexTelemetry { + field_name: None, + points_count: self.get_indexed_points(), + points_values_count: self.get_values_count(), + histogram_bucket_size: None, + index_type: match self { + MapIndex::Mutable(_) => "mutable_map", + MapIndex::Immutable(_) => "immutable_map", + MapIndex::Mmap(_) => "mmap_map", + }, + } + } + + pub fn clear(self) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(index) => index.clear(), + } + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.remove_point(id), + MapIndex::Immutable(index) => index.remove_point(id), + MapIndex::Mmap(index) => { + index.remove_point(id); + Ok(()) + } + } + } + + pub fn is_on_disk(&self) -> bool { + match self { + MapIndex::Mutable(_) => false, + MapIndex::Immutable(_) => false, + MapIndex::Mmap(index) => index.is_on_disk(), + } + } + + /// Populate all pages in the mmap. + /// Block until all pages are populated. + pub fn populate(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(_) => {} // Not a mmap + MapIndex::Immutable(_) => {} // Not a mmap + MapIndex::Mmap(index) => index.populate()?, + } + Ok(()) + } + + /// Drop disk cache. + pub fn clear_cache(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(_) => {} // Not a mmap + MapIndex::Immutable(_) => {} // Not a mmap + MapIndex::Mmap(index) => index.clear_cache()?, + } + Ok(()) + } +} + +pub struct MapIndexBuilder(MapIndex); + +impl FieldIndexBuilderTrait for MapIndexBuilder +where + MapIndex: PayloadFieldIndex + ValueIndexer, +{ + type FieldIndexType = MapIndex; + + fn init(&mut self) -> OperationResult<()> { + match &mut self.0 { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(_) => unreachable!(), + } + } + + fn add_point( + &mut self, + id: PointOffsetType, + values: &[&Value], + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + self.0.add_point(id, values, hw_counter) + } + + fn finalize(self) -> OperationResult { + Ok(self.0) + } +} + +pub struct MapIndexMmapBuilder { + path: PathBuf, + point_to_values: Vec>, + values_to_points: HashMap>, + is_on_disk: bool, +} + +impl FieldIndexBuilderTrait for MapIndexMmapBuilder +where + MapIndex: PayloadFieldIndex + ValueIndexer, + as ValueIndexer>::ValueType: Into, +{ + type FieldIndexType = MapIndex; + + fn init(&mut self) -> OperationResult<()> { + Ok(()) + } + + fn add_point( + &mut self, + id: PointOffsetType, + payload: &[&Value], + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + let mut flatten_values: Vec<_> = vec![]; + for value in payload.iter() { + let payload_values = as ValueIndexer>::get_values(value); + flatten_values.extend(payload_values); + } + let flatten_values: Vec = flatten_values.into_iter().map(Into::into).collect(); + + if self.point_to_values.len() <= id as usize { + self.point_to_values.resize_with(id as usize + 1, Vec::new); + } + + self.point_to_values[id as usize].extend(flatten_values.clone()); + + let mut hw_cell_wb = hw_counter + .payload_index_io_write_counter() + .write_back_counter(); + + for value in flatten_values { + let entry = self.values_to_points.entry(value); + + if let Entry::Vacant(e) = &entry { + let size = N::mmapped_size(N::as_referenced(e.key().borrow())); + hw_cell_wb.incr_delta(size); + } + + hw_cell_wb.incr_delta(size_of_val(&id)); + entry.or_default().push(id); + } + + Ok(()) + } + + fn finalize(self) -> OperationResult { + Ok(MapIndex::Mmap(Box::new(MmapMapIndex::build( + &self.path, + self.point_to_values, + self.values_to_points, + self.is_on_disk, + )?))) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(keyword) => Some(Box::new( + self.get_iterator(keyword.as_str(), hw_counter).copied(), + )), + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(keywords) => Some(Box::new( + keywords + .iter() + .flat_map(move |keyword| { + self.get_iterator(keyword.as_str(), hw_counter).copied() + }) + .unique(), + )), + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(keywords) => Some(self.except_set(keywords, hw_counter)), + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(keyword) => { + let mut estimation = self.match_cardinality(keyword.as_str(), hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(keywords) => { + let estimations = keywords + .iter() + .map(|keyword| self.match_cardinality(keyword.as_str(), hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(keywords) => { + Some(self.except_cardinality(keywords.iter().map(|k| k.as_str()), hw_counter)) + } + AnyVariants::Integers(others) => { + if others.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // Payload_blocks only used in HNSW building, which is unmeasured. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count > threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match(key.clone(), value.to_string().into()), + cardinality: count, + }), + ) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(uuid_string) => { + let uuid = Uuid::from_str(uuid_string).ok()?; + Some(Box::new( + self.get_iterator(&uuid.as_u128(), hw_counter).copied(), + )) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let uuids = uuids.ok()?; + + Some(Box::new( + uuids + .into_iter() + .flat_map(move |uuid| self.get_iterator(&uuid, hw_counter).copied()) + .unique(), + )) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let excluded_uuids = uuids.ok()?; + let exclude_iter = self + .iter_values() + .filter(move |key| !excluded_uuids.contains(*key)) + .flat_map(move |key| self.get_iterator(key, hw_counter).copied()) + .unique(); + Some(Box::new(exclude_iter)) + } + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(uuid_string) => { + let uuid = Uuid::from_str(uuid_string).ok()?; + let mut estimation = self.match_cardinality(&uuid.as_u128(), hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let uuids = uuids.ok()?; + + let estimations = uuids + .into_iter() + .map(|uuid| self.match_cardinality(&uuid, hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some( + CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + ), + ) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let excluded_uuids = uuids.ok()?; + + Some(self.except_cardinality(excluded_uuids.iter(), hw_counter)) + } + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // payload_blocks only used in HNSW building, which is unmeasured. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count >= threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match( + key.clone(), + Uuid::from_u128(*value).to_string().into(), + ), + cardinality: count, + }), + ) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(_) => None, + ValueVariants::Integer(integer) => { + Some(Box::new(self.get_iterator(integer, hw_counter).copied())) + } + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variants })) => match any_variants { + AnyVariants::Strings(keywords) => { + if keywords.is_empty() { + Some(Box::new(vec![].into_iter())) + } else { + None + } + } + AnyVariants::Integers(integers) => Some(Box::new( + integers + .iter() + .flat_map(move |integer| self.get_iterator(integer, hw_counter).copied()) + .unique(), + )), + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + AnyVariants::Integers(integers) => Some(self.except_set(integers, hw_counter)), + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(_) => None, + ValueVariants::Integer(integer) => { + let mut estimation = self.match_cardinality(integer, hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variants })) => match any_variants { + AnyVariants::Strings(keywords) => { + if keywords.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + AnyVariants::Integers(integers) => { + let estimations = integers + .iter() + .map(|integer| self.match_cardinality(integer, hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(others) => { + if others.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + AnyVariants::Integers(integers) => { + Some(self.except_cardinality(integers.iter(), hw_counter)) + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // Only used in HNSW building so no measurement needed here. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count >= threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match(key.clone(), (*value).into()), + cardinality: count, + }), + ) + } +} + +impl FacetIndex for MapIndex +where + N: MapIndexKey + ?Sized, + for<'a> N::Referenced<'a>: Into>, + for<'a> &'a N: Into>, +{ + fn get_point_values( + &self, + point_id: PointOffsetType, + ) -> impl Iterator + '_ { + MapIndex::get_values(self, point_id) + .into_iter() + .flatten() + .map(Into::into) + } + + fn iter_values(&self) -> impl Iterator> + '_ { + self.iter_values().map(Into::into) + } + + fn iter_values_map<'a>( + &'a self, + hw_counter: &'a HardwareCounterCell, + ) -> impl Iterator, IdIter<'a>)> + 'a { + self.iter_values_map(hw_counter) + .map(|(k, iter)| (k.into(), iter)) + } + + fn iter_counts_per_value(&self) -> impl Iterator>> + '_ { + self.iter_counts_per_value().map(|(value, count)| FacetHit { + value: value.into(), + count, + }) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = String; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + if let Value::String(keyword) = value { + return Some(keyword.to_owned()); + } + None + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = IntPayloadType; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + if let Value::Number(num) = value { + return num.as_i64(); + } + None + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = UuidIntType; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + Some(Uuid::parse_str(value.as_str()?).ok()?.as_u128()) + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::path::Path; + + use rstest::rstest; + use tempfile::Builder; + + use super::*; + use crate::common::rocksdb_wrapper::open_db_with_existing_cf; + + const FIELD_NAME: &str = "test"; + + #[derive(Clone, Copy)] + enum IndexType { + Mutable, + Immutable, + Mmap, + } + + fn save_map_index( + data: &[Vec], + path: &Path, + index_type: IndexType, + into_value: impl Fn(&N::Owned) -> Value, + ) where + N: MapIndexKey + ?Sized, + MapIndex: PayloadFieldIndex + ValueIndexer, + as ValueIndexer>::ValueType: Into, + { + let hw_counter = HardwareCounterCell::new(); + + match index_type { + IndexType::Mutable | IndexType::Immutable => { + let mut builder = + MapIndex::::builder(open_db_with_existing_cf(path).unwrap(), FIELD_NAME); + builder.init().unwrap(); + for (idx, values) in data.iter().enumerate() { + let values: Vec = values.iter().map(&into_value).collect(); + let values: Vec<_> = values.iter().collect(); + builder + .add_point(idx as PointOffsetType, &values, &hw_counter) + .unwrap(); + } + builder.finalize().unwrap(); + } + IndexType::Mmap => { + let mut builder = MapIndex::::mmap_builder(path, false); + builder.init().unwrap(); + for (idx, values) in data.iter().enumerate() { + let values: Vec = values.iter().map(&into_value).collect(); + let values: Vec<_> = values.iter().collect(); + builder + .add_point(idx as PointOffsetType, &values, &hw_counter) + .unwrap(); + } + builder.finalize().unwrap(); + } + } + } + + fn load_map_index(data: &[Vec], path: &Path, index_type: IndexType) -> MapIndex { + let mut index = match index_type { + IndexType::Mutable => { + MapIndex::::new_memory(open_db_with_existing_cf(path).unwrap(), FIELD_NAME, true) + } + IndexType::Immutable => MapIndex::::new_memory( + open_db_with_existing_cf(path).unwrap(), + FIELD_NAME, + false, + ), + IndexType::Mmap => MapIndex::::new_mmap(path, false).unwrap(), + }; + index.load_from_db().unwrap(); + for (idx, values) in data.iter().enumerate() { \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..c9e800207 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_index_field_index_map_index_mod.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_map_index_mod.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_map_index_mod.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.678730+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 44671, + "extracted_output_length": 44652, + "expected_output_length": 49127, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-Qyi26STj0PxnAE6rJIP0", + "cost_usd": 0.02615234, + "prompt_tokens": 56466, + "completion_tokens": 9804, + "total_tokens": 66270, + "native_prompt_tokens": 55942, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 434 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..381a72c22 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,433 @@ + +index 0a4e2d762..b3ddaf135 100644 +--- a/qdrant_lib_segment_src_index_field_index_map_index_mod.rs_expectedoutput.txt (expected):tmp/tmpqauht04s_expected.txt ++++ b/qdrant_lib_segment_src_index_field_index_map_index_mod.rs_extracted.txt (actual):tmp/tmp_phk44kp_actual.txt +@@ -1,8 +1,9 @@ +-use std::borrow::Borrow; ++std::borrow::Borrow; + use std::collections::hash_map::Entry; + use std::fmt::{Debug, Display}; + use std::hash::{BuildHasher, Hash}; + use std::iter; ++use std::mem::size_of_val; + use std::path::{Path, PathBuf}; + use std::str::FromStr; + use std::sync::Arc; +@@ -22,11 +23,11 @@ use uuid::Uuid; + + use self::immutable_map_index::ImmutableMapIndex; + use self::mutable_map_index::MutableMapIndex; +-use super::FieldIndexBuilderTrait; + use super::facet_index::FacetIndex; + use super::mmap_point_to_values::MmapValue; +-use crate::common::Flusher; ++use super::FieldIndexBuilderTrait; + use crate::common::operation_error::{OperationError, OperationResult}; ++use crate::common::Flusher; + use crate::data_types::facets::{FacetHit, FacetValueRef}; + use crate::index::field_index::stat_tools::number_of_selected_points; + use crate::index::field_index::{ +@@ -155,7 +156,7 @@ impl MapIndex { + } + } + +- fn get_indexed_points(&self) -> usize { ++ pub fn get_indexed_points(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_indexed_points(), + MapIndex::Immutable(index) => index.get_indexed_points(), +@@ -163,7 +164,7 @@ impl MapIndex { + } + } + +- fn get_values_count(&self) -> usize { ++ pub fn get_values_count(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_values_count(), + MapIndex::Immutable(index) => index.get_values_count(), +@@ -203,14 +204,6 @@ impl MapIndex { + } + } + +- pub fn iter_counts_per_value(&self) -> Box + '_> { +- match self { +- MapIndex::Mutable(index) => Box::new(index.iter_counts_per_value()), +- MapIndex::Immutable(index) => Box::new(index.iter_counts_per_value()), +- MapIndex::Mmap(index) => Box::new(index.iter_counts_per_value()), +- } +- } +- + pub fn iter_values_map<'a>( + &'a self, + hw_cell: &'a HardwareCounterCell, +@@ -226,6 +219,14 @@ impl MapIndex { + format!("{field}_map") + } + ++ pub fn recreate(&self) -> OperationResult<()> { ++ match self { ++ MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), ++ MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), ++ MapIndex::Mmap(_) => Ok(()), ++ } ++ } ++ + fn flusher(&self) -> Flusher { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().flusher(), +@@ -244,21 +245,10 @@ impl MapIndex { + CardinalityEstimation::exact(values_count) + } + +- pub fn get_telemetry_data(&self) -> PayloadIndexTelemetry { +- PayloadIndexTelemetry { +- field_name: None, +- points_count: self.get_indexed_points(), +- points_values_count: self.get_values_count(), +- histogram_bucket_size: None, +- index_type: match self { +- MapIndex::Mutable(_) => "mutable_map", +- MapIndex::Immutable(_) => "immutable_map", +- MapIndex::Mmap(_) => "mmap_map", +- }, +- } +- } +- +- pub fn encode_db_record(value: &N, idx: PointOffsetType) -> String { ++ pub fn encode_db_record(value: &N, idx: PointOffsetType) -> String ++ where ++ N::Owned: Display, ++ { + format!("{value}/{idx}") + } + +@@ -283,33 +273,6 @@ impl MapIndex { + self.values_count(idx) == 0 + } + +- fn clear(self) -> OperationResult<()> { +- match self { +- MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), +- MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), +- MapIndex::Mmap(index) => index.clear(), +- } +- } +- +- fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { +- match self { +- MapIndex::Mutable(index) => index.remove_point(id), +- MapIndex::Immutable(index) => index.remove_point(id), +- MapIndex::Mmap(index) => { +- index.remove_point(id); +- Ok(()) +- } +- } +- } +- +- fn files(&self) -> Vec { +- match self { +- MapIndex::Mutable(_) => Vec::new(), +- MapIndex::Immutable(_) => Vec::new(), +- MapIndex::Mmap(index) => index.files(), +- } +- } +- + /// Estimates cardinality for `except` clause + /// + /// # Arguments +@@ -367,14 +330,12 @@ impl MapIndex { + + let excluded_value_counts: Vec<_> = excluded + .map(|val| { +- self.get_count_for_value(val.borrow(), hw_counter) ++ self.get_count_for_value(val, hw_counter) + .unwrap_or(0) + }) + .collect(); + let total_excluded_value_count: usize = excluded_value_counts.iter().sum(); + +- debug_assert!(total_excluded_value_count <= self.get_values_count()); +- + let non_excluded_values_count = self + .get_values_count() + .saturating_sub(total_excluded_value_count); +@@ -384,7 +345,6 @@ impl MapIndex { + + if max_values_per_point == 0 { + // All points are excluded, so we can't select any point +- debug_assert_eq!(non_excluded_values_count, 0); + return CardinalityEstimation::exact(0); + } + +@@ -435,11 +395,44 @@ impl MapIndex { + Box::new( + self.iter_values() + .filter(|key| !excluded.contains((*key).borrow())) +- .flat_map(move |key| self.get_iterator(key.borrow(), hw_counter).copied()) ++ .flat_map(move |key| self.get_iterator(key, hw_counter).copied()) + .unique(), + ) + } + ++ pub fn get_telemetry_data(&self) -> PayloadIndexTelemetry { ++ PayloadIndexTelemetry { ++ field_name: None, ++ points_count: self.get_indexed_points(), ++ points_values_count: self.get_values_count(), ++ histogram_bucket_size: None, ++ index_type: match self { ++ MapIndex::Mutable(_) => "mutable_map", ++ MapIndex::Immutable(_) => "immutable_map", ++ MapIndex::Mmap(_) => "mmap_map", ++ }, ++ } ++ } ++ ++ pub fn clear(self) -> OperationResult<()> { ++ match self { ++ MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), ++ MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), ++ MapIndex::Mmap(index) => index.clear(), ++ } ++ } ++ ++ fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { ++ match self { ++ MapIndex::Mutable(index) => index.remove_point(id), ++ MapIndex::Immutable(index) => index.remove_point(id), ++ MapIndex::Mmap(index) => { ++ index.remove_point(id); ++ Ok(()) ++ } ++ } ++ } ++ + pub fn is_on_disk(&self) -> bool { + match self { + MapIndex::Mutable(_) => false, +@@ -584,7 +577,11 @@ impl PayloadFieldIndex for MapIndex { + } + + fn files(&self) -> Vec { +- self.files() ++ match self { ++ MapIndex::Mutable(_) => Vec::new(), ++ MapIndex::Immutable(_) => Vec::new(), ++ MapIndex::Mmap(index) => index.files(), ++ } + } + + fn filter<'a>( +@@ -700,7 +697,7 @@ impl PayloadFieldIndex for MapIndex { + ) -> Box + '_> { + Box::new( + self.iter_values() +- .map(|value| { ++ .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // Payload_blocks only used in HNSW building, which is unmeasured. +@@ -734,7 +731,11 @@ impl PayloadFieldIndex for MapIndex { + } + + fn files(&self) -> Vec { +- self.files() ++ match self { ++ MapIndex::Mutable(_) => Vec::new(), ++ MapIndex::Immutable(_) => Vec::new(), ++ MapIndex::Mmap(index) => index.files(), ++ } + } + + fn filter<'a>( +@@ -848,9 +849,11 @@ impl PayloadFieldIndex for MapIndex { + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { +- Some(CardinalityEstimation::exact(0).with_primary_clause( +- PrimaryCondition::Condition(Box::new(condition.clone())), +- )) ++ Some( ++ CardinalityEstimation::exact(0).with_primary_clause( ++ PrimaryCondition::Condition(Box::new(condition.clone())), ++ ), ++ ) + } else { + None + } +@@ -925,7 +928,11 @@ impl PayloadFieldIndex for MapIndex { + } + + fn files(&self) -> Vec { +- self.files() ++ match self { ++ MapIndex::Mutable(_) => Vec::new(), ++ MapIndex::Immutable(_) => Vec::new(), ++ MapIndex::Mmap(index) => index.files(), ++ } + } + + fn filter<'a>( +@@ -941,7 +948,7 @@ impl PayloadFieldIndex for MapIndex { + } + ValueVariants::Bool(_) => None, + }, +- Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { ++ Some(Match::Any(MatchAny { any: any_variants })) => match any_variants { + AnyVariants::Strings(keywords) => { + if keywords.is_empty() { + Some(Box::new(vec![].into_iter())) +@@ -1245,11 +1252,7 @@ mod tests { + } + } + +- fn load_map_index( +- data: &[Vec], +- path: &Path, +- index_type: IndexType, +- ) -> MapIndex { ++ fn load_map_index(data: &[Vec], path: &Path, index_type: IndexType) -> MapIndex { + let mut index = match index_type { + IndexType::Mutable => { + MapIndex::::new_memory(open_db_with_existing_cf(path).unwrap(), FIELD_NAME, true) +@@ -1262,137 +1265,4 @@ mod tests { + IndexType::Mmap => MapIndex::::new_mmap(path, false).unwrap(), + }; + index.load_from_db().unwrap(); +- for (idx, values) in data.iter().enumerate() { +- let index_values: HashSet = index +- .get_values(idx as PointOffsetType) +- .unwrap() +- .map(|v| N::to_owned(N::from_referenced(&v))) +- .collect(); +- let index_values: HashSet<&N> = index_values.iter().map(|v| v.borrow()).collect(); +- let check_values: HashSet<&N> = values.iter().map(|v| v.borrow()).collect(); +- assert_eq!(index_values, check_values); +- } +- +- index +- } +- +- #[test] +- fn test_index_non_ascending_insertion() { +- let temp_dir = Builder::new().prefix("store_dir").tempdir().unwrap(); +- let mut builder = MapIndex::::mmap_builder(temp_dir.path(), false); +- builder.init().unwrap(); +- +- let data = [vec![1, 2, 3, 4, 5, 6], vec![25], vec![10, 11]]; +- +- let hw_counter = HardwareCounterCell::new(); +- +- for (idx, values) in data.iter().enumerate().rev() { +- let values: Vec = values.iter().map(|i| (*i).into()).collect(); +- let values: Vec<_> = values.iter().collect(); +- builder +- .add_point(idx as PointOffsetType, &values, &hw_counter) +- .unwrap(); +- } +- +- let index = builder.finalize().unwrap(); +- for (idx, values) in data.iter().enumerate().rev() { +- let res: Vec<_> = index +- .get_values(idx as u32) +- .unwrap() +- .map(|i| *i as i32) +- .collect(); +- assert_eq!(res, *values); +- } +- } +- +- #[rstest] +- #[case(IndexType::Mutable)] +- #[case(IndexType::Immutable)] +- #[case(IndexType::Mmap)] +- fn test_int_disk_map_index(#[case] index_type: IndexType) { +- let data = vec![ +- vec![1, 2, 3, 4, 5, 6], +- vec![1, 2, 3, 4, 5, 6], +- vec![13, 14, 15, 16, 17, 18], +- vec![19, 20, 21, 22, 23, 24], +- vec![25], +- ]; +- +- let temp_dir = Builder::new().prefix("store_dir").tempdir().unwrap(); +- save_map_index::(&data, temp_dir.path(), index_type, |v| (*v).into()); +- let index = load_map_index::(&data, temp_dir.path(), index_type); +- +- let hw_counter = HardwareCounterCell::new(); +- +- // Ensure cardinality is non zero +- assert!( +- !index +- .except_cardinality(vec![].into_iter(), &hw_counter) +- .equals_min_exp_max(&CardinalityEstimation::exact(0)), +- ); +- } +- +- #[rstest] +- #[case(IndexType::Mutable)] +- #[case(IndexType::Immutable)] +- #[case(IndexType::Mmap)] +- fn test_string_disk_map_index(#[case] index_type: IndexType) { +- let data = vec![ +- vec![ +- SmolStr::from("AABB"), +- SmolStr::from("UUFF"), +- SmolStr::from("IIBB"), +- ], +- vec![ +- SmolStr::from("PPMM"), +- SmolStr::from("QQXX"), +- SmolStr::from("YYBB"), +- ], +- vec![ +- SmolStr::from("FFMM"), +- SmolStr::from("IICC"), +- SmolStr::from("IIBB"), +- ], +- vec![ +- SmolStr::from("AABB"), +- SmolStr::from("UUFF"), +- SmolStr::from("IIBB"), +- ], +- vec![SmolStr::from("PPGG")], +- ]; +- +- let temp_dir = Builder::new().prefix("store_dir").tempdir().unwrap(); +- save_map_index::(&data, temp_dir.path(), index_type, |v| v.to_string().into()); +- let index = load_map_index::(&data, temp_dir.path(), index_type); +- +- let hw_counter = HardwareCounterCell::new(); +- +- // Ensure cardinality is non zero +- assert!( +- !index +- .except_cardinality(vec![].into_iter(), &hw_counter) +- .equals_min_exp_max(&CardinalityEstimation::exact(0)), +- ); +- } +- +- #[rstest] +- #[case(IndexType::Mutable)] +- #[case(IndexType::Immutable)] +- #[case(IndexType::Mmap)] +- fn test_empty_index(#[case] index_type: IndexType) { +- let data: Vec> = vec![]; +- +- let temp_dir = Builder::new().prefix("store_dir").tempdir().unwrap(); +- save_map_index::(&data, temp_dir.path(), index_type, |v| v.to_string().into()); +- let index = load_map_index::(&data, temp_dir.path(), index_type); +- +- let hw_counter = HardwareCounterCell::new(); +- +- // Ensure cardinality is zero +- assert!( +- index +- .except_cardinality(vec![].into_iter(), &hw_counter) +- .equals_min_exp_max(&CardinalityEstimation::exact(0)), +- ); +- } +-} +\ No newline at end of file ++ for (idx, values) in data.iter().enumerate() { +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..c5aee6e66 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_map_index_mod.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,1269 @@ +```use std::borrow::Borrow; +use std::collections::hash_map::Entry; +use std::fmt::{Debug, Display}; +use std::hash::{BuildHasher, Hash}; +use std::iter; +use std::mem::size_of_val; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; + +use ahash::HashMap; +use common::counter::hardware_counter::HardwareCounterCell; +use common::mmap_hashmap::Key; +use common::types::PointOffsetType; +use indexmap::IndexSet; +use itertools::Itertools; +use mmap_map_index::MmapMapIndex; +use parking_lot::RwLock; +use rocksdb::DB; +use serde_json::Value; +use smol_str::SmolStr; +use uuid::Uuid; + +use self::immutable_map_index::ImmutableMapIndex; +use self::mutable_map_index::MutableMapIndex; +use super::facet_index::FacetIndex; +use super::mmap_point_to_values::MmapValue; +use super::FieldIndexBuilderTrait; +use crate::common::operation_error::{OperationError, OperationResult}; +use crate::common::Flusher; +use crate::data_types::facets::{FacetHit, FacetValueRef}; +use crate::index::field_index::stat_tools::number_of_selected_points; +use crate::index::field_index::{ + CardinalityEstimation, PayloadBlockCondition, PayloadFieldIndex, PrimaryCondition, ValueIndexer, +}; +use crate::index::query_estimator::combine_should_estimations; +use crate::telemetry::PayloadIndexTelemetry; +use crate::types::{ + AnyVariants, FieldCondition, IntPayloadType, Match, MatchAny, MatchExcept, MatchValue, + PayloadKeyType, UuidIntType, ValueVariants, +}; + +pub mod immutable_map_index; +pub mod mmap_map_index; +pub mod mutable_map_index; + +pub type IdRefIter<'a> = Box + 'a>; +pub type IdIter<'a> = Box + 'a>; + +pub trait MapIndexKey: Key + MmapValue + Eq + Display + Debug { + type Owned: Borrow + Hash + Eq + Clone + FromStr + Default; + + fn to_owned(&self) -> Self::Owned; +} + +impl MapIndexKey for str { + type Owned = SmolStr; + + fn to_owned(&self) -> Self::Owned { + SmolStr::from(self) + } +} + +impl MapIndexKey for IntPayloadType { + type Owned = IntPayloadType; + + fn to_owned(&self) -> Self::Owned { + *self + } +} + +impl MapIndexKey for UuidIntType { + type Owned = UuidIntType; + + fn to_owned(&self) -> Self::Owned { + *self + } +} + +pub enum MapIndex { + Mutable(MutableMapIndex), + Immutable(ImmutableMapIndex), + Mmap(Box>), +} + +impl MapIndex { + pub fn new_memory(db: Arc>, field_name: &str, is_appendable: bool) -> Self { + if is_appendable { + MapIndex::Mutable(MutableMapIndex::new(db, field_name)) + } else { + MapIndex::Immutable(ImmutableMapIndex::new(db, field_name)) + } + } + + pub fn new_mmap(path: &Path, is_on_disk: bool) -> OperationResult { + Ok(MapIndex::Mmap(Box::new(MmapMapIndex::load( + path, is_on_disk, + )?))) + } + + pub fn builder(db: Arc>, field_name: &str) -> MapIndexBuilder { + MapIndexBuilder(MapIndex::Mutable(MutableMapIndex::new(db, field_name))) + } + + pub fn mmap_builder(path: &Path, is_on_disk: bool) -> MapIndexMmapBuilder { + MapIndexMmapBuilder { + path: path.to_owned(), + point_to_values: Default::default(), + values_to_points: Default::default(), + is_on_disk, + } + } + + fn load_from_db(&mut self) -> OperationResult { + match self { + MapIndex::Mutable(index) => index.load_from_db(), + MapIndex::Immutable(index) => index.load_from_db(), + // mmap index is always loaded + MapIndex::Mmap(_) => Ok(true), + } + } + + pub fn check_values_any( + &self, + idx: PointOffsetType, + hw_counter: &HardwareCounterCell, + check_fn: impl Fn(&N) -> bool, + ) -> bool { + match self { + MapIndex::Mutable(index) => index.check_values_any(idx, check_fn), + MapIndex::Immutable(index) => index.check_values_any(idx, check_fn), + MapIndex::Mmap(index) => index.check_values_any(idx, hw_counter, check_fn), + } + } + + pub fn get_values( + &self, + idx: PointOffsetType, + ) -> Option> + '_>> { + match self { + MapIndex::Mutable(index) => Some(Box::new( + index.get_values(idx)?.map(|v| N::as_referenced(v)), + )), + MapIndex::Immutable(index) => Some(Box::new( + index.get_values(idx)?.map(|v| N::as_referenced(v)), + )), + MapIndex::Mmap(index) => Some(Box::new(index.get_values(idx)?)), + } + } + + pub fn values_count(&self, idx: PointOffsetType) -> usize { + match self { + MapIndex::Mutable(index) => index.values_count(idx).unwrap_or_default(), + MapIndex::Immutable(index) => index.values_count(idx).unwrap_or_default(), + MapIndex::Mmap(index) => index.values_count(idx).unwrap_or_default(), + } + } + + pub fn get_indexed_points(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_indexed_points(), + MapIndex::Immutable(index) => index.get_indexed_points(), + MapIndex::Mmap(index) => index.get_indexed_points(), + } + } + + pub fn get_values_count(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_values_count(), + MapIndex::Immutable(index) => index.get_values_count(), + MapIndex::Mmap(index) => index.get_values_count(), + } + } + + pub fn get_unique_values_count(&self) -> usize { + match self { + MapIndex::Mutable(index) => index.get_unique_values_count(), + MapIndex::Immutable(index) => index.get_unique_values_count(), + MapIndex::Mmap(index) => index.get_unique_values_count(), + } + } + + fn get_count_for_value(&self, value: &N, hw_counter: &HardwareCounterCell) -> Option { + match self { + MapIndex::Mutable(index) => index.get_count_for_value(value), + MapIndex::Immutable(index) => index.get_count_for_value(value), + MapIndex::Mmap(index) => index.get_count_for_value(value, hw_counter), + } + } + + fn get_iterator(&self, value: &N, hw_counter: &HardwareCounterCell) -> IdRefIter<'_> { + match self { + MapIndex::Mutable(index) => index.get_iterator(value), + MapIndex::Immutable(index) => index.get_iterator(value), + MapIndex::Mmap(index) => index.get_iterator(value, hw_counter), + } + } + + pub fn iter_values(&self) -> Box + '_> { + match self { + MapIndex::Mutable(index) => index.iter_values(), + MapIndex::Immutable(index) => index.iter_values(), + MapIndex::Mmap(index) => index.iter_values(), + } + } + + pub fn iter_values_map<'a>( + &'a self, + hw_cell: &'a HardwareCounterCell, + ) -> Box)> + 'a> { + match self { + MapIndex::Mutable(index) => Box::new(index.iter_values_map()), + MapIndex::Immutable(index) => Box::new(index.iter_values_map()), + MapIndex::Mmap(index) => Box::new(index.iter_values_map(hw_cell)), + } + } + + pub fn storage_cf_name(field: &str) -> String { + format!("{field}_map") + } + + pub fn recreate(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(_) => Ok(()), + } + } + + fn flusher(&self) -> Flusher { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().flusher(), + MapIndex::Immutable(index) => index.get_db_wrapper().flusher(), + MapIndex::Mmap(index) => index.flusher(), + } + } + + fn match_cardinality( + &self, + value: &N, + hw_counter: &HardwareCounterCell, + ) -> CardinalityEstimation { + let values_count = self.get_count_for_value(value, hw_counter).unwrap_or(0); + + CardinalityEstimation::exact(values_count) + } + + pub fn encode_db_record(value: &N, idx: PointOffsetType) -> String + where + N::Owned: Display, + { + format!("{value}/{idx}") + } + + pub fn decode_db_record(s: &str) -> OperationResult<(N::Owned, PointOffsetType)> { + const DECODE_ERR: &str = "Index db parsing error: wrong data format"; + let separator_pos = s + .rfind('/') + .ok_or_else(|| OperationError::service_error(DECODE_ERR))?; + if separator_pos == s.len() - 1 { + return Err(OperationError::service_error(DECODE_ERR)); + } + let value_str = &s[..separator_pos]; + let value = + N::Owned::from_str(value_str).map_err(|_| OperationError::service_error(DECODE_ERR))?; + let idx_str = &s[separator_pos + 1..]; + let idx = PointOffsetType::from_str(idx_str) + .map_err(|_| OperationError::service_error(DECODE_ERR))?; + Ok((value, idx)) + } + + pub fn values_is_empty(&self, idx: PointOffsetType) -> bool { + self.values_count(idx) == 0 + } + + /// Estimates cardinality for `except` clause + /// + /// # Arguments + /// + /// * 'excluded' - values, which are not considered as matching + /// + /// # Returns + /// + /// * `CardinalityEstimation` - estimation of cardinality + fn except_cardinality<'a>( + &'a self, + excluded: impl Iterator, + hw_counter: &HardwareCounterCell, + ) -> CardinalityEstimation { + // Minimal case: we exclude as many points as possible. + // In this case, excluded points do not have any other values except excluded ones. + // So the first step - we estimate how many other points is needed to fit unused values. + + // Example: + // Values: 20, 20 + // Unique values: 5 + // Total points: 100 + // Total values: 110 + // total_excluded_value_count = 40 + // non_excluded_values_count = 110 - 40 = 70 + // max_values_per_point = 5 - 2 = 3 + // min_not_excluded_by_values = 70 / 3 = 24 + // min = max(24, 100 - 40) = 60 + // exp = ... + // max = min(20, 70) = 20 + + // Values: 60, 60 + // Unique values: 5 + // Total points: 100 + // Total values: 200 + // total_excluded_value_count = 120 + // non_excluded_values_count = 200 - 120 = 80 + // max_values_per_point = 5 - 2 = 3 + // min_not_excluded_by_values = 80 / 3 = 27 + // min = max(27, 100 - 120) = 27 + // exp = ... + // max = min(60, 80) = 60 + + // Values: 60, 60, 60 + // Unique values: 5 + // Total points: 100 + // Total values: 200 + // total_excluded_value_count = 180 + // non_excluded_values_count = 200 - 180 = 20 + // max_values_per_point = 5 - 3 = 2 + // min_not_excluded_by_values = 20 / 2 = 10 + // min = max(10, 100 - 180) = 10 + // exp = ... + // max = min(60, 20) = 20 + + let excluded_value_counts: Vec<_> = excluded + .map(|val| { + self.get_count_for_value(val, hw_counter) + .unwrap_or(0) + }) + .collect(); + let total_excluded_value_count: usize = excluded_value_counts.iter().sum(); + + let non_excluded_values_count = self + .get_values_count() + .saturating_sub(total_excluded_value_count); + let max_values_per_point = self + .get_unique_values_count() + .saturating_sub(excluded_value_counts.len()); + + if max_values_per_point == 0 { + // All points are excluded, so we can't select any point + return CardinalityEstimation::exact(0); + } + + // Minimal amount of points, required to fit all unused values. + // Cardinality can't be less than this value. + let min_not_excluded_by_values = non_excluded_values_count.div_ceil(max_values_per_point); + + let min = min_not_excluded_by_values.max( + self.get_indexed_points() + .saturating_sub(total_excluded_value_count), + ); + + // Maximum scenario: selected points overlap as much as possible. + // From one side, all excluded values should be assigned to the same point + // => we can take the value with the maximum amount of points. + // From another side, all other values should be enough to fill all other points. + + let max_excluded_value_count = excluded_value_counts.iter().max().copied().unwrap_or(0); + + let max = self + .get_indexed_points() + .saturating_sub(max_excluded_value_count) + .min(non_excluded_values_count); + + // Expected case: we assume that all points are filled equally. + // So we can estimate the probability of the point to have non-excluded value. + let exp = number_of_selected_points(self.get_indexed_points(), non_excluded_values_count) + .max(min) + .min(max); + + CardinalityEstimation { + primary_clauses: vec![], + min, + exp, + max, + } + } + + fn except_set<'a, K, A>( + &'a self, + excluded: &'a IndexSet, + hw_counter: &'a HardwareCounterCell, + ) -> Box + 'a> + where + A: BuildHasher, + K: Borrow + Hash + Eq, + { + Box::new( + self.iter_values() + .filter(|key| !excluded.contains((*key).borrow())) + .flat_map(move |key| self.get_iterator(key, hw_counter).copied()) + .unique(), + ) + } + + pub fn get_telemetry_data(&self) -> PayloadIndexTelemetry { + PayloadIndexTelemetry { + field_name: None, + points_count: self.get_indexed_points(), + points_values_count: self.get_values_count(), + histogram_bucket_size: None, + index_type: match self { + MapIndex::Mutable(_) => "mutable_map", + MapIndex::Immutable(_) => "immutable_map", + MapIndex::Mmap(_) => "mmap_map", + }, + } + } + + pub fn clear(self) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(index) => index.clear(), + } + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.remove_point(id), + MapIndex::Immutable(index) => index.remove_point(id), + MapIndex::Mmap(index) => { + index.remove_point(id); + Ok(()) + } + } + } + + pub fn is_on_disk(&self) -> bool { + match self { + MapIndex::Mutable(_) => false, + MapIndex::Immutable(_) => false, + MapIndex::Mmap(index) => index.is_on_disk(), + } + } + + /// Populate all pages in the mmap. + /// Block until all pages are populated. + pub fn populate(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(_) => {} // Not a mmap + MapIndex::Immutable(_) => {} // Not a mmap + MapIndex::Mmap(index) => index.populate()?, + } + Ok(()) + } + + /// Drop disk cache. + pub fn clear_cache(&self) -> OperationResult<()> { + match self { + MapIndex::Mutable(_) => {} // Not a mmap + MapIndex::Immutable(_) => {} // Not a mmap + MapIndex::Mmap(index) => index.clear_cache()?, + } + Ok(()) + } +} + +pub struct MapIndexBuilder(MapIndex); + +impl FieldIndexBuilderTrait for MapIndexBuilder +where + MapIndex: PayloadFieldIndex + ValueIndexer, +{ + type FieldIndexType = MapIndex; + + fn init(&mut self) -> OperationResult<()> { + match &mut self.0 { + MapIndex::Mutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Immutable(index) => index.get_db_wrapper().recreate_column_family(), + MapIndex::Mmap(_) => unreachable!(), + } + } + + fn add_point( + &mut self, + id: PointOffsetType, + values: &[&Value], + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + self.0.add_point(id, values, hw_counter) + } + + fn finalize(self) -> OperationResult { + Ok(self.0) + } +} + +pub struct MapIndexMmapBuilder { + path: PathBuf, + point_to_values: Vec>, + values_to_points: HashMap>, + is_on_disk: bool, +} + +impl FieldIndexBuilderTrait for MapIndexMmapBuilder +where + MapIndex: PayloadFieldIndex + ValueIndexer, + as ValueIndexer>::ValueType: Into, +{ + type FieldIndexType = MapIndex; + + fn init(&mut self) -> OperationResult<()> { + Ok(()) + } + + fn add_point( + &mut self, + id: PointOffsetType, + payload: &[&Value], + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + let mut flatten_values: Vec<_> = vec![]; + for value in payload.iter() { + let payload_values = as ValueIndexer>::get_values(value); + flatten_values.extend(payload_values); + } + let flatten_values: Vec = flatten_values.into_iter().map(Into::into).collect(); + + if self.point_to_values.len() <= id as usize { + self.point_to_values.resize_with(id as usize + 1, Vec::new); + } + + self.point_to_values[id as usize].extend(flatten_values.clone()); + + let mut hw_cell_wb = hw_counter + .payload_index_io_write_counter() + .write_back_counter(); + + for value in flatten_values { + let entry = self.values_to_points.entry(value); + + if let Entry::Vacant(e) = &entry { + let size = N::mmapped_size(N::as_referenced(e.key().borrow())); + hw_cell_wb.incr_delta(size); + } + + hw_cell_wb.incr_delta(size_of_val(&id)); + entry.or_default().push(id); + } + + Ok(()) + } + + fn finalize(self) -> OperationResult { + Ok(MapIndex::Mmap(Box::new(MmapMapIndex::build( + &self.path, + self.point_to_values, + self.values_to_points, + self.is_on_disk, + )?))) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(keyword) => Some(Box::new( + self.get_iterator(keyword.as_str(), hw_counter).copied(), + )), + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(keywords) => Some(Box::new( + keywords + .iter() + .flat_map(move |keyword| { + self.get_iterator(keyword.as_str(), hw_counter).copied() + }) + .unique(), + )), + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(keywords) => Some(self.except_set(keywords, hw_counter)), + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(keyword) => { + let mut estimation = self.match_cardinality(keyword.as_str(), hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(keywords) => { + let estimations = keywords + .iter() + .map(|keyword| self.match_cardinality(keyword.as_str(), hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(keywords) => { + Some(self.except_cardinality(keywords.iter().map(|k| k.as_str()), hw_counter)) + } + AnyVariants::Integers(others) => { + if others.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // Payload_blocks only used in HNSW building, which is unmeasured. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count > threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match(key.clone(), value.to_string().into()), + cardinality: count, + }), + ) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(uuid_string) => { + let uuid = Uuid::from_str(uuid_string).ok()?; + Some(Box::new( + self.get_iterator(&uuid.as_u128(), hw_counter).copied(), + )) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let uuids = uuids.ok()?; + + Some(Box::new( + uuids + .into_iter() + .flat_map(move |uuid| self.get_iterator(&uuid, hw_counter).copied()) + .unique(), + )) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let excluded_uuids = uuids.ok()?; + let exclude_iter = self + .iter_values() + .filter(move |key| !excluded_uuids.contains(*key)) + .flat_map(move |key| self.get_iterator(key, hw_counter).copied()) + .unique(); + Some(Box::new(exclude_iter)) + } + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(uuid_string) => { + let uuid = Uuid::from_str(uuid_string).ok()?; + let mut estimation = self.match_cardinality(&uuid.as_u128(), hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Integer(_) => None, + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variant })) => match any_variant { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let uuids = uuids.ok()?; + + let estimations = uuids + .into_iter() + .map(|uuid| self.match_cardinality(&uuid, hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + AnyVariants::Integers(integers) => { + if integers.is_empty() { + Some( + CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + ), + ) + } else { + None + } + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(uuids_string) => { + let uuids: Result, _> = uuids_string + .iter() + .map(|uuid_string| Uuid::from_str(uuid_string).map(|x| x.as_u128())) + .collect(); + + let excluded_uuids = uuids.ok()?; + + Some(self.except_cardinality(excluded_uuids.iter(), hw_counter)) + } + AnyVariants::Integers(other) => { + if other.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // payload_blocks only used in HNSW building, which is unmeasured. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count >= threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match( + key.clone(), + Uuid::from_u128(*value).to_string().into(), + ), + cardinality: count, + }), + ) + } +} + +impl PayloadFieldIndex for MapIndex { + fn count_indexed_points(&self) -> usize { + self.get_indexed_points() + } + + fn load(&mut self) -> OperationResult { + self.load_from_db() + } + + fn cleanup(self) -> OperationResult<()> { + self.clear() + } + + fn flusher(&self) -> Flusher { + MapIndex::flusher(self) + } + + fn files(&self) -> Vec { + match self { + MapIndex::Mutable(_) => Vec::new(), + MapIndex::Immutable(_) => Vec::new(), + MapIndex::Mmap(index) => index.files(), + } + } + + fn filter<'a>( + &'a self, + condition: &'a FieldCondition, + hw_counter: &'a HardwareCounterCell, + ) -> Option + 'a>> { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(_) => None, + ValueVariants::Integer(integer) => { + Some(Box::new(self.get_iterator(integer, hw_counter).copied())) + } + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variants })) => match any_variants { + AnyVariants::Strings(keywords) => { + if keywords.is_empty() { + Some(Box::new(vec![].into_iter())) + } else { + None + } + } + AnyVariants::Integers(integers) => Some(Box::new( + integers + .iter() + .flat_map(move |integer| self.get_iterator(integer, hw_counter).copied()) + .unique(), + )), + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(other) => { + if other.is_empty() { + Some(Box::new(iter::empty())) + } else { + None + } + } + AnyVariants::Integers(integers) => Some(self.except_set(integers, hw_counter)), + }, + _ => None, + } + } + + fn estimate_cardinality( + &self, + condition: &FieldCondition, + hw_counter: &HardwareCounterCell, + ) -> Option { + match &condition.r#match { + Some(Match::Value(MatchValue { value })) => match value { + ValueVariants::String(_) => None, + ValueVariants::Integer(integer) => { + let mut estimation = self.match_cardinality(integer, hw_counter); + estimation + .primary_clauses + .push(PrimaryCondition::Condition(Box::new(condition.clone()))); + Some(estimation) + } + ValueVariants::Bool(_) => None, + }, + Some(Match::Any(MatchAny { any: any_variants })) => match any_variants { + AnyVariants::Strings(keywords) => { + if keywords.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + AnyVariants::Integers(integers) => { + let estimations = integers + .iter() + .map(|integer| self.match_cardinality(integer, hw_counter)) + .collect::>(); + let estimation = if estimations.is_empty() { + CardinalityEstimation::exact(0) + } else { + combine_should_estimations(&estimations, self.get_indexed_points()) + }; + Some( + estimation.with_primary_clause(PrimaryCondition::Condition(Box::new( + condition.clone(), + ))), + ) + } + }, + Some(Match::Except(MatchExcept { except })) => match except { + AnyVariants::Strings(others) => { + if others.is_empty() { + Some(CardinalityEstimation::exact(0).with_primary_clause( + PrimaryCondition::Condition(Box::new(condition.clone())), + )) + } else { + None + } + } + AnyVariants::Integers(integers) => { + Some(self.except_cardinality(integers.iter(), hw_counter)) + } + }, + _ => None, + } + } + + fn payload_blocks( + &self, + threshold: usize, + key: PayloadKeyType, + ) -> Box + '_> { + Box::new( + self.iter_values() + .map(move |value| { + ( + value, + self.get_count_for_value(value, &HardwareCounterCell::disposable()) // Only used in HNSW building so no measurement needed here. + .unwrap_or(0), + ) + }) + .filter(move |(_value, count)| *count >= threshold) + .map(move |(value, count)| PayloadBlockCondition { + condition: FieldCondition::new_match(key.clone(), (*value).into()), + cardinality: count, + }), + ) + } +} + +impl FacetIndex for MapIndex +where + N: MapIndexKey + ?Sized, + for<'a> N::Referenced<'a>: Into>, + for<'a> &'a N: Into>, +{ + fn get_point_values( + &self, + point_id: PointOffsetType, + ) -> impl Iterator + '_ { + MapIndex::get_values(self, point_id) + .into_iter() + .flatten() + .map(Into::into) + } + + fn iter_values(&self) -> impl Iterator> + '_ { + self.iter_values().map(Into::into) + } + + fn iter_values_map<'a>( + &'a self, + hw_counter: &'a HardwareCounterCell, + ) -> impl Iterator, IdIter<'a>)> + 'a { + self.iter_values_map(hw_counter) + .map(|(k, iter)| (k.into(), iter)) + } + + fn iter_counts_per_value(&self) -> impl Iterator>> + '_ { + self.iter_counts_per_value().map(|(value, count)| FacetHit { + value: value.into(), + count, + }) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = String; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + if let Value::String(keyword) = value { + return Some(keyword.to_owned()); + } + None + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = IntPayloadType; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + if let Value::Number(num) = value { + return num.as_i64(); + } + None + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +impl ValueIndexer for MapIndex { + type ValueType = UuidIntType; + + fn add_many( + &mut self, + id: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + match self { + MapIndex::Mutable(index) => index.add_many_to_map(id, values, hw_counter), + MapIndex::Immutable(_) => Err(OperationError::service_error( + "Can't add values to immutable map index", + )), + MapIndex::Mmap(_) => Err(OperationError::service_error( + "Can't add values to mmap map index", + )), + } + } + + fn get_value(value: &Value) -> Option { + Some(Uuid::parse_str(value.as_str()?).ok()?.as_u128()) + } + + fn remove_point(&mut self, id: PointOffsetType) -> OperationResult<()> { + self.remove_point(id) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::path::Path; + + use rstest::rstest; + use tempfile::Builder; + + use super::*; + use crate::common::rocksdb_wrapper::open_db_with_existing_cf; + + const FIELD_NAME: &str = "test"; + + #[derive(Clone, Copy)] + enum IndexType { + Mutable, + Immutable, + Mmap, + } + + fn save_map_index( + data: &[Vec], + path: &Path, + index_type: IndexType, + into_value: impl Fn(&N::Owned) -> Value, + ) where + N: MapIndexKey + ?Sized, + MapIndex: PayloadFieldIndex + ValueIndexer, + as ValueIndexer>::ValueType: Into, + { + let hw_counter = HardwareCounterCell::new(); + + match index_type { + IndexType::Mutable | IndexType::Immutable => { + let mut builder = + MapIndex::::builder(open_db_with_existing_cf(path).unwrap(), FIELD_NAME); + builder.init().unwrap(); + for (idx, values) in data.iter().enumerate() { + let values: Vec = values.iter().map(&into_value).collect(); + let values: Vec<_> = values.iter().collect(); + builder + .add_point(idx as PointOffsetType, &values, &hw_counter) + .unwrap(); + } + builder.finalize().unwrap(); + } + IndexType::Mmap => { + let mut builder = MapIndex::::mmap_builder(path, false); + builder.init().unwrap(); + for (idx, values) in data.iter().enumerate() { + let values: Vec = values.iter().map(&into_value).collect(); + let values: Vec<_> = values.iter().collect(); + builder + .add_point(idx as PointOffsetType, &values, &hw_counter) + .unwrap(); + } + builder.finalize().unwrap(); + } + } + } + + fn load_map_index(data: &[Vec], path: &Path, index_type: IndexType) -> MapIndex { + let mut index = match index_type { + IndexType::Mutable => { + MapIndex::::new_memory(open_db_with_existing_cf(path).unwrap(), FIELD_NAME, true) + } + IndexType::Immutable => MapIndex::::new_memory( + open_db_with_existing_cf(path).unwrap(), + FIELD_NAME, + false, + ), + IndexType::Mmap => MapIndex::::new_mmap(path, false).unwrap(), + }; + index.load_from_db().unwrap(); + for (idx, values) in data.iter().enumerate() { + \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt new file mode 100644 index 000000000..8939c1669 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt @@ -0,0 +1,384 @@ +use std::fs::{create_dir_all, remove_dir}; +use std::ops::Bound; +use std::path::{Path, PathBuf}; + +use common::counter::conditioned_counter::ConditionedCounter; +use common::counter::hardware_counter::HardwareCounterCell; +use common::counter::iterator_hw_measurement::HwMeasurementIteratorExt; +use common::types::PointOffsetType; +use io::file_operations::{atomic_save_json, read_json}; +use memmap2::MmapMut; +use memory::fadvise::clear_disk_cache; +use memory::madvise::AdviceSetting; +use memory::mmap_ops::{self, create_and_ensure_length}; +use memory::mmap_type::{MmapBitSlice, MmapSlice}; +use serde::{Deserialize, Serialize}; + +use super::mutable_numeric_index::InMemoryNumericIndex; +use super::Encodable; +use crate::common::Flusher; +use crate::common::mmap_bitslice_buffered_update_wrapper::MmapBitSliceBufferedUpdateWrapper; +use crate::common::operation_error::OperationResult; +use crate::index::field_index::histogram::{Histogram, Numericable, Point}; +use crate::index::field_index::mmap_point_to_values::{MmapPointToValues, MmapValue}; + +const PAIRS_PATH: &str = "data.bin"; +const DELETED_PATH: &str = "deleted.bin"; +const CONFIG_PATH: &str = "mmap_field_index_config.json"; + +pub struct MmapNumericIndex { + path: PathBuf, + deleted: MmapBitSliceBufferedUpdateWrapper, + // sorted pairs (id + value), sorted by value (by id if values are equal) + pairs: MmapSlice>, + histogram: Histogram, + deleted_count: usize, + max_values_per_point: usize, + point_to_values: MmapPointToValues, + is_on_disk: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct MmapNumericIndexConfig { + max_values_per_point: usize, +} + +pub(super) struct NumericIndexPairsIterator<'a, T: Encodable + Numericable> { + pairs: &'a [Point], + deleted: &'a MmapBitSliceBufferedUpdateWrapper, + start_index: usize, + end_index: usize, +} + +impl Iterator for NumericIndexPairsIterator<'_, T> { + type Item = Point; + + fn next(&mut self) -> Option { + while self.start_index < self.end_index { + let key = self.pairs[self.start_index].clone(); + let deleted = self.deleted.get(key.idx as usize).unwrap_or(true); + self.start_index += 1; + if deleted { + continue; + } + return Some(key); + } + None + } +} + +impl DoubleEndedIterator for NumericIndexPairsIterator<'_, T> { + fn next_back(&mut self) -> Option { + while self.start_index < self.end_index { + let key = self.pairs[self.end_index - 1].clone(); + let deleted = self.deleted.get(key.idx as usize).unwrap_or(true); + self.end_index -= 1; + if deleted { + continue; + } + return Some(key); + } + None + } +} + +impl MmapNumericIndex { + pub fn build( + in_memory_index: InMemoryNumericIndex, + path: &Path, + is_on_disk: bool, + ) -> OperationResult { + create_dir_all(path)?; + + let pairs_path = path.join(PAIRS_PATH); + let deleted_path = path.join(DELETED_PATH); + let config_path = path.join(CONFIG_PATH); + + atomic_save_json( + &config_path, + &MmapNumericIndexConfig { + max_values_per_point: in_memory_index.max_values_per_point, + }, + )?; + + in_memory_index.histogram.save(path)?; + + MmapPointToValues::::from_iter( + path, + in_memory_index + .point_to_values + .iter() + .enumerate() + .map(|(idx, values)| { + ( + idx as PointOffsetType, + values.iter().map(|v| T::as_referenced(v)), + ) + }), + )?; + + { + let pairs_file = create_and_ensure_length( + &pairs_path, + in_memory_index.map.len() * std::mem::size_of::>(), + )?; + let pairs_mmap = unsafe { MmapMut::map_mut(&pairs_file)? }; + let mut pairs = unsafe { MmapSlice::>::try_from(pairs_mmap)? }; + for (src, dst) in in_memory_index.map.iter().zip(pairs.iter_mut()) { + *dst = src.clone(); + } + } + + { + const BITS_IN_BYTE: usize = 8; + let deleted_flags_count = in_memory_index.point_to_values.len(); + let deleted_file = create_and_ensure_length( + &deleted_path, + BITS_IN_BYTE + * BITS_IN_BYTE + * deleted_flags_count.div_ceil(BITS_IN_BYTE * BITS_IN_BYTE), + )?; + let mut deleted_mmap = unsafe { MmapMut::map_mut(&deleted_file)? }; + deleted_mmap.fill(0); + let mut deleted_bitflags = MmapBitSlice::from(deleted_mmap, 0); + for (idx, values) in in_memory_index.point_to_values.iter().enumerate() { + if values.is_empty() { + deleted_bitflags.set(idx, true); + } + } + } + + Self::load(path, is_on_disk) + } + + pub fn load(path: &Path, is_on_disk: bool) -> OperationResult { + let pairs_path = path.join(PAIRS_PATH); + let deleted_path = path.join(DELETED_PATH); + let config_path = path.join(CONFIG_PATH); + + let histogram = Histogram::::load(path)?; + let config: MmapNumericIndexConfig = read_json(&config_path)?; + let deleted = mmap_ops::open_write_mmap(&deleted_path, AdviceSetting::Global, false)?; + let deleted = MmapBitSlice::from(deleted, 0); + let deleted_count = deleted.count_ones(); + let do_populate = !is_on_disk; + let map = unsafe { + MmapSlice::try_from(mmap_ops::open_write_mmap( + &pairs_path, + AdviceSetting::Global, + do_populate, + )?)? + }; + let point_to_values = MmapPointToValues::open(path, do_populate)?; + + Ok(Self { + pairs: map, + deleted: MmapBitSliceBufferedUpdateWrapper::new(deleted), + path: path.to_path_buf(), + histogram, + deleted_count, + max_values_per_point: config.max_values_per_point, + point_to_values, + is_on_disk, + }) + } + + pub fn clear(self) -> OperationResult<()> { + let files = self.files(); + let Self { path, .. } = self; + for file in files { + std::fs::remove_file(file)?; + } + let _ = remove_dir(path); + Ok(()) + } + + pub fn files(&self) -> Vec { + let mut files = vec![ + self.path.join(PAIRS_PATH), + self.path.join(DELETED_PATH), + self.path.join(CONFIG_PATH), + ]; + files.extend(self.point_to_values.files()); + files.extend(Histogram::::files(&self.path)); + files + } + + pub fn flusher(&self) -> Flusher { + self.deleted.flusher() + } + + pub fn check_values_any( + &self, + idx: PointOffsetType, + check_fn: impl Fn(&T) -> bool, + hw_counter: &HardwareCounterCell, + ) -> bool { + let hw_counter = self.make_conditioned_counter(hw_counter); + + if self.deleted.get(idx as usize) == Some(false) { + self.point_to_values.check_values_any( + idx, + |v| check_fn(T::from_referenced(&v)), + &hw_counter, + ) + } else { + false + } + } + + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + if self.deleted.get(idx as usize) == Some(false) { + Some(Box::new( + self.point_to_values + .get_values(idx)? + .map(|v| *T::from_referenced(&v)), + )) + } else { + None + } + } + + pub fn values_count(&self, idx: PointOffsetType) -> Option { + if self.deleted.get(idx as usize) == Some(false) { + self.point_to_values.get_values_count(idx) + } else { + None + } + } + + /// Returns the number of key-value pairs in the index. + /// Note that is doesn't count deleted pairs. + pub(super) fn total_unique_values_count(&self) -> usize { + self.pairs.len() + } + + pub(super) fn values_range<'a>( + &'a self, + start_bound: Bound>, + end_bound: Bound>, + hw_counter: &'a HardwareCounterCell, + ) -> impl Iterator + 'a { + let hw_counter = self.make_conditioned_counter(hw_counter); + + self.values_range_iterator(start_bound, end_bound) + .map(|Point { idx, .. }| idx) + .measure_hw_with_condition_cell(hw_counter, size_of::>(), |i| { + i.payload_index_io_read_counter() + }) + } + + pub(super) fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.values_range_iterator(start_bound, end_bound) + .map(|Point { val, idx }| (val, idx)) + } + + pub fn remove_point(&mut self, idx: PointOffsetType) { + let idx = idx as usize; + if idx < self.deleted.len() && !self.deleted.get(idx).unwrap_or(true) { + self.deleted.set(idx, true); + self.deleted_count += 1; + } + } + + pub(super) fn get_histogram(&self) -> &Histogram { + &self.histogram + } + + pub(super) fn get_points_count(&self) -> usize { + self.point_to_values.len() - self.deleted_count + } + + pub(super) fn get_max_values_per_point(&self) -> usize { + self.max_values_per_point + } + + pub(super) fn values_range_size( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> usize { + let iterator = self.values_range_iterator(start_bound, end_bound); + iterator.end_index - iterator.start_index + } + + // get iterator + fn values_range_iterator( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> NumericIndexPairsIterator<'_, T> { + let start_index = match start_bound { + Bound::Included(bound) => self.pairs.binary_search(&bound).unwrap_or_else(|idx| idx), + Bound::Excluded(bound) => match self.pairs.binary_search(&bound) { + Ok(idx) => idx + 1, + Err(idx) => idx, + }, + Bound::Unbounded => 0, + }; + + if start_index >= self.pairs.len() { + return NumericIndexPairsIterator { + pairs: &self.pairs, + deleted: &self.deleted, + start_index: self.pairs.len(), + end_index: self.pairs.len(), + }; + } + + let end_index = match end_bound { + Bound::Included(bound) => match self.pairs[start_index..].binary_search(&bound) { + Ok(idx) => idx + 1 + start_index, + Err(idx) => idx + start_index, + }, + Bound::Excluded(bound) => { + let end_bound = self.pairs[start_index..].binary_search(&bound); + end_bound.unwrap_or_else(|idx| idx) + start_index + } + Bound::Unbounded => self.pairs.len(), + }; + + NumericIndexPairsIterator { + pairs: &self.pairs, + deleted: &self.deleted, + start_index, + end_index, + } + } + + fn make_conditioned_counter<'a>( + &self, + hw_counter: &'a HardwareCounterCell, + ) -> ConditionedCounter<'a> { + ConditionedCounter::new(self.is_on_disk, hw_counter) + } + + pub fn is_on_disk(&self) -> bool { + self.is_on_disk + } + + /// Populate all pages in the mmap. + /// Block until all pages are populated. + pub fn populate(&self) -> OperationResult<()> { + self.pairs.populate()?; + self.point_to_values.populate(); + Ok(()) + } + + /// Drop disk cache. + pub fn clear_cache(&self) -> OperationResult<()> { + let pairs_path = self.path.join(PAIRS_PATH); + let deleted_path = self.path.join(DELETED_PATH); + + clear_disk_cache(&pairs_path)?; + clear_disk_cache(&deleted_path)?; + + self.point_to_values.clear_cache()?; + + Ok(()) + } +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json new file mode 100644 index 000000000..fc2763693 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:06.770700+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 12589, + "extracted_output_length": 12577, + "expected_output_length": 12578, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806", + "generation_id": "gen-1757433486-VXDBsJhMjTQwxXq2Vzg0", + "cost_usd": 0.01289314, + "prompt_tokens": 11944, + "completion_tokens": 4092, + "total_tokens": 16036, + "native_prompt_tokens": 11981, + "native_completion_tokens": 7021, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 4159 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff new file mode 100644 index 000000000..f156f3b2f --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff @@ -0,0 +1,14 @@ + +index df5de5037..8939c1669 100644 +--- a/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs_expectedoutput.txt (expected):tmp/tmpi1g_vcc5_expected.txt ++++ b/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs_extracted.txt (actual):tmp/tmpuu636ekl_actual.txt +@@ -14,8 +14,8 @@ use memory::mmap_ops::{self, create_and_ensure_length}; + use memory::mmap_type::{MmapBitSlice, MmapSlice}; + use serde::{Deserialize, Serialize}; + +-use super::Encodable; + use super::mutable_numeric_index::InMemoryNumericIndex; ++use super::Encodable; + use crate::common::Flusher; + use crate::common::mmap_bitslice_buffered_update_wrapper::MmapBitSliceBufferedUpdateWrapper; + use crate::common::operation_error::OperationResult; diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt new file mode 100644 index 000000000..e384946bf --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mmap_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt @@ -0,0 +1,386 @@ +```rust +use std::fs::{create_dir_all, remove_dir}; +use std::ops::Bound; +use std::path::{Path, PathBuf}; + +use common::counter::conditioned_counter::ConditionedCounter; +use common::counter::hardware_counter::HardwareCounterCell; +use common::counter::iterator_hw_measurement::HwMeasurementIteratorExt; +use common::types::PointOffsetType; +use io::file_operations::{atomic_save_json, read_json}; +use memmap2::MmapMut; +use memory::fadvise::clear_disk_cache; +use memory::madvise::AdviceSetting; +use memory::mmap_ops::{self, create_and_ensure_length}; +use memory::mmap_type::{MmapBitSlice, MmapSlice}; +use serde::{Deserialize, Serialize}; + +use super::mutable_numeric_index::InMemoryNumericIndex; +use super::Encodable; +use crate::common::Flusher; +use crate::common::mmap_bitslice_buffered_update_wrapper::MmapBitSliceBufferedUpdateWrapper; +use crate::common::operation_error::OperationResult; +use crate::index::field_index::histogram::{Histogram, Numericable, Point}; +use crate::index::field_index::mmap_point_to_values::{MmapPointToValues, MmapValue}; + +const PAIRS_PATH: &str = "data.bin"; +const DELETED_PATH: &str = "deleted.bin"; +const CONFIG_PATH: &str = "mmap_field_index_config.json"; + +pub struct MmapNumericIndex { + path: PathBuf, + deleted: MmapBitSliceBufferedUpdateWrapper, + // sorted pairs (id + value), sorted by value (by id if values are equal) + pairs: MmapSlice>, + histogram: Histogram, + deleted_count: usize, + max_values_per_point: usize, + point_to_values: MmapPointToValues, + is_on_disk: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct MmapNumericIndexConfig { + max_values_per_point: usize, +} + +pub(super) struct NumericIndexPairsIterator<'a, T: Encodable + Numericable> { + pairs: &'a [Point], + deleted: &'a MmapBitSliceBufferedUpdateWrapper, + start_index: usize, + end_index: usize, +} + +impl Iterator for NumericIndexPairsIterator<'_, T> { + type Item = Point; + + fn next(&mut self) -> Option { + while self.start_index < self.end_index { + let key = self.pairs[self.start_index].clone(); + let deleted = self.deleted.get(key.idx as usize).unwrap_or(true); + self.start_index += 1; + if deleted { + continue; + } + return Some(key); + } + None + } +} + +impl DoubleEndedIterator for NumericIndexPairsIterator<'_, T> { + fn next_back(&mut self) -> Option { + while self.start_index < self.end_index { + let key = self.pairs[self.end_index - 1].clone(); + let deleted = self.deleted.get(key.idx as usize).unwrap_or(true); + self.end_index -= 1; + if deleted { + continue; + } + return Some(key); + } + None + } +} + +impl MmapNumericIndex { + pub fn build( + in_memory_index: InMemoryNumericIndex, + path: &Path, + is_on_disk: bool, + ) -> OperationResult { + create_dir_all(path)?; + + let pairs_path = path.join(PAIRS_PATH); + let deleted_path = path.join(DELETED_PATH); + let config_path = path.join(CONFIG_PATH); + + atomic_save_json( + &config_path, + &MmapNumericIndexConfig { + max_values_per_point: in_memory_index.max_values_per_point, + }, + )?; + + in_memory_index.histogram.save(path)?; + + MmapPointToValues::::from_iter( + path, + in_memory_index + .point_to_values + .iter() + .enumerate() + .map(|(idx, values)| { + ( + idx as PointOffsetType, + values.iter().map(|v| T::as_referenced(v)), + ) + }), + )?; + + { + let pairs_file = create_and_ensure_length( + &pairs_path, + in_memory_index.map.len() * std::mem::size_of::>(), + )?; + let pairs_mmap = unsafe { MmapMut::map_mut(&pairs_file)? }; + let mut pairs = unsafe { MmapSlice::>::try_from(pairs_mmap)? }; + for (src, dst) in in_memory_index.map.iter().zip(pairs.iter_mut()) { + *dst = src.clone(); + } + } + + { + const BITS_IN_BYTE: usize = 8; + let deleted_flags_count = in_memory_index.point_to_values.len(); + let deleted_file = create_and_ensure_length( + &deleted_path, + BITS_IN_BYTE + * BITS_IN_BYTE + * deleted_flags_count.div_ceil(BITS_IN_BYTE * BITS_IN_BYTE), + )?; + let mut deleted_mmap = unsafe { MmapMut::map_mut(&deleted_file)? }; + deleted_mmap.fill(0); + let mut deleted_bitflags = MmapBitSlice::from(deleted_mmap, 0); + for (idx, values) in in_memory_index.point_to_values.iter().enumerate() { + if values.is_empty() { + deleted_bitflags.set(idx, true); + } + } + } + + Self::load(path, is_on_disk) + } + + pub fn load(path: &Path, is_on_disk: bool) -> OperationResult { + let pairs_path = path.join(PAIRS_PATH); + let deleted_path = path.join(DELETED_PATH); + let config_path = path.join(CONFIG_PATH); + + let histogram = Histogram::::load(path)?; + let config: MmapNumericIndexConfig = read_json(&config_path)?; + let deleted = mmap_ops::open_write_mmap(&deleted_path, AdviceSetting::Global, false)?; + let deleted = MmapBitSlice::from(deleted, 0); + let deleted_count = deleted.count_ones(); + let do_populate = !is_on_disk; + let map = unsafe { + MmapSlice::try_from(mmap_ops::open_write_mmap( + &pairs_path, + AdviceSetting::Global, + do_populate, + )?)? + }; + let point_to_values = MmapPointToValues::open(path, do_populate)?; + + Ok(Self { + pairs: map, + deleted: MmapBitSliceBufferedUpdateWrapper::new(deleted), + path: path.to_path_buf(), + histogram, + deleted_count, + max_values_per_point: config.max_values_per_point, + point_to_values, + is_on_disk, + }) + } + + pub fn clear(self) -> OperationResult<()> { + let files = self.files(); + let Self { path, .. } = self; + for file in files { + std::fs::remove_file(file)?; + } + let _ = remove_dir(path); + Ok(()) + } + + pub fn files(&self) -> Vec { + let mut files = vec![ + self.path.join(PAIRS_PATH), + self.path.join(DELETED_PATH), + self.path.join(CONFIG_PATH), + ]; + files.extend(self.point_to_values.files()); + files.extend(Histogram::::files(&self.path)); + files + } + + pub fn flusher(&self) -> Flusher { + self.deleted.flusher() + } + + pub fn check_values_any( + &self, + idx: PointOffsetType, + check_fn: impl Fn(&T) -> bool, + hw_counter: &HardwareCounterCell, + ) -> bool { + let hw_counter = self.make_conditioned_counter(hw_counter); + + if self.deleted.get(idx as usize) == Some(false) { + self.point_to_values.check_values_any( + idx, + |v| check_fn(T::from_referenced(&v)), + &hw_counter, + ) + } else { + false + } + } + + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + if self.deleted.get(idx as usize) == Some(false) { + Some(Box::new( + self.point_to_values + .get_values(idx)? + .map(|v| *T::from_referenced(&v)), + )) + } else { + None + } + } + + pub fn values_count(&self, idx: PointOffsetType) -> Option { + if self.deleted.get(idx as usize) == Some(false) { + self.point_to_values.get_values_count(idx) + } else { + None + } + } + + /// Returns the number of key-value pairs in the index. + /// Note that is doesn't count deleted pairs. + pub(super) fn total_unique_values_count(&self) -> usize { + self.pairs.len() + } + + pub(super) fn values_range<'a>( + &'a self, + start_bound: Bound>, + end_bound: Bound>, + hw_counter: &'a HardwareCounterCell, + ) -> impl Iterator + 'a { + let hw_counter = self.make_conditioned_counter(hw_counter); + + self.values_range_iterator(start_bound, end_bound) + .map(|Point { idx, .. }| idx) + .measure_hw_with_condition_cell(hw_counter, size_of::>(), |i| { + i.payload_index_io_read_counter() + }) + } + + pub(super) fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.values_range_iterator(start_bound, end_bound) + .map(|Point { val, idx }| (val, idx)) + } + + pub fn remove_point(&mut self, idx: PointOffsetType) { + let idx = idx as usize; + if idx < self.deleted.len() && !self.deleted.get(idx).unwrap_or(true) { + self.deleted.set(idx, true); + self.deleted_count += 1; + } + } + + pub(super) fn get_histogram(&self) -> &Histogram { + &self.histogram + } + + pub(super) fn get_points_count(&self) -> usize { + self.point_to_values.len() - self.deleted_count + } + + pub(super) fn get_max_values_per_point(&self) -> usize { + self.max_values_per_point + } + + pub(super) fn values_range_size( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> usize { + let iterator = self.values_range_iterator(start_bound, end_bound); + iterator.end_index - iterator.start_index + } + + // get iterator + fn values_range_iterator( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> NumericIndexPairsIterator<'_, T> { + let start_index = match start_bound { + Bound::Included(bound) => self.pairs.binary_search(&bound).unwrap_or_else(|idx| idx), + Bound::Excluded(bound) => match self.pairs.binary_search(&bound) { + Ok(idx) => idx + 1, + Err(idx) => idx, + }, + Bound::Unbounded => 0, + }; + + if start_index >= self.pairs.len() { + return NumericIndexPairsIterator { + pairs: &self.pairs, + deleted: &self.deleted, + start_index: self.pairs.len(), + end_index: self.pairs.len(), + }; + } + + let end_index = match end_bound { + Bound::Included(bound) => match self.pairs[start_index..].binary_search(&bound) { + Ok(idx) => idx + 1 + start_index, + Err(idx) => idx + start_index, + }, + Bound::Excluded(bound) => { + let end_bound = self.pairs[start_index..].binary_search(&bound); + end_bound.unwrap_or_else(|idx| idx) + start_index + } + Bound::Unbounded => self.pairs.len(), + }; + + NumericIndexPairsIterator { + pairs: &self.pairs, + deleted: &self.deleted, + start_index, + end_index, + } + } + + fn make_conditioned_counter<'a>( + &self, + hw_counter: &'a HardwareCounterCell, + ) -> ConditionedCounter<'a> { + ConditionedCounter::new(self.is_on_disk, hw_counter) + } + + pub fn is_on_disk(&self) -> bool { + self.is_on_disk + } + + /// Populate all pages in the mmap. + /// Block until all pages are populated. + pub fn populate(&self) -> OperationResult<()> { + self.pairs.populate()?; + self.point_to_values.populate(); + Ok(()) + } + + /// Drop disk cache. + pub fn clear_cache(&self) -> OperationResult<()> { + let pairs_path = self.path.join(PAIRS_PATH); + let deleted_path = self.path.join(DELETED_PATH); + + clear_disk_cache(&pairs_path)?; + clear_disk_cache(&deleted_path)?; + + self.point_to_values.clear_cache()?; + + Ok(()) + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt new file mode 100644 index 000000000..be62b2639 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/extracted_output.txt @@ -0,0 +1,326 @@ +use std::collections::BTreeSet; +use std::ops::Bound; +use std::ops::Bound::{Excluded, Unbounded}; +use std::sync::Arc; + +use common::counter::hardware_counter::HardwareCounterCell; +use common::types::PointOffsetType; +use parking_lot::RwLock; +use rocksdb::DB; + +use super::{ + Encodable, HISTOGRAM_MAX_BUCKET_SIZE, HISTOGRAM_PRECISION, numeric_index_storage_cf_name, +}; +use crate::common::operation_error::{OperationError, OperationResult}; +use crate::common::rocksdb_buffered_delete_wrapper::DatabaseColumnScheduledDeleteWrapper; +use crate::common::rocksdb_wrapper::DatabaseColumnWrapper; +use crate::index::field_index::histogram::{Histogram, Numericable, Point}; + +pub struct MutableNumericIndex { + db_wrapper: DatabaseColumnScheduledDeleteWrapper, + in_memory_index: InMemoryNumericIndex, +} + +pub struct InMemoryNumericIndex { + pub map: BTreeSet>, + pub histogram: Histogram, + pub points_count: usize, + pub max_values_per_point: usize, + pub point_to_values: Vec>, +} + +impl Default for InMemoryNumericIndex { + fn default() -> Self { + Self { + map: BTreeSet::new(), + histogram: Histogram::new(HISTOGRAM_MAX_BUCKET_SIZE, HISTOGRAM_PRECISION), + points_count: 0, + max_values_per_point: 0, + point_to_values: Default::default(), + } + } +} + +impl FromIterator<(PointOffsetType, T)> + for InMemoryNumericIndex +{ + fn from_iter>(iter: I) -> Self { + let mut index = InMemoryNumericIndex::default(); + for pair in iter { + let (idx, value) = pair; + + if index.point_to_values.len() <= idx as usize { + index + .point_to_values + .resize_with(idx as usize + 1, Vec::new) + } + + index.point_to_values[idx as usize].push(value); + + let key = Point::new(value, idx); + InMemoryNumericIndex::add_to_map(&mut index.map, &mut index.histogram, key); + } + for values in &index.point_to_values { + if !values.is_empty() { + index.points_count += 1; + index.max_values_per_point = index.max_values_per_point.max(values.len()); + } + } + index + } +} + +impl InMemoryNumericIndex { + pub fn check_values_any(&self, idx: PointOffsetType, check_fn: impl Fn(&T) -> bool) -> bool { + self.point_to_values + .get(idx as usize) + .map(|values| values.iter().any(check_fn)) + .unwrap_or(false) + } + + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + Some(Box::new( + self.point_to_values + .get(idx as usize) + .map(|v| v.iter().cloned())?, + )) + } + + pub fn values_count(&self, idx: PointOffsetType) -> Option { + self.point_to_values.get(idx as usize).map(Vec::len) + } + + pub fn total_unique_values_count(&self) -> usize { + self.map.len() + } + + pub fn values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl Iterator { + self.map + .range((start_bound, end_bound)) + .map(|point| point.idx) + } + + pub fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.map + .range((start_bound, end_bound)) + .map(|point| (point.val, point.idx)) + } + + pub fn add_many_to_list(&mut self, idx: PointOffsetType, values: Vec) { + if self.point_to_values.len() <= idx as usize { + self.point_to_values.resize_with(idx as usize + 1, Vec::new) + } + for value in &values { + let key = Point::new(*value, idx); + Self::add_to_map(&mut self.map, &mut self.histogram, key); + } + if !values.is_empty() { + self.points_count += 1; + self.max_values_per_point = self.max_values_per_point.max(values.len()); + } + self.point_to_values[idx as usize] = values; + } + + pub fn remove_point(&mut self, idx: PointOffsetType) { + if let Some(values) = self.point_to_values.get_mut(idx as usize) { + if !values.is_empty() { + self.points_count = self.points_count.checked_sub(1).unwrap_or_default(); + } + for value in values.iter() { + let key = Point::new(*value, idx); + Self::remove_from_map(&mut self.map, &mut self.histogram, key); + } + *values = Default::default(); + } + } + + fn add_to_map(map: &mut BTreeSet>, histogram: &mut Histogram, key: Point) { + let was_added = map.insert(key.clone()); + if was_added { + histogram.insert( + key, + |x| Self::get_histogram_left_neighbor(map, x.clone()), + |x| Self::get_histogram_right_neighbor(map, x.clone()), + ); + } + } + + fn remove_from_map(map: &mut BTreeSet>, histogram: &mut Histogram, key: Point) { + let was_removed = map.remove(&key); + if was_removed { + histogram.remove( + &key, + |x| Self::get_histogram_left_neighbor(map, x.clone()), + |x| Self::get_histogram_right_neighbor(map, x.clone()), + ); + } + } + + fn get_histogram_left_neighbor(map: &BTreeSet>, key: Point) -> Option> { + map.range((Unbounded, Excluded(key))).next_back().cloned() + } + + fn get_histogram_right_neighbor(map: &BTreeSet>, key: Point) -> Option> { + map.range((Excluded(key), Unbounded)).next().cloned() + } + + pub fn get_histogram(&self) -> &Histogram { + &self.histogram + } + + pub fn get_points_count(&self) -> usize { + self.points_count + } + + pub fn get_max_values_per_point(&self) -> usize { + self.max_values_per_point + } +} + +impl MutableNumericIndex { + pub fn new_from_db_wrapper(db_wrapper: DatabaseColumnScheduledDeleteWrapper) -> Self { + Self { + db_wrapper, + in_memory_index: InMemoryNumericIndex::default(), + } + } + + pub fn into_in_memory_index(self) -> InMemoryNumericIndex { + self.in_memory_index + } + + pub fn get_db_wrapper(&self) -> &DatabaseColumnScheduledDeleteWrapper { + &self.db_wrapper + } + + pub fn new(db: Arc>, field: &str) -> Self { + let store_cf_name = numeric_index_storage_cf_name(field); + let db_wrapper = DatabaseColumnScheduledDeleteWrapper::new(DatabaseColumnWrapper::new( + db, + &store_cf_name, + )); + Self { + db_wrapper, + in_memory_index: InMemoryNumericIndex::default(), + } + } + + pub fn load(&mut self) -> OperationResult { + if !self.db_wrapper.has_column_family()? { + return Ok(false); + }; + + self.in_memory_index = self + .db_wrapper + .lock_db() + .iter()? + .map(|(key, value)| { + let value_idx = + u32::from_be_bytes(value.as_ref().try_into().map_err(|_| { + OperationError::service_error("incorrect numeric index value") + })?); + let (idx, value) = T::decode_key(&key); + if idx != value_idx { + return Err(OperationError::service_error( + "incorrect numeric index key-value pair", + )); + } + Ok((idx, value)) + }) + .collect::, OperationError>>()?; + + Ok(true) + } + + pub fn add_many_to_list( + &mut self, + idx: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + let mut hw_cell_wb = hw_counter + .payload_index_io_write_counter() + .write_back_counter(); + + for value in &values { + let key = value.encode_key(idx); + self.db_wrapper.put(&key, idx.to_be_bytes())?; + hw_cell_wb.incr_delta(size_of_val(&key) + size_of_val(&idx)); + } + + self.in_memory_index.add_many_to_list(idx, values); + Ok(()) + } + + pub fn remove_point(&mut self, idx: PointOffsetType) -> OperationResult<()> { + self.in_memory_index + .get_values(idx) + .map(|mut values| { + values.try_for_each(|value| { + let key = value.encode_key(idx); + self.db_wrapper.remove(key) + }) + }) + .transpose()?; + self.in_memory_index.remove_point(idx); + Ok(()) + } + + pub fn map(&self) -> &BTreeSet> { + &self.in_memory_index.map + } + + #[inline] + pub fn total_unique_values_count(&self) -> usize { + self.in_memory_index.total_unique_values_count() + } + #[inline] + pub fn check_values_any(&self, idx: PointOffsetType, check_fn: impl Fn(&T) -> bool) -> bool { + self.in_memory_index.check_values_any(idx, check_fn) + } + #[inline] + pub fn get_points_count(&self) -> usize { + self.in_memory_index.get_points_count() + } + #[inline] + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + self.in_memory_index.get_values(idx) + } + #[inline] + pub fn values_count(&self, idx: PointOffsetType) -> Option { + self.in_memory_index.values_count(idx) + } + #[inline] + pub fn values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl Iterator { + self.in_memory_index.values_range(start_bound, end_bound) + } + #[inline] + pub fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.in_memory_index + .orderable_values_range(start_bound, end_bound) + } + #[inline] + pub fn get_histogram(&self) -> &Histogram { + self.in_memory_index.get_histogram() + } + #[inline] + pub fn get_max_values_per_point(&self) -> usize { + self.in_memory_index.get_max_values_per_point() + } +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..564d20956 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.902430+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 10771, + "extracted_output_length": 10759, + "expected_output_length": 11067, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-C8iCAu5VYp4kt7icIXfn", + "cost_usd": 0.00453658, + "prompt_tokens": 20947, + "completion_tokens": 2619, + "total_tokens": 23566, + "native_prompt_tokens": 20888, + "native_completion_tokens": 2743, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 263 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..a375bd23d --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,22 @@ + +index 153c7d551..be62b2639 100644 +--- a/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs_expectedoutput.txt (expected):tmp/tmpymocw6s9_expected.txt ++++ b/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs_extracted.txt (actual):tmp/tmp4tqe8k_8_actual.txt +@@ -21,7 +21,6 @@ pub struct MutableNumericIndex { + in_memory_index: InMemoryNumericIndex, + } + +-// Numeric Index with insertions and deletions without persistence + pub struct InMemoryNumericIndex { + pub map: BTreeSet>, + pub histogram: Histogram, +@@ -145,9 +144,6 @@ impl InMemoryNumericIndex { + + fn add_to_map(map: &mut BTreeSet>, histogram: &mut Histogram, key: Point) { + let was_added = map.insert(key.clone()); +- // Histogram works with unique values (idx + value) only, so we need to +- // make sure that we don't add the same value twice. +- // key is a combination of value + idx, so we can use it to ensure than the pair is unique + if was_added { + histogram.insert( + key, diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..fcac1a0c4 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_field_index_numeric_index_mutable_numeric_index.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt @@ -0,0 +1,328 @@ +```rust +use std::collections::BTreeSet; +use std::ops::Bound; +use std::ops::Bound::{Excluded, Unbounded}; +use std::sync::Arc; + +use common::counter::hardware_counter::HardwareCounterCell; +use common::types::PointOffsetType; +use parking_lot::RwLock; +use rocksdb::DB; + +use super::{ + Encodable, HISTOGRAM_MAX_BUCKET_SIZE, HISTOGRAM_PRECISION, numeric_index_storage_cf_name, +}; +use crate::common::operation_error::{OperationError, OperationResult}; +use crate::common::rocksdb_buffered_delete_wrapper::DatabaseColumnScheduledDeleteWrapper; +use crate::common::rocksdb_wrapper::DatabaseColumnWrapper; +use crate::index::field_index::histogram::{Histogram, Numericable, Point}; + +pub struct MutableNumericIndex { + db_wrapper: DatabaseColumnScheduledDeleteWrapper, + in_memory_index: InMemoryNumericIndex, +} + +pub struct InMemoryNumericIndex { + pub map: BTreeSet>, + pub histogram: Histogram, + pub points_count: usize, + pub max_values_per_point: usize, + pub point_to_values: Vec>, +} + +impl Default for InMemoryNumericIndex { + fn default() -> Self { + Self { + map: BTreeSet::new(), + histogram: Histogram::new(HISTOGRAM_MAX_BUCKET_SIZE, HISTOGRAM_PRECISION), + points_count: 0, + max_values_per_point: 0, + point_to_values: Default::default(), + } + } +} + +impl FromIterator<(PointOffsetType, T)> + for InMemoryNumericIndex +{ + fn from_iter>(iter: I) -> Self { + let mut index = InMemoryNumericIndex::default(); + for pair in iter { + let (idx, value) = pair; + + if index.point_to_values.len() <= idx as usize { + index + .point_to_values + .resize_with(idx as usize + 1, Vec::new) + } + + index.point_to_values[idx as usize].push(value); + + let key = Point::new(value, idx); + InMemoryNumericIndex::add_to_map(&mut index.map, &mut index.histogram, key); + } + for values in &index.point_to_values { + if !values.is_empty() { + index.points_count += 1; + index.max_values_per_point = index.max_values_per_point.max(values.len()); + } + } + index + } +} + +impl InMemoryNumericIndex { + pub fn check_values_any(&self, idx: PointOffsetType, check_fn: impl Fn(&T) -> bool) -> bool { + self.point_to_values + .get(idx as usize) + .map(|values| values.iter().any(check_fn)) + .unwrap_or(false) + } + + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + Some(Box::new( + self.point_to_values + .get(idx as usize) + .map(|v| v.iter().cloned())?, + )) + } + + pub fn values_count(&self, idx: PointOffsetType) -> Option { + self.point_to_values.get(idx as usize).map(Vec::len) + } + + pub fn total_unique_values_count(&self) -> usize { + self.map.len() + } + + pub fn values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl Iterator { + self.map + .range((start_bound, end_bound)) + .map(|point| point.idx) + } + + pub fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.map + .range((start_bound, end_bound)) + .map(|point| (point.val, point.idx)) + } + + pub fn add_many_to_list(&mut self, idx: PointOffsetType, values: Vec) { + if self.point_to_values.len() <= idx as usize { + self.point_to_values.resize_with(idx as usize + 1, Vec::new) + } + for value in &values { + let key = Point::new(*value, idx); + Self::add_to_map(&mut self.map, &mut self.histogram, key); + } + if !values.is_empty() { + self.points_count += 1; + self.max_values_per_point = self.max_values_per_point.max(values.len()); + } + self.point_to_values[idx as usize] = values; + } + + pub fn remove_point(&mut self, idx: PointOffsetType) { + if let Some(values) = self.point_to_values.get_mut(idx as usize) { + if !values.is_empty() { + self.points_count = self.points_count.checked_sub(1).unwrap_or_default(); + } + for value in values.iter() { + let key = Point::new(*value, idx); + Self::remove_from_map(&mut self.map, &mut self.histogram, key); + } + *values = Default::default(); + } + } + + fn add_to_map(map: &mut BTreeSet>, histogram: &mut Histogram, key: Point) { + let was_added = map.insert(key.clone()); + if was_added { + histogram.insert( + key, + |x| Self::get_histogram_left_neighbor(map, x.clone()), + |x| Self::get_histogram_right_neighbor(map, x.clone()), + ); + } + } + + fn remove_from_map(map: &mut BTreeSet>, histogram: &mut Histogram, key: Point) { + let was_removed = map.remove(&key); + if was_removed { + histogram.remove( + &key, + |x| Self::get_histogram_left_neighbor(map, x.clone()), + |x| Self::get_histogram_right_neighbor(map, x.clone()), + ); + } + } + + fn get_histogram_left_neighbor(map: &BTreeSet>, key: Point) -> Option> { + map.range((Unbounded, Excluded(key))).next_back().cloned() + } + + fn get_histogram_right_neighbor(map: &BTreeSet>, key: Point) -> Option> { + map.range((Excluded(key), Unbounded)).next().cloned() + } + + pub fn get_histogram(&self) -> &Histogram { + &self.histogram + } + + pub fn get_points_count(&self) -> usize { + self.points_count + } + + pub fn get_max_values_per_point(&self) -> usize { + self.max_values_per_point + } +} + +impl MutableNumericIndex { + pub fn new_from_db_wrapper(db_wrapper: DatabaseColumnScheduledDeleteWrapper) -> Self { + Self { + db_wrapper, + in_memory_index: InMemoryNumericIndex::default(), + } + } + + pub fn into_in_memory_index(self) -> InMemoryNumericIndex { + self.in_memory_index + } + + pub fn get_db_wrapper(&self) -> &DatabaseColumnScheduledDeleteWrapper { + &self.db_wrapper + } + + pub fn new(db: Arc>, field: &str) -> Self { + let store_cf_name = numeric_index_storage_cf_name(field); + let db_wrapper = DatabaseColumnScheduledDeleteWrapper::new(DatabaseColumnWrapper::new( + db, + &store_cf_name, + )); + Self { + db_wrapper, + in_memory_index: InMemoryNumericIndex::default(), + } + } + + pub fn load(&mut self) -> OperationResult { + if !self.db_wrapper.has_column_family()? { + return Ok(false); + }; + + self.in_memory_index = self + .db_wrapper + .lock_db() + .iter()? + .map(|(key, value)| { + let value_idx = + u32::from_be_bytes(value.as_ref().try_into().map_err(|_| { + OperationError::service_error("incorrect numeric index value") + })?); + let (idx, value) = T::decode_key(&key); + if idx != value_idx { + return Err(OperationError::service_error( + "incorrect numeric index key-value pair", + )); + } + Ok((idx, value)) + }) + .collect::, OperationError>>()?; + + Ok(true) + } + + pub fn add_many_to_list( + &mut self, + idx: PointOffsetType, + values: Vec, + hw_counter: &HardwareCounterCell, + ) -> OperationResult<()> { + let mut hw_cell_wb = hw_counter + .payload_index_io_write_counter() + .write_back_counter(); + + for value in &values { + let key = value.encode_key(idx); + self.db_wrapper.put(&key, idx.to_be_bytes())?; + hw_cell_wb.incr_delta(size_of_val(&key) + size_of_val(&idx)); + } + + self.in_memory_index.add_many_to_list(idx, values); + Ok(()) + } + + pub fn remove_point(&mut self, idx: PointOffsetType) -> OperationResult<()> { + self.in_memory_index + .get_values(idx) + .map(|mut values| { + values.try_for_each(|value| { + let key = value.encode_key(idx); + self.db_wrapper.remove(key) + }) + }) + .transpose()?; + self.in_memory_index.remove_point(idx); + Ok(()) + } + + pub fn map(&self) -> &BTreeSet> { + &self.in_memory_index.map + } + + #[inline] + pub fn total_unique_values_count(&self) -> usize { + self.in_memory_index.total_unique_values_count() + } + #[inline] + pub fn check_values_any(&self, idx: PointOffsetType, check_fn: impl Fn(&T) -> bool) -> bool { + self.in_memory_index.check_values_any(idx, check_fn) + } + #[inline] + pub fn get_points_count(&self) -> usize { + self.in_memory_index.get_points_count() + } + #[inline] + pub fn get_values(&self, idx: PointOffsetType) -> Option + '_>> { + self.in_memory_index.get_values(idx) + } + #[inline] + pub fn values_count(&self, idx: PointOffsetType) -> Option { + self.in_memory_index.values_count(idx) + } + #[inline] + pub fn values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl Iterator { + self.in_memory_index.values_range(start_bound, end_bound) + } + #[inline] + pub fn orderable_values_range( + &self, + start_bound: Bound>, + end_bound: Bound>, + ) -> impl DoubleEndedIterator + '_ { + self.in_memory_index + .orderable_values_range(start_bound, end_bound) + } + #[inline] + pub fn get_histogram(&self) -> &Histogram { + self.in_memory_index.get_histogram() + } + #[inline] + pub fn get_max_values_per_point(&self) -> usize { + self.in_memory_index.get_max_values_per_point() + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/extracted_output.txt new file mode 100644 index 000000000..fc9f9183b --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/extracted_output.txt @@ -0,0 +1,561 @@ +use std::borrow::Cow; +use std::cmp::max; +use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicBool; + +use common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use common::types::{PointOffsetType, ScoredPointOffset}; +use io::file_operations::read_bin; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +use super::entry_points::EntryPoint; +use super::graph_links::{GraphLinks, GraphLinksFormat}; +use crate::common::operation_error::{ + CancellableResult, OperationError, OperationResult, check_process_stopped, +}; +use crate::common::utils::rev_range; +use crate::index::hnsw_index::entry_points::EntryPoints; +use crate::index::hnsw_index::graph_links::GraphLinksSerializer; +use crate::index::hnsw_index::point_scorer::FilteredScorer; +use crate::index::hnsw_index::search_context::SearchContext; +use crate::index::visited_pool::{VisitedListHandle, VisitedPool}; + +pub type LinkContainer = Vec; +pub type LayerContainerRef<'a> = &'a [PointOffsetType]; +pub type LayersContainer = Vec; + +pub const HNSW_GRAPH_FILE: &str = "graph.bin"; +pub const HNSW_LINKS_FILE: &str = "links.bin"; +pub const COMPRESSED_HNSW_LINKS_FILE: &str = "links_compressed.bin"; + +/// Contents of the `graph.bin` file. +#[derive(Deserialize, Serialize, Debug)] +pub(super) struct GraphLayerData<'a> { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) ef_construct: usize, + pub(super) entry_points: Cow<'a, EntryPoints>, +} + +#[derive(Debug)] +pub struct GraphLayers { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) links: GraphLinks, + pub(super) entry_points: EntryPoints, + pub(super) visited_pool: VisitedPool, +} + +pub trait GraphLayersBase { + fn get_visited_list_from_pool(&self) -> VisitedListHandle; + + fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle); + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType); + + /// Get M based on current level + fn get_m(&self, level: usize) -> usize; + + /// Greedy search for closest points within a single graph layer + fn _search_on_level( + &self, + searcher: &mut SearchContext, + level: usize, + visited_list: &mut VisitedListHandle, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult<()> { + let limit = self.get_m(level); + let mut points_ids: Vec = Vec::with_capacity(2 * limit); + + while let Some(candidate) = searcher.candidates.pop() { + check_process_stopped(is_stopped)?; + + if candidate.score < searcher.lower_bound() { + break; + } + + points_ids.clear(); + self.links_map(candidate.idx, level, |link| { + if !visited_list.check(link) { + points_ids.push(link); + } + }); + + let scores = points_scorer.score_points(&mut points_ids, limit); + scores.iter().copied().for_each(|score_point| { + searcher.process_candidate(score_point); + visited_list.check_and_update_visited(score_point.idx); + }); + } + + Ok(()) + } + + fn search_on_level( + &self, + level_entry: ScoredPointOffset, + level: usize, + ef: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult> { + let mut visited_list = self.get_visited_list_from_pool(); + visited_list.check_and_update_visited(level_entry.idx); + let mut search_context = SearchContext::new(level_entry, ef); + + self._search_on_level( + &mut search_context, + level, + &mut visited_list, + points_scorer, + is_stopped, + )?; + Ok(search_context.nearest) + } + + /// Greedy searches for entry point of level `target_level`. + /// Beam size is 1. + fn search_entry( + &self, + entry_point: PointOffsetType, + top_level: usize, + target_level: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult { + let mut links: Vec = Vec::with_capacity(2 * self.get_m(0)); + + let mut current_point = ScoredPointOffset { + idx: entry_point, + score: points_scorer.score_point(entry_point), + }; + for level in rev_range(top_level, target_level) { + check_process_stopped(is_stopped)?; + + let limit = self.get_m(level); + + let mut changed = true; + while changed { + changed = false; + + links.clear(); + self.links_map(current_point.idx, level, |link| { + links.push(link); + }); + + let scores = points_scorer.score_points(&mut links, limit); + scores.iter().copied().for_each(|score_point| { + if score_point.score > current_point.score { + changed = true; + current_point = score_point; + } + }); + } + } + Ok(current_point) + } + + #[cfg(test)] + #[cfg(feature = "gpu")] + fn search_entry_on_level( + &self, + entry_point: PointOffsetType, + level: usize, + points_scorer: &mut FilteredScorer, + ) -> ScoredPointOffset { + let limit = self.get_m(level); + let mut links: Vec = Vec::with_capacity(2 * self.get_m(0)); + let mut current_point = ScoredPointOffset { + idx: entry_point, + score: points_scorer.score_point(entry_point), + }; + + let mut changed = true; + while changed { + changed = false; + + links.clear(); + self.links_map(current_point.idx, level, |link| { + links.push(link); + }); + + let scores = points_scorer.score_points(&mut links, limit); + scores.iter().copied().for_each(|score_point| { + if score_point.score > current_point.score { + changed = true; + current_point = score_point; + } + }); + } + current_point + } +} + +impl GraphLayersBase for GraphLayers { + fn get_visited_list_from_pool(&self) -> VisitedListHandle { + self.visited_pool.get(self.links.num_points()) + } + + fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle) { + self.visited_pool.return_back(visited_list); + } + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType), + { + self.links.links(point_id, level).for_each(f); + } + + fn get_m(&self, level: usize) -> usize { + if level == 0 { self.m0 } else { self.m } + } +} + +/// Object contains links between nodes for HNSW search +/// +/// Assume all scores are similarities. Larger score = closer points +impl GraphLayers { + /// Returns the highest level this point is included in + pub fn point_level(&self, point_id: PointOffsetType) -> usize { + self.links.point_level(point_id) + } + + fn get_entry_point( + &self, + points_scorer: &FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + ) -> Option { + // Try to get it from custom entry points + custom_entry_points + .and_then(|custom_entry_points| { + custom_entry_points + .iter() + .filter(|&&point_id| points_scorer.check_vector(point_id)) + .map(|&point_id| { + let level = self.point_level(point_id); + EntryPoint { point_id, level } + }) + .max_by_key(|ep| ep.level) + }) + .or_else(|| { + // Otherwise use normal entry points + self.entry_points + .get_entry_point(|point_id| points_scorer.check_vector(point_id)) + }) + } + + pub fn search( + &self, + top: usize, + ef: usize, + mut points_scorer: FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + is_stopped: &AtomicBool, + ) -> CancellableResult> { + let Some(entry_point) = self.get_entry_point(&points_scorer, custom_entry_points) else { + return Ok(Vec::default()); + }; + + let zero_level_entry = self.search_entry( + entry_point.point_id, + entry_point.level, + 0, + &mut points_scorer, + is_stopped, + )?; + let nearest = self.search_on_level( + zero_level_entry, + 0, + max(top, ef), + &mut points_scorer, + is_stopped, + )?; + Ok(nearest.into_iter_sorted().take(top).collect_vec()) + } + + pub fn get_path(path: &Path) -> PathBuf { + path.join(HNSW_GRAPH_FILE) + } + + pub fn get_links_path(path: &Path, format: GraphLinksFormat) -> PathBuf { + match format { + GraphLinksFormat::Plain => path.join(HNSW_LINKS_FILE), + GraphLinksFormat::Compressed => path.join(COMPRESSED_HNSW_LINKS_FILE), + } + } + + pub fn files(&self, path: &Path) -> Vec { + vec![ + GraphLayers::get_path(path), + GraphLayers::get_links_path(path, self.links.format()), + ] + } + + pub fn num_points(&self) -> usize { + self.links.num_points() + } +} + +impl GraphLayers { + pub fn load(dir: &Path, on_disk: bool, compress: bool) -> OperationResult { + let graph_data: GraphLayerData = read_bin(&GraphLayers::get_path(dir))?; + + if compress { + Self::convert_to_compressed(dir, graph_data.m, graph_data.m0)?; + } + + Ok(Self { + m: graph_data.m, + m0: graph_data.m0, + links: Self::load_links(dir, on_disk)?, + entry_points: graph_data.entry_points.into_owned(), + visited_pool: VisitedPool::new(), + }) + } + + fn load_links(dir: &Path, on_disk: bool) -> OperationResult { + for format in [GraphLinksFormat::Compressed, GraphLinksFormat::Plain] { + let path = GraphLayers::get_links_path(dir, format); + if path.exists() { + return GraphLinks::load_from_file(&path, on_disk, format); + } + } + Err(OperationError::service_error("No links file found")) + } + + fn convert_to_compressed(dir: &Path, m: usize, m0: usize) -> OperationResult<()> { + let plain_path = Self::get_links_path(dir, GraphLinksFormat::Plain); + let compressed_path = Self::get_links_path(dir, GraphLinksFormat::Compressed); + + if compressed_path.exists() { + return Ok(()); + } + + let start = std::time::Instant::now(); + + let links = GraphLinks::load_from_file(&plain_path, true, GraphLinksFormat::Plain)?; + let original_size = plain_path.metadata()?.len(); + GraphLinksSerializer::new(links.into_edges(), GraphLinksFormat::Compressed, m, m0) + .save_as(&compressed_path)?; + let new_size = compressed_path.metadata()?.len(); + + // Remove the original file + std::fs::remove_file(plain_path)?; + + log::debug!( + "Compressed HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)", + start.elapsed(), + original_size as f64 / 1024.0 / 1024.0, + new_size as f64 / 1024.0 / 1024.0, + new_size as f64 / original_size as f64 * 100.0, + ); + + Ok(()) + } + + #[cfg(feature = "testing")] + pub fn compress_ram(&mut self) { + use crate::index::hnsw_index::graph_links::GraphLinksSerializer; + assert_eq!(self.links.format(), GraphLinksFormat::Plain); + let dummy = GraphLinksSerializer::new(Vec::new(), GraphLinksFormat::Plain, 0, 0) + .to_graph_links_ram(); + let links = std::mem::replace(&mut self.links, dummy); + self.links = GraphLinksSerializer::new( + links.into_edges(), + GraphLinksFormat::Compressed, + self.m, + self.m0, + ) + .to_graph_links_ram(); + } + + pub fn populate(&self) -> OperationResult<()> { + self.links.populate()?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use rand::SeedableRng; + use rand::rngs::StdRng; + use rstest::rstest; + use tempfile::Builder; + + use super::*; + use crate::data_types::vectors::VectorElementType; + use crate::fixtures::index_fixtures::{ + FakeFilterContext, TestRawScorerProducer, random_vector, + }; + use crate::index::hnsw_index::graph_links::GraphLinksSerializer; + use crate::index::hnsw_index::tests::{ + create_graph_layer_builder_fixture, create_graph_layer_fixture, + }; + use crate::spaces::metric::Metric; + use crate::spaces::simple::{CosineMetric, DotProductMetric}; + use crate::vector_storage::DEFAULT_STOPPED; + use crate::vector_storage::chunked_vector_storage::VectorOffsetType; + + fn search_in_graph( + query: &[VectorElementType], + top: usize, + vector_storage: &TestRawScorerProducer, + graph: &GraphLayers, + ) -> Vec { + let fake_filter_context = FakeFilterContext {}; + let raw_scorer = vector_storage.get_raw_sorer(query.to_owned()).unwrap(); + + let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + let ef = 16; + graph + .search(top, ef, scorer, None, &DEFAULT_STOPPED) + .unwrap() + } + + const M: usize = 8; + + #[rstest] + #[case::uncompressed(GraphLinksFormat::Plain)] + #[case::compressed(GraphLinksFormat::Compressed)] + fn test_search_on_level(#[case] format: GraphLinksFormat) { + let dim = 8; + let m = 8; + let entry_points_num = 10; + let num_vectors = 10; + + let vector_holder = TestRawScorerProducer::::new(dim, num_vectors, &mut rng); + + let mut graph_links = vec![vec![Vec::new()]; num_vectors]; + graph_links[0][0] = vec![1, 2, 3, 4, 5, 6]; + + let graph_layers = GraphLayers { + m, + m0: 2 * m, + links: GraphLinksSerializer::new(graph_links.clone(), format, m, 2 * m) + .to_graph_links_ram(), + entry_points: EntryPoints::new(entry_points_num), + visited_pool: VisitedPool::new(), + }; + + let linking_idx: PointOffsetType = 7; + + let fake_filter_context = FakeFilterContext {}; + let added_vector = vector_holder.vectors.get(linking_idx as VectorOffsetType).to_vec(); + let raw_scorer = vector_holder.get_raw_scorer(added_vector).unwrap(); + let mut scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + + let nearest_on_level = graph_layers + .search_on_level( + ScoredPointOffset { + idx: 0, + score: scorer.score_point(0), + }, + 0, + 32, + &mut scorer, + &DEFAULT_STOPPED, + ) + .unwrap(); + + assert_eq!(nearest_on_level.len(), graph_links[0][0].len() + 1); + + for nearest in nearest_on_level.iter_unsorted() { + // eprintln!("nearest = {:#?}", nearest); + assert_eq!( + nearest.score, + scorer.score_internal(linking_idx, nearest.idx) + ) + } + } + + #[rstest] + #[case::uncompressed((GraphLinksFormat::Plain, false))] + #[case::converted((GraphLinksFormat::Plain, true))] + #[case::compressed((GraphLinksFormat::Compressed, false))] + #[case::recompressed((GraphLinksFormat::Compressed, true))] + fn test_save_and_load(#[case] (initial_format, compress): (GraphLinksFormat, bool)) { + let num_vectors = 100; + let dim = 8; + let top = 5; + + let mut rng = StdRng::seed_from_u64(42); + + let dir = Builder::new().prefix("graph_dir").tempdir().unwrap(); + let (vector_holder, graph_layers_builder) = + create_graph_layer_builder_fixture(num_vectors, M, dim, false, &mut rng); + let graph1 = graph_layers_builder + .into_graph_layers(dir.path(), initial_format, true) + .unwrap(); + assert_eq!(graph1.links.format(), initial_format); + let res1 = search_in_graph(&query, top, &vector_holder, &graph1); + drop(graph1); + + let graph2 = GraphLayers::load(dir.path(), false, compress).unwrap(); + if compress { + assert_eq!(graph2.links.format(), GraphLinksFormat::Compressed); + } else { + assert_eq!(graph2.links.format(), initial_format); + } + let res2 = search_in_graph(&query, top, &vector_holder, &graph2); + + assert_eq!(res1, res2) + } + + #[rstest] + #[case::uncompressed(GraphLinksFormat::Plain)] + #[case::compressed(GraphLinksFormat::Compressed)] + fn test_add_points(#[case] format: GraphLinksFormat) { + let num_vectors = 1000; + let dim = 8; + + let mut rng = StdRng::seed_from_u64(42); + + type M = CosineMetric; + + let (vector_holder, graph_layers) = + create_graph_layer_fixture::(num_vectors, M, dim, format, false, &mut rng); + + let main_entry = graph_layers + .entry_points + .get_entry_point(|_x| true) + .expect("Expect entry point to exists"); + + assert!(main_entry.level > 0); + + let num_levels = (0..num_vectors) + .map(|i| graph_layers.links.point_level(i as PointOffsetType)) + .max() + .unwrap(); + assert_eq!(main_entry.level, num_levels); + + let total_links_0 = (0..num_vectors) + .map(|i| graph_layers.links.links(i as PointOffsetType, 0).count()) + .sum::(); + + eprintln!("total_links_0 = {total_links_0:#?}"); + eprintln!("num_vectors = {num_vectors:#?}"); + assert!(total_links_0 > 0); + assert!(total_links_0 as f64 / num_vectors as f64 > M as f64); + + let top = 5; + let query = random_vector(&mut rng, dim); + let processed_query = >::preprocess(query.clone()); + let mut reference_top = FixedLengthPriorityQueue::new(top); + for idx in 0..vector_holder.vectors.len() as PointOffsetType { + let vec = &vector_holder.vectors.get(idx as VectorOffsetType); + reference_top.push(ScoredPointOffset { + idx, + score: M::similarity(vec, &processed_query), + }); + } + + let graph_search = search_in_graph(&query, top, &vector_holder, &graph_layers); + + assert_eq!(reference_top.into_sorted_vec(), graph_search); + } +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/metadata.json new file mode 100644 index 000000000..4b1b406ec --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:14.347852+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 19019, + "extracted_output_length": 19007, + "expected_output_length": 18890, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814", + "generation_id": "gen-1757433494-pDz6ElavczWbt4h7t50Y", + "cost_usd": 0.02097698, + "prompt_tokens": 69731, + "completion_tokens": 4626, + "total_tokens": 74357, + "native_prompt_tokens": 69073, + "native_completion_tokens": 4821, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 422 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/output.diff new file mode 100644 index 000000000..c85bab0ac --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/output.diff @@ -0,0 +1,84 @@ + +index d859f00fb..fc9f9183b 100644 +--- a/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs_expectedoutput.txt (expected):tmp/tmpmjx3rzs4_expected.txt ++++ b/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs_extracted.txt (actual):tmp/tmp01g8zde3_actual.txt +@@ -22,6 +22,7 @@ use crate::index::hnsw_index::search_context::SearchContext; + use crate::index::visited_pool::{VisitedListHandle, VisitedPool}; + + pub type LinkContainer = Vec; ++pub type LayerContainerRef<'a> = &'a [PointOffsetType]; + pub type LayersContainer = Vec; + + pub const HNSW_GRAPH_FILE: &str = "graph.bin"; +@@ -49,6 +50,8 @@ pub struct GraphLayers { + pub trait GraphLayersBase { + fn get_visited_list_from_pool(&self) -> VisitedListHandle; + ++ fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle); ++ + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType); +@@ -197,6 +200,10 @@ impl GraphLayersBase for GraphLayers { + self.visited_pool.get(self.links.num_points()) + } + ++ fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle) { ++ self.visited_pool.return_back(visited_list); ++ } ++ + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType), +@@ -401,7 +408,7 @@ mod tests { + graph: &GraphLayers, + ) -> Vec { + let fake_filter_context = FakeFilterContext {}; +- let raw_scorer = vector_storage.get_raw_scorer(query.to_owned()).unwrap(); ++ let raw_scorer = vector_storage.get_raw_sorer(query.to_owned()).unwrap(); + + let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + let ef = 16; +@@ -421,10 +428,7 @@ mod tests { + let entry_points_num = 10; + let num_vectors = 10; + +- let mut rng = StdRng::seed_from_u64(42); +- +- let vector_holder = +- TestRawScorerProducer::::new(dim, num_vectors, &mut rng); ++ let vector_holder = TestRawScorerProducer::::new(dim, num_vectors, &mut rng); + + let mut graph_links = vec![vec![Vec::new()]; num_vectors]; + graph_links[0][0] = vec![1, 2, 3, 4, 5, 6]; +@@ -441,10 +445,7 @@ mod tests { + let linking_idx: PointOffsetType = 7; + + let fake_filter_context = FakeFilterContext {}; +- let added_vector = vector_holder +- .vectors +- .get(linking_idx as VectorOffsetType) +- .to_vec(); ++ let added_vector = vector_holder.vectors.get(linking_idx as VectorOffsetType).to_vec(); + let raw_scorer = vector_holder.get_raw_scorer(added_vector).unwrap(); + let mut scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + +@@ -485,9 +486,6 @@ mod tests { + let mut rng = StdRng::seed_from_u64(42); + + let dir = Builder::new().prefix("graph_dir").tempdir().unwrap(); +- +- let query = random_vector(&mut rng, dim); +- + let (vector_holder, graph_layers_builder) = + create_graph_layer_builder_fixture(num_vectors, M, dim, false, &mut rng); + let graph1 = graph_layers_builder +@@ -536,7 +534,7 @@ mod tests { + assert_eq!(main_entry.level, num_levels); + + let total_links_0 = (0..num_vectors) +- .map(|i| graph_layers.links.links(i as PointOffsetType, 0).len()) ++ .map(|i| graph_layers.links.links(i as PointOffsetType, 0).count()) + .sum::(); + + eprintln!("total_links_0 = {total_links_0:#?}"); diff --git a/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/raw_response.txt new file mode 100644 index 000000000..f5d8f27c8 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_index_hnsw_index_graph_layers.rs/x-ai_grok-code-fast-1/20250909_155814/raw_response.txt @@ -0,0 +1,563 @@ +```rust +use std::borrow::Cow; +use std::cmp::max; +use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicBool; + +use common::fixed_length_priority_queue::FixedLengthPriorityQueue; +use common::types::{PointOffsetType, ScoredPointOffset}; +use io::file_operations::read_bin; +use itertools::Itertools; +use serde::{Deserialize, Serialize}; + +use super::entry_points::EntryPoint; +use super::graph_links::{GraphLinks, GraphLinksFormat}; +use crate::common::operation_error::{ + CancellableResult, OperationError, OperationResult, check_process_stopped, +}; +use crate::common::utils::rev_range; +use crate::index::hnsw_index::entry_points::EntryPoints; +use crate::index::hnsw_index::graph_links::GraphLinksSerializer; +use crate::index::hnsw_index::point_scorer::FilteredScorer; +use crate::index::hnsw_index::search_context::SearchContext; +use crate::index::visited_pool::{VisitedListHandle, VisitedPool}; + +pub type LinkContainer = Vec; +pub type LayerContainerRef<'a> = &'a [PointOffsetType]; +pub type LayersContainer = Vec; + +pub const HNSW_GRAPH_FILE: &str = "graph.bin"; +pub const HNSW_LINKS_FILE: &str = "links.bin"; +pub const COMPRESSED_HNSW_LINKS_FILE: &str = "links_compressed.bin"; + +/// Contents of the `graph.bin` file. +#[derive(Deserialize, Serialize, Debug)] +pub(super) struct GraphLayerData<'a> { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) ef_construct: usize, + pub(super) entry_points: Cow<'a, EntryPoints>, +} + +#[derive(Debug)] +pub struct GraphLayers { + pub(super) m: usize, + pub(super) m0: usize, + pub(super) links: GraphLinks, + pub(super) entry_points: EntryPoints, + pub(super) visited_pool: VisitedPool, +} + +pub trait GraphLayersBase { + fn get_visited_list_from_pool(&self) -> VisitedListHandle; + + fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle); + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType); + + /// Get M based on current level + fn get_m(&self, level: usize) -> usize; + + /// Greedy search for closest points within a single graph layer + fn _search_on_level( + &self, + searcher: &mut SearchContext, + level: usize, + visited_list: &mut VisitedListHandle, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult<()> { + let limit = self.get_m(level); + let mut points_ids: Vec = Vec::with_capacity(2 * limit); + + while let Some(candidate) = searcher.candidates.pop() { + check_process_stopped(is_stopped)?; + + if candidate.score < searcher.lower_bound() { + break; + } + + points_ids.clear(); + self.links_map(candidate.idx, level, |link| { + if !visited_list.check(link) { + points_ids.push(link); + } + }); + + let scores = points_scorer.score_points(&mut points_ids, limit); + scores.iter().copied().for_each(|score_point| { + searcher.process_candidate(score_point); + visited_list.check_and_update_visited(score_point.idx); + }); + } + + Ok(()) + } + + fn search_on_level( + &self, + level_entry: ScoredPointOffset, + level: usize, + ef: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult> { + let mut visited_list = self.get_visited_list_from_pool(); + visited_list.check_and_update_visited(level_entry.idx); + let mut search_context = SearchContext::new(level_entry, ef); + + self._search_on_level( + &mut search_context, + level, + &mut visited_list, + points_scorer, + is_stopped, + )?; + Ok(search_context.nearest) + } + + /// Greedy searches for entry point of level `target_level`. + /// Beam size is 1. + fn search_entry( + &self, + entry_point: PointOffsetType, + top_level: usize, + target_level: usize, + points_scorer: &mut FilteredScorer, + is_stopped: &AtomicBool, + ) -> CancellableResult { + let mut links: Vec = Vec::with_capacity(2 * self.get_m(0)); + + let mut current_point = ScoredPointOffset { + idx: entry_point, + score: points_scorer.score_point(entry_point), + }; + for level in rev_range(top_level, target_level) { + check_process_stopped(is_stopped)?; + + let limit = self.get_m(level); + + let mut changed = true; + while changed { + changed = false; + + links.clear(); + self.links_map(current_point.idx, level, |link| { + links.push(link); + }); + + let scores = points_scorer.score_points(&mut links, limit); + scores.iter().copied().for_each(|score_point| { + if score_point.score > current_point.score { + changed = true; + current_point = score_point; + } + }); + } + } + Ok(current_point) + } + + #[cfg(test)] + #[cfg(feature = "gpu")] + fn search_entry_on_level( + &self, + entry_point: PointOffsetType, + level: usize, + points_scorer: &mut FilteredScorer, + ) -> ScoredPointOffset { + let limit = self.get_m(level); + let mut links: Vec = Vec::with_capacity(2 * self.get_m(0)); + let mut current_point = ScoredPointOffset { + idx: entry_point, + score: points_scorer.score_point(entry_point), + }; + + let mut changed = true; + while changed { + changed = false; + + links.clear(); + self.links_map(current_point.idx, level, |link| { + links.push(link); + }); + + let scores = points_scorer.score_points(&mut links, limit); + scores.iter().copied().for_each(|score_point| { + if score_point.score > current_point.score { + changed = true; + current_point = score_point; + } + }); + } + current_point + } +} + +impl GraphLayersBase for GraphLayers { + fn get_visited_list_from_pool(&self) -> VisitedListHandle { + self.visited_pool.get(self.links.num_points()) + } + + fn return_visited_list_to_pool(&self, visited_list: VisitedListHandle) { + self.visited_pool.return_back(visited_list); + } + + fn links_map(&self, point_id: PointOffsetType, level: usize, f: F) + where + F: FnMut(PointOffsetType), + { + self.links.links(point_id, level).for_each(f); + } + + fn get_m(&self, level: usize) -> usize { + if level == 0 { self.m0 } else { self.m } + } +} + +/// Object contains links between nodes for HNSW search +/// +/// Assume all scores are similarities. Larger score = closer points +impl GraphLayers { + /// Returns the highest level this point is included in + pub fn point_level(&self, point_id: PointOffsetType) -> usize { + self.links.point_level(point_id) + } + + fn get_entry_point( + &self, + points_scorer: &FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + ) -> Option { + // Try to get it from custom entry points + custom_entry_points + .and_then(|custom_entry_points| { + custom_entry_points + .iter() + .filter(|&&point_id| points_scorer.check_vector(point_id)) + .map(|&point_id| { + let level = self.point_level(point_id); + EntryPoint { point_id, level } + }) + .max_by_key(|ep| ep.level) + }) + .or_else(|| { + // Otherwise use normal entry points + self.entry_points + .get_entry_point(|point_id| points_scorer.check_vector(point_id)) + }) + } + + pub fn search( + &self, + top: usize, + ef: usize, + mut points_scorer: FilteredScorer, + custom_entry_points: Option<&[PointOffsetType]>, + is_stopped: &AtomicBool, + ) -> CancellableResult> { + let Some(entry_point) = self.get_entry_point(&points_scorer, custom_entry_points) else { + return Ok(Vec::default()); + }; + + let zero_level_entry = self.search_entry( + entry_point.point_id, + entry_point.level, + 0, + &mut points_scorer, + is_stopped, + )?; + let nearest = self.search_on_level( + zero_level_entry, + 0, + max(top, ef), + &mut points_scorer, + is_stopped, + )?; + Ok(nearest.into_iter_sorted().take(top).collect_vec()) + } + + pub fn get_path(path: &Path) -> PathBuf { + path.join(HNSW_GRAPH_FILE) + } + + pub fn get_links_path(path: &Path, format: GraphLinksFormat) -> PathBuf { + match format { + GraphLinksFormat::Plain => path.join(HNSW_LINKS_FILE), + GraphLinksFormat::Compressed => path.join(COMPRESSED_HNSW_LINKS_FILE), + } + } + + pub fn files(&self, path: &Path) -> Vec { + vec![ + GraphLayers::get_path(path), + GraphLayers::get_links_path(path, self.links.format()), + ] + } + + pub fn num_points(&self) -> usize { + self.links.num_points() + } +} + +impl GraphLayers { + pub fn load(dir: &Path, on_disk: bool, compress: bool) -> OperationResult { + let graph_data: GraphLayerData = read_bin(&GraphLayers::get_path(dir))?; + + if compress { + Self::convert_to_compressed(dir, graph_data.m, graph_data.m0)?; + } + + Ok(Self { + m: graph_data.m, + m0: graph_data.m0, + links: Self::load_links(dir, on_disk)?, + entry_points: graph_data.entry_points.into_owned(), + visited_pool: VisitedPool::new(), + }) + } + + fn load_links(dir: &Path, on_disk: bool) -> OperationResult { + for format in [GraphLinksFormat::Compressed, GraphLinksFormat::Plain] { + let path = GraphLayers::get_links_path(dir, format); + if path.exists() { + return GraphLinks::load_from_file(&path, on_disk, format); + } + } + Err(OperationError::service_error("No links file found")) + } + + fn convert_to_compressed(dir: &Path, m: usize, m0: usize) -> OperationResult<()> { + let plain_path = Self::get_links_path(dir, GraphLinksFormat::Plain); + let compressed_path = Self::get_links_path(dir, GraphLinksFormat::Compressed); + + if compressed_path.exists() { + return Ok(()); + } + + let start = std::time::Instant::now(); + + let links = GraphLinks::load_from_file(&plain_path, true, GraphLinksFormat::Plain)?; + let original_size = plain_path.metadata()?.len(); + GraphLinksSerializer::new(links.into_edges(), GraphLinksFormat::Compressed, m, m0) + .save_as(&compressed_path)?; + let new_size = compressed_path.metadata()?.len(); + + // Remove the original file + std::fs::remove_file(plain_path)?; + + log::debug!( + "Compressed HNSW graph links in {:.1?}: {:.1}MB -> {:.1}MB ({:.1}%)", + start.elapsed(), + original_size as f64 / 1024.0 / 1024.0, + new_size as f64 / 1024.0 / 1024.0, + new_size as f64 / original_size as f64 * 100.0, + ); + + Ok(()) + } + + #[cfg(feature = "testing")] + pub fn compress_ram(&mut self) { + use crate::index::hnsw_index::graph_links::GraphLinksSerializer; + assert_eq!(self.links.format(), GraphLinksFormat::Plain); + let dummy = GraphLinksSerializer::new(Vec::new(), GraphLinksFormat::Plain, 0, 0) + .to_graph_links_ram(); + let links = std::mem::replace(&mut self.links, dummy); + self.links = GraphLinksSerializer::new( + links.into_edges(), + GraphLinksFormat::Compressed, + self.m, + self.m0, + ) + .to_graph_links_ram(); + } + + pub fn populate(&self) -> OperationResult<()> { + self.links.populate()?; + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use rand::SeedableRng; + use rand::rngs::StdRng; + use rstest::rstest; + use tempfile::Builder; + + use super::*; + use crate::data_types::vectors::VectorElementType; + use crate::fixtures::index_fixtures::{ + FakeFilterContext, TestRawScorerProducer, random_vector, + }; + use crate::index::hnsw_index::graph_links::GraphLinksSerializer; + use crate::index::hnsw_index::tests::{ + create_graph_layer_builder_fixture, create_graph_layer_fixture, + }; + use crate::spaces::metric::Metric; + use crate::spaces::simple::{CosineMetric, DotProductMetric}; + use crate::vector_storage::DEFAULT_STOPPED; + use crate::vector_storage::chunked_vector_storage::VectorOffsetType; + + fn search_in_graph( + query: &[VectorElementType], + top: usize, + vector_storage: &TestRawScorerProducer, + graph: &GraphLayers, + ) -> Vec { + let fake_filter_context = FakeFilterContext {}; + let raw_scorer = vector_storage.get_raw_sorer(query.to_owned()).unwrap(); + + let scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + let ef = 16; + graph + .search(top, ef, scorer, None, &DEFAULT_STOPPED) + .unwrap() + } + + const M: usize = 8; + + #[rstest] + #[case::uncompressed(GraphLinksFormat::Plain)] + #[case::compressed(GraphLinksFormat::Compressed)] + fn test_search_on_level(#[case] format: GraphLinksFormat) { + let dim = 8; + let m = 8; + let entry_points_num = 10; + let num_vectors = 10; + + let vector_holder = TestRawScorerProducer::::new(dim, num_vectors, &mut rng); + + let mut graph_links = vec![vec![Vec::new()]; num_vectors]; + graph_links[0][0] = vec![1, 2, 3, 4, 5, 6]; + + let graph_layers = GraphLayers { + m, + m0: 2 * m, + links: GraphLinksSerializer::new(graph_links.clone(), format, m, 2 * m) + .to_graph_links_ram(), + entry_points: EntryPoints::new(entry_points_num), + visited_pool: VisitedPool::new(), + }; + + let linking_idx: PointOffsetType = 7; + + let fake_filter_context = FakeFilterContext {}; + let added_vector = vector_holder.vectors.get(linking_idx as VectorOffsetType).to_vec(); + let raw_scorer = vector_holder.get_raw_scorer(added_vector).unwrap(); + let mut scorer = FilteredScorer::new(raw_scorer.as_ref(), Some(&fake_filter_context)); + + let nearest_on_level = graph_layers + .search_on_level( + ScoredPointOffset { + idx: 0, + score: scorer.score_point(0), + }, + 0, + 32, + &mut scorer, + &DEFAULT_STOPPED, + ) + .unwrap(); + + assert_eq!(nearest_on_level.len(), graph_links[0][0].len() + 1); + + for nearest in nearest_on_level.iter_unsorted() { + // eprintln!("nearest = {:#?}", nearest); + assert_eq!( + nearest.score, + scorer.score_internal(linking_idx, nearest.idx) + ) + } + } + + #[rstest] + #[case::uncompressed((GraphLinksFormat::Plain, false))] + #[case::converted((GraphLinksFormat::Plain, true))] + #[case::compressed((GraphLinksFormat::Compressed, false))] + #[case::recompressed((GraphLinksFormat::Compressed, true))] + fn test_save_and_load(#[case] (initial_format, compress): (GraphLinksFormat, bool)) { + let num_vectors = 100; + let dim = 8; + let top = 5; + + let mut rng = StdRng::seed_from_u64(42); + + let dir = Builder::new().prefix("graph_dir").tempdir().unwrap(); + let (vector_holder, graph_layers_builder) = + create_graph_layer_builder_fixture(num_vectors, M, dim, false, &mut rng); + let graph1 = graph_layers_builder + .into_graph_layers(dir.path(), initial_format, true) + .unwrap(); + assert_eq!(graph1.links.format(), initial_format); + let res1 = search_in_graph(&query, top, &vector_holder, &graph1); + drop(graph1); + + let graph2 = GraphLayers::load(dir.path(), false, compress).unwrap(); + if compress { + assert_eq!(graph2.links.format(), GraphLinksFormat::Compressed); + } else { + assert_eq!(graph2.links.format(), initial_format); + } + let res2 = search_in_graph(&query, top, &vector_holder, &graph2); + + assert_eq!(res1, res2) + } + + #[rstest] + #[case::uncompressed(GraphLinksFormat::Plain)] + #[case::compressed(GraphLinksFormat::Compressed)] + fn test_add_points(#[case] format: GraphLinksFormat) { + let num_vectors = 1000; + let dim = 8; + + let mut rng = StdRng::seed_from_u64(42); + + type M = CosineMetric; + + let (vector_holder, graph_layers) = + create_graph_layer_fixture::(num_vectors, M, dim, format, false, &mut rng); + + let main_entry = graph_layers + .entry_points + .get_entry_point(|_x| true) + .expect("Expect entry point to exists"); + + assert!(main_entry.level > 0); + + let num_levels = (0..num_vectors) + .map(|i| graph_layers.links.point_level(i as PointOffsetType)) + .max() + .unwrap(); + assert_eq!(main_entry.level, num_levels); + + let total_links_0 = (0..num_vectors) + .map(|i| graph_layers.links.links(i as PointOffsetType, 0).count()) + .sum::(); + + eprintln!("total_links_0 = {total_links_0:#?}"); + eprintln!("num_vectors = {num_vectors:#?}"); + assert!(total_links_0 > 0); + assert!(total_links_0 as f64 / num_vectors as f64 > M as f64); + + let top = 5; + let query = random_vector(&mut rng, dim); + let processed_query = >::preprocess(query.clone()); + let mut reference_top = FixedLengthPriorityQueue::new(top); + for idx in 0..vector_holder.vectors.len() as PointOffsetType { + let vec = &vector_holder.vectors.get(idx as VectorOffsetType); + reference_top.push(ScoredPointOffset { + idx, + score: M::similarity(vec, &processed_query), + }); + } + + let graph_search = search_in_graph(&query, top, &vector_holder, &graph_layers); + + assert_eq!(reference_top.into_sorted_vec(), graph_search); + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/extracted_output.txt new file mode 100644 index 000000000..7488cd69c --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/extracted_output.txt @@ -0,0 +1,104 @@ +use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use atomic_refcell::AtomicRefCell; +use common::budget::ResourcePermit; +use common::flags::FeatureFlags; +use io::storage_version::StorageVersion; +use log::info; +use parking_lot::{Mutex, RwLock}; +use rocksdb::DB; +use serde::Deserialize; +use uuid::Uuid; + +use crate::common::operation_error::{OperationError, OperationResult, check_process_stopped}; +use crate::common::rocksdb_wrapper::{DB_MAPPING_CF, DB_VECTOR_CF, open_db}; +use crate::data_types::vectors::DEFAULT_VECTOR_NAME; +use crate::id_tracker::immutable_id_tracker::ImmutableIdTracker; +use crate::id_tracker::mutable_id_tracker::MutableIdTracker; +use crate::id_tracker::simple_id_tracker::SimpleIdTracker; +use crate::id_tracker::{IdTracker, IdTrackerEnum, IdTrackerSS}; +use crate::index::VectorIndexEnum; +use crate::index::hnsw_index::gpu::gpu_devices_manager::LockedGpuDevice; +use crate::index::hnsw_index::hnsw::{HNSWIndex, HnswIndexOpenArgs}; +use crate::index::plain_vector_index::PlainVectorIndex; +use crate::index::sparse_index::sparse_index_config::SparseIndexType; +use crate::index::sparse_index::sparse_vector_index::{ + self, SparseVectorIndex, SparseVectorIndexOpenArgs +}; +use crate::index::struct_payload_index::StructPayloadIndex; +use crate::payload_storage::mmap_payload_storage::MmapPayloadStorage; +use crate::payload_storage::on_disk_payload_storage::OnDiskPayloadStorage; +use crate::payload_storage::payload_storage_enum::PayloadStorageEnum; +use crate::payload_storage::simple_payload_storage::SimplePayloadStorage; +use crate::segment::{SEGMENT_STATE_FILE, Segment, SegmentVersion, VectorData}; +use crate::types::{ + Distance, Indexes, PayloadStorageType, SegmentConfig, SegmentState, SegmentType, SeqNumberType, + SparseVectorStorageType, VectorDataConfig, VectorName, VectorStorageDatatype, VectorStorageType, +}; +use crate::vector_storage::dense::appendable_dense_vector_storage::{ + open_appendable_in_ram_vector_storage, open_appendable_in_ram_vector_storage_byte, + open_appendable_in_ram_vector_storage_half, open_appendable_memmap_vector_storage, + open_appendable_memmap_vector_storage_byte, open_appendable_memmap_vector_storage_half, +}; +use crate::vector_storage::dense::memmap_dense_vector_storage::{ + open_memmap_vector_storage, open_memmap_vector_storage_byte, open_memmap_vector_storage_half, +}; +use crate::vector_storage::dense::simple_dense_vector_storage::{ + open_simple_dense_byte_vector_storage, open_simple_dense_half_vector_storage, + open_simple_dense_vector_storage, +}; +use crate::vector_storage::multi_dense::appendable_mmap_multi_dense_vector_storage::{ + open_appendable_in_ram_multi_vector_storage, open_appendable_in_ram_multi_vector_storage_byte, + open_appendable_in_ram_multi_vector_storage_half, open_appendable_memmap_multi_vector_storage, + open_appendable_memmap_multi_vector_storage_byte, open_appendable_memmap_multi_vector_storage_half, +}; +use crate::vector_storage::multi_dense::simple_multi_dense_vector_storage::{ + open_simple_multi_dense_vector_storage, open_simple_multi_dense_vector_storage_byte, + open_simple_multi_dense_vector_storage_half, +}; +use crate::vector_storage::quantized::quantized_vectors::QuantizedVectors; +use crate::vector_storage::sparse::mmap_sparse_vector_storage::MmapSparseVectorStorage; +use crate::vector_storage::sparse::simple_sparse_vector_storage::open_simple_sparse_vector_storage; +use crate::vector_storage::{VectorStorage, VectorStorageEnum}; + +pub const PAYLOAD_INDEX_PATH: &str = "payload_index"; +pub const VECTOR_STORAGE_PATH: &str = "vector_storage"; +pub const VECTOR_INDEX_PATH: &str = "vector_index"; + +fn sp(t: T) -> Arc> { + Arc::new(AtomicRefCell::new(t)) +} + +fn get_vector_name_with_prefix(prefix: &str, vector_name: &VectorName) -> String { + if !vector_name.is_empty() { + format!("{prefix}-{vector_name}") + } else { + prefix.to_owned() + } +} + +pub fn get_vector_storage_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { + segment_path.join(get_vector_name_with_prefix(VECTOR_STORAGE_PATH, vector_name)) +} + +pub fn get_vector_index_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { + segment_path.join(get_vector_name_with_prefix(VECTOR_INDEX_PATH, vector_name)) +} + +pub(crate) fn open_vector_storage( + database: &Arc>, + vector_config: &VectorDataConfig, + stopped: &AtomicBool, + vector_storage_path: &Path, + vector_name: &VectorName, +) -> OperationResult { + let storage_element_type = vector_config.datatype.unwrap_or_default(); + + match vector_config.storage_type { + VectorStorageType::Memory => { + let db_column_name = get_vector_name_with_prefix(DB \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/metadata.json new file mode 100644 index 000000000..6f0766230 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:59.183504+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 4840, + "extracted_output_length": 4832, + "expected_output_length": 35226, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859", + "generation_id": "gen-1757433539-sb3fWRbsZ6f7bVEaYET6", + "cost_usd": 0.02919458, + "prompt_tokens": 72082, + "completion_tokens": 3432, + "total_tokens": 75514, + "native_prompt_tokens": 71326, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 8889 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/output.diff new file mode 100644 index 000000000..7dd8dcb5d --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/output.diff @@ -0,0 +1,867 @@ + +index 5dbd9e9bd..7488cd69c 100644 +--- a/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs_expectedoutput.txt (expected):tmp/tmppg_we2am_expected.txt ++++ b/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs_extracted.txt (actual):tmp/tmp19ybgwtn_actual.txt +@@ -28,7 +28,7 @@ use crate::index::hnsw_index::hnsw::{HNSWIndex, HnswIndexOpenArgs}; + use crate::index::plain_vector_index::PlainVectorIndex; + use crate::index::sparse_index::sparse_index_config::SparseIndexType; + use crate::index::sparse_index::sparse_vector_index::{ +- self, SparseVectorIndex, SparseVectorIndexOpenArgs, ++ self, SparseVectorIndex, SparseVectorIndexOpenArgs + }; + use crate::index::struct_payload_index::StructPayloadIndex; + use crate::payload_storage::mmap_payload_storage::MmapPayloadStorage; +@@ -38,8 +38,7 @@ use crate::payload_storage::simple_payload_storage::SimplePayloadStorage; + use crate::segment::{SEGMENT_STATE_FILE, Segment, SegmentVersion, VectorData}; + use crate::types::{ + Distance, Indexes, PayloadStorageType, SegmentConfig, SegmentState, SegmentType, SeqNumberType, +- SparseVectorStorageType, VectorDataConfig, VectorName, VectorStorageDatatype, +- VectorStorageType, ++ SparseVectorStorageType, VectorDataConfig, VectorName, VectorStorageDatatype, VectorStorageType, + }; + use crate::vector_storage::dense::appendable_dense_vector_storage::{ + open_appendable_in_ram_vector_storage, open_appendable_in_ram_vector_storage_byte, +@@ -56,8 +55,7 @@ use crate::vector_storage::dense::simple_dense_vector_storage::{ + use crate::vector_storage::multi_dense::appendable_mmap_multi_dense_vector_storage::{ + open_appendable_in_ram_multi_vector_storage, open_appendable_in_ram_multi_vector_storage_byte, + open_appendable_in_ram_multi_vector_storage_half, open_appendable_memmap_multi_vector_storage, +- open_appendable_memmap_multi_vector_storage_byte, +- open_appendable_memmap_multi_vector_storage_half, ++ open_appendable_memmap_multi_vector_storage_byte, open_appendable_memmap_multi_vector_storage_half, + }; + use crate::vector_storage::multi_dense::simple_multi_dense_vector_storage::{ + open_simple_multi_dense_vector_storage, open_simple_multi_dense_vector_storage_byte, +@@ -85,10 +83,7 @@ fn get_vector_name_with_prefix(prefix: &str, vector_name: &VectorName) -> String + } + + pub fn get_vector_storage_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { +- segment_path.join(get_vector_name_with_prefix( +- VECTOR_STORAGE_PATH, +- vector_name, +- )) ++ segment_path.join(get_vector_name_with_prefix(VECTOR_STORAGE_PATH, vector_name)) + } + + pub fn get_vector_index_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { +@@ -105,818 +100,5 @@ pub(crate) fn open_vector_storage( + let storage_element_type = vector_config.datatype.unwrap_or_default(); + + match vector_config.storage_type { +- // In memory + VectorStorageType::Memory => { +- let db_column_name = get_vector_name_with_prefix(DB_VECTOR_CF, vector_name); +- +- if let Some(multi_vec_config) = &vector_config.multivector_config { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_simple_multi_dense_vector_storage( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- stopped, +- ), +- VectorStorageDatatype::Uint8 => open_simple_multi_dense_vector_storage_byte( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- stopped, +- ), +- VectorStorageDatatype::Float16 => open_simple_multi_dense_vector_storage_half( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- stopped, +- ), +- } +- } else { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_simple_dense_vector_storage( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- stopped, +- ), +- VectorStorageDatatype::Uint8 => open_simple_dense_byte_vector_storage( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- stopped, +- ), +- VectorStorageDatatype::Float16 => open_simple_dense_half_vector_storage( +- database.clone(), +- &db_column_name, +- vector_config.size, +- vector_config.distance, +- stopped, +- ), +- } +- } +- } +- // Mmap on disk, not appendable +- VectorStorageType::Mmap => { +- if let Some(multi_vec_config) = &vector_config.multivector_config { +- // there are no mmap multi vector storages, appendable only +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_appendable_memmap_multi_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ), +- VectorStorageDatatype::Uint8 => { +- open_appendable_memmap_multi_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- VectorStorageDatatype::Float16 => { +- open_appendable_memmap_multi_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- } +- } else { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_memmap_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Uint8 => open_memmap_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Float16 => open_memmap_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- } +- } +- } +- // Chunked mmap on disk, appendable +- VectorStorageType::ChunkedMmap => { +- if let Some(multi_vec_config) = &vector_config.multivector_config { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_appendable_memmap_multi_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ), +- VectorStorageDatatype::Uint8 => { +- open_appendable_memmap_multi_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- VectorStorageDatatype::Float16 => { +- open_appendable_memmap_multi_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- } +- } else { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_appendable_memmap_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Uint8 => open_appendable_memmap_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Float16 => open_appendable_memmap_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- } +- } +- } +- VectorStorageType::InRamChunkedMmap => { +- if let Some(multi_vec_config) = &vector_config.multivector_config { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_appendable_in_ram_multi_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ), +- VectorStorageDatatype::Uint8 => { +- open_appendable_in_ram_multi_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- VectorStorageDatatype::Float16 => { +- open_appendable_in_ram_multi_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- *multi_vec_config, +- ) +- } +- } +- } else { +- match storage_element_type { +- VectorStorageDatatype::Float32 => open_appendable_in_ram_vector_storage( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Uint8 => open_appendable_in_ram_vector_storage_byte( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- VectorStorageDatatype::Float16 => open_appendable_in_ram_vector_storage_half( +- vector_storage_path, +- vector_config.size, +- vector_config.distance, +- ), +- } +- } +- } +- } +-} +- +-pub(crate) fn open_segment_db( +- segment_path: &Path, +- config: &SegmentConfig, +-) -> OperationResult>> { +- let vector_db_names: Vec = config +- .vector_data +- .keys() +- .map(|vector_name| get_vector_name_with_prefix(DB_VECTOR_CF, vector_name)) +- .chain( +- config +- .sparse_vector_data +- .iter() +- .filter(|(_, sparse_vector_config)| { +- matches!( +- sparse_vector_config.storage_type, +- SparseVectorStorageType::OnDisk +- ) +- }) +- .map(|(vector_name, _)| get_vector_name_with_prefix(DB_VECTOR_CF, vector_name)), +- ) +- .collect(); +- open_db(segment_path, &vector_db_names) +- .map_err(|err| OperationError::service_error(format!("RocksDB open error: {err}"))) +-} +- +-pub(crate) fn create_payload_storage( +- database: Arc>, +- config: &SegmentConfig, +- path: &Path, +-) -> OperationResult { +- let payload_storage = match config.payload_storage_type { +- PayloadStorageType::InMemory => { +- PayloadStorageEnum::from(SimplePayloadStorage::open(database)?) +- } +- PayloadStorageType::OnDisk => { +- PayloadStorageEnum::from(OnDiskPayloadStorage::open(database)?) +- } +- PayloadStorageType::Mmap => { +- PayloadStorageEnum::from(MmapPayloadStorage::open_or_create(path)?) +- } +- }; +- Ok(payload_storage) +-} +- +-pub(crate) fn create_mutable_id_tracker(segment_path: &Path) -> OperationResult { +- MutableIdTracker::open(segment_path) +-} +- +-pub(crate) fn create_rocksdb_id_tracker( +- database: Arc>, +-) -> OperationResult { +- SimpleIdTracker::open(database) +-} +- +-pub(crate) fn create_immutable_id_tracker( +- segment_path: &Path, +-) -> OperationResult { +- ImmutableIdTracker::open(segment_path) +-} +- +-pub(crate) fn get_payload_index_path(segment_path: &Path) -> PathBuf { +- segment_path.join(PAYLOAD_INDEX_PATH) +-} +- +-pub(crate) struct VectorIndexOpenArgs<'a> { +- pub path: &'a Path, +- pub id_tracker: Arc>, +- pub vector_storage: Arc>, +- pub payload_index: Arc>, +- pub quantized_vectors: Arc>>, +-} +- +-pub struct VectorIndexBuildArgs<'a> { +- pub permit: Arc, +- /// Vector indices from other segments, used to speed up index building. +- /// May or may not contain the same vectors. +- pub old_indices: &'a [Arc>], +- pub gpu_device: Option<&'a LockedGpuDevice<'a>>, +- pub stopped: &'a AtomicBool, +- pub feature_flags: FeatureFlags, +-} +- +-pub(crate) fn open_vector_index( +- vector_config: &VectorDataConfig, +- open_args: VectorIndexOpenArgs, +-) -> OperationResult { +- let VectorIndexOpenArgs { +- path, +- id_tracker, +- vector_storage, +- payload_index, +- quantized_vectors, +- } = open_args; +- Ok(match &vector_config.index { +- Indexes::Plain {} => VectorIndexEnum::Plain(PlainVectorIndex::new( +- id_tracker, +- vector_storage, +- payload_index, +- )), +- Indexes::Hnsw(hnsw_config) => VectorIndexEnum::Hnsw(HNSWIndex::open(HnswIndexOpenArgs { +- path, +- id_tracker, +- vector_storage, +- quantized_vectors, +- payload_index, +- hnsw_config: hnsw_config.clone(), +- })?), +- }) +-} +- +-pub(crate) fn build_vector_index( +- vector_config: &VectorDataConfig, +- open_args: VectorIndexOpenArgs, +- build_args: VectorIndexBuildArgs, +-) -> OperationResult { +- let VectorIndexOpenArgs { +- path, +- id_tracker, +- vector_storage, +- payload_index, +- quantized_vectors, +- } = open_args; +- Ok(match &vector_config.index { +- Indexes::Plain {} => VectorIndexEnum::Plain(PlainVectorIndex::new( +- id_tracker, +- vector_storage, +- payload_index, +- )), +- Indexes::Hnsw(hnsw_config) => VectorIndexEnum::Hnsw(HNSWIndex::build( +- HnswIndexOpenArgs { +- path, +- id_tracker, +- vector_storage, +- quantized_vectors, +- payload_index, +- hnsw_config: hnsw_config.clone(), +- }, +- build_args, +- )?), +- }) +-} +- +-#[cfg(feature = "testing")] +-pub fn create_sparse_vector_index_test( +- args: SparseVectorIndexOpenArgs, +-) -> OperationResult { +- create_sparse_vector_index(args) +-} +- +-pub(crate) fn create_sparse_vector_index( +- args: SparseVectorIndexOpenArgs, +-) -> OperationResult { +- let vector_index = match ( +- args.config.index_type, +- args.config.datatype.unwrap_or_default(), +- sparse_vector_index::USE_COMPRESSED, +- ) { +- (_, a @ (VectorStorageDatatype::Float16 | VectorStorageDatatype::Uint8), false) => { +- Err(OperationError::ValidationError { +- description: format!("{a:?} datatype is not supported"), +- })? +- } +- +- (SparseIndexType::MutableRam, _, _) => { +- VectorIndexEnum::SparseRam(SparseVectorIndex::open(args)?) +- } +- +- // Non-compressed +- (SparseIndexType::ImmutableRam, VectorStorageDatatype::Float32, false) => { +- VectorIndexEnum::SparseImmutableRam(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::Mmap, VectorStorageDatatype::Float32, false) => { +- VectorIndexEnum::SparseMmap(SparseVectorIndex::open(args)?) +- } +- +- // Compressed +- (SparseIndexType::ImmutableRam, VectorStorageDatatype::Float32, true) => { +- VectorIndexEnum::SparseCompressedImmutableRamF32(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::Mmap, VectorStorageDatatype::Float32, true) => { +- VectorIndexEnum::SparseCompressedMmapF32(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::ImmutableRam, VectorStorageDatatype::Float16, true) => { +- VectorIndexEnum::SparseCompressedImmutableRamF16(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::Mmap, VectorStorageDatatype::Float16, true) => { +- VectorIndexEnum::SparseCompressedMmapF16(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::ImmutableRam, VectorStorageDatatype::Uint8, true) => { +- VectorIndexEnum::SparseCompressedImmutableRamU8(SparseVectorIndex::open(args)?) +- } +- (SparseIndexType::Mmap, VectorStorageDatatype::Uint8, true) => { +- VectorIndexEnum::SparseCompressedMmapU8(SparseVectorIndex::open(args)?) +- } +- }; +- +- Ok(vector_index) +-} +- +-pub(crate) fn create_sparse_vector_storage( +- database: Arc>, +- path: &Path, +- vector_name: &VectorName, +- storage_type: &SparseVectorStorageType, +- stopped: &AtomicBool, +-) -> OperationResult { +- match storage_type { +- SparseVectorStorageType::OnDisk => { +- let db_column_name = get_vector_name_with_prefix(DB_VECTOR_CF, vector_name); +- open_simple_sparse_vector_storage(database, &db_column_name, stopped) +- } +- SparseVectorStorageType::Mmap => { +- let mmap_storage = MmapSparseVectorStorage::open_or_create(path)?; +- Ok(VectorStorageEnum::SparseMmap(mmap_storage)) +- } +- } +-} +- +-fn create_segment( +- version: Option, +- segment_path: &Path, +- config: &SegmentConfig, +- stopped: &AtomicBool, +-) -> OperationResult { +- let database = open_segment_db(segment_path, config)?; +- let payload_storage = sp(create_payload_storage( +- database.clone(), +- config, +- segment_path, +- )?); +- +- let appendable_flag = config.is_appendable(); +- +- let mutable_id_tracker = +- appendable_flag || !ImmutableIdTracker::mappings_file_path(segment_path).is_file(); +- +- let id_tracker = if mutable_id_tracker { +- // Determine whether we use the new (file based) or old (RocksDB) mutable ID tracker +- // Decide based on the feature flag and state on disk +- let use_new_mutable_tracker = { +- // New ID tracker is enabled by default, but we still use the old tracker if we have +- // any mappings stored in RocksDB +- // TODO(1.15 or later): remove this check and use new mutable ID tracker unconditionally +- let db = database.read(); +- match db.cf_handle(DB_MAPPING_CF) { +- Some(cf_handle) => { +- let count = db +- .property_int_value_cf(cf_handle, rocksdb::properties::ESTIMATE_NUM_KEYS) +- .map_err(|err| { +- OperationError::service_error(format!( +- "Failed to get estimated number of keys from RocksDB: {err}" +- )) +- })? +- .unwrap_or_default(); +- count == 0 +- } +- None => true, +- } +- }; +- +- if use_new_mutable_tracker { +- sp(IdTrackerEnum::MutableIdTracker(create_mutable_id_tracker( +- segment_path, +- )?)) +- } else { +- sp(IdTrackerEnum::RocksDbIdTracker(create_rocksdb_id_tracker( +- database.clone(), +- )?)) +- } +- } else { +- sp(IdTrackerEnum::ImmutableIdTracker( +- create_immutable_id_tracker(segment_path)?, +- )) +- }; +- +- let mut vector_storages = HashMap::new(); +- +- for (vector_name, vector_config) in &config.vector_data { +- let vector_storage_path = get_vector_storage_path(segment_path, vector_name); +- +- // Select suitable vector storage type based on configuration +- let vector_storage = sp(open_vector_storage( +- &database, +- vector_config, +- stopped, +- &vector_storage_path, +- vector_name, +- )?); +- +- vector_storages.insert(vector_name.to_owned(), vector_storage); +- } +- +- for (vector_name, sparse_config) in config.sparse_vector_data.iter() { +- let vector_storage_path = get_vector_storage_path(segment_path, vector_name); +- +- // Select suitable sparse vector storage type based on configuration +- let vector_storage = sp(create_sparse_vector_storage( +- database.clone(), +- &vector_storage_path, +- vector_name, +- &sparse_config.storage_type, +- stopped, +- )?); +- +- vector_storages.insert(vector_name.to_owned(), vector_storage); +- } +- +- let payload_index_path = get_payload_index_path(segment_path); +- let payload_index: Arc> = sp(StructPayloadIndex::open( +- payload_storage.clone(), +- id_tracker.clone(), +- vector_storages.clone(), +- &payload_index_path, +- appendable_flag, +- )?); +- +- let mut vector_data = HashMap::new(); +- for (vector_name, vector_config) in &config.vector_data { +- let vector_storage_path = get_vector_storage_path(segment_path, vector_name); +- let vector_storage = vector_storages.remove(vector_name).unwrap(); +- +- let vector_index_path = get_vector_index_path(segment_path, vector_name); +- // Warn when number of points between ID tracker and storage differs +- let point_count = id_tracker.borrow().total_point_count(); +- let vector_count = vector_storage.borrow().total_vector_count(); +- if vector_count != point_count { +- log::debug!( +- "Mismatch of point and vector counts ({point_count} != {vector_count}, storage: {})", +- vector_storage_path.display(), +- ); +- } +- +- let quantized_vectors = sp(if config.quantization_config(vector_name).is_some() { +- let quantized_data_path = vector_storage_path; +- if QuantizedVectors::config_exists(&quantized_data_path) { +- let quantized_vectors = +- QuantizedVectors::load(&vector_storage.borrow(), &quantized_data_path)?; +- Some(quantized_vectors) +- } else { +- None +- } +- } else { +- None +- }); +- +- let vector_index: Arc> = sp(open_vector_index( +- vector_config, +- VectorIndexOpenArgs { +- path: &vector_index_path, +- id_tracker: id_tracker.clone(), +- vector_storage: vector_storage.clone(), +- payload_index: payload_index.clone(), +- quantized_vectors: quantized_vectors.clone(), +- }, +- )?); +- +- check_process_stopped(stopped)?; +- +- vector_data.insert( +- vector_name.to_owned(), +- VectorData { +- vector_index, +- vector_storage, +- quantized_vectors, +- }, +- ); +- } +- +- for (vector_name, sparse_vector_config) in &config.sparse_vector_data { +- let vector_storage_path = get_vector_storage_path(segment_path, vector_name); +- let vector_index_path = get_vector_index_path(segment_path, vector_name); +- let vector_storage = vector_storages.remove(vector_name).unwrap(); +- +- // Warn when number of points between ID tracker and storage differs +- let point_count = id_tracker.borrow().total_point_count(); +- let vector_count = vector_storage.borrow().total_vector_count(); +- if vector_count != point_count { +- log::debug!( +- "Mismatch of point and vector counts ({point_count} != {vector_count}, storage: {})", +- vector_storage_path.display(), +- ); +- } +- +- let vector_index = sp(create_sparse_vector_index(SparseVectorIndexOpenArgs { +- config: sparse_vector_config.index, +- id_tracker: id_tracker.clone(), +- vector_storage: vector_storage.clone(), +- payload_index: payload_index.clone(), +- path: &vector_index_path, +- stopped, +- tick_progress: || (), +- })?); +- +- check_process_stopped(stopped)?; +- +- vector_data.insert( +- vector_name.to_owned(), +- VectorData { +- vector_storage, +- vector_index, +- quantized_vectors: sp(None), +- }, +- ); +- } +- +- let segment_type = if config.is_any_vector_indexed() { +- SegmentType::Indexed +- } else { +- SegmentType::Plain +- }; +- +- Ok(Segment { +- version, +- persisted_version: Arc::new(Mutex::new(version)), +- current_path: segment_path.to_owned(), +- id_tracker, +- vector_data, +- segment_type, +- appendable_flag, +- payload_index, +- payload_storage, +- segment_config: config.clone(), +- error_status: None, +- database, +- flush_thread: Mutex::new(None), +- }) +-} +- +-pub fn load_segment(path: &Path, stopped: &AtomicBool) -> OperationResult> { +- if path +- .extension() +- .and_then(|ext| ext.to_str()) +- .map(|ext| ext == "deleted") +- .unwrap_or(false) +- { +- log::warn!("Segment is marked as deleted, skipping: {}", path.display()); +- // Skip deleted segments +- return Ok(None); +- } +- +- let Some(stored_version) = SegmentVersion::load(path)? else { +- // Assume segment was not properly saved. +- // Server might have crashed before saving the segment fully. +- log::warn!( +- "Segment version file not found, skipping: {}", +- path.display() +- ); +- return Ok(None); +- }; +- +- let app_version = SegmentVersion::current(); +- +- if stored_version != app_version { +- info!("Migrating segment {stored_version} -> {app_version}"); +- +- if stored_version > app_version { +- return Err(OperationError::service_error(format!( +- "Data version {stored_version} is newer than application version {app_version}. \ +- Please upgrade the application. Compatibility is not guaranteed." +- ))); +- } +- +- if stored_version.major == 0 && stored_version.minor < 3 { +- return Err(OperationError::service_error(format!( +- "Segment version({stored_version}) is not compatible with current version({app_version})" +- ))); +- } +- +- if stored_version.major == 0 && stored_version.minor == 3 { +- let segment_state = load_segment_state_v3(path)?; +- Segment::save_state(&segment_state, path)?; +- } else if stored_version.major == 0 && stored_version.minor <= 5 { +- let segment_state = load_segment_state_v5(path)?; +- Segment::save_state(&segment_state, path)?; +- } +- +- SegmentVersion::save(path)? +- } +- +- let segment_state = Segment::load_state(path)?; +- +- let segment = create_segment(segment_state.version, path, &segment_state.config, stopped)?; +- +- Ok(Some(segment)) +-} +- +-pub fn new_segment_path(segments_path: &Path) -> PathBuf { +- segments_path.join(Uuid::new_v4().to_string()) +-} +- +-/// Build segment instance using given configuration. +-/// Builder will generate folder for the segment and store all segment information inside it. +-/// +-/// # Arguments +-/// +-/// * `segments_path` - Path to the segments directory. Segment folder will be created in this directory +-/// * `config` - Segment configuration +-/// * `ready` - Whether the segment is ready after building; will save segment version +-/// +-/// To load a segment, saving the segment version is required. If `ready` is false, the version +-/// will not be stored. Then the segment is skipped on restart when trying to load it again. In +-/// that case, the segment version must be stored manually to make it ready. +-pub fn build_segment( +- segments_path: &Path, +- config: &SegmentConfig, +- ready: bool, +-) -> OperationResult { +- let segment_path = new_segment_path(segments_path); +- +- std::fs::create_dir_all(&segment_path)?; +- +- let segment = create_segment(None, &segment_path, config, &AtomicBool::new(false))?; +- segment.save_current_state()?; +- +- // Version is the last file to save, as it will be used to check if segment was built correctly. +- // If it is not saved, segment will be skipped. +- if ready { +- SegmentVersion::save(&segment_path)?; +- } +- +- Ok(segment) +-} +- +-/// Load v0.3.* segment data and migrate to current version +-#[allow(deprecated)] +-fn load_segment_state_v3(segment_path: &Path) -> OperationResult { +- use crate::compat::{SegmentConfigV5, StorageTypeV5, VectorDataConfigV5}; +- +- #[derive(Deserialize)] +- #[serde(rename_all = "snake_case")] +- #[deprecated] +- pub struct SegmentStateV3 { +- pub version: SeqNumberType, +- pub config: SegmentConfigV3, +- } +- +- #[derive(Deserialize)] +- #[serde(rename_all = "snake_case")] +- #[deprecated] +- pub struct SegmentConfigV3 { +- /// Size of a vectors used +- pub vector_size: usize, +- /// Type of distance function used for measuring distance between vectors +- pub distance: Distance, +- /// Type of index used for search +- pub index: Indexes, +- /// Type of vector storage +- pub storage_type: StorageTypeV5, +- /// Defines payload storage type +- #[serde(default)] +- pub payload_storage_type: PayloadStorageType, +- } +- +- let path = segment_path.join(SEGMENT_STATE_FILE); +- +- let mut contents = String::new(); +- +- let mut file = File::open(&path)?; +- file.read_to_string(&mut contents)?; +- +- serde_json::from_str::(&contents) +- .map(|state| { +- // Construct V5 version, then convert into current +- let vector_data = VectorDataConfigV5 { +- size: state.config.vector_size, +- distance: state.config.distance, +- hnsw_config: None, +- quantization_config: None, +- on_disk: None, +- }; +- let segment_config = SegmentConfigV5 { +- vector_data: HashMap::from([(DEFAULT_VECTOR_NAME.to_owned(), vector_data)]), +- index: state.config.index, +- storage_type: state.config.storage_type, +- payload_storage_type: state.config.payload_storage_type, +- quantization_config: None, +- }; +- +- SegmentState { +- version: Some(state.version), +- config: segment_config.into(), +- } +- }) +- .map_err(|err| { +- OperationError::service_error(format!( +- "Failed to read segment {}. Error: {}", +- path.to_str().unwrap(), +- err +- )) +- }) +-} +- +-/// Load v0.5.0 segment data and migrate to current version +-#[allow(deprecated)] +-fn load_segment_state_v5(segment_path: &Path) -> OperationResult { +- use crate::compat::SegmentStateV5; +- +- let path = segment_path.join(SEGMENT_STATE_FILE); +- +- let mut contents = String::new(); +- +- let mut file = File::open(&path)?; +- file.read_to_string(&mut contents)?; +- +- serde_json::from_str::(&contents) +- .map(Into::into) +- .map_err(|err| { +- OperationError::service_error(format!( +- "Failed to read segment {}. Error: {}", +- path.to_str().unwrap(), +- err +- )) +- }) +-} +\ No newline at end of file ++ let db_column_name = get_vector_name_with_prefix(DB +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/raw_response.txt new file mode 100644 index 000000000..7db90215b --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_segment_constructor_segment_constructor_base.rs/x-ai_grok-code-fast-1/20250909_155859/raw_response.txt @@ -0,0 +1,105 @@ +```rust +use std::collections::HashMap; +use std::fs::File; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use atomic_refcell::AtomicRefCell; +use common::budget::ResourcePermit; +use common::flags::FeatureFlags; +use io::storage_version::StorageVersion; +use log::info; +use parking_lot::{Mutex, RwLock}; +use rocksdb::DB; +use serde::Deserialize; +use uuid::Uuid; + +use crate::common::operation_error::{OperationError, OperationResult, check_process_stopped}; +use crate::common::rocksdb_wrapper::{DB_MAPPING_CF, DB_VECTOR_CF, open_db}; +use crate::data_types::vectors::DEFAULT_VECTOR_NAME; +use crate::id_tracker::immutable_id_tracker::ImmutableIdTracker; +use crate::id_tracker::mutable_id_tracker::MutableIdTracker; +use crate::id_tracker::simple_id_tracker::SimpleIdTracker; +use crate::id_tracker::{IdTracker, IdTrackerEnum, IdTrackerSS}; +use crate::index::VectorIndexEnum; +use crate::index::hnsw_index::gpu::gpu_devices_manager::LockedGpuDevice; +use crate::index::hnsw_index::hnsw::{HNSWIndex, HnswIndexOpenArgs}; +use crate::index::plain_vector_index::PlainVectorIndex; +use crate::index::sparse_index::sparse_index_config::SparseIndexType; +use crate::index::sparse_index::sparse_vector_index::{ + self, SparseVectorIndex, SparseVectorIndexOpenArgs +}; +use crate::index::struct_payload_index::StructPayloadIndex; +use crate::payload_storage::mmap_payload_storage::MmapPayloadStorage; +use crate::payload_storage::on_disk_payload_storage::OnDiskPayloadStorage; +use crate::payload_storage::payload_storage_enum::PayloadStorageEnum; +use crate::payload_storage::simple_payload_storage::SimplePayloadStorage; +use crate::segment::{SEGMENT_STATE_FILE, Segment, SegmentVersion, VectorData}; +use crate::types::{ + Distance, Indexes, PayloadStorageType, SegmentConfig, SegmentState, SegmentType, SeqNumberType, + SparseVectorStorageType, VectorDataConfig, VectorName, VectorStorageDatatype, VectorStorageType, +}; +use crate::vector_storage::dense::appendable_dense_vector_storage::{ + open_appendable_in_ram_vector_storage, open_appendable_in_ram_vector_storage_byte, + open_appendable_in_ram_vector_storage_half, open_appendable_memmap_vector_storage, + open_appendable_memmap_vector_storage_byte, open_appendable_memmap_vector_storage_half, +}; +use crate::vector_storage::dense::memmap_dense_vector_storage::{ + open_memmap_vector_storage, open_memmap_vector_storage_byte, open_memmap_vector_storage_half, +}; +use crate::vector_storage::dense::simple_dense_vector_storage::{ + open_simple_dense_byte_vector_storage, open_simple_dense_half_vector_storage, + open_simple_dense_vector_storage, +}; +use crate::vector_storage::multi_dense::appendable_mmap_multi_dense_vector_storage::{ + open_appendable_in_ram_multi_vector_storage, open_appendable_in_ram_multi_vector_storage_byte, + open_appendable_in_ram_multi_vector_storage_half, open_appendable_memmap_multi_vector_storage, + open_appendable_memmap_multi_vector_storage_byte, open_appendable_memmap_multi_vector_storage_half, +}; +use crate::vector_storage::multi_dense::simple_multi_dense_vector_storage::{ + open_simple_multi_dense_vector_storage, open_simple_multi_dense_vector_storage_byte, + open_simple_multi_dense_vector_storage_half, +}; +use crate::vector_storage::quantized::quantized_vectors::QuantizedVectors; +use crate::vector_storage::sparse::mmap_sparse_vector_storage::MmapSparseVectorStorage; +use crate::vector_storage::sparse::simple_sparse_vector_storage::open_simple_sparse_vector_storage; +use crate::vector_storage::{VectorStorage, VectorStorageEnum}; + +pub const PAYLOAD_INDEX_PATH: &str = "payload_index"; +pub const VECTOR_STORAGE_PATH: &str = "vector_storage"; +pub const VECTOR_INDEX_PATH: &str = "vector_index"; + +fn sp(t: T) -> Arc> { + Arc::new(AtomicRefCell::new(t)) +} + +fn get_vector_name_with_prefix(prefix: &str, vector_name: &VectorName) -> String { + if !vector_name.is_empty() { + format!("{prefix}-{vector_name}") + } else { + prefix.to_owned() + } +} + +pub fn get_vector_storage_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { + segment_path.join(get_vector_name_with_prefix(VECTOR_STORAGE_PATH, vector_name)) +} + +pub fn get_vector_index_path(segment_path: &Path, vector_name: &VectorName) -> PathBuf { + segment_path.join(get_vector_name_with_prefix(VECTOR_INDEX_PATH, vector_name)) +} + +pub(crate) fn open_vector_storage( + database: &Arc>, + vector_config: &VectorDataConfig, + stopped: &AtomicBool, + vector_storage_path: &Path, + vector_name: &VectorName, +) -> OperationResult { + let storage_element_type = vector_config.datatype.unwrap_or_default(); + + match vector_config.storage_type { + VectorStorageType::Memory => { + let db_column_name = get_vector_name_with_prefix(DB \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/extracted_output.txt new file mode 100644 index 000000000..6667b18e6 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/extracted_output.txt @@ -0,0 +1,278 @@ +use common::types::ScoreType; + +use super::metric::{Metric, MetricPostProcessing}; +#[cfg(target_arch = "x86_64")] +use super::simple_avx::*; +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] +use super::simple_neon::*; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use super::simple_sse::*; +use super::tools::is_length_zero_or_normalized; +use crate::data_types::vectors::{DenseVector, VectorElementType}; +use crate::types::Distance; + +#[cfg(target_arch = "x86_64")] +pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; + +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + all(target_arch = "aarch64", target_feature = "neon") +))] +pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; + +#[derive(Clone)] +pub struct DotProductMetric; + +#[derive(Clone)] +pub struct CosineMetric; + +#[derive(Clone)] +pub struct EuclidMetric; + +#[derive(Clone)] +pub struct ManhattanMetric; + +impl Metric for EuclidMetric { + fn distance() -> Distance { + Distance::Euclid + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { euclid_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_neon(v1, v2) }; + } + } + + euclid_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for EuclidMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score.abs().sqrt() + } +} + +impl Metric for ManhattanMetric { + fn distance() -> Distance { + Distance::Manhattan + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { manhattan_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { manhattan_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { manhattan_similarity_neon(v1, v2) }; + } + } + + manhattan_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for ManhattanMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score.abs() + } +} + +impl Metric for DotProductMetric { + fn distance() -> Distance { + Distance::Dot + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { dot_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_neon(v1, v2) }; + } + } + + dot_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for DotProductMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score + } +} + +/// Equivalent to DotProductMetric with normalization of the vectors in preprocessing. +impl Metric for CosineMetric { + fn distance() -> Distance { + Distance::Cosine + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + DotProductMetric::similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && vector.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { cosine_preprocess_avx(vector) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && vector.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { cosine_preprocess_sse(vector) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && vector.len() >= MIN_DIM_SIZE_SIMD + { + return unsafe { cosine_preprocess_neon(vector) }; + } + } + + cosine_preprocess(vector) + } +} + +impl MetricPostProcessing for CosineMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score + } +} + +pub fn euclid_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + -v1.iter() + .zip(v2) + .map(|(a, b)| (a - b).powi(2)) + .sum::() +} + +pub fn manhattan_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + -v1.iter() + .zip(v2) + .map(|(a, b)| (a - b).abs()) + .sum::() +} + +pub fn cosine_preprocess(vector: DenseVector) -> DenseVector { + let mut length: f32 = vector.iter().map(|x| x * x).sum(); + if is_length_zero_or_normalized(length) { + return vector; + } + length = length.sqrt(); + vector.iter().map(|x| x / length).collect() +} + +pub fn dot_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + v1.iter().zip(v2).map(|(a, b)| a * b).sum() +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + use super::*; + + #[test] + fn test_cosine_preprocessing() { + let res = >::preprocess(vec![0.0, 0.0, 0.0, 0.0]); + assert_eq!(res, vec![0.0, 0.0, 0.0, 0.0]); + } + + /// If we preprocess a vector multiple times, we expect the same result. + /// Renormalization should not produce something different. + #[test] + fn test_cosine_stable_preprocessing() { + const DIM: usize = 1500; + const ATTEMPTS: usize = 100; + + let mut rng = rand::rng(); + + for attempt in 0..ATTEMPTS { + let range = rng.random_range(-2.5..=0.0)..=rng.random_range(0.0..2.5); + let vector: Vec<_> = (0..DIM).map(|_| rng.random_range(range.clone())).collect(); + + // Preprocess and re-preprocess + let preprocess1 = >::preprocess(vector); + let preprocess2: DenseVector = + >::preprocess(preprocess1.clone()); + + // All following preprocess attempts must be the same + assert_eq!( + preprocess1, preprocess2, + "renormalization is not stable (vector #{attempt})" + ); + } + } +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/metadata.json b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/metadata.json new file mode 100644 index 000000000..ebde06f80 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_src_spaces_simple.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_src_spaces_simple.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_src_spaces_simple.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:56.719278+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 8201, + "extracted_output_length": 8189, + "expected_output_length": 8190, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756", + "generation_id": "gen-1757433476-Z2gX6AS5GY4AJPMKSHlL", + "cost_usd": 0.00461574, + "prompt_tokens": 47947, + "completion_tokens": 2247, + "total_tokens": 50194, + "native_prompt_tokens": 47553, + "native_completion_tokens": 2443, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 363 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/output.diff b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/raw_response.txt new file mode 100644 index 000000000..3201993f7 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_src_spaces_simple.rs/x-ai_grok-code-fast-1/20250909_155756/raw_response.txt @@ -0,0 +1,280 @@ +```rust +use common::types::ScoreType; + +use super::metric::{Metric, MetricPostProcessing}; +#[cfg(target_arch = "x86_64")] +use super::simple_avx::*; +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] +use super::simple_neon::*; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use super::simple_sse::*; +use super::tools::is_length_zero_or_normalized; +use crate::data_types::vectors::{DenseVector, VectorElementType}; +use crate::types::Distance; + +#[cfg(target_arch = "x86_64")] +pub(crate) const MIN_DIM_SIZE_AVX: usize = 32; + +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + all(target_arch = "aarch64", target_feature = "neon") +))] +pub(crate) const MIN_DIM_SIZE_SIMD: usize = 16; + +#[derive(Clone)] +pub struct DotProductMetric; + +#[derive(Clone)] +pub struct CosineMetric; + +#[derive(Clone)] +pub struct EuclidMetric; + +#[derive(Clone)] +pub struct ManhattanMetric; + +impl Metric for EuclidMetric { + fn distance() -> Distance { + Distance::Euclid + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { euclid_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_neon(v1, v2) }; + } + } + + euclid_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for EuclidMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score.abs().sqrt() + } +} + +impl Metric for ManhattanMetric { + fn distance() -> Distance { + Distance::Manhattan + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { manhattan_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { manhattan_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { manhattan_similarity_neon(v1, v2) }; + } + } + + manhattan_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for ManhattanMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score.abs() + } +} + +impl Metric for DotProductMetric { + fn distance() -> Distance { + Distance::Dot + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && v1.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { dot_similarity_avx(v1, v2) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_sse(v1, v2) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && v1.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_neon(v1, v2) }; + } + } + + dot_similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + vector + } +} + +impl MetricPostProcessing for DotProductMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score + } +} + +/// Equivalent to DotProductMetric with normalization of the vectors in preprocessing. +impl Metric for CosineMetric { + fn distance() -> Distance { + Distance::Cosine + } + + fn similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + DotProductMetric::similarity(v1, v2) + } + + fn preprocess(vector: DenseVector) -> DenseVector { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && vector.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { cosine_preprocess_avx(vector) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && vector.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { cosine_preprocess_sse(vector) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && vector.len() >= MIN_DIM_SIZE_SIMD + { + return unsafe { cosine_preprocess_neon(vector) }; + } + } + + cosine_preprocess(vector) + } +} + +impl MetricPostProcessing for CosineMetric { + fn postprocess(score: ScoreType) -> ScoreType { + score + } +} + +pub fn euclid_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + -v1.iter() + .zip(v2) + .map(|(a, b)| (a - b).powi(2)) + .sum::() +} + +pub fn manhattan_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + -v1.iter() + .zip(v2) + .map(|(a, b)| (a - b).abs()) + .sum::() +} + +pub fn cosine_preprocess(vector: DenseVector) -> DenseVector { + let mut length: f32 = vector.iter().map(|x| x * x).sum(); + if is_length_zero_or_normalized(length) { + return vector; + } + length = length.sqrt(); + vector.iter().map(|x| x / length).collect() +} + +pub fn dot_similarity(v1: &[VectorElementType], v2: &[VectorElementType]) -> ScoreType { + v1.iter().zip(v2).map(|(a, b)| a * b).sum() +} + +#[cfg(test)] +mod tests { + use rand::Rng; + + use super::*; + + #[test] + fn test_cosine_preprocessing() { + let res = >::preprocess(vec![0.0, 0.0, 0.0, 0.0]); + assert_eq!(res, vec![0.0, 0.0, 0.0, 0.0]); + } + + /// If we preprocess a vector multiple times, we expect the same result. + /// Renormalization should not produce something different. + #[test] + fn test_cosine_stable_preprocessing() { + const DIM: usize = 1500; + const ATTEMPTS: usize = 100; + + let mut rng = rand::rng(); + + for attempt in 0..ATTEMPTS { + let range = rng.random_range(-2.5..=0.0)..=rng.random_range(0.0..2.5); + let vector: Vec<_> = (0..DIM).map(|_| rng.random_range(range.clone())).collect(); + + // Preprocess and re-preprocess + let preprocess1 = >::preprocess(vector); + let preprocess2: DenseVector = + >::preprocess(preprocess1.clone()); + + // All following preprocess attempts must be the same + assert_eq!( + preprocess1, preprocess2, + "renormalization is not stable (vector #{attempt})" + ); + } + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt new file mode 100644 index 000000000..9e778b449 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/extracted_output.txt @@ -0,0 +1,1148 @@ +use std::collections::HashMap; +use std::fs::create_dir; +use std::path::Path; +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use anyhow::{Context, Result}; +use atomic_refcell::AtomicRefCell; +use common::budget::ResourcePermit; +use common::counter::hardware_counter::HardwareCounterCell; +use fnv::FnvBuildHasher; +use indexmap::IndexSet; +use itertools::Itertools; +use rand::SeedableRng; +use rand::{Rng, rng}; +use segment::data_types::facets::{FacetParams, FacetValue}; +use segment::data_types::index::{ + FloatIndexParams, FloatIndexType, IntegerIndexParams, IntegerIndexType, KeywordIndexParams, + KeywordIndexType, TextIndexParams, TextIndexType, +}; +use segment::data_types::vectors::{DEFAULT_VECTOR_NAME, only_default_vector}; +use segment::entry::entry_point::SegmentEntry; +use segment::fixtures::payload_context_fixture::FixtureIdTracker; +use segment::fixtures::payload_fixtures::{ + FLICKING_KEY, FLT_KEY, GEO_KEY, INT_KEY, INT_KEY_2, INT_KEY_3, LAT_RANGE, LON_RANGE, STR_KEY, + STR_PROJ_KEY, STR_ROOT_PROJ_KEY, TEXT_KEY, generate_diverse_nested_payload, + generate_diverse_payload, random_filter, random_nested_filter, random_vector, +}; +use segment::index::PayloadIndex; +use segment::index::field_index::{FieldIndex, PrimaryCondition}; +use segment::index::struct_payload_index::StructPayloadIndex; +use segment::json_path::JsonPath; +use segment::payload_json; +use segment::payload_storage::PayloadStorage; +use segment::payload_storage::in_memory_payload_storage::InMemoryPayloadStorage; +use segment::segment::Segment; +use segment::segment_constructor::build_segment; +use segment::segment_constructor::segment_builder::SegmentBuilder; +use segment::segment_constructor::simple_segment_constructor::build_simple_segment; +use segment::types::PayloadFieldSchema::{FieldParams, FieldType}; +use segment::types::PayloadSchemaType::{Integer, Keyword}; +use segment::types::{ + AnyVariants, Condition, Distance, FieldCondition, Filter, GeoBoundingBox, GeoLineString, + GeoPoint, GeoPolygon, GeoRadius, HnswConfig, Indexes, IsEmptyCondition, Match, Payload, + PayloadField, PayloadSchemaParams, PayloadSchemaType, Range, SegmentConfig, VectorDataConfig, + VectorStorageType, WithPayload, +}; +use segment::utils::scored_point_ties::ScoredPointTies; +use tempfile::{Builder, TempDir}; + +macro_rules! here { + () => { + format!("at {}:{}", file!(), line!()) + }; +} + +/// `anyhow::ensure!` but with location, as what `assert!` would do +macro_rules! ensure { + ($($arg:tt)*) => { + (|| Ok(anyhow::ensure!($($arg)*)))().map_err(|e| { + e.context(here!()) + })? + }; +} + +const DIM: usize = 5; +const ATTEMPTS: usize = 20; + +struct TestSegments { + _base_dir: TempDir, + struct_segment: Segment, + plain_segment: Segment, + mmap_segment: Segment, +} + +impl TestSegments { + fn new() -> Self { + let base_dir = Builder::new().prefix("test_segments").tempdir().unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let mut rnd = StdRng::seed_from_u64(42); + + let config = Self::make_simple_config(true); + + let mut plain_segment = + build_segment(&base_dir.path().join("plain"), &config, true).unwrap(); + let mut struct_segment = + build_segment(&base_dir.path().join("struct"), &config, true).unwrap(); + + let num_points = 3000; + let points_to_delete = 500; + let points_to_clear = 500; + + let mut opnum = 0; + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&Integer.into()), + &hw_counter, + ) + .unwrap(); + + opnum += 1; + for n in 0..num_points { + let idx = n.into(); + let vector = random_vector(&mut rnd, DIM); + let payload: Payload = generate_diverse_payload(&mut rnd); + + plain_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + struct_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + plain_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + struct_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + + opnum += 1; + } + + struct_segment + .create_field_index( + opnum, + &JsonPath::new(STR_KEY), + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index(opnum, &JsonPath::new(INT_KEY), None, &hw_counter) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(false), + is_principal: None, + on_disk: None, + }, + ))), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_3), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(false), + range: Some(true), + is_principal: None, + on_disk: None, + }, + ))), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(GEO_KEY), + Some(&PayloadSchemaType::Geo.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(TEXT_KEY), + Some(&PayloadSchemaType::Text.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(FLICKING_KEY), + Some(&Integer.into()), + &hw_counter, + ) + .unwrap(); + + // Make mmap segment after inserting the points, but before deleting some of them + let mut mmap_segment = + Self::make_mmap_segment(&base_dir.path().join("mmap"), &plain_segment); + + for _ in 0..points_to_clear { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + mmap_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for _ in 0..points_to_delete { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + mmap_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for (field, indexes) in struct_segment.payload_index.borrow().field_indexes.iter() { + for index in indexes { + assert!(index.count_indexed_points() <= num_points as usize); + if field.to_string() != FLICKING_KEY { + assert!( + index.count_indexed_points() + >= (num_points as usize - points_to_delete - points_to_clear) + ); + } + } + } + + Self { + _base_dir: base_dir, + struct_segment, + plain_segment, + mmap_segment, + } + } + + fn make_simple_config(appendable: bool) -> SegmentConfig { + let conf = SegmentConfig { + vector_data: HashMap::from([( + DEFAULT_VECTOR_NAME.to_owned(), + VectorDataConfig { + size: DIM, + distance: Distance::Dot, + storage_type: VectorStorageType::Memory, + index: if appendable { + Indexes::Plain {} + } else { + Indexes::Hnsw(HnswConfig::default()) + }, + quantization_config: None, + multivector_config: None, + datatype: None, + }, + )]), + sparse_vector_data: Default::default(), + payload_storage_type: Default::default(), + }; + assert_eq!(conf.is_appendable(), appendable); + conf + } + + fn make_mmap_segment(path: &Path, plain_segment: &Segment) -> Segment { + let stopped = AtomicBool::new(false); + create_dir(path).unwrap(); + + let mut builder = SegmentBuilder::new( + path, + &path.with_extension("tmp"), + &Self::make_simple_config(false), + ) + .unwrap(); + + builder.update(&[plain_segment], &stopped).unwrap(); + let permit = ResourcePermit::dummy(1); + let hw_counter = HardwareCounterCell::new(); + + let mut segment = builder.build(permit, &stopped, &hw_counter).unwrap(); + let opnum = segment.version() + 1; + + segment + .create_field_index( + opnum, + &JsonPath::new(STR_KEY), + Some(&FieldParams(PayloadSchemaParams::Keyword( + KeywordIndexParams { + r#type: KeywordIndexType::Keyword, + is_tenant: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(true), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(false), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_3), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(false), + range: Some(true), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(FLT_KEY), + Some(&FieldParams(PayloadSchemaParams::Float(FloatIndexParams { + r#type: FloatIndexType::Float, + is_principal: None, + on_disk: Some(true), + }))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(TEXT_KEY), + Some(&FieldParams(PayloadSchemaParams::Text(TextIndexParams { + r#type: TextIndexType::Text, + on_disk: Some(true), + ..Default::default() + }))), + &hw_counter, + ) + .unwrap(); + + segment + } +} + +fn build_test_segments_nested_payload(path_struct: &Path, path_plain: &Path) -> (Segment, Segment) { + let mut rnd = StdRng::seed_from_u64(42); + + let mut plain_segment = build_simple_segment(path_plain, DIM, Distance::Dot).unwrap(); + let mut struct_segment = build_simple_segment(path_struct, DIM, Distance::Dot).unwrap(); + + let num_points = 3000; + let points_to_delete = 500; + let points_to_clear = 500; + + // Nested payload keys + let nested_str_key = JsonPath::new(&format!("{}.{}.{}", STR_KEY, "nested_1", "nested_2")); + let nested_str_proj_key = + JsonPath::new(&format!("{}.{}[].{}", STR_PROJ_KEY, "nested_1", "nested_2")); + let deep_nested_str_proj_key = JsonPath::new(&format!( + "{}[].{}[].{}", + STR_ROOT_PROJ_KEY, "nested_1", "nested_2" + )); + + let hw_counter = HardwareCounterCell::new(); + + let mut opnum = 0; + struct_segment + .create_field_index(opnum, &nested_str_key, Some(&Keyword.into()), &hw_counter) + .unwrap(); + + struct_segment + .create_field_index( + opnum, + &nested_str_proj_key, + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + + struct_segment + .create_field_index( + opnum, + &deep_nested_str_proj_key, + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + + eprintln!("{deep_nested_str_proj_key}"); + + opnum += 1; + for n in 0..num_points { + let idx = n.into(); + let vector = random_vector(&mut rnd, DIM); + let payload: Payload = generate_diverse_nested_payload(&mut rnd); + + plain_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + struct_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + plain_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + struct_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + + opnum += 1; + } + + for _ in 0..points_to_clear { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for _ in 0..points_to_delete { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for (_field, indexes) in struct_segment.payload_index.borrow().field_indexes.iter() { + for index in indexes { + assert!(index.count_indexed_points() < num_points as usize); + assert!( + index.count_indexed_points() > (num_points as usize - points_to_delete - points_to_clear) + ); + } + } + + (struct_segment, plain_segment) +} + +fn validate_geo_filter(test_segments: &TestSegments, query_filter: Filter) -> Result<()> { + let mut rnd = rng(); + let query = random_vector(&mut rnd, DIM).into(); + + for _i in 0..ATTEMPTS { + let query = random_vector(&mut rnd, DIM).into(); + let plain_result = test_segments + .plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + let estimation = test_segments + .plain_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + let struct_result = test_segments + .struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + for (r1, r2) in plain_result.iter().zip(struct_result.iter()) { + ensure!(r1.id == r2.id); + ensure!((r1.score - r2.score) < 0.0001) + } + } + + Ok(()) +} + +/// Test read operations on segments. +/// The segments fixtures are created only once to improve test speed. +#[test] +fn test_read_operations() -> Result<()> { + let test_segments = Arc::new(TestSegments::new()); + let mut handles = vec![]; + + for test_fn in [ + test_is_empty_conditions, + test_integer_index_types, + test_cardinality_estimation, + test_struct_payload_index, + test_struct_payload_geo_boundingbox_index, + test_struct_payload_geo_radius_index, + test_struct_payload_geo_polygon_index, + test_any_matcher_cardinality_estimation, + test_struct_keyword_facet, + test_mmap_keyword_facet, + test_struct_keyword_facet_filtered, + test_mmap_keyword_facet_filtered, + ] { + let segments = Arc::clone(&test_segments); + handles.push(std::thread::spawn(move || test_fn(&segments))); + } + + for handle in handles { + handle.join().unwrap()?; + } + + Ok(()) +} + +fn test_is_empty_conditions(test_segments: &TestSegments) -> Result<()> { + let filter = Filter::new_must(Condition::IsEmpty(IsEmptyCondition { + is_empty: PayloadField { + key: JsonPath::new(FLICKING_KEY), + }, + })); + + let hw_counter = HardwareCounterCell::new(); + + let estimation_struct = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let estimation_plain = test_segments + .plain_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let plain_result = test_segments + .plain_segment + .payload_index + .borrow() + .query_points(&filter, &hw_counter); + + let real_number = plain_result.len(); + + let struct_result = test_segments + .struct_segment + .payload_index + .borrow() + .query_points(&filter, &hw_counter); + + ensure!(plain_result == struct_result); + + eprintln!("estimation_plain = {estimation_plain:#?}"); + eprintln!("estimation_struct = {estimation_struct:#?}"); + eprintln!("real_number = {real_number:#?}"); + + ensure!(estimation_plain.max >= real_number); + ensure!(estimation_plain.min <= real_number); + + ensure!(estimation_struct.max >= real_number); + ensure!(estimation_struct.min <= real_number); + + ensure!( + (estimation_struct.exp as f64 - real_number as f64).abs() + <= (estimation_plain.exp as f64 - real_number as f64).abs() + ); + + Ok(()) +} + +fn test_integer_index_types(test_segments: &TestSegments) -> Result<()> { + for (kind, indexes) in [ + ( + "struct", + &test_segments.struct_segment.payload_index.borrow(), + ), + ("mmap", &test_segments.mmap_segment.payload_index.borrow()), + ] { + eprintln!("Checking {kind}_segment"); + let field_indexes = indexes.field_indexes.get(&JsonPath::new(INT_KEY)).unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(has_map_index); + ensure!(has_int_index); + + let field_indexes = indexes + .field_indexes + .get(&JsonPath::new(INT_KEY_2)) + .unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(has_map_index); + ensure!(!has_int_index); + + let field_indexes = indexes + .field_indexes + .get(&JsonPath::new(INT_KEY_3)) + .unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(!has_map_index); + ensure!(has_int_index); + } + Ok(()) +} + +fn test_cardinality_estimation(test_segments: &TestSegments) -> Result<()> { + let filter = Filter::new_must(Condition::Field(FieldCondition::new_range( + JsonPath::new(INT_KEY), + Range { + lt: None, + gt: None, + gte: Some(50.), + lte: Some(100.), + }, + ))); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let hw_counter = HardwareCounterCell::new(); + + let payload_index = test_segments.struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = test_segments + .struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + ensure!(exact <= estimation.max); + ensure!(exact >= estimation.min); + + Ok(()) +} + +#[test] +fn test_root_nested_array_filter_cardinality_estimation() { + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let (struct_segment, _) = build_test_segments_nested_payload(dir1.path(), dir2.path()); + + // rely on test data from `build_test_segments_nested_payload` + let nested_key = "nested_1[].nested_2"; + let nested_match = FieldCondition::new_match(JsonPath::new(nested_key), "some value".to_owned().into()); + let filter = Filter::new_must(Condition::new_nested( + JsonPath::new(STR_ROOT_PROJ_KEY), + Filter::new_must(Condition::Field(nested_match)), + )); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + // not empty primary clauses + assert_eq!(estimation.primary_clauses.len(), 1); + eprintln!("primary_clauses = {:#?}", estimation.primary_clauses); + let primary_clause = estimation.primary_clauses.first().unwrap(); + + let expected_primary_clause = FieldCondition::new_match( + JsonPath::new(&format!("{}[].{}", STR_ROOT_PROJ_KEY, nested_key)), // full key expected + "some value".to_owned().into(), + ); + + match primary_clause { + PrimaryCondition::Condition(field_condition) => { + assert_eq!(*field_condition, Box::new(expected_primary_clause)); + } + o => panic!("unexpected primary clause: {o:?}"), + } + + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + assert!(exact <= estimation.max); + assert!(exact >= estimation.min); +} + +#[test] +fn test_nesting_nested_array_filter_cardinality_estimation() { + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let (struct_segment, _) = build_test_segments_nested_payload(dir1.path(), dir2.path()); + + // rely on test data from `build_test_segments_nested_payload` + let nested_match_key = "nested_2"; + let nested_match = FieldCondition::new_match( + JsonPath::new(nested_match_key), + "some value".to_owned().into(), + ); + let filter = Filter::new_must(Condition::new_nested( + JsonPath::new(STR_ROOT_PROJ_KEY), + Filter::new_must(Condition::new_nested( + JsonPath::new("nested_1"), + Filter::new_must(Condition::Field(nested_match)), + )), + )); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + // not empty primary clauses + assert_eq!(estimation.primary_clauses.len(), 1); + eprintln!("primary_clauses = {:#?}", estimation.primary_clauses); + let primary_clause = estimation.primary_clauses.first().unwrap(); + + let expected_primary_clause = FieldCondition::new_match( + // full key expected + JsonPath::new(&format!( + "{STR_ROOT_PROJ_KEY}[].nested_1[].{nested_match_key}" + )), + "some value".to_owned().into(), + ); + + match primary_clause { + PrimaryCondition::Condition(field_condition) => { + assert_eq!(*field_condition, Box::new(expected_primary_clause)); + } + o => panic!("unexpected primary clause: {o:?}"), + } + + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + assert!(exact <= estimation.max); + assert!(exact >= estimation.min); +} + +/// Compare search with plain, struct, and mmap indices. +fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + for _i in 0..ATTEMPTS { + let query_vector = random_vector(&mut rnd, DIM).into(); + let query_filter = random_filter(&mut rnd, 3); + + let plain_result = test_segments + .plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let struct_result = test_segments + .struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let mmap_result = test_segments + .mmap_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + let mut plain_result_sorted_ties: Vec = + plain_result.iter().map(|x| x.into()).collect_vec(); + plain_result_sorted_ties.sort(); + + let mut struct_result_sorted_ties: Vec = + struct_result.iter().map(|x| x.into()).collect_vec(); + struct_result_sorted_ties.sort(); + + let mut mmap_result_sorted_ties: Vec = + mmap_result.iter().map(|x| x.into()).collect_vec(); + mmap_result_sorted_ties.sort(); + + ensure!( + plain_result_sorted_ties.len() == struct_result_sorted_ties.len(), + "query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result:?}", + ); + ensure!( + plain_result_sorted_ties.len() == mmap_result_sorted_ties.len(), + "query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + mmap result {mmap_result:?}", + ); + + for (r1, r2, r3) in itertools::izip!( + plain_result_sorted_ties, + struct_result_sorted_ties, + mmap_result_sorted_ties, + ) { + ensure!( + r1.id == r2.id, + "got different ScoredPoint {r1:?} and {r2:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result:?}" + ); + ensure!((r1.score - r2.score) < 0.0001); + ensure!( + r1.id == r3.id, + "got different ScoredPoint {r1:?} and {r3:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + mmap result {mmap_result:?}", + ); + ensure!((r1.score - r3.score) < 0.0001); + } + } + Ok(()) +} + +fn test_struct_payload_geo_boundingbox_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + let geo_bbox = GeoBoundingBox { + top_left: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + bottom_right: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + }; + + let condition = Condition::Field(FieldCondition::new_geo_bounding_box( + JsonPath::new("geo_key"), + geo_bbox, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +fn test_struct_payload_geo_radius_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + let r_meters = rnd.random_range(1.0..10000.0); + let geo_radius = GeoRadius { + center: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + radius: r_meters, + }; + + let condition = Condition::Field(FieldCondition::new_geo_radius( + JsonPath::new("geo_key"), + geo_radius, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +fn test_struct_payload_geo_polygon_index(test_segments: &TestSegments) -> Result<()> { + let polygon_edge = 5; + let interiors_num = 3; + + fn generate_ring(polygon_edge: i32) -> GeoLineString { + let mut rnd = rng(); + let mut line = GeoLineString { + points: (0..polygon_edge) + .map(|_| GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }) + .collect(), + }; + line.points.push(line.points[0]); // add last point that is identical to the first + line + } + + let exterior = generate_ring(polygon_edge); + let interiors = Some( + std::iter::repeat_with(|| generate_ring(polygon_edge)) + .take(interiors_num) + .collect(), + ); + + let geo_polygon = GeoPolygon { + exterior, + interiors, + }; + + let condition = Condition::Field(FieldCondition::new_geo_polygon( + JsonPath::new("geo_key"), + geo_polygon, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +#[test] +fn test_struct_payload_index_nested_fields() { + // Compare search with plain and struct indexes + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let dim = 5; + + let mut rnd = rng(); + + let (struct_segment, plain_segment) = + build_test_segments_nested_payload(dir1.path(), dir2.path()); + + let attempts = 100; + for _i in 0..attempts { + let query_vector = random_vector(&mut rnd, dim); + let query_filter = random_nested_filter(&mut rnd); + let plain_result = plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload { + enable: true, + payload_selector: None, + }, + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let struct_result = struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload { + enable: true, + payload_selector: None, + }, + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + assert!(estimation.min <= estimation.exp, "{estimation:#?}"); + assert!(estimation.exp <= estimation.max, "{estimation:#?}"); + assert!( + estimation.max <= struct_segment.id_tracker.borrow().available_point_count(), + "{estimation:#?}", + ); + + // warning: report flakiness at https://github.com/qdrant/qdrant/issues/534 + plain_result + .iter() + .zip(struct_result.iter()) + .for_each(|(r1, r2)| { + assert_eq!( + r1.id, r2.id, + "got different ScoredPoint {r1:?} and {r2:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json new file mode 100644 index 000000000..212e669bd --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_segment_tests_integration_payload_index_test.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_segment_tests_integration_payload_index_test.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_segment_tests_integration_payload_index_test.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:06.533290+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 37350, + "extracted_output_length": 37342, + "expected_output_length": 44798, + "results_dir": "locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806", + "generation_id": "gen-1757433486-8oXsUgfDPvGUJsEKMAYW", + "cost_usd": 0.02788514, + "prompt_tokens": 65261, + "completion_tokens": 9042, + "total_tokens": 74303, + "native_prompt_tokens": 64606, + "native_completion_tokens": 9999, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 2100 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff new file mode 100644 index 000000000..62a0c0595 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/output.diff @@ -0,0 +1,466 @@ + +index d46358c38..9e778b449 100644 +--- a/qdrant_lib_segment_tests_integration_payload_index_test.rs_expectedoutput.txt (expected):tmp/tmpmpym7907_expected.txt ++++ b/qdrant_lib_segment_tests_integration_payload_index_test.rs_extracted.txt (actual):tmp/tmp_w9xo7v1_actual.txt +@@ -8,12 +8,11 @@ use anyhow::{Context, Result}; + use atomic_refcell::AtomicRefCell; + use common::budget::ResourcePermit; + use common::counter::hardware_counter::HardwareCounterCell; +-use common::types::PointOffsetType; + use fnv::FnvBuildHasher; + use indexmap::IndexSet; + use itertools::Itertools; +-use rand::prelude::StdRng; +-use rand::{Rng, SeedableRng}; ++use rand::SeedableRng; ++use rand::{Rng, rng}; + use segment::data_types::facets::{FacetParams, FacetValue}; + use segment::data_types::index::{ + FloatIndexParams, FloatIndexType, IntegerIndexParams, IntegerIndexType, KeywordIndexParams, +@@ -43,8 +42,8 @@ use segment::types::PayloadSchemaType::{Integer, Keyword}; + use segment::types::{ + AnyVariants, Condition, Distance, FieldCondition, Filter, GeoBoundingBox, GeoLineString, + GeoPoint, GeoPolygon, GeoRadius, HnswConfig, Indexes, IsEmptyCondition, Match, Payload, +- PayloadField, PayloadSchemaParams, PayloadSchemaType, Range, SegmentConfig, ValueVariants, +- VectorDataConfig, VectorStorageType, WithPayload, ++ PayloadField, PayloadSchemaParams, PayloadSchemaType, Range, SegmentConfig, VectorDataConfig, ++ VectorStorageType, WithPayload, + }; + use segment::utils::scored_point_ties::ScoredPointTies; + use tempfile::{Builder, TempDir}; +@@ -473,8 +472,7 @@ fn build_test_segments_nested_payload(path_struct: &Path, path_plain: &Path) -> + for index in indexes { + assert!(index.count_indexed_points() < num_points as usize); + assert!( +- index.count_indexed_points() +- > (num_points as usize - points_to_delete - points_to_clear) ++ index.count_indexed_points() > (num_points as usize - points_to_delete - points_to_clear) + ); + } + } +@@ -483,7 +481,8 @@ fn build_test_segments_nested_payload(path_struct: &Path, path_plain: &Path) -> + } + + fn validate_geo_filter(test_segments: &TestSegments, query_filter: Filter) -> Result<()> { +- let mut rnd = rand::rng(); ++ let mut rnd = rng(); ++ let query = random_vector(&mut rnd, DIM).into(); + + for _i in 0..ATTEMPTS { + let query = random_vector(&mut rnd, DIM).into(); +@@ -497,6 +496,7 @@ fn validate_geo_filter(test_segments: &TestSegments, query_filter: Filter) -> Re + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + +@@ -529,6 +529,7 @@ fn validate_geo_filter(test_segments: &TestSegments, query_filter: Filter) -> Re + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + +@@ -750,8 +751,7 @@ fn test_root_nested_array_filter_cardinality_estimation() { + + // rely on test data from `build_test_segments_nested_payload` + let nested_key = "nested_1[].nested_2"; +- let nested_match = +- FieldCondition::new_match(JsonPath::new(nested_key), "some value".to_owned().into()); ++ let nested_match = FieldCondition::new_match(JsonPath::new(nested_key), "some value".to_owned().into()); + let filter = Filter::new_must(Condition::new_nested( + JsonPath::new(STR_ROOT_PROJ_KEY), + Filter::new_must(Condition::Field(nested_match)), +@@ -770,7 +770,7 @@ fn test_root_nested_array_filter_cardinality_estimation() { + let primary_clause = estimation.primary_clauses.first().unwrap(); + + let expected_primary_clause = FieldCondition::new_match( +- JsonPath::new(&format!("{STR_ROOT_PROJ_KEY}[].{nested_key}")), // full key expected ++ JsonPath::new(&format!("{}[].{}", STR_ROOT_PROJ_KEY, nested_key)), // full key expected + "some value".to_owned().into(), + ); + +@@ -781,8 +781,6 @@ fn test_root_nested_array_filter_cardinality_estimation() { + o => panic!("unexpected primary clause: {o:?}"), + } + +- let hw_counter = HardwareCounterCell::new(); +- + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment +@@ -848,8 +846,6 @@ fn test_nesting_nested_array_filter_cardinality_estimation() { + o => panic!("unexpected primary clause: {o:?}"), + } + +- let hw_counter = HardwareCounterCell::new(); +- + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment +@@ -869,7 +865,7 @@ fn test_nesting_nested_array_filter_cardinality_estimation() { + + /// Compare search with plain, struct, and mmap indices. + fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { +- let mut rnd = rand::rng(); ++ let mut rnd = rng(); + + for _i in 0..ATTEMPTS { + let query_vector = random_vector(&mut rnd, DIM).into(); +@@ -885,6 +881,7 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + let struct_result = test_segments +@@ -897,6 +894,7 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + let mmap_result = test_segments +@@ -909,6 +907,7 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + +@@ -932,7 +931,6 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + "{estimation:#?}", + ); + +- // Perform additional sort to break ties by score + let mut plain_result_sorted_ties: Vec = + plain_result.iter().map(|x| x.into()).collect_vec(); + plain_result_sorted_ties.sort(); +@@ -964,9 +962,7 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + plain_result_sorted_ties, + struct_result_sorted_ties, + mmap_result_sorted_ties, +- ) +- .map(|(r1, r2, r3)| (r1.0, r2.0, r3.0)) +- { ++ ) { + ensure!( + r1.id == r2.id, + "got different ScoredPoint {r1:?} and {r2:?} for\n\ +@@ -991,7 +987,7 @@ fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + } + + fn test_struct_payload_geo_boundingbox_index(test_segments: &TestSegments) -> Result<()> { +- let mut rnd = rand::rng(); ++ let mut rnd = rng(); + + let geo_bbox = GeoBoundingBox { + top_left: GeoPoint { +@@ -1015,7 +1011,7 @@ fn test_struct_payload_geo_boundingbox_index(test_segments: &TestSegments) -> Re + } + + fn test_struct_payload_geo_radius_index(test_segments: &TestSegments) -> Result<()> { +- let mut rnd = rand::rng(); ++ let mut rnd = rng(); + + let r_meters = rnd.random_range(1.0..10000.0); + let geo_radius = GeoRadius { +@@ -1041,7 +1037,7 @@ fn test_struct_payload_geo_polygon_index(test_segments: &TestSegments) -> Result + let interiors_num = 3; + + fn generate_ring(polygon_edge: i32) -> GeoLineString { +- let mut rnd = rand::rng(); ++ let mut rnd = rng(); + let mut line = GeoLineString { + points: (0..polygon_edge) + .map(|_| GeoPoint { +@@ -1082,14 +1078,16 @@ fn test_struct_payload_index_nested_fields() { + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + +- let mut rnd = rand::rng(); ++ let dim = 5; ++ ++ let mut rnd = rng(); + + let (struct_segment, plain_segment) = + build_test_segments_nested_payload(dir1.path(), dir2.path()); + + let attempts = 100; + for _i in 0..attempts { +- let query_vector = random_vector(&mut rnd, DIM).into(); ++ let query_vector = random_vector(&mut rnd, dim); + let query_filter = random_nested_filter(&mut rnd); + let plain_result = plain_segment + .search( +@@ -1103,6 +1101,7 @@ fn test_struct_payload_index_nested_fields() { + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + let struct_result = struct_segment +@@ -1117,6 +1116,7 @@ fn test_struct_payload_index_nested_fields() { + Some(&query_filter), + 5, + None, ++ &false.into(), + ) + .unwrap(); + +@@ -1145,245 +1145,4 @@ fn test_struct_payload_index_nested_fields() { + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ +- struct result{struct_result:?}" +- ); +- assert!((r1.score - r2.score) < 0.0001) +- }); +- } +-} +- +-#[test] +-fn test_update_payload_index_type() { +- let dir = Builder::new().prefix("storage_dir").tempdir().unwrap(); +- let mut payload_storage = InMemoryPayloadStorage::default(); +- +- let point_num = 10; +- let mut points = HashMap::new(); +- +- let mut payloads: Vec = vec![]; +- for i in 0..point_num { +- payloads.push(payload_json! {"field": i}); +- } +- +- let hw_counter = HardwareCounterCell::new(); +- +- for (idx, payload) in payloads.into_iter().enumerate() { +- points.insert(idx, payload.clone()); +- payload_storage +- .set(idx as PointOffsetType, &payload, &hw_counter) +- .unwrap(); +- } +- +- let wrapped_payload_storage = Arc::new(AtomicRefCell::new(payload_storage.into())); +- let id_tracker = Arc::new(AtomicRefCell::new(FixtureIdTracker::new(point_num))); +- +- let mut index = StructPayloadIndex::open( +- wrapped_payload_storage, +- id_tracker, +- HashMap::new(), +- dir.path(), +- true, +- ) +- .unwrap(); +- +- let field = JsonPath::new("field"); +- +- // set field to Integer type +- index.set_indexed(&field, Integer, &hw_counter).unwrap(); +- assert_eq!( +- *index.indexed_fields().get(&field).unwrap(), +- FieldType(Integer) +- ); +- let field_index = index.field_indexes.get(&field).unwrap(); +- assert_eq!(field_index[0].count_indexed_points(), point_num); +- assert_eq!(field_index[1].count_indexed_points(), point_num); +- +- // update field to Keyword type +- index.set_indexed(&field, Keyword, &hw_counter).unwrap(); +- assert_eq!( +- *index.indexed_fields().get(&field).unwrap(), +- FieldType(Keyword) +- ); +- let field_index = index.field_indexes.get(&field).unwrap(); +- assert_eq!(field_index[0].count_indexed_points(), 0); // only one field index for Keyword +- +- // set field to Integer type (again) +- index.set_indexed(&field, Integer, &hw_counter).unwrap(); +- assert_eq!( +- *index.indexed_fields().get(&field).unwrap(), +- FieldType(Integer) +- ); +- let field_index = index.field_indexes.get(&field).unwrap(); +- assert_eq!(field_index[0].count_indexed_points(), point_num); +- assert_eq!(field_index[1].count_indexed_points(), point_num); +-} +- +-fn test_any_matcher_cardinality_estimation(test_segments: &TestSegments) -> Result<()> { +- let keywords: IndexSet = ["value1", "value2"] +- .iter() +- .map(|&i| i.to_string()) +- .collect(); +- let any_match = FieldCondition::new_match( +- JsonPath::new(STR_KEY), +- Match::new_any(AnyVariants::Strings(keywords)), +- ); +- +- let filter = Filter::new_must(Condition::Field(any_match.clone())); +- +- let hw_counter = HardwareCounterCell::new(); +- +- let estimation = test_segments +- .struct_segment +- .payload_index +- .borrow() +- .estimate_cardinality(&filter, &hw_counter); +- +- ensure!(estimation.primary_clauses.len() == 1); +- for clause in estimation.primary_clauses.iter() { +- let expected_primary_clause = any_match.clone(); +- +- match clause { +- PrimaryCondition::Condition(field_condition) => { +- ensure!(*field_condition == Box::new(expected_primary_clause)); +- } +- o => panic!("unexpected primary clause: {o:?}"), +- } +- } +- +- let hw_counter = HardwareCounterCell::new(); +- +- let payload_index = test_segments.struct_segment.payload_index.borrow(); +- let filter_context = payload_index.filter_context(&filter, &hw_counter); +- let exact = test_segments +- .struct_segment +- .id_tracker +- .borrow() +- .iter_ids() +- .filter(|x| filter_context.check(*x)) +- .collect_vec() +- .len(); +- +- eprintln!("exact = {exact:#?}"); +- eprintln!("estimation = {estimation:#?}"); +- +- ensure!(exact <= estimation.max); +- ensure!(exact >= estimation.min); +- +- Ok(()) +-} +- +-/// FacetParams fixture without a filter +-fn keyword_facet_request() -> FacetParams { +- let limit = 1000; +- let key: JsonPath = STR_KEY.try_into().unwrap(); +- let exact = false; // This is only used at local shard level +- +- // *** Without filter *** +- FacetParams { +- key: key.clone(), +- limit, +- filter: None, +- exact, +- } +-} +- +-/// Checks that the counts are the same as counting each value exactly. +-fn validate_facet_result( +- segment: &Segment, +- facet_hits: HashMap, +- filter: Option, +-) -> Result<()> { +- let hw_counter = HardwareCounterCell::new(); +- +- for (value, count) in facet_hits.iter() { +- // Compare against exact count +- let value = ValueVariants::from(value.clone()); +- +- let count_filter = Filter::new_must(Condition::Field(FieldCondition::new_match( +- JsonPath::new(STR_KEY), +- Match::from(value.clone()), +- ))); +- let count_filter = Filter::merge_opts(Some(count_filter), filter.clone()); +- +- let exact = segment +- .read_filtered( +- None, +- None, +- count_filter.as_ref(), +- &Default::default(), +- &hw_counter, +- ) +- .len(); +- +- ensure!(*count == exact, "Facet value: {value:?}"); +- } +- +- Ok(()) +-} +- +-fn test_struct_keyword_facet(test_segments: &TestSegments) -> Result<()> { +- let request = keyword_facet_request(); +- +- // Plain segment should fail, as it does not have a keyword index +- assert!( +- test_segments +- .plain_segment +- .facet(&request, &Default::default(), &Default::default()) +- .is_err(), +- ); +- +- // Struct segment +- let facet_hits = test_segments +- .struct_segment +- .facet(&request, &Default::default(), &Default::default()) +- .unwrap(); +- +- validate_facet_result(&test_segments.struct_segment, facet_hits, None).context(here!()) +-} +- +-fn test_mmap_keyword_facet(test_segments: &TestSegments) -> Result<()> { +- let request = keyword_facet_request(); +- +- let facet_hits = test_segments +- .mmap_segment +- .facet(&request, &Default::default(), &Default::default()) +- .unwrap(); +- +- validate_facet_result(&test_segments.mmap_segment, facet_hits, None).context(here!()) +-} +- +-fn test_struct_keyword_facet_filtered(test_segments: &TestSegments) -> Result<()> { +- let mut request = keyword_facet_request(); +- +- for _ in 0..ATTEMPTS { +- let filter = random_filter(&mut rand::rng(), 3); +- request.filter = Some(filter.clone()); +- +- let facet_hits = test_segments +- .struct_segment +- .facet(&request, &Default::default(), &Default::default()) +- .unwrap(); +- +- validate_facet_result(&test_segments.struct_segment, facet_hits, Some(filter)) +- .context(here!())? +- } +- Ok(()) +-} +- +-fn test_mmap_keyword_facet_filtered(test_segments: &TestSegments) -> Result<()> { +- let mut request = keyword_facet_request(); +- +- for _ in 0..ATTEMPTS { +- let filter = random_filter(&mut rand::rng(), 3); +- request.filter = Some(filter.clone()); +- +- let facet_hits = test_segments +- .mmap_segment +- .facet(&request, &Default::default(), &Default::default()) +- .unwrap(); +- +- validate_facet_result(&test_segments.mmap_segment, facet_hits, Some(filter)) +- .context(here!())? +- } +- Ok(()) +-} +\ No newline at end of file ++ struct result{struct_result +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt new file mode 100644 index 000000000..0fabccb95 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_segment_tests_integration_payload_index_test.rs/x-ai_grok-code-fast-1/20250909_155806/raw_response.txt @@ -0,0 +1,1149 @@ +```rust +use std::collections::HashMap; +use std::fs::create_dir; +use std::path::Path; +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use anyhow::{Context, Result}; +use atomic_refcell::AtomicRefCell; +use common::budget::ResourcePermit; +use common::counter::hardware_counter::HardwareCounterCell; +use fnv::FnvBuildHasher; +use indexmap::IndexSet; +use itertools::Itertools; +use rand::SeedableRng; +use rand::{Rng, rng}; +use segment::data_types::facets::{FacetParams, FacetValue}; +use segment::data_types::index::{ + FloatIndexParams, FloatIndexType, IntegerIndexParams, IntegerIndexType, KeywordIndexParams, + KeywordIndexType, TextIndexParams, TextIndexType, +}; +use segment::data_types::vectors::{DEFAULT_VECTOR_NAME, only_default_vector}; +use segment::entry::entry_point::SegmentEntry; +use segment::fixtures::payload_context_fixture::FixtureIdTracker; +use segment::fixtures::payload_fixtures::{ + FLICKING_KEY, FLT_KEY, GEO_KEY, INT_KEY, INT_KEY_2, INT_KEY_3, LAT_RANGE, LON_RANGE, STR_KEY, + STR_PROJ_KEY, STR_ROOT_PROJ_KEY, TEXT_KEY, generate_diverse_nested_payload, + generate_diverse_payload, random_filter, random_nested_filter, random_vector, +}; +use segment::index::PayloadIndex; +use segment::index::field_index::{FieldIndex, PrimaryCondition}; +use segment::index::struct_payload_index::StructPayloadIndex; +use segment::json_path::JsonPath; +use segment::payload_json; +use segment::payload_storage::PayloadStorage; +use segment::payload_storage::in_memory_payload_storage::InMemoryPayloadStorage; +use segment::segment::Segment; +use segment::segment_constructor::build_segment; +use segment::segment_constructor::segment_builder::SegmentBuilder; +use segment::segment_constructor::simple_segment_constructor::build_simple_segment; +use segment::types::PayloadFieldSchema::{FieldParams, FieldType}; +use segment::types::PayloadSchemaType::{Integer, Keyword}; +use segment::types::{ + AnyVariants, Condition, Distance, FieldCondition, Filter, GeoBoundingBox, GeoLineString, + GeoPoint, GeoPolygon, GeoRadius, HnswConfig, Indexes, IsEmptyCondition, Match, Payload, + PayloadField, PayloadSchemaParams, PayloadSchemaType, Range, SegmentConfig, VectorDataConfig, + VectorStorageType, WithPayload, +}; +use segment::utils::scored_point_ties::ScoredPointTies; +use tempfile::{Builder, TempDir}; + +macro_rules! here { + () => { + format!("at {}:{}", file!(), line!()) + }; +} + +/// `anyhow::ensure!` but with location, as what `assert!` would do +macro_rules! ensure { + ($($arg:tt)*) => { + (|| Ok(anyhow::ensure!($($arg)*)))().map_err(|e| { + e.context(here!()) + })? + }; +} + +const DIM: usize = 5; +const ATTEMPTS: usize = 20; + +struct TestSegments { + _base_dir: TempDir, + struct_segment: Segment, + plain_segment: Segment, + mmap_segment: Segment, +} + +impl TestSegments { + fn new() -> Self { + let base_dir = Builder::new().prefix("test_segments").tempdir().unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let mut rnd = StdRng::seed_from_u64(42); + + let config = Self::make_simple_config(true); + + let mut plain_segment = + build_segment(&base_dir.path().join("plain"), &config, true).unwrap(); + let mut struct_segment = + build_segment(&base_dir.path().join("struct"), &config, true).unwrap(); + + let num_points = 3000; + let points_to_delete = 500; + let points_to_clear = 500; + + let mut opnum = 0; + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&Integer.into()), + &hw_counter, + ) + .unwrap(); + + opnum += 1; + for n in 0..num_points { + let idx = n.into(); + let vector = random_vector(&mut rnd, DIM); + let payload: Payload = generate_diverse_payload(&mut rnd); + + plain_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + struct_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + plain_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + struct_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + + opnum += 1; + } + + struct_segment + .create_field_index( + opnum, + &JsonPath::new(STR_KEY), + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index(opnum, &JsonPath::new(INT_KEY), None, &hw_counter) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(false), + is_principal: None, + on_disk: None, + }, + ))), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_3), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(false), + range: Some(true), + is_principal: None, + on_disk: None, + }, + ))), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(GEO_KEY), + Some(&PayloadSchemaType::Geo.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(TEXT_KEY), + Some(&PayloadSchemaType::Text.into()), + &hw_counter, + ) + .unwrap(); + struct_segment + .create_field_index( + opnum, + &JsonPath::new(FLICKING_KEY), + Some(&Integer.into()), + &hw_counter, + ) + .unwrap(); + + // Make mmap segment after inserting the points, but before deleting some of them + let mut mmap_segment = + Self::make_mmap_segment(&base_dir.path().join("mmap"), &plain_segment); + + for _ in 0..points_to_clear { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + mmap_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for _ in 0..points_to_delete { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + mmap_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for (field, indexes) in struct_segment.payload_index.borrow().field_indexes.iter() { + for index in indexes { + assert!(index.count_indexed_points() <= num_points as usize); + if field.to_string() != FLICKING_KEY { + assert!( + index.count_indexed_points() + >= (num_points as usize - points_to_delete - points_to_clear) + ); + } + } + } + + Self { + _base_dir: base_dir, + struct_segment, + plain_segment, + mmap_segment, + } + } + + fn make_simple_config(appendable: bool) -> SegmentConfig { + let conf = SegmentConfig { + vector_data: HashMap::from([( + DEFAULT_VECTOR_NAME.to_owned(), + VectorDataConfig { + size: DIM, + distance: Distance::Dot, + storage_type: VectorStorageType::Memory, + index: if appendable { + Indexes::Plain {} + } else { + Indexes::Hnsw(HnswConfig::default()) + }, + quantization_config: None, + multivector_config: None, + datatype: None, + }, + )]), + sparse_vector_data: Default::default(), + payload_storage_type: Default::default(), + }; + assert_eq!(conf.is_appendable(), appendable); + conf + } + + fn make_mmap_segment(path: &Path, plain_segment: &Segment) -> Segment { + let stopped = AtomicBool::new(false); + create_dir(path).unwrap(); + + let mut builder = SegmentBuilder::new( + path, + &path.with_extension("tmp"), + &Self::make_simple_config(false), + ) + .unwrap(); + + builder.update(&[plain_segment], &stopped).unwrap(); + let permit = ResourcePermit::dummy(1); + let hw_counter = HardwareCounterCell::new(); + + let mut segment = builder.build(permit, &stopped, &hw_counter).unwrap(); + let opnum = segment.version() + 1; + + segment + .create_field_index( + opnum, + &JsonPath::new(STR_KEY), + Some(&FieldParams(PayloadSchemaParams::Keyword( + KeywordIndexParams { + r#type: KeywordIndexType::Keyword, + is_tenant: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(true), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_2), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(true), + range: Some(false), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(INT_KEY_3), + Some(&FieldParams(PayloadSchemaParams::Integer( + IntegerIndexParams { + r#type: IntegerIndexType::Integer, + lookup: Some(false), + range: Some(true), + is_principal: None, + on_disk: Some(true), + }, + ))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(FLT_KEY), + Some(&FieldParams(PayloadSchemaParams::Float(FloatIndexParams { + r#type: FloatIndexType::Float, + is_principal: None, + on_disk: Some(true), + }))), + &hw_counter, + ) + .unwrap(); + segment + .create_field_index( + opnum, + &JsonPath::new(TEXT_KEY), + Some(&FieldParams(PayloadSchemaParams::Text(TextIndexParams { + r#type: TextIndexType::Text, + on_disk: Some(true), + ..Default::default() + }))), + &hw_counter, + ) + .unwrap(); + + segment + } +} + +fn build_test_segments_nested_payload(path_struct: &Path, path_plain: &Path) -> (Segment, Segment) { + let mut rnd = StdRng::seed_from_u64(42); + + let mut plain_segment = build_simple_segment(path_plain, DIM, Distance::Dot).unwrap(); + let mut struct_segment = build_simple_segment(path_struct, DIM, Distance::Dot).unwrap(); + + let num_points = 3000; + let points_to_delete = 500; + let points_to_clear = 500; + + // Nested payload keys + let nested_str_key = JsonPath::new(&format!("{}.{}.{}", STR_KEY, "nested_1", "nested_2")); + let nested_str_proj_key = + JsonPath::new(&format!("{}.{}[].{}", STR_PROJ_KEY, "nested_1", "nested_2")); + let deep_nested_str_proj_key = JsonPath::new(&format!( + "{}[].{}[].{}", + STR_ROOT_PROJ_KEY, "nested_1", "nested_2" + )); + + let hw_counter = HardwareCounterCell::new(); + + let mut opnum = 0; + struct_segment + .create_field_index(opnum, &nested_str_key, Some(&Keyword.into()), &hw_counter) + .unwrap(); + + struct_segment + .create_field_index( + opnum, + &nested_str_proj_key, + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + + struct_segment + .create_field_index( + opnum, + &deep_nested_str_proj_key, + Some(&Keyword.into()), + &hw_counter, + ) + .unwrap(); + + eprintln!("{deep_nested_str_proj_key}"); + + opnum += 1; + for n in 0..num_points { + let idx = n.into(); + let vector = random_vector(&mut rnd, DIM); + let payload: Payload = generate_diverse_nested_payload(&mut rnd); + + plain_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + struct_segment + .upsert_point(opnum, idx, only_default_vector(&vector), &hw_counter) + .unwrap(); + plain_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + struct_segment + .set_full_payload(opnum, idx, &payload, &hw_counter) + .unwrap(); + + opnum += 1; + } + + for _ in 0..points_to_clear { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .clear_payload(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for _ in 0..points_to_delete { + opnum += 1; + let idx_to_remove = rnd.random_range(0..num_points); + plain_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + struct_segment + .delete_point(opnum, idx_to_remove.into(), &hw_counter) + .unwrap(); + } + + for (_field, indexes) in struct_segment.payload_index.borrow().field_indexes.iter() { + for index in indexes { + assert!(index.count_indexed_points() < num_points as usize); + assert!( + index.count_indexed_points() > (num_points as usize - points_to_delete - points_to_clear) + ); + } + } + + (struct_segment, plain_segment) +} + +fn validate_geo_filter(test_segments: &TestSegments, query_filter: Filter) -> Result<()> { + let mut rnd = rng(); + let query = random_vector(&mut rnd, DIM).into(); + + for _i in 0..ATTEMPTS { + let query = random_vector(&mut rnd, DIM).into(); + let plain_result = test_segments + .plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + let estimation = test_segments + .plain_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + let struct_result = test_segments + .struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + for (r1, r2) in plain_result.iter().zip(struct_result.iter()) { + ensure!(r1.id == r2.id); + ensure!((r1.score - r2.score) < 0.0001) + } + } + + Ok(()) +} + +/// Test read operations on segments. +/// The segments fixtures are created only once to improve test speed. +#[test] +fn test_read_operations() -> Result<()> { + let test_segments = Arc::new(TestSegments::new()); + let mut handles = vec![]; + + for test_fn in [ + test_is_empty_conditions, + test_integer_index_types, + test_cardinality_estimation, + test_struct_payload_index, + test_struct_payload_geo_boundingbox_index, + test_struct_payload_geo_radius_index, + test_struct_payload_geo_polygon_index, + test_any_matcher_cardinality_estimation, + test_struct_keyword_facet, + test_mmap_keyword_facet, + test_struct_keyword_facet_filtered, + test_mmap_keyword_facet_filtered, + ] { + let segments = Arc::clone(&test_segments); + handles.push(std::thread::spawn(move || test_fn(&segments))); + } + + for handle in handles { + handle.join().unwrap()?; + } + + Ok(()) +} + +fn test_is_empty_conditions(test_segments: &TestSegments) -> Result<()> { + let filter = Filter::new_must(Condition::IsEmpty(IsEmptyCondition { + is_empty: PayloadField { + key: JsonPath::new(FLICKING_KEY), + }, + })); + + let hw_counter = HardwareCounterCell::new(); + + let estimation_struct = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let estimation_plain = test_segments + .plain_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let plain_result = test_segments + .plain_segment + .payload_index + .borrow() + .query_points(&filter, &hw_counter); + + let real_number = plain_result.len(); + + let struct_result = test_segments + .struct_segment + .payload_index + .borrow() + .query_points(&filter, &hw_counter); + + ensure!(plain_result == struct_result); + + eprintln!("estimation_plain = {estimation_plain:#?}"); + eprintln!("estimation_struct = {estimation_struct:#?}"); + eprintln!("real_number = {real_number:#?}"); + + ensure!(estimation_plain.max >= real_number); + ensure!(estimation_plain.min <= real_number); + + ensure!(estimation_struct.max >= real_number); + ensure!(estimation_struct.min <= real_number); + + ensure!( + (estimation_struct.exp as f64 - real_number as f64).abs() + <= (estimation_plain.exp as f64 - real_number as f64).abs() + ); + + Ok(()) +} + +fn test_integer_index_types(test_segments: &TestSegments) -> Result<()> { + for (kind, indexes) in [ + ( + "struct", + &test_segments.struct_segment.payload_index.borrow(), + ), + ("mmap", &test_segments.mmap_segment.payload_index.borrow()), + ] { + eprintln!("Checking {kind}_segment"); + let field_indexes = indexes.field_indexes.get(&JsonPath::new(INT_KEY)).unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(has_map_index); + ensure!(has_int_index); + + let field_indexes = indexes + .field_indexes + .get(&JsonPath::new(INT_KEY_2)) + .unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(has_map_index); + ensure!(!has_int_index); + + let field_indexes = indexes + .field_indexes + .get(&JsonPath::new(INT_KEY_3)) + .unwrap(); + + let has_map_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntMapIndex(_))); + let has_int_index = field_indexes + .iter() + .any(|index| matches!(index, FieldIndex::IntIndex(_))); + + ensure!(!has_map_index); + ensure!(has_int_index); + } + Ok(()) +} + +fn test_cardinality_estimation(test_segments: &TestSegments) -> Result<()> { + let filter = Filter::new_must(Condition::Field(FieldCondition::new_range( + JsonPath::new(INT_KEY), + Range { + lt: None, + gt: None, + gte: Some(50.), + lte: Some(100.), + }, + ))); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + let hw_counter = HardwareCounterCell::new(); + + let payload_index = test_segments.struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = test_segments + .struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + ensure!(exact <= estimation.max); + ensure!(exact >= estimation.min); + + Ok(()) +} + +#[test] +fn test_root_nested_array_filter_cardinality_estimation() { + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let (struct_segment, _) = build_test_segments_nested_payload(dir1.path(), dir2.path()); + + // rely on test data from `build_test_segments_nested_payload` + let nested_key = "nested_1[].nested_2"; + let nested_match = FieldCondition::new_match(JsonPath::new(nested_key), "some value".to_owned().into()); + let filter = Filter::new_must(Condition::new_nested( + JsonPath::new(STR_ROOT_PROJ_KEY), + Filter::new_must(Condition::Field(nested_match)), + )); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + // not empty primary clauses + assert_eq!(estimation.primary_clauses.len(), 1); + eprintln!("primary_clauses = {:#?}", estimation.primary_clauses); + let primary_clause = estimation.primary_clauses.first().unwrap(); + + let expected_primary_clause = FieldCondition::new_match( + JsonPath::new(&format!("{}[].{}", STR_ROOT_PROJ_KEY, nested_key)), // full key expected + "some value".to_owned().into(), + ); + + match primary_clause { + PrimaryCondition::Condition(field_condition) => { + assert_eq!(*field_condition, Box::new(expected_primary_clause)); + } + o => panic!("unexpected primary clause: {o:?}"), + } + + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + assert!(exact <= estimation.max); + assert!(exact >= estimation.min); +} + +#[test] +fn test_nesting_nested_array_filter_cardinality_estimation() { + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let (struct_segment, _) = build_test_segments_nested_payload(dir1.path(), dir2.path()); + + // rely on test data from `build_test_segments_nested_payload` + let nested_match_key = "nested_2"; + let nested_match = FieldCondition::new_match( + JsonPath::new(nested_match_key), + "some value".to_owned().into(), + ); + let filter = Filter::new_must(Condition::new_nested( + JsonPath::new(STR_ROOT_PROJ_KEY), + Filter::new_must(Condition::new_nested( + JsonPath::new("nested_1"), + Filter::new_must(Condition::Field(nested_match)), + )), + )); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&filter, &hw_counter); + + // not empty primary clauses + assert_eq!(estimation.primary_clauses.len(), 1); + eprintln!("primary_clauses = {:#?}", estimation.primary_clauses); + let primary_clause = estimation.primary_clauses.first().unwrap(); + + let expected_primary_clause = FieldCondition::new_match( + // full key expected + JsonPath::new(&format!( + "{STR_ROOT_PROJ_KEY}[].nested_1[].{nested_match_key}" + )), + "some value".to_owned().into(), + ); + + match primary_clause { + PrimaryCondition::Condition(field_condition) => { + assert_eq!(*field_condition, Box::new(expected_primary_clause)); + } + o => panic!("unexpected primary clause: {o:?}"), + } + + let payload_index = struct_segment.payload_index.borrow(); + let filter_context = payload_index.filter_context(&filter, &hw_counter); + let exact = struct_segment + .id_tracker + .borrow() + .iter_ids() + .filter(|x| filter_context.check(*x)) + .collect_vec() + .len(); + + eprintln!("exact = {exact:#?}"); + eprintln!("estimation = {estimation:#?}"); + + assert!(exact <= estimation.max); + assert!(exact >= estimation.min); +} + +/// Compare search with plain, struct, and mmap indices. +fn test_struct_payload_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + for _i in 0..ATTEMPTS { + let query_vector = random_vector(&mut rnd, DIM).into(); + let query_filter = random_filter(&mut rnd, 3); + + let plain_result = test_segments + .plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let struct_result = test_segments + .struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let mmap_result = test_segments + .mmap_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload::default(), + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = test_segments + .struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + ensure!(estimation.min <= estimation.exp, "{estimation:#?}"); + ensure!(estimation.exp <= estimation.max, "{estimation:#?}"); + ensure!( + estimation.max + <= test_segments + .struct_segment + .id_tracker + .borrow() + .available_point_count(), + "{estimation:#?}", + ); + + let mut plain_result_sorted_ties: Vec = + plain_result.iter().map(|x| x.into()).collect_vec(); + plain_result_sorted_ties.sort(); + + let mut struct_result_sorted_ties: Vec = + struct_result.iter().map(|x| x.into()).collect_vec(); + struct_result_sorted_ties.sort(); + + let mut mmap_result_sorted_ties: Vec = + mmap_result.iter().map(|x| x.into()).collect_vec(); + mmap_result_sorted_ties.sort(); + + ensure!( + plain_result_sorted_ties.len() == struct_result_sorted_ties.len(), + "query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result:?}", + ); + ensure!( + plain_result_sorted_ties.len() == mmap_result_sorted_ties.len(), + "query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + mmap result {mmap_result:?}", + ); + + for (r1, r2, r3) in itertools::izip!( + plain_result_sorted_ties, + struct_result_sorted_ties, + mmap_result_sorted_ties, + ) { + ensure!( + r1.id == r2.id, + "got different ScoredPoint {r1:?} and {r2:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result:?}" + ); + ensure!((r1.score - r2.score) < 0.0001); + ensure!( + r1.id == r3.id, + "got different ScoredPoint {r1:?} and {r3:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + mmap result {mmap_result:?}", + ); + ensure!((r1.score - r3.score) < 0.0001); + } + } + Ok(()) +} + +fn test_struct_payload_geo_boundingbox_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + let geo_bbox = GeoBoundingBox { + top_left: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + bottom_right: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + }; + + let condition = Condition::Field(FieldCondition::new_geo_bounding_box( + JsonPath::new("geo_key"), + geo_bbox, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +fn test_struct_payload_geo_radius_index(test_segments: &TestSegments) -> Result<()> { + let mut rnd = rng(); + + let r_meters = rnd.random_range(1.0..10000.0); + let geo_radius = GeoRadius { + center: GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }, + radius: r_meters, + }; + + let condition = Condition::Field(FieldCondition::new_geo_radius( + JsonPath::new("geo_key"), + geo_radius, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +fn test_struct_payload_geo_polygon_index(test_segments: &TestSegments) -> Result<()> { + let polygon_edge = 5; + let interiors_num = 3; + + fn generate_ring(polygon_edge: i32) -> GeoLineString { + let mut rnd = rng(); + let mut line = GeoLineString { + points: (0..polygon_edge) + .map(|_| GeoPoint { + lon: rnd.random_range(LON_RANGE), + lat: rnd.random_range(LAT_RANGE), + }) + .collect(), + }; + line.points.push(line.points[0]); // add last point that is identical to the first + line + } + + let exterior = generate_ring(polygon_edge); + let interiors = Some( + std::iter::repeat_with(|| generate_ring(polygon_edge)) + .take(interiors_num) + .collect(), + ); + + let geo_polygon = GeoPolygon { + exterior, + interiors, + }; + + let condition = Condition::Field(FieldCondition::new_geo_polygon( + JsonPath::new("geo_key"), + geo_polygon, + )); + + let query_filter = Filter::new_must(condition); + + validate_geo_filter(test_segments, query_filter).context(here!()) +} + +#[test] +fn test_struct_payload_index_nested_fields() { + // Compare search with plain and struct indexes + let dir1 = Builder::new().prefix("segment1_dir").tempdir().unwrap(); + let dir2 = Builder::new().prefix("segment2_dir").tempdir().unwrap(); + + let dim = 5; + + let mut rnd = rng(); + + let (struct_segment, plain_segment) = + build_test_segments_nested_payload(dir1.path(), dir2.path()); + + let attempts = 100; + for _i in 0..attempts { + let query_vector = random_vector(&mut rnd, dim); + let query_filter = random_nested_filter(&mut rnd); + let plain_result = plain_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload { + enable: true, + payload_selector: None, + }, + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + let struct_result = struct_segment + .search( + DEFAULT_VECTOR_NAME, + &query_vector, + &WithPayload { + enable: true, + payload_selector: None, + }, + &false.into(), + Some(&query_filter), + 5, + None, + &false.into(), + ) + .unwrap(); + + let hw_counter = HardwareCounterCell::new(); + + let estimation = struct_segment + .payload_index + .borrow() + .estimate_cardinality(&query_filter, &hw_counter); + + assert!(estimation.min <= estimation.exp, "{estimation:#?}"); + assert!(estimation.exp <= estimation.max, "{estimation:#?}"); + assert!( + estimation.max <= struct_segment.id_tracker.borrow().available_point_count(), + "{estimation:#?}", + ); + + // warning: report flakiness at https://github.com/qdrant/qdrant/issues/534 + plain_result + .iter() + .zip(struct_result.iter()) + .for_each(|(r1, r2)| { + assert_eq!( + r1.id, r2.id, + "got different ScoredPoint {r1:?} and {r2:?} for\n\ + query vector {query_vector:?}\n\ + query filter {query_filter:?}\n\ + plain result {plain_result:?}\n\ + struct result{struct_result \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..3e9e32b94 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_lib_sparse_src_index_search_context.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_lib_sparse_src_index_search_context.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_lib_sparse_src_index_search_context.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.901347+00:00", + "success": false, + "error": "Model returned empty output", + "api_error": false, + "raw_response_length": 0, + "extracted_output_length": null, + "expected_output_length": 18403, + "results_dir": "locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-ThC6VwwLOVUGMaDpCnru", + "cost_usd": 0.01648312, + "prompt_tokens": 73783, + "completion_tokens": 2599, + "total_tokens": 76382, + "native_prompt_tokens": 73598, + "native_completion_tokens": 10000, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 10000 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..c88317733 --- /dev/null +++ b/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,428 @@ + +index 8be5822c3..e69de29bb 100644 +--- a/qdrant_lib_sparse_src_index_search_context.rs_expectedoutput.txt (expected):tmp/tmpwo8fqwp1_expected.txt ++++ b/qdrant_lib_sparse_src_index_search_context.rs_extracted.txt (actual):tmp/tmpyc5uvudm_actual.txt +@@ -1,422 +0,0 @@ +-use std::cmp::{Ordering, max, min}; +-use std::sync::atomic::AtomicBool; +-use std::sync::atomic::Ordering::Relaxed; +- +-use common::counter::hardware_counter::HardwareCounterCell; +-use common::top_k::TopK; +-use common::types::{PointOffsetType, ScoredPointOffset}; +- +-use super::posting_list_common::PostingListIter; +-use crate::common::scores_memory_pool::PooledScoresHandle; +-use crate::common::sparse_vector::{RemappedSparseVector, score_vectors}; +-use crate::common::types::{DimId, DimWeight}; +-use crate::index::inverted_index::InvertedIndex; +-use crate::index::posting_list::PostingListIterator; +- +-/// Iterator over posting lists with a reference to the corresponding query index and weight +-pub struct IndexedPostingListIterator { +- posting_list_iterator: T, +- query_index: DimId, +- query_weight: DimWeight, +-} +- +-/// Making this larger makes the search faster but uses more (pooled) memory +-const ADVANCE_BATCH_SIZE: usize = 10_000; +- +-pub struct SearchContext<'a, 'b, T: PostingListIter = PostingListIterator<'a>> { +- postings_iterators: Vec>, +- query: RemappedSparseVector, +- top: usize, +- is_stopped: &'a AtomicBool, +- top_results: TopK, +- min_record_id: Option, // min_record_id ids across all posting lists +- max_record_id: PointOffsetType, // max_record_id ids across all posting lists +- pooled: PooledScoresHandle<'b>, // handle to pooled scores +- use_pruning: bool, +- hardware_counter: &'a HardwareCounterCell, +-} +- +-impl<'a, 'b, T: PostingListIter> SearchContext<'a, 'b, T> { +- pub fn new( +- query: RemappedSparseVector, +- top: usize, +- inverted_index: &'a impl InvertedIndex = T>, +- pooled: PooledScoresHandle<'b>, +- is_stopped: &'a AtomicBool, +- hardware_counter: &'a HardwareCounterCell, +- ) -> SearchContext<'a, 'b, T> { +- let mut postings_iterators = Vec::new(); +- // track min and max record ids across all posting lists +- let mut max_record_id = 0; +- let mut min_record_id = u32::MAX; +- // iterate over query indices +- for (query_weight_offset, id) in query.indices.iter().enumerate() { +- if let Some(mut it) = inverted_index.get(*id, hardware_counter) { +- if let (Some(first), Some(last_id)) = (it.peek(), it.last_id()) { +- // check if new min +- let min_record_id_posting = first.record_id; +- min_record_id = min(min_record_id, min_record_id_posting); +- +- // check if new max +- let max_record_id_posting = last_id; +- max_record_id = max(max_record_id, max_record_id_posting); +- +- // capture query info +- let query_index = *id; +- let query_weight = query.values[query_weight_offset]; +- +- postings_iterators.push(IndexedPostingListIterator { +- posting_list_iterator: it, +- query_index, +- query_weight, +- }); +- } +- } +- } +- let top_results = TopK::new(top); +- // Query vectors with negative values can NOT use the pruning mechanism which relies on the pre-computed `max_next_weight`. +- // The max contribution per posting list that we calculate is not made to compute the max value of two negative numbers. +- // This is a limitation of the current pruning implementation. +- let use_pruning = T::reliable_max_next_weight() && query.values.iter().all(|v| *v >= 0.0); +- let min_record_id = Some(min_record_id); +- SearchContext { +- postings_iterators, +- query, +- top, +- is_stopped, +- top_results, +- min_record_id, +- max_record_id, +- pooled, +- use_pruning, +- hardware_counter, +- } +- } +- +- const DEFAULT_SCORE: f32 = 0.0; +- +- /// Plain search against the given ids without any pruning +- pub fn plain_search(&mut self, ids: &[PointOffsetType]) -> Vec { +- // sort ids to fully leverage posting list iterator traversal +- let mut sorted_ids = ids.to_vec(); +- sorted_ids.sort_unstable(); +- +- let cpu_counter = self.hardware_counter.cpu_counter(); +- +- let mut indices = Vec::with_capacity(self.query.indices.len()); +- let mut values = Vec::with_capacity(self.query.values.len()); +- for id in sorted_ids { +- // check for cancellation +- if self.is_stopped.load(Relaxed) { +- break; +- } +- +- indices.clear(); +- values.clear(); +- // collect indices and values for the current record id from the query's posting lists *only* +- for posting_iterator in self.postings_iterators.iter_mut() { +- // rely on underlying binary search as the posting lists are sorted by record id +- match posting_iterator.posting_list_iterator.skip_to(id) { +- None => {} // no match for posting list +- Some(element) => { +- // match for posting list +- indices.push(posting_iterator.query_index); +- values.push(element.weight); +- } +- } +- } +- +- if values.is_empty() { +- continue; +- } +- +- // Accumulate the sum of the length of the retrieved sparse vector and the query vector length +- // as measurement for CPU usage of plain search. +- cpu_counter +- .incr_delta(self.query.indices.len() + values.len() * size_of::()); +- +- // reconstruct sparse vector and score against query +- let sparse_score = +- score_vectors(&indices, &values, &self.query.indices, &self.query.values) +- .unwrap_or(Self::DEFAULT_SCORE); +- +- self.top_results.push(ScoredPointOffset { +- score: sparse_score, +- idx: id, +- }); +- } +- let top = std::mem::take(&mut self.top_results); +- top.into_vec() +- } +- +- /// Advance posting lists iterators in a batch fashion. +- fn advance_batch bool>( +- &mut self, +- batch_start_id: PointOffsetType, +- batch_last_id: PointOffsetType, +- filter_condition: &F, +- ) { +- // init batch scores +- let batch_len = batch_last_id - batch_start_id + 1; +- self.pooled.scores.clear(); // keep underlying allocated memory +- self.pooled.scores.resize(batch_len as usize, 0.0); +- +- for posting in self.postings_iterators.iter_mut() { +- posting.posting_list_iterator.for_each_till_id( +- batch_last_id, +- self.pooled.scores.as_mut_slice(), +- #[inline(always)] +- |scores, id, weight| { +- let element_score = weight * posting.query_weight; +- let local_id = (id - batch_start_id) as usize; +- // SAFETY: `id` is within `batch_start_id..=batch_last_id` +- // Thus, `local_id` is within `0..batch_len`. +- *unsafe { scores.get_unchecked_mut(local_id) } += element_score; +- }, +- ); +- } +- +- for (local_index, &score) in self.pooled.scores.iter().enumerate() { +- // publish only the non-zero scores above the current min to beat +- if score != 0.0 && score > self.top_results.threshold() { +- let real_id = batch_start_id + local_index as PointOffsetType; +- // do not score if filter condition is not satisfied +- if !filter_condition(real_id) { +- continue; +- } +- let score_point_offset = ScoredPointOffset { +- score, +- idx: real_id, +- }; +- self.top_results.push(score_point_offset); +- } +- } +- } +- +- /// Compute scores for the last posting list quickly +- fn process_last_posting_list bool>(&mut self, filter_condition: &F) { +- debug_assert_eq!(self.postings_iterators.len(), 1); +- let posting = &mut self.postings_iterators[0]; +- posting.posting_list_iterator.for_each_till_id( +- PointOffsetType::MAX, +- &mut (), +- |_, id, weight| { +- // do not score if filter condition is not satisfied +- if !filter_condition(id) { +- return; +- } +- let score = weight * posting.query_weight; +- self.top_results.push(ScoredPointOffset { score, idx: id }); +- }, +- ); +- } +- +- /// Returns the next min record id from all posting list iterators +- /// +- /// returns None if all posting list iterators are exhausted +- fn next_min_id(to_inspect: &mut [IndexedPostingListIterator]) -> Option { +- let mut min_record_id = None; +- +- // Iterate to find min record id at the head of the posting lists +- for posting_iterator in to_inspect.iter_mut() { +- if let Some(next_element) = posting_iterator.posting_list_iterator.peek() { +- match min_record_id { +- None => min_record_id = Some(next_element.record_id), // first record with matching id +- Some(min_id_seen) => { +- // update min record id if smaller +- if next_element.record_id < min_id_seen { +- min_record_id = Some(next_element.record_id); +- } +- } +- } +- } +- } +- +- min_record_id +- } +- +- /// Make sure the longest posting list is at the head of the posting list iterators +- pub(crate) fn promote_longest_posting_lists_to_the_front(&mut self) { +- // find index of longest posting list +- let posting_index = self +- .postings_iterators +- .iter() +- .enumerate() +- .max_by(|(_, a), (_, b)| { +- a.posting_list_iterator +- .len_to_end() +- .cmp(&b.posting_list_iterator.len_to_end()) +- }) +- .map(|(index, _)| index); +- +- if let Some(posting_index) = posting_index { +- // make sure it is not already at the head +- if posting_index != 0 { +- // swap longest posting list to the head +- self.postings_iterators.swap(0, posting_index); +- } +- } +- } +- +- /// How many elements are left in the posting list iterator +- #[cfg(test)] +- pub(crate) fn posting_list_len(&self, idx: usize) -> usize { +- self.postings_iterators[idx] +- .posting_list_iterator +- .len_to_end() +- } +- +- /// Search for the top k results that satisfy the filter condition +- pub fn search bool>( +- &mut self, +- filter_condition: &F, +- ) -> Vec { +- if self.postings_iterators.is_empty() { +- return Vec::new(); +- } +- +- { +- // Measure CPU usage of indexed sparse search. +- // Assume the complexity of the search as total volume of the posting lists +- // that are traversed in the batched search. +- let mut cpu_cost = 0; +- +- for posting in self.postings_iterators.iter() { +- cpu_cost += posting.posting_list_iterator.len_to_end() +- * posting.posting_list_iterator.element_size(); +- } +- self.hardware_counter.cpu_counter().incr_delta(cpu_cost); +- } +- +- let mut best_min_score = f32::MIN; +- loop { +- // check for cancellation (atomic amortized by batch) +- if self.is_stopped.load(Relaxed) { +- break; +- } +- +- // prepare next iterator of batched ids +- let Some(start_batch_id) = self.min_record_id else { +- break; +- }; +- +- // compute batch range of contiguous ids for the next batch +- let last_batch_id = min( +- start_batch_id + ADVANCE_BATCH_SIZE as u32, +- self.max_record_id, +- ); +- +- // advance and score posting lists iterators +- self.advance_batch(start_batch_id, last_batch_id, filter_condition); +- +- // remove empty posting lists if necessary +- self.postings_iterators.retain(|posting_iterator| { +- posting_iterator.posting_list_iterator.len_to_end() != 0 +- }); +- +- // update min_record_id +- self.min_record_id = Self::next_min_id(&mut self.postings_iterators); +- +- // check if all posting lists are exhausted +- if self.postings_iterators.is_empty() { +- break; +- } +- +- // if only one posting list left, we can score it quickly +- if self.postings_iterators.len() == 1 { +- self.process_last_posting_list(filter_condition); +- break; +- } +- +- // we potentially have enough results to prune low performing posting lists +- if self.use_pruning && self.top_results.len() >= self.top { +- // current min score +- let new_min_score = self.top_results.threshold(); +- if new_min_score == best_min_score { +- // no improvement in lowest best score since last pruning - skip pruning +- continue; +- } else { +- best_min_score = new_min_score; +- } +- // make sure the first posting list is the longest for pruning +- self.promote_longest_posting_lists_to_the_front(); +- +- // prune posting list that cannot possibly contribute to the top results +- let pruned = self.prune_longest_posting_list(new_min_score); +- if pruned { +- // update min_record_id +- self.min_record_id = Self::next_min_id(&mut self.postings_iterators); +- } +- } +- } +- // posting iterators exhausted, return result queue +- let queue = std::mem::take(&mut self.top_results); +- queue.into_vec() +- } +- +- /// Prune posting lists that cannot possibly contribute to the top results +- /// Assumes longest posting list is at the head of the posting list iterators +- /// Returns true if the longest posting list was pruned +- pub fn prune_longest_posting_list(&mut self, min_score: f32) -> bool { +- if self.postings_iterators.is_empty() { +- return false; +- } +- // peek first element of longest posting list +- let (longest_posting_iterator, rest_iterators) = self.postings_iterators.split_at_mut(1); +- let longest_posting_iterator = &mut longest_posting_iterator[0]; +- if let Some(element) = longest_posting_iterator.posting_list_iterator.peek() { +- let next_min_id_in_others = Self::next_min_id(rest_iterators); +- match next_min_id_in_others { +- Some(next_min_id) => { +- match next_min_id.cmp(&element.record_id) { +- Ordering::Equal => { +- // if the next min id in the other posting lists is the same as the current one, +- // we can't prune the current element as it needs to be scored properly across posting lists +- return false; +- } +- Ordering::Less => { +- // we can't prune as there the other posting lists contains smaller smaller ids that need to scored first +- return false; +- } +- Ordering::Greater => { +- // next_min_id is > element.record_id there is a chance to prune up to `next_min_id` +- // check against the max possible score using the `max_next_weight` +- // we can under prune as we should actually check the best score up to `next_min_id` - 1 only +- // instead of the max possible score but it is not possible to know the best score up to `next_min_id` - 1 +- let max_weight_from_list = element.weight.max(element.max_next_weight); +- let max_score_contribution = +- max_weight_from_list * longest_posting_iterator.query_weight; +- if max_score_contribution <= min_score { +- // prune to next_min_id +- let longest_posting_iterator = +- &mut self.postings_iterators[0].posting_list_iterator; +- let position_before_pruning = +- longest_posting_iterator.current_index(); +- longest_posting_iterator.skip_to(next_min_id); +- let position_after_pruning = +- longest_posting_iterator.current_index(); +- // check if pruning took place +- return position_before_pruning != position_after_pruning; +- } +- } +- } +- } +- None => { +- // the current posting list is the only one left, we can potentially skip it to the end +- // check against the max possible score using the `max_next_weight` +- let max_weight_from_list = element.weight.max(element.max_next_weight); +- let max_score_contribution = +- max_weight_from_list * longest_posting_iterator.query_weight; +- if max_score_contribution <= min_score { +- // prune to the end! +- let longest_posting_iterator = &mut self.postings_iterators[0]; +- longest_posting_iterator.posting_list_iterator.skip_to_end(); +- return true; +- } +- } +- } +- } +- // no pruning took place +- false +- } +-} +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/qdrant_lib_sparse_src_index_search_context.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..e69de29bb diff --git a/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json b/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json new file mode 100644 index 000000000..7ed49fdaf --- /dev/null +++ b/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_src_main.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_src_main.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_src_main.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:41.903256+00:00", + "success": false, + "error": "Model returned empty output", + "api_error": false, + "raw_response_length": 0, + "extracted_output_length": null, + "expected_output_length": 21882, + "results_dir": "locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741", + "generation_id": "gen-1757433462-vVZ8Vfj34QwFqrHmF1SW", + "cost_usd": 0.01644464, + "prompt_tokens": 72975, + "completion_tokens": 3215, + "total_tokens": 76190, + "native_prompt_tokens": 72196, + "native_completion_tokens": 10000, + "native_finish_reason": "length", + "stats_error": null, + "native_tokens_reasoning": 10000 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff b/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff new file mode 100644 index 000000000..cd1200d26 --- /dev/null +++ b/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/output.diff @@ -0,0 +1,617 @@ + +index c6d1df953..e69de29bb 100644 +--- a/qdrant_src_main.rs_expectedoutput.txt (expected):tmp/tmpd33vlyb__expected.txt ++++ b/qdrant_src_main.rs_extracted.txt (actual):tmp/tmpvtey0txe_actual.txt +@@ -1,611 +0,0 @@ +-#[cfg(feature = "web")] +-mod actix; +-mod common; +-mod consensus; +-mod greeting; +-mod issues_setup; +-mod migrations; +-mod settings; +-mod snapshots; +-mod startup; +-mod tonic; +-mod tracing; +- +-use std::io::Error; +-use std::sync::Arc; +-use std::thread; +-use std::thread::JoinHandle; +-use std::time::Duration; +- +-use ::common::budget::{ResourceBudget, get_io_budget}; +-use ::common::cpu::get_cpu_budget; +-use ::common::flags::{feature_flags, init_feature_flags}; +-use ::tonic::transport::Uri; +-use api::grpc::transport_channel_pool::TransportChannelPool; +-use clap::Parser; +-use collection::shards::channel_service::ChannelService; +-use consensus::Consensus; +-use slog::Drain; +-use startup::setup_panic_hook; +-use storage::content_manager::consensus::operation_sender::OperationSender; +-use storage::content_manager::consensus::persistent::Persistent; +-use storage::content_manager::consensus_manager::{ConsensusManager, ConsensusStateRef}; +-use storage::content_manager::toc::TableOfContent; +-use storage::content_manager::toc::dispatcher::TocDispatcher; +-use storage::dispatcher::Dispatcher; +-use storage::rbac::Access; +-#[cfg(all( +- not(target_env = "msvc"), +- any(target_arch = "x86_64", target_arch = "aarch64") +-))] +-use tikv_jemallocator::Jemalloc; +- +-use crate::common::helpers::{ +- create_general_purpose_runtime, create_search_runtime, create_update_runtime, +- load_tls_client_config, +-}; +-use crate::common::inference::service::InferenceService; +-use crate::common::telemetry::TelemetryCollector; +-use crate::common::telemetry_reporting::TelemetryReporter; +-use crate::greeting::welcome; +-use crate::migrations::single_to_cluster::handle_existing_collections; +-use crate::settings::Settings; +-use crate::snapshots::{recover_full_snapshot, recover_snapshots}; +-use crate::startup::{remove_started_file_indicator, touch_started_file_indicator}; +- +-#[cfg(all( +- not(target_env = "msvc"), +- any(target_arch = "x86_64", target_arch = "aarch64") +-))] +-#[global_allocator] +-static GLOBAL: Jemalloc = Jemalloc; +- +-const FULL_ACCESS: Access = Access::full("For main"); +- +-/// Qdrant (read: quadrant ) is a vector similarity search engine. +-/// It provides a production-ready service with a convenient API to store, search, and manage points - vectors with an additional payload. +-/// +-/// This CLI starts a Qdrant peer/server. +-#[derive(Parser, Debug)] +-#[command(version, about)] +-struct Args { +- /// Uri of the peer to bootstrap from in case of multi-peer deployment. +- /// If not specified - this peer will be considered as a first in a new deployment. +- #[arg(long, value_parser, value_name = "URI", env = "QDRANT_BOOTSTRAP")] +- bootstrap: Option, +- /// Uri of this peer. +- /// Other peers should be able to reach it by this uri. +- /// +- /// This value has to be supplied if this is the first peer in a new deployment. +- /// +- /// In case this is not the first peer and it bootstraps the value is optional. +- /// If not supplied then qdrant will take internal grpc port from config and derive the IP address of this peer on bootstrap peer (receiving side) +- #[arg(long, value_parser, value_name = "URI", env = "QDRANT_URI")] +- uri: Option, +- +- /// Force snapshot re-creation +- /// If provided - existing collections will be replaced with snapshots. +- /// Default is to not recreate from snapshots. +- #[arg(short, long, action, default_value_t = false)] +- force_snapshot: bool, +- +- /// List of paths to snapshot files. +- /// Format: : +- /// +- /// WARN: Do not use this option if you are recovering collection in existing distributed cluster. +- /// Use `/collections//snapshots/recover` API instead. +- #[arg(long, value_name = "PATH:NAME", alias = "collection-snapshot")] +- snapshot: Option>, +- +- /// Path to snapshot of multiple collections. +- /// Format: +- /// +- /// WARN: Do not use this option if you are recovering collection in existing distributed cluster. +- /// Use `/collections//snapshots/recover` API instead. +- #[arg(long, value_name = "PATH")] +- storage_snapshot: Option, +- +- /// Path to an alternative configuration file. +- /// Format: +- /// +- /// Default path: config/config.yaml +- #[arg(long, value_name = "PATH")] +- config_path: Option, +- +- /// Disable telemetry sending to developers +- /// If provided - telemetry collection will be disabled. +- /// Read more: +- #[arg(long, action, default_value_t = false)] +- disable_telemetry: bool, +- +- /// Run stacktrace collector. Used for debugging. +- #[arg(long, action, default_value_t = false)] +- stacktrace: bool, +- +- /// Reinit consensus state. +- /// When enabled, the service will assume the consensus should be reinitialized. +- /// The exact behavior depends on if this current node has bootstrap URI or not. +- /// If it has - it'll remove current consensus state and consensus WAL (while keeping peer ID) +- /// and will try to receive state from the bootstrap peer. +- /// If it doesn't have - it'll remove other peers from voters promote +- /// the current peer to the leader and the single member of the cluster. +- /// It'll also compact consensus WAL to force snapshot +- #[arg(long, action, default_value_t = false)] +- reinit: bool, +-} +- +-fn main() -> anyhow::Result<()> { +- let args = Args::parse(); +- +- // Run backtrace collector, expected to used by `rstack` crate +- if args.stacktrace { +- #[cfg(all(target_os = "linux", feature = "stacktrace"))] +- { +- let _ = rstack_self::child(); +- } +- return Ok(()); +- } +- +- let settings = Settings::new(args.config_path)?; +- +- // Set global feature flags, sourced from configuration +- init_feature_flags(settings.feature_flags); +- +- let reporting_enabled = !settings.telemetry_disabled && !args.disable_telemetry; +- +- let reporting_id = TelemetryCollector::generate_id(); +- +- // Setup logging (no logging before this point) +- let logger_handle = tracing::setup( +- settings +- .logger +- .with_top_level_directive(settings.log_level.clone()), +- )?; +- +- remove_started_file_indicator(); +- +- setup_panic_hook(reporting_enabled, reporting_id.to_string()); +- +- memory::madvise::set_global(settings.storage.mmap_advice); +- segment::vector_storage::common::set_async_scorer( +- settings +- .storage +- .performance +- .async_scorer +- .unwrap_or_default(), +- ); +- +- welcome(&settings); +- +- #[cfg(feature = "gpu")] +- if let Some(settings_gpu) = &settings.gpu { +- use segment::index::hnsw_index::gpu::*; +- +- // initialize GPU devices manager. +- if settings_gpu.indexing { +- set_gpu_force_half_precision(settings_gpu.force_half_precision); +- set_gpu_groups_count(settings_gpu.groups_count); +- +- let mut gpu_device_manager = GPU_DEVICES_MANAGER.write(); +- *gpu_device_manager = match gpu_devices_manager::GpuDevicesMaganer::new( +- &settings_gpu.device_filter, +- settings_gpu.devices.as_deref(), +- settings_gpu.allow_integrated, +- settings_gpu.allow_emulated, +- true, // Currently we always wait for the free gpu device. +- settings_gpu.parallel_indexes.unwrap_or(1), +- ) { +- Ok(gpu_device_manager) => Some(gpu_device_manager), +- Err(err) => { +- log::error!("Can't initialize GPU devices manager: {err}"); +- None +- } +- } +- } +- } +- +- if let Some(recovery_warning) = &settings.storage.recovery_mode { +- log::warn!("Qdrant is loaded in recovery mode: {recovery_warning}"); +- log::warn!( +- "Read more: https://qdrant.tech/documentation/guides/administration/#recovery-mode" +- ); +- } +- +- // Validate as soon as possible, but we must initialize logging first +- settings.validate_and_warn(); +- +- // Report feature flags that are enabled for easier debugging +- let flags = feature_flags(); +- if !flags.is_default() { +- log::debug!("Feature flags: {flags:?}"); +- } +- +- let bootstrap = if args.bootstrap == args.uri { +- if args.bootstrap.is_some() { +- log::warn!( +- "Bootstrap URI is the same as this peer URI. Consider this peer as a first in a new deployment.", +- ); +- } +- None +- } else { +- args.bootstrap +- }; +- +- // Saved state of the consensus. +- let persistent_consensus_state = Persistent::load_or_init( +- &settings.storage.storage_path, +- bootstrap.is_none(), +- args.reinit, +- )?; +- +- let is_distributed_deployment = settings.cluster.enabled; +- +- let temp_path = settings.storage.temp_path.as_deref(); +- +- let restored_collections = if let Some(full_snapshot) = args.storage_snapshot { +- recover_full_snapshot( +- temp_path, +- &full_snapshot, +- &settings.storage.storage_path, +- args.force_snapshot, +- persistent_consensus_state.this_peer_id(), +- is_distributed_deployment, +- ) +- } else if let Some(snapshots) = args.snapshot { +- // recover from snapshots +- recover_snapshots( +- &snapshots, +- args.force_snapshot, +- temp_path, +- &settings.storage.storage_path, +- persistent_consensus_state.this_peer_id(), +- is_distributed_deployment, +- ) +- } else { +- vec![] +- }; +- +- // Create and own search runtime out of the scope of async context to ensure correct +- // destruction of it +- let search_runtime = create_search_runtime(settings.storage.performance.max_search_threads) +- .expect("Can't search create runtime."); +- +- let update_runtime = +- create_update_runtime(settings.storage.performance.max_optimization_threads) +- .expect("Can't optimizer create runtime."); +- +- let general_runtime = +- create_general_purpose_runtime().expect("Can't optimizer general purpose runtime."); +- let runtime_handle = general_runtime.handle().clone(); +- +- // Use global CPU budget for optimizations based on settings +- let cpu_budget = get_cpu_budget(settings.storage.performance.optimizer_cpu_budget); +- let io_budget = get_io_budget(settings.storage.performance.optimizer_io_budget, cpu_budget); +- let optimizer_resource_budget = ResourceBudget::new(cpu_budget, io_budget); +- +- // Create a signal sender and receiver. It is used to communicate with the consensus thread. +- let (propose_sender, propose_receiver) = std::sync::mpsc::channel(); +- +- let propose_operation_sender = if settings.cluster.enabled { +- // High-level channel which could be used to send User-space consensus operations +- Some(OperationSender::new(propose_sender)) +- } else { +- // We don't need sender for the single-node mode +- None +- }; +- +- // Channel service is used to manage connections between peers. +- // It allocates required number of channels and manages proper reconnection handling +- let mut channel_service = +- ChannelService::new(settings.service.http_port, settings.service.api_key.clone()); +- +- if is_distributed_deployment { +- // We only need channel_service in case if cluster is enabled. +- // So we initialize it with real values here +- let p2p_grpc_timeout = Duration::from_millis(settings.cluster.grpc_timeout_ms); +- let connection_timeout = Duration::from_millis(settings.cluster.connection_timeout_ms); +- +- let tls_config = load_tls_client_config(&settings)?; +- +- channel_service.channel_pool = Arc::new(TransportChannelPool::new( +- p2p_grpc_timeout, +- connection_timeout, +- settings.cluster.p2p.connection_pool_size, +- tls_config, +- )); +- channel_service.id_to_address = persistent_consensus_state.peer_address_by_id.clone(); +- channel_service.id_to_metadata = persistent_consensus_state.peer_metadata_by_id.clone(); +- } +- +- // Table of content manages the list of collections. +- // It is a main entry point for the storage. +- let toc = TableOfContent::new( +- &settings.storage, +- search_runtime, +- update_runtime, +- general_runtime, +- optimizer_resource_budget, +- channel_service.clone(), +- persistent_consensus_state.this_peer_id(), +- propose_operation_sender.clone(), +- ); +- +- toc.clear_all_tmp_directories()?; +- +- // Here we load all stored collections. +- runtime_handle.block_on(async { +- for collection in toc.all_collections(&FULL_ACCESS).await { +- log::debug!("Loaded collection: {collection}"); +- } +- }); +- +- let toc_arc = Arc::new(toc); +- let storage_path = toc_arc.storage_path(); +- +- // Holder for all actively running threads of the service: web, gPRC, consensus, etc. +- let mut handles: Vec>> = vec![]; +- +- // Router for external queries. +- // It decides if query should go directly to the ToC or through the consensus. +- let mut dispatcher = Dispatcher::new(toc_arc.clone()); +- +- let (telemetry_collector, dispatcher_arc, health_checker) = if is_distributed_deployment { +- let consensus_state: ConsensusStateRef = ConsensusManager::new( +- persistent_consensus_state, +- toc_arc.clone(), +- propose_operation_sender.unwrap(), +- storage_path, +- ) +- .into(); +- let is_new_deployment = consensus_state.is_new_deployment(); +- +- dispatcher = +- dispatcher.with_consensus(consensus_state.clone(), settings.cluster.resharding_enabled); +- +- let toc_dispatcher = TocDispatcher::new(Arc::downgrade(&toc_arc), consensus_state.clone()); +- toc_arc.with_toc_dispatcher(toc_dispatcher); +- +- let dispatcher_arc = Arc::new(dispatcher); +- +- // Monitoring and telemetry. +- let telemetry_collector = +- TelemetryCollector::new(settings.clone(), dispatcher_arc.clone(), reporting_id); +- let tonic_telemetry_collector = telemetry_collector.tonic_telemetry_collector.clone(); +- +- // `raft` crate uses `slog` crate so it is needed to use `slog_stdlog::StdLog` to forward +- // logs from it to `log` crate +- let slog_logger = slog::Logger::root(slog_stdlog::StdLog.fuse(), slog::o!()); +- +- // Runs raft consensus in a separate thread. +- // Create a pipe `message_sender` to communicate with the consensus +- let health_checker = Arc::new(common::health::HealthChecker::spawn( +- toc_arc.clone(), +- consensus_state.clone(), +- &runtime_handle, +- // NOTE: `wait_for_bootstrap` should be calculated *before* starting `Consensus` thread +- consensus_state.is_new_deployment() && bootstrap.is_some(), +- )); +- +- let handle = Consensus::run( +- &slog_logger, +- consensus_state.clone(), +- bootstrap, +- args.uri.map(|uri| uri.to_string()), +- settings.clone(), +- channel_service, +- propose_receiver, +- tonic_telemetry_collector, +- toc_arc.clone(), +- runtime_handle.clone(), +- args.reinit, +- ) +- .expect("Can't initialize consensus"); +- +- handles.push(handle); +- +- let toc_arc_clone = toc_arc.clone(); +- let consensus_state_clone = consensus_state.clone(); +- let _cancel_transfer_handle = runtime_handle.spawn(async move { +- consensus_state_clone.is_leader_established.await_ready(); +- match toc_arc_clone +- .cancel_related_transfers("Source or target peer restarted") +- .await +- { +- Ok(_) => { +- log::debug!("All transfers if any cancelled"); +- } +- Err(err) => { +- log::error!("Can't cancel related transfers: {err}"); +- } +- } +- }); +- +- // TODO(resharding): Remove resharding driver? +- // +- // runtime_handle.block_on(async { +- // toc_arc.resume_resharding_tasks().await; +- // }); +- +- let collections_to_recover_in_consensus = if is_new_deployment { +- let existing_collections = +- runtime_handle.block_on(toc_arc.all_collections(&FULL_ACCESS)); +- existing_collections +- .into_iter() +- .map(|pass| pass.name().to_string()) +- .collect() +- } else { +- restored_collections +- }; +- +- if !collections_to_recover_in_consensus.is_empty() { +- runtime_handle.block_on(handle_existing_collections( +- toc_arc.clone(), +- consensus_state.clone(), +- dispatcher_arc.clone(), +- consensus_state.this_peer_id(), +- collections_to_recover_in_consensus, +- )); +- } +- +- (telemetry_collector, dispatcher_arc, Some(health_checker)) +- } else { +- log::info!("Distributed mode disabled"); +- let dispatcher_arc = Arc::new(dispatcher); +- +- // Monitoring and telemetry. +- let telemetry_collector = +- TelemetryCollector::new(settings.clone(), dispatcher_arc.clone(), reporting_id); +- (telemetry_collector, dispatcher_arc, None) +- }; +- +- let tonic_telemetry_collector = telemetry_collector.tonic_telemetry_collector.clone(); +- +- // +- // Telemetry reporting +- // +- +- let reporting_id = telemetry_collector.reporting_id(); +- let telemetry_collector = Arc::new(tokio::sync::Mutex::new(telemetry_collector)); +- +- if reporting_enabled { +- log::info!("Telemetry reporting enabled, id: {reporting_id}"); +- +- runtime_handle.spawn(TelemetryReporter::run(telemetry_collector.clone())); +- } else { +- log::info!("Telemetry reporting disabled"); +- } +- +- if settings.service.hardware_reporting == Some(true) { +- log::info!("Hardware reporting enabled"); +- } +- +- // Setup subscribers to listen for issue-able events +- issues_setup::setup_subscribers(&settings); +- +- // Helper to better log start errors +- let log_err_if_any = |server_name, result| match result { +- Err(err) => { +- log::error!("Error while starting {server_name} server: {err}"); +- Err(err) +- } +- ok => ok, +- }; +- +- // +- // Inference Service +- // +- if let Some(inference_config) = settings.inference.clone() { +- match InferenceService::init_global(inference_config) { +- Ok(_) => { +- log::info!("Inference service is configured."); +- } +- Err(err) => { +- log::error!("{err}"); +- } +- } +- } else { +- log::info!("Inference service is not configured."); +- } +- +- // +- // REST API server +- // +- +- #[cfg(feature = "web")] +- { +- let dispatcher_arc = dispatcher_arc.clone(); +- let settings = settings.clone(); +- let handle = thread::Builder::new() +- .name("web".to_string()) +- .spawn(move || { +- log_err_if_any( +- "REST", +- actix::init( +- dispatcher_arc.clone(), +- telemetry_collector, +- health_checker, +- settings, +- logger_handle, +- ), +- ) +- }) +- .unwrap(); +- handles.push(handle); +- } +- +- // +- // gRPC server +- // +- +- if let Some(grpc_port) = settings.service.grpc_port { +- let settings = settings.clone(); +- let handle = thread::Builder::new() +- .name("grpc".to_string()) +- .spawn(move || { +- log_err_if_any( +- "gRPC", +- tonic::init( +- dispatcher_arc, +- tonic_telemetry_collector, +- settings, +- grpc_port, +- runtime_handle, +- ), +- ) +- }) +- .unwrap(); +- handles.push(handle); +- } else { +- log::info!("gRPC endpoint disabled"); +- } +- +- #[cfg(feature = "service_debug")] +- { +- use std::fmt::Write; +- +- use parking_lot::deadlock; +- +- const DEADLOCK_CHECK_PERIOD: Duration = Duration::from_secs(10); +- +- thread::Builder::new() +- .name("deadlock_checker".to_string()) +- .spawn(move || { +- loop { +- thread::sleep(DEADLOCK_CHECK_PERIOD); +- let deadlocks = deadlock::check_deadlock(); +- if deadlocks.is_empty() { +- continue; +- } +- +- let mut error = format!("{} deadlocks detected\n", deadlocks.len()); +- for (i, threads) in deadlocks.iter().enumerate() { +- writeln!(error, "Deadlock #{i}").expect("fail to writeln!"); +- for t in threads { +- writeln!( +- error, +- "Thread Id {:#?}\n{:#?}", +- t.thread_id(), +- t.backtrace(), +- ) +- .expect("fail to writeln!"); +- } +- } +- log::error!("{error}"); +- } +- }) +- .unwrap(); +- } +- +- touch_started_file_indicator(); +- +- for handle in handles { +- log::debug!( +- "Waiting for thread {} to finish", +- handle.thread().name().unwrap() +- ); +- handle.join().expect("thread is not panicking")?; +- } +- drop(toc_arc); +- drop(settings); +- Ok(()) +-} +\ No newline at end of file diff --git a/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt b/locodiff-250425/results/qdrant_src_main.rs/x-ai_grok-code-fast-1/20250909_155741/raw_response.txt new file mode 100644 index 000000000..e69de29bb diff --git a/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/extracted_output.txt b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/extracted_output.txt new file mode 100644 index 000000000..3b702ccc1 --- /dev/null +++ b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/extracted_output.txt @@ -0,0 +1,744 @@ +// This is a comment to indicate the start of the response +use tokio::spawn; +use futures::future::join_all; + +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use api::grpc::qdrant::points_server::Points; +use api::grpc::qdrant::{ + ClearPayloadPoints, CountPoints, CountResponse, CreateFieldIndexCollection, + DeleteFieldIndexCollection, DeletePayloadPoints, DeletePointVectors, DeletePoints, + DiscoverBatchPoints, DiscoverBatchResponse, DiscoverPoints, DiscoverResponse, FacetCounts, + FacetResponse, GetPoints, GetResponse, PointsOperationResponse, QueryBatchPoints, + QueryBatchResponse, QueryGroupsResponse, QueryPointGroups, QueryPoints, QueryResponse, + RecommendBatchPoints, RecommendBatchResponse, RecommendGroupsResponse, RecommendPointGroups, + RecommendPoints, RecommendResponse, ScrollPoints, ScrollResponse, SearchBatchPoints, + SearchBatchResponse, SearchGroupsResponse, SearchMatrixOffsets, SearchMatrixOffsetsResponse, + SearchMatrixPairs, SearchMatrixPairsResponse, SearchMatrixPoints, SearchPointGroups, + SearchPoints, SearchResponse, SetPayloadPoints, UpdateBatchPoints, UpdateBatchResponse, + UpdatePointVectors, UpsertPoints, +}; +use collection::operations::types::CoreSearchRequest; +use collection::operations::verification::new_unchecked_verification_pass; +use common::counter::hardware_accumulator::HwMeasurementAcc; +use storage::content_manager::toc::request_hw_counter::RequestHwCounter; +use storage::dispatcher::Dispatcher; +use tonic::{Request, Response, Status}; + +use super::query_common::*; +use super::update_common::*; +use super::validate; +use crate::common::inference::extract_token; +use crate::common::update::InternalUpdateParams; +use crate::settings::ServiceConfig; +use crate::tonic::auth::extract_access; +use crate::tonic::verification::StrictModeCheckedTocProvider; + +pub struct PointsService { + dispatcher: Arc, + service_config: ServiceConfig, +} + +impl PointsService { + pub fn new(dispatcher: Arc, service_config: ServiceConfig) -> Self { + Self { + dispatcher, + service_config, + } + } + + fn get_request_collection_hw_usage_counter( + &self, + collection_name: String, + wait: Option, + ) -> RequestHwCounter { + let counter = HwMeasurementAcc::new_with_metrics_drain( + self.dispatcher.get_collection_hw_metrics(collection_name), + ); + + let waiting = wait != Some(false); + RequestHwCounter::new(counter, self.service_config.hardware_reporting() && waiting) + } +} + +#[tonic::async_trait] +impl Points for PointsService { + async fn upsert( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + upsert( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn get(&self, mut request: Request) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let inner_request = request.into_inner(); + + let hw_metrics = self + .get_request_collection_hw_usage_counter(inner_request.collection_name.clone(), None); + + get( + StrictModeCheckedTocProvider::new(&self.dispatcher), + inner_request, + None, + access, + hw_metrics, + ) + .await + } + + async fn update_vectors( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + // Nothing to verify here. + let pass = new_unchecked_verification_pass(); + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + update_vectors( + self.dispatcher.toc(&access, &pass).clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_vectors( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let hw_metrics = self.get_request_collection_hw_usage_counter( + request.get_ref().collection_name.clone(), + None, + ); + + delete_vectors( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn set_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + set_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn overwrite_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + overwrite_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn clear_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + clear_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn update_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + update_batch( + &self.dispatcher, + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + } + + async fn create_field_index( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + create_field_index( + self.dispatcher.clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_field_index( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete_field_index( + self.dispatcher.clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn search( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = search( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn search_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let SearchBatchPoints { + collection_name, + search_points, + read_consistency, + timeout, + } = request.into_inner(); + + let mut requests = Vec::new(); + + for mut search_point in search_points { + let shard_key = search_point.shard_key_selector.take(); + + let shard_selector = convert_shard_selector_for_read(None, shard_key); + let core_search_request = CoreSearchRequest::try_from(search_point)?; + + requests.push((core_search_request, shard_selector)); + } + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + + let res = core_search_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + requests, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn search_groups( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = search_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn scroll( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let inner_request = request.into_inner(); + + let hw_metrics = self + .get_request_collection_hw_usage_counter(inner_request.collection_name.clone(), None); + + scroll( + StrictModeCheckedTocProvider::new(&self.dispatcher), + inner_request, + None, + access, + hw_metrics, + ) + .await + } + + async fn recommend( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = recommend( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn recommend_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let RecommendBatchPoints { + collection_name, + recommend_points, + read_consistency, + timeout, + } = request.into_inner(); + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + + let res = recommend_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + recommend_points, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn recommend_groups( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = recommend_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn discover( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = discover( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn discover_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let DiscoverBatchPoints { + collection_name, + discover_points, + read_consistency, + timeout, + } = request.into_inner(); + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + let res = discover_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + discover_points, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn count( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = count( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + &access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn query( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = query( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + + async fn query_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let request = request.into_inner(); + let QueryBatchPoints { + collection_name, + query_points, + read_consistency, + timeout, + } = request; + let timeout = timeout.map(Duration::from_secs); + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + let res = query_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + query_points, + read_consistency, + access, + timeout, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + + async fn query_groups( + &self, + mut request: Request, + ) -> Result, Status> { + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = query_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + async fn facet( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let hw_metrics = self.get_request_collection_hw_usage_counter( + request.get_ref().collection_name.clone(), + None, + ); + facet( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await + } + + async fn search_matrix_pairs( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let timing = Instant::now(); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let search_matrix_response = search_points_matrix( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics.get_counter(), + ) + .await?; + let pairs_response = SearchMatrixPairsResponse { + result: Some(SearchMatrixPairs::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), + usage: hw_metrics.to_grpc_api(), + }; + + Ok(Response::new(pairs_response)) + } + + async fn search_matrix_offsets( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let timing = Instant::now(); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let search_matrix_response = search_points_matrix( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics.get_counter(), + ) + .await?; + let offsets_response = SearchMatrixOffsetsResponse { + result: Some(SearchMatrixOffsets::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), + usage: hw_metrics.to_grpc_api(), + }; + + Ok(Response::new(offsets_response)) + } +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/metadata.json b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/metadata.json new file mode 100644 index 000000000..b41f78c82 --- /dev/null +++ b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "qdrant_src_tonic_api_points_api.rs", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/qdrant_src_tonic_api_points_api.rs_prompt.txt", + "expected_file": "locodiff-250425/prompts/qdrant_src_tonic_api_points_api.rs_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:57:58.147796+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 24147, + "extracted_output_length": 24135, + "expected_output_length": 23659, + "results_dir": "locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758", + "generation_id": "gen-1757433478-KVAmwqUrMsHSmBXOfzDL", + "cost_usd": 0.02156298, + "prompt_tokens": 67810, + "completion_tokens": 5063, + "total_tokens": 72873, + "native_prompt_tokens": 67248, + "native_completion_tokens": 5455, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 761 +} \ No newline at end of file diff --git a/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/output.diff b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/output.diff new file mode 100644 index 000000000..f40d66158 --- /dev/null +++ b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/output.diff @@ -0,0 +1,132 @@ + +index 78b27a3ba..3b702ccc1 100644 +--- a/qdrant_src_tonic_api_points_api.rs_expectedoutput.txt (expected):tmp/tmp1t2t_yy3_expected.txt ++++ b/qdrant_src_tonic_api_points_api.rs_extracted.txt (actual):tmp/tmp6o2sx3_9_actual.txt +@@ -1,5 +1,10 @@ ++// This is a comment to indicate the start of the response ++use tokio::spawn; ++use futures::future::join_all; ++ + use std::sync::Arc; +-use std::time::{Duration, Instant}; ++use std::time::Duration; ++use std::time::Instant; + + use api::grpc::qdrant::points_server::Points; + use api::grpc::qdrant::{ +@@ -16,6 +21,7 @@ use api::grpc::qdrant::{ + UpdatePointVectors, UpsertPoints, + }; + use collection::operations::types::CoreSearchRequest; ++use collection::operations::verification::new_unchecked_verification_pass; + use common::counter::hardware_accumulator::HwMeasurementAcc; + use storage::content_manager::toc::request_hw_counter::RequestHwCounter; + use storage::dispatcher::Dispatcher; +@@ -136,6 +142,7 @@ impl Points for PointsService { + validate(request.get_ref())?; + + // Nothing to verify here. ++ let pass = new_unchecked_verification_pass(); + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); +@@ -145,7 +152,7 @@ impl Points for PointsService { + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + update_vectors( +- StrictModeCheckedTocProvider::new(&self.dispatcher), ++ self.dispatcher.toc(&access, &pass).clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, +@@ -301,8 +308,9 @@ impl Points for PointsService { + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; +- ++ + let access = extract_access(&mut request); ++ + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); +@@ -323,14 +331,18 @@ impl Points for PointsService { + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; +- + let access = extract_access(&mut request); + ++ let collection_name = request.get_ref().collection_name.clone(); ++ let wait = Some(request.get_ref().wait.unwrap_or(false)); ++ let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); ++ + delete_field_index( + self.dispatcher.clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, ++ hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) +@@ -373,8 +385,6 @@ impl Points for PointsService { + timeout, + } = request.into_inner(); + +- let timeout = timeout.map(Duration::from_secs); +- + let mut requests = Vec::new(); + + for mut search_point in search_points { +@@ -395,7 +405,7 @@ impl Points for PointsService { + requests, + read_consistency, + access, +- timeout, ++ timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; +@@ -428,7 +438,6 @@ impl Points for PointsService { + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; +- + let access = extract_access(&mut request); + + let inner_request = request.into_inner(); +@@ -520,6 +529,7 @@ impl Points for PointsService { + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; ++ + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + +@@ -540,7 +550,9 @@ impl Points for PointsService { + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; ++ + let access = extract_access(&mut request); ++ + let DiscoverBatchPoints { + collection_name, + discover_points, +@@ -696,7 +708,6 @@ impl Points for PointsService { + hw_metrics.get_counter(), + ) + .await?; +- + let pairs_response = SearchMatrixPairsResponse { + result: Some(SearchMatrixPairs::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), +@@ -722,7 +733,6 @@ impl Points for PointsService { + hw_metrics.get_counter(), + ) + .await?; +- + let offsets_response = SearchMatrixOffsetsResponse { + result: Some(SearchMatrixOffsets::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), diff --git a/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/raw_response.txt b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/raw_response.txt new file mode 100644 index 000000000..ab96477a6 --- /dev/null +++ b/locodiff-250425/results/qdrant_src_tonic_api_points_api.rs/x-ai_grok-code-fast-1/20250909_155758/raw_response.txt @@ -0,0 +1,746 @@ +```rust +// This is a comment to indicate the start of the response +use tokio::spawn; +use futures::future::join_all; + +use std::sync::Arc; +use std::time::Duration; +use std::time::Instant; + +use api::grpc::qdrant::points_server::Points; +use api::grpc::qdrant::{ + ClearPayloadPoints, CountPoints, CountResponse, CreateFieldIndexCollection, + DeleteFieldIndexCollection, DeletePayloadPoints, DeletePointVectors, DeletePoints, + DiscoverBatchPoints, DiscoverBatchResponse, DiscoverPoints, DiscoverResponse, FacetCounts, + FacetResponse, GetPoints, GetResponse, PointsOperationResponse, QueryBatchPoints, + QueryBatchResponse, QueryGroupsResponse, QueryPointGroups, QueryPoints, QueryResponse, + RecommendBatchPoints, RecommendBatchResponse, RecommendGroupsResponse, RecommendPointGroups, + RecommendPoints, RecommendResponse, ScrollPoints, ScrollResponse, SearchBatchPoints, + SearchBatchResponse, SearchGroupsResponse, SearchMatrixOffsets, SearchMatrixOffsetsResponse, + SearchMatrixPairs, SearchMatrixPairsResponse, SearchMatrixPoints, SearchPointGroups, + SearchPoints, SearchResponse, SetPayloadPoints, UpdateBatchPoints, UpdateBatchResponse, + UpdatePointVectors, UpsertPoints, +}; +use collection::operations::types::CoreSearchRequest; +use collection::operations::verification::new_unchecked_verification_pass; +use common::counter::hardware_accumulator::HwMeasurementAcc; +use storage::content_manager::toc::request_hw_counter::RequestHwCounter; +use storage::dispatcher::Dispatcher; +use tonic::{Request, Response, Status}; + +use super::query_common::*; +use super::update_common::*; +use super::validate; +use crate::common::inference::extract_token; +use crate::common::update::InternalUpdateParams; +use crate::settings::ServiceConfig; +use crate::tonic::auth::extract_access; +use crate::tonic::verification::StrictModeCheckedTocProvider; + +pub struct PointsService { + dispatcher: Arc, + service_config: ServiceConfig, +} + +impl PointsService { + pub fn new(dispatcher: Arc, service_config: ServiceConfig) -> Self { + Self { + dispatcher, + service_config, + } + } + + fn get_request_collection_hw_usage_counter( + &self, + collection_name: String, + wait: Option, + ) -> RequestHwCounter { + let counter = HwMeasurementAcc::new_with_metrics_drain( + self.dispatcher.get_collection_hw_metrics(collection_name), + ); + + let waiting = wait != Some(false); + RequestHwCounter::new(counter, self.service_config.hardware_reporting() && waiting) + } +} + +#[tonic::async_trait] +impl Points for PointsService { + async fn upsert( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + upsert( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn get(&self, mut request: Request) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let inner_request = request.into_inner(); + + let hw_metrics = self + .get_request_collection_hw_usage_counter(inner_request.collection_name.clone(), None); + + get( + StrictModeCheckedTocProvider::new(&self.dispatcher), + inner_request, + None, + access, + hw_metrics, + ) + .await + } + + async fn update_vectors( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + // Nothing to verify here. + let pass = new_unchecked_verification_pass(); + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + update_vectors( + self.dispatcher.toc(&access, &pass).clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_vectors( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let hw_metrics = self.get_request_collection_hw_usage_counter( + request.get_ref().collection_name.clone(), + None, + ); + + delete_vectors( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn set_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + set_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn overwrite_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + overwrite_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn clear_payload( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + clear_payload( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn update_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + update_batch( + &self.dispatcher, + request.into_inner(), + InternalUpdateParams::default(), + access, + inference_token, + hw_metrics, + ) + .await + } + + async fn create_field_index( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + create_field_index( + self.dispatcher.clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn delete_field_index( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let wait = Some(request.get_ref().wait.unwrap_or(false)); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, wait); + + delete_field_index( + self.dispatcher.clone(), + request.into_inner(), + InternalUpdateParams::default(), + access, + hw_metrics, + ) + .await + .map(|resp| resp.map(Into::into)) + } + + async fn search( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = search( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn search_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let SearchBatchPoints { + collection_name, + search_points, + read_consistency, + timeout, + } = request.into_inner(); + + let mut requests = Vec::new(); + + for mut search_point in search_points { + let shard_key = search_point.shard_key_selector.take(); + + let shard_selector = convert_shard_selector_for_read(None, shard_key); + let core_search_request = CoreSearchRequest::try_from(search_point)?; + + requests.push((core_search_request, shard_selector)); + } + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + + let res = core_search_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + requests, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn search_groups( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = search_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn scroll( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + + let inner_request = request.into_inner(); + + let hw_metrics = self + .get_request_collection_hw_usage_counter(inner_request.collection_name.clone(), None); + + scroll( + StrictModeCheckedTocProvider::new(&self.dispatcher), + inner_request, + None, + access, + hw_metrics, + ) + .await + } + + async fn recommend( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = recommend( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn recommend_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let RecommendBatchPoints { + collection_name, + recommend_points, + read_consistency, + timeout, + } = request.into_inner(); + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + + let res = recommend_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + recommend_points, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn recommend_groups( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = recommend_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn discover( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = discover( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn discover_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + + let DiscoverBatchPoints { + collection_name, + discover_points, + read_consistency, + timeout, + } = request.into_inner(); + + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + let res = discover_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + discover_points, + read_consistency, + access, + timeout.map(Duration::from_secs), + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn count( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + + let access = extract_access(&mut request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let res = count( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + &access, + hw_metrics, + ) + .await?; + + Ok(res) + } + + async fn query( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = query( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + + async fn query_batch( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let request = request.into_inner(); + let QueryBatchPoints { + collection_name, + query_points, + read_consistency, + timeout, + } = request; + let timeout = timeout.map(Duration::from_secs); + let hw_metrics = + self.get_request_collection_hw_usage_counter(collection_name.clone(), None); + let res = query_batch( + StrictModeCheckedTocProvider::new(&self.dispatcher), + &collection_name, + query_points, + read_consistency, + access, + timeout, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + + async fn query_groups( + &self, + mut request: Request, + ) -> Result, Status> { + let access = extract_access(&mut request); + let inference_token = extract_token(&request); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + + let res = query_groups( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + None, + access, + hw_metrics, + inference_token, + ) + .await?; + + Ok(res) + } + async fn facet( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let hw_metrics = self.get_request_collection_hw_usage_counter( + request.get_ref().collection_name.clone(), + None, + ); + facet( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics, + ) + .await + } + + async fn search_matrix_pairs( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let timing = Instant::now(); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let search_matrix_response = search_points_matrix( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics.get_counter(), + ) + .await?; + let pairs_response = SearchMatrixPairsResponse { + result: Some(SearchMatrixPairs::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), + usage: hw_metrics.to_grpc_api(), + }; + + Ok(Response::new(pairs_response)) + } + + async fn search_matrix_offsets( + &self, + mut request: Request, + ) -> Result, Status> { + validate(request.get_ref())?; + let access = extract_access(&mut request); + let timing = Instant::now(); + let collection_name = request.get_ref().collection_name.clone(); + let hw_metrics = self.get_request_collection_hw_usage_counter(collection_name, None); + let search_matrix_response = search_points_matrix( + StrictModeCheckedTocProvider::new(&self.dispatcher), + request.into_inner(), + access, + hw_metrics.get_counter(), + ) + .await?; + let offsets_response = SearchMatrixOffsetsResponse { + result: Some(SearchMatrixOffsets::from(search_matrix_response)), + time: timing.elapsed().as_secs_f64(), + usage: hw_metrics.to_grpc_api(), + }; + + Ok(Response::new(offsets_response)) + } +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/extracted_output.txt b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/extracted_output.txt new file mode 100644 index 000000000..431236551 --- /dev/null +++ b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/extracted_output.txt @@ -0,0 +1,26 @@ +'use strict'; + +module.exports = { + plugins: [ + '@babel/plugin-syntax-jsx', + '@babel/plugin-transform-flow-strip-types', + ['@babel/plugin-proposal-class-properties', {loose: true}], + 'syntax-trailing-function-commas', + [ + '@babel/plugin-proposal-object-rest-spread', + {loose: true, useBuiltIns: true}, + ], + ['@babel/plugin-transform-template-literals', {loose: true}], + '@babel/plugin-transform-literals', + '@babel/plugin-transform-arrow-functions', + '@babel/plugin-transform-block-scoped-functions', + '@babel/plugin-transform-object-super', + '@babel/plugin-transform-shorthand-properties', + '@babel/plugin-transform-computed-properties', + '@babel/plugin-transform-for-of', + ['@babel/plugin-transform-spread', {loose: true, useBuiltIns: true}], + '@babel/plugin-transform-parameters', + ['@babel/plugin-transform-destructuring', {loose: true, useBuiltIns: true}], + ['@babel/plugin-transform-block-scoping', {throwIfClosureRequired: true}], + ], +}; \ No newline at end of file diff --git a/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/metadata.json b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/metadata.json new file mode 100644 index 000000000..542965172 --- /dev/null +++ b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "react_babel.config.js", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/react_babel.config.js_prompt.txt", + "expected_file": "locodiff-250425/prompts/react_babel.config.js_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:11.942139+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 1027, + "extracted_output_length": 1009, + "expected_output_length": 1010, + "results_dir": "locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811", + "generation_id": "gen-1757433492-OGmn0Yuo5Erc4z5tFF6P", + "cost_usd": 0.00336224, + "prompt_tokens": 2009, + "completion_tokens": 941, + "total_tokens": 2950, + "native_prompt_tokens": 2137, + "native_completion_tokens": 2210, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 1986 +} \ No newline at end of file diff --git a/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/output.diff b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/raw_response.txt b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/raw_response.txt new file mode 100644 index 000000000..ff96f4963 --- /dev/null +++ b/locodiff-250425/results/react_babel.config.js/x-ai_grok-code-fast-1/20250909_155811/raw_response.txt @@ -0,0 +1,28 @@ +```javascript +'use strict'; + +module.exports = { + plugins: [ + '@babel/plugin-syntax-jsx', + '@babel/plugin-transform-flow-strip-types', + ['@babel/plugin-proposal-class-properties', {loose: true}], + 'syntax-trailing-function-commas', + [ + '@babel/plugin-proposal-object-rest-spread', + {loose: true, useBuiltIns: true}, + ], + ['@babel/plugin-transform-template-literals', {loose: true}], + '@babel/plugin-transform-literals', + '@babel/plugin-transform-arrow-functions', + '@babel/plugin-transform-block-scoped-functions', + '@babel/plugin-transform-object-super', + '@babel/plugin-transform-shorthand-properties', + '@babel/plugin-transform-computed-properties', + '@babel/plugin-transform-for-of', + ['@babel/plugin-transform-spread', {loose: true, useBuiltIns: true}], + '@babel/plugin-transform-parameters', + ['@babel/plugin-transform-destructuring', {loose: true, useBuiltIns: true}], + ['@babel/plugin-transform-block-scoping', {throwIfClosureRequired: true}], + ], +}; +``` \ No newline at end of file diff --git a/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt new file mode 100644 index 000000000..40b96655c --- /dev/null +++ b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt @@ -0,0 +1,823 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import { + ScopeId, + HIRFunction, + Place, + Instruction, + ReactiveScopeDependency, + Identifier, + ReactiveScope, + isObjectMethodType, + isRefValueType, + isUseRefType, + makeInstructionId, + InstructionId, + InstructionKind, + GeneratedSource, + DeclarationId, + areEqualPaths, + IdentifierId, + Terminal, + InstructionValue, + LoadContext, + TInstruction, + FunctionExpression, + ObjectMethod, + PropertyLiteral, + convertHoistedLValueKind, +} from './HIR'; +import { + collectHoistablePropertyLoads, + keyByScopeId, +} from './CollectHoistablePropertyLoads'; +import { + ScopeBlockTraversal, + eachInstructionOperand, + eachInstructionValueOperand, + eachPatternOperand, + eachTerminalOperand, +} from './visitors'; +import {Stack, empty} from '../Utils/Stack'; +import {CompilerError} from '../CompilerError'; +import {Iterable_some} from '../Utils/utils'; +import {ReactiveScopeDependencyTreeHIR} from './DeriveMinimalDependenciesHIR'; +import {collectOptionalChainSidemap} from './CollectOptionalChainDependencies'; + +export function propagateScopeDependenciesHIR(fn: HIRFunction): void { + const usedOutsideDeclaringScope = + findTemporariesUsedOutsideDeclaringScope(fn); + const temporaries = collectTemporariesSidemap(fn, usedOutsideDeclaringScope); + const { + temporariesReadInOptional, + processedInstrsInOptional, + hoistableObjects, + } = collectOptionalChainSidemap(fn); + + const hoistablePropertyLoads = keyByScopeId( + fn, + collectHoistablePropertyLoads(fn, temporaries, hoistableObjects), + ); + + const scopeDeps = collectDependencies( + fn, + usedOutsideDeclaringScope, + new Map([...temporaries, ...temporariesReadInOptional]), + processedInstrsInOptional, + ); + + /** + * Derive the minimal set of hoistable dependencies for each scope. + */ + for (const [scope, deps] of scopeDeps) { + if (deps.length === 0) { + continue; + } + + /** + * Step 1: Find hoistable accesses, given the basic block in + * which the scope begins. + */ + const hoistables = hoistablePropertyLoads.get(scope.id); + CompilerError.invariant(hoistables != null, { + reason: '[PropagateScopeDependencies] Scope not found in tracked blocks', + loc: GeneratedSource, + }); + /** + * Step 2: Calculate hoistable dependencies. + */ + const tree = new ReactiveScopeDependencyTreeHIR( + [...hoistables.assumedNonNullObjects].map(o => o.fullPath), + ); + for (const dep of deps) { + tree.addDependency({...dep}); + } + + /** + * Step 3: Reduce dependencies to a minimal set. + */ + const candidates = tree.deriveMinimalDependencies(); + for (const candidateDep of candidates) { + if ( + !Iterable_some( + scope.dependencies, + existingDep => + existingDep.identifier.declarationId === + candidateDep.identifier.declarationId && + areEqualPaths(existingDep.path, candidateDep.path), + ) + ) + scope.dependencies.add(candidateDep); + } + } +} + +export function findTemporariesUsedOutsideDeclaringScope( + fn: HIRFunction, +): ReadonlySet { + /* + * tracks all relevant LoadLocal and PropertyLoad lvalues + * and the scope where they are defined + */ + const declarations = new Map(); + const prunedScopes = new Set(); + const scopeTraversal = new ScopeBlockTraversal(); + const usedOutsideDeclaringScope = new Set(); + + function handlePlace(place: Place): void { + const declaringScope = declarations.get(place.identifier.declarationId); + if ( + declaringScope != null && + !scopeTraversal.isScopeActive(declaringScope) && + !prunedScopes.has(declaringScope) + ) { + // Declaring scope is not active === used outside declaring scope + usedOutsideDeclaringScope.add(place.identifier.declarationId); + } + } + + function handleInstruction(instr: Instruction): void { + const scope = scopeTraversal.currentScope; + if (scope == null || prunedScopes.has(scope)) { + return; + } + switch (instr.value.kind) { + case 'LoadLocal': + case 'LoadContext': + case 'PropertyLoad': { + declarations.set(instr.lvalue.identifier.declarationId, scope); + break; + } + default: { + break; + } + } + } + + for (const [blockId, block] of fn.body.blocks) { + scopeTraversal.recordScopes(block); + const scopeStartInfo = scopeTraversal.blockInfos.get(blockId); + if (scopeStartInfo?.kind === 'begin' && scopeStartInfo.pruned) { + prunedScopes.add(scopeStartInfo.scope.id); + } + for (const instr of block.instructions) { + for (const place of eachInstructionOperand(instr)) { + handlePlace(place); + } + handleInstruction(instr); + } + + for (const place of eachTerminalOperand(block.terminal)) { + handlePlace(place); + } + } + return usedOutsideDeclaringScope; +} + +/** + * @returns mapping of LoadLocal and PropertyLoad to the source of the load. + * ```js + * // source + * foo(a.b); + * + * // HIR: a potential sidemap is {0: a, 1: a.b, 2: foo} + * $0 = LoadLocal 'a' + * $1 = PropertyLoad $0, 'b' + * $2 = LoadLocal 'foo' + * $3 = CallExpression $2($1) + * ``` + * @param usedOutsideDeclaringScope is used to check the correctness of + * reordering LoadLocal / PropertyLoad calls. We only track a LoadLocal / + * PropertyLoad in the returned temporaries map if reordering the read (from the + * time-of-load to time-of-use) is valid. + * + * If a LoadLocal or PropertyLoad instruction is within the reactive scope range + * (a proxy for mutable range) of the load source, later instructions may + * reassign / mutate the source value. Since it's incorrect to reorder these + * load instructions to after their scope ranges, we also do not store them in + * identifier sidemaps. + * + * Take this example (from fixture + * `evaluation-order-mutate-call-after-dependency-load`) + * ```js + * // source + * function useFoo(arg) { + * const arr = [1, 2, 3, ...arg]; + * return [ + * arr.length, + * arr.push(0) + * ]; + * } + * + * // IR pseudocode + * scope @0 { + * $0 = arr = ArrayExpression [1, 2, 3, ...arg] + * $1 = arr.length + * $2 = arr.push(0) + * } + * scope @1 { + * $3 = ArrayExpression [$1, $2] + * } + * ``` + * Here, it's invalid for scope@1 to take `arr.length` as a dependency instead + * of $1, as the evaluation of `arr.length` changes between instructions $1 and + * $3. We do not track $1 -> arr.length in this case. + */ +export function collectTemporariesSidemap( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, +): ReadonlyMap { + const temporaries = new Map(); + collectTemporariesSidemapImpl( + fn, + usedOutsideDeclaringScope, + temporaries, + null, + ); + return temporaries; +} + +function isLoadContextMutable( + instrValue: InstructionValue, + id: InstructionId, +): instrValue is LoadContext { + if (instrValue.kind === 'LoadContext') { + /** + * Not all context variables currently have scopes due to limitations of + * mutability analysis for function expressions. + * + * Currently, many function expressions references are inferred to be + * 'Read' | 'Freeze' effects which don't replay mutable effects of captured + * context. + */ + return ( + instrValue.place.identifier.scope != null && + id >= instrValue.place.identifier.scope.range.end + ); + } + return false; +} +/** + * Recursive collect a sidemap of all `LoadLocal` and `PropertyLoads` with a + * function and all nested functions. + * + * Note that IdentifierIds are currently unique, so we can use a single + * Map across all nested functions. + */ +function collectTemporariesSidemapImpl( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, + temporaries: Map, + innerFnContext: {instrId: InstructionId} | null, +): void { + for (const [_, block] of fn.body.blocks) { + for (const {value, lvalue, id: origInstrId} of block.instructions) { + const instrId = + innerFnContext != null ? innerFnContext.instrId : origInstrId; + const usedOutside = usedOutsideDeclaringScope.has( + lvalue.identifier.declarationId, + ); + + if (value.kind === 'PropertyLoad' && !usedOutside) { + if ( + innerFnContext == null || + temporaries.has(value.object.identifier.id) + ) { + /** + * All dependencies of a inner / nested function must have a base + * identifier from the outermost component / hook. This is because the + * compiler cannot break an inner function into multiple granular + * scopes. + */ + const property = getProperty( + value.object, + value.property, + false, + temporaries, + ); + temporaries.set(lvalue.identifier.id, property); + } + } else if ( + (value.kind === 'LoadLocal' || isLoadContextMutable(value, instrId)) && + lvalue.identifier.name == null && + value.place.identifier.name !== null && + !usedOutside + ) { + if ( + innerFnContext == null || + fn.context.some( + context => context.identifier.id === value.place.identifier.id, + ) + ) { + temporaries.set(lvalue.identifier.id, { + identifier: value.place.identifier, + path: [], + }); + } + } else if ( + value.kind === 'FunctionExpression' || + value.kind === 'ObjectMethod' + ) { + collectTemporariesSidemapImpl( + value.loweredFunc.func, + usedOutsideDeclaringScope, + temporaries, + innerFnContext ?? {instrId}, + ); + } + } + } +} + +function getProperty( + object: Place, + propertyName: PropertyLiteral, + optional: boolean, + temporaries: ReadonlyMap, +): ReactiveScopeDependency { + /* + * (1) Get the base object either from the temporary sidemap (e.g. a LoadLocal) + * or a deep copy of an existing property dependency. + * Example 1: + * $0 = LoadLocal x + * $1 = PropertyLoad $0.y + * getProperty($0, ...) -> resolvedObject = x, resolvedDependency = null + * + * Example 2: + * $0 = LoadLocal x + * $1 = PropertyLoad $0.y + * $2 = PropertyLoad $1.z + * getProperty($1, ...) -> resolvedObject = null, resolvedDependency = x.y + * + * Example 3: + * $0 = Call(...) + * $1 = PropertyLoad $0.y + * getProperty($0, ...) -> resolvedObject = null, resolvedDependency = null + */ + const resolvedDependency = temporaries.get(object.identifier.id); + + /** + * (2) Push the last PropertyLoad + * TODO(mofeiZ): understand optional chaining + */ + let property: ReactiveScopeDependency; + if (resolvedDependency == null) { + property = { + identifier: object.identifier, + path: [{property: propertyName, optional}], + }; + } else { + property = { + identifier: resolvedDependency.identifier, + path: [ + ...resolvedDependency.path, + {property: propertyName, optional}, + ], + }; + } + return property; +} + +type Decl = { + id: InstructionId; + scope: Stack; +}; + +export class DependencyCollectionContext { + #declarations: Map = new Map(); + #reassignments: Map = new Map(); + + #scopes: Stack = empty(); + // Reactive dependencies used in the current reactive scope. + #dependencies: Stack> = empty(); + deps: Map> = new Map(); + + #temporaries: ReadonlyMap; + #temporariesUsedOutsideScope: ReadonlySet; + #processedInstrsInOptional: ReadonlySet; + + /** + * Tracks the traversal state. See Context.declare for explanation of why this + * is needed. + */ + #innerFnContext: {outerInstrId: InstructionId} | null = null; + + constructor( + temporariesUsedOutsideScope: ReadonlySet, + temporaries: ReadonlyMap, + processedInstrsInOptional: ReadonlySet, + ) { + this.#temporariesUsedOutsideScope = temporariesUsedOutsideScope; + this.#temporaries = temporaries; + this.#processedInstrsInOptional = processedInstrsInOptional; + } + + enterScope(scope: ReactiveScope): void { + // Set context for new scope + this.#dependencies = this.#dependencies.push([]); + this.#scopes = this.#scopes.push(scope); + } + + exitScope(scope: ReactiveScope, pruned: boolean): void { + // Save dependencies we collected from the exiting scope + const scopedDependencies = this.#dependencies.value; + CompilerError.invariant(scopedDependencies != null, { + reason: '[PropagateScopeDeps]: Unexpected scope mismatch', + loc: scope.loc, + }); + + // Restore context of previous scope + this.#scopes = this.#scopes.pop(); + this.#dependencies = this.#dependencies.pop(); + + /* + * Collect dependencies we recorded for the exiting scope and propagate + * them upward using the same rules as normal dependency collection. + * Child scopes may have dependencies on values created within the outer + * scope, which necessarily cannot be dependencies of the outer scope. + */ + for (const dep of scopedDependencies) { + if (this.#checkValidDependency(dep)) { + this.#dependencies.value?.push(dep); + } + } + + if (!pruned) { + this.deps.set(scope, scopedDependencies); + } + } + + isUsedOutsideDeclaringScope(place: Place): boolean { + return this.#temporariesUsedOutsideScope.has( + place.identifier.declarationId, + ); + } + + /* + * Records where a value was declared, and optionally, the scope where the value originated from. + * This is later used to determine if a dependency should be added to a scope; if the current + * scope we are visiting is the same scope where the value originates, it can't be a dependency + * on itself. + * + * Note that we do not track declarations or reassignments within inner + * functions for the following reasons: + * - inner functions cannot be split by scope boundaries and are guaranteed + * to consume their own declarations + * - reassignments within inner functions are tracked as context variables, + * which already have extended mutable ranges to account for reassignments + * - *most importantly* it's currently simply incorrect to compare inner + * function instruction ids (tracked by `decl`) with outer ones (as stored + * by root identifier mutable ranges). + */ + declare(identifier: Identifier, decl: Decl): void { + if (this.#innerFnContext != null) return; + if (!this.#declarations.has(identifier.declarationId)) { + this.#declarations.set(identifier.declarationId, decl); + } + this.#reassignments.set(identifier, decl); + } + hasDeclared(identifier: Identifier): boolean { + return this.#declarations.has(identifier.declarationId); + } + + // Checks if identifier is a valid dependency in the current scope + #checkValidDependency(maybeDependency: ReactiveScopeDependency): boolean { + // ref value is not a valid dep + if (isRefValueType(maybeDependency.identifier)) { + return false; + } + + /* + * object methods are not deps because they will be codegen'ed back in to + * the object literal. + */ + if (isObjectMethodType(maybeDependency.identifier)) { + return false; + } + + const identifier = maybeDependency.identifier; + /* + * If this operand is used in a scope, has a dynamic value, and was defined + * before this scope, then its a dependency of the scope. + */ + const currentDeclaration = + this.#reassignments.get(identifier) ?? + this.#declarations.get(identifier.declarationId); + const currentScope = this.currentScope.value; + return ( + currentScope != null && + currentDeclaration !== undefined && + currentDeclaration.id < currentScope.range.start + ); + } + + #isScopeActive(scope: ReactiveScope): boolean { + if (this.#scopes === null) { + return false; + } + return this.#scopes.find(state => state === scope); + } + + get currentScope(): Stack { + return this.#scopes; + } + + visitOperand(place: Place): void { + /* + * if this operand is a temporary created for a property load, try to resolve it to + * the expanded Place. Fall back to using the operand as-is. + */ + this.visitDependency( + this.#temporaries.get(place.identifier.id) ?? { + identifier: place.identifier, + path: [], + }, + ); + } + + visitProperty( + object: Place, + property: PropertyLiteral, + optional: boolean, + ): void { + const nextDependency = getProperty( + object, + property, + optional, + this.#temporaries, + ); + this.visitDependency(nextDependency); + } + + visitDependency(maybeDependency: ReactiveScopeDependency): void { + /* + * Any value used after its originally defining scope has concluded must be added as an + * output of its defining scope. Regardless of whether its a const or not, + * some later code needs access to the value. If the current + * scope we are visiting is the same scope where the value originates, it can't be a dependency + * on itself. + */ + + /* + * if originalDeclaration is undefined here, then this is not a local var + * (all decls e.g. `let x;` should be initialized in BuildHIR) + */ + const originalDeclaration = this.#declarations.get( + maybeDependency.identifier.declarationId, + ); + if ( + originalDeclaration !== undefined && + originalDeclaration.scope.value !== null + ) { + originalDeclaration.scope.each(scope => { + if ( + !this.#isScopeActive(scope) && + !Iterable_some( + scope.declarations.values(), + decl => + decl.identifier.declarationId === + maybeDependency.identifier.declarationId, + ) + ) { + scope.declarations.set(maybeDependency.identifier.id, { + identifier: maybeDependency.identifier, + scope: originalDeclaration.scope.value!, + }); + } + }); + } + + if (this.#checkValidDependency(maybeDependency)) { + this.#dependencies.value!.push(maybeDependency); + } + } + + /* + * Record a variable that is declared in some other scope and that is being reassigned in the + * current one as a {@link ReactiveScope.reassignments} + */ + visitReassignment(place: Place): void { + const currentScope = this.currentScope.value; + if ( + currentScope != null && + !Iterable_some( + currentScope.reassignments, + identifier => + identifier.declarationId === place.identifier.declarationId, + ) && + this.#checkValidDependency({identifier: place.identifier, path: []}) + ) { + currentScope.reassignments.add(place.identifier); + } + } + enterInnerFn( + innerFn: TInstruction | TInstruction, + cb: () => T, + ): T { + const prevContext = this.#innerFnContext; + this.#innerFnContext = this.#innerFnContext ?? {outerInstrId: innerFn.id}; + const result = cb(); + this.#innerFnContext = prevContext; + return result; + } + + /** + * Skip dependencies that are subexpressions of other dependencies. e.g. if a + * dependency is tracked in the temporaries sidemap, it can be added at + * site-of-use + */ + isDeferredDependency( + instr: + | {kind: HIRValue.Instruction; value: Instruction} + | {kind: HIRValue.Terminal; value: Terminal}, + ): boolean { + return ( + this.#processedInstrsInOptional.has(instr.value) || + (instr.kind === HIRValue.Instruction && + this.#temporaries.has(instr.value.lvalue.identifier.id)) + ); + } +} +enum HIRValue { + Instruction = 1, + Terminal, +} + +export function handleInstruction( + instr: Instruction, + context: DependencyCollectionContext, +): void { + const {id, value, lvalue} = instr; + context.declare(lvalue.identifier, { + id, + scope: context.currentScope, + }); + if ( + context.isDeferredDependency({kind: HIRValue.Instruction, value: instr}) + ) { + return; + } + if (value.kind === 'PropertyLoad') { + context.visitProperty(value.object, value.property, false); + } else if (value.kind === 'StoreLocal') { + context.visitOperand(value.value); + if (value.lvalue.kind === InstructionKind.Reassign) { + context.visitReassignment(value.lvalue.place); + } + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } else if (value.kind === 'DeclareLocal' || value.kind === 'DeclareContext') { + /* + * Some variables may be declared and never initialized. We need to retain + * (and hoist) these declarations if they are included in a reactive scope. + * One approach is to simply add all `DeclareLocal`s as scope declarations. + * + * Context variables with hoisted declarations only become live after their + * first assignment. We only declare real DeclareLocal / DeclareContext + * instructions (not hoisted ones) to avoid generating dependencies on + * hoisted declarations. + */ + if (convertHoistedLValueKind(value.lvalue.kind) === null) { + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } + } else if (value.kind === 'Destructure') { + context.visitOperand(value.value); + for (const place of eachPatternOperand(value.lvalue.pattern)) { + if (value.lvalue.kind === InstructionKind.Reassign) { + context.visitReassignment(place); + } + context.declare(place.identifier, { + id, + scope: context.currentScope, + }); + } + } else if (value.kind === 'StoreContext') { + /** + * Some StoreContext variables have hoisted declarations. If we're storing + * to a context variable that hasn't yet been declared, the StoreContext is + * the declaration. + * (see corresponding logic in PruneHoistedContext) + */ + if ( + !context.hasDeclared(value.lvalue.place.identifier) || + value.lvalue.kind !== InstructionKind.Reassign + ) { + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } + + for (const operand of eachInstructionValueOperand(value)) { + context.visitOperand(operand); + } + } else { + for (const operand of eachInstructionValueOperand(value)) { + context.visitOperand(operand); + } + } + + context.declare(lvalue.identifier, { + id, + scope: context.currentScope, + }); +} + +function collectDependencies( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, + temporaries: ReadonlyMap, + processedInstrsInOptional: ReadonlySet, +): Map> { + const context = new DependencyCollectionContext( + usedOutsideDeclaringScope, + temporaries, + processedInstrsInOptional, + ); + + for (const param of fn.params) { + if (param.kind === 'Identifier') { + context.declare(param.identifier, { + id: makeInstructionId(0), + scope: empty(), + }); + } else { + context.declare(param.place.identifier, { + id: makeInstructionId(0), + scope: empty(), + }); + } + } + + const scopeTraversal = new ScopeBlockTraversal(); + + const handleFunction = (fn: HIRFunction): void => { + for (const [blockId, block] of fn.body.blocks) { + scopeTraversal.recordScopes(block); + const scopeBlockInfo = scopeTraversal.blockInfos.get(blockId); + if (scopeBlockInfo?.kind === 'begin') { + context.enterScope(scopeBlockInfo.scope); + } else if (scopeBlockInfo?.kind === 'end') { + context.exitScope(scopeBlockInfo.scope, scopeBlockInfo.pruned); + } + // Record referenced optional chains in phis + for (const phi of block.phis) { + for (const operand of phi.operands) { + const maybeOptionalChain = temporaries.get(operand[1].identifier.id); + if (maybeOptionalChain) { + context.visitDependency(maybeOptionalChain); + } + } + } + for (const instr of block.instructions) { + if ( + instr.value.kind === 'FunctionExpression' || + instr.value.kind === 'ObjectMethod' + ) { + context.declare(instr.lvalue.identifier, { + id: instr.id, + scope: context.currentScope, + }); + /** + * Recursively visit the inner function to extract dependencies there + */ + const innerFn = instr.value.loweredFunc.func; + context.enterInnerFn( + instr as + | TInstruction + | TInstruction, + () => { + handleFunction(innerFn); + }, + ); + } else { + handleInstruction(instr, context); + } + } + + if ( + !context.isDeferredDependency({ + kind: HIRValue.Terminal, + value: block.terminal, + }) + ) { + for (const place of eachTerminalOperand(block.terminal)) { + context.visitOperand(place); + } + } + } + }; + + handleFunction(fn); + return context.deps; +} \ No newline at end of file diff --git a/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/metadata.json b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/metadata.json new file mode 100644 index 000000000..2243a7b06 --- /dev/null +++ b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts_prompt.txt", + "expected_file": "locodiff-250425/prompts/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:39.387785+00:00", + "success": false, + "error": "Output mismatch", + "api_error": false, + "raw_response_length": 25577, + "extracted_output_length": 25569, + "expected_output_length": 25734, + "results_dir": "locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839", + "generation_id": "gen-1757433519-ulpvJqvy0xmjvwg8VEel", + "cost_usd": 0.01394368, + "prompt_tokens": 22241, + "completion_tokens": 6060, + "total_tokens": 28301, + "native_prompt_tokens": 22079, + "native_completion_tokens": 6398, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 588 +} \ No newline at end of file diff --git a/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/output.diff b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/output.diff new file mode 100644 index 000000000..51d0a67fe --- /dev/null +++ b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/output.diff @@ -0,0 +1,71 @@ + +index 506a8718d..40b96655c 100644 +--- a/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts_expectedoutput.txt (expected):tmp/tmp3_xmr_nm_expected.txt ++++ b/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts_extracted.txt (actual):tmp/tmpir8kgszc_actual.txt +@@ -80,8 +80,8 @@ export function propagateScopeDependenciesHIR(fn: HIRFunction): void { + } + + /** +- * Step 1: Find hoistable accesses, given the basic block in which the scope +- * begins. ++ * Step 1: Find hoistable accesses, given the basic block in ++ * which the scope begins. + */ + const hoistables = hoistablePropertyLoads.get(scope.id); + CompilerError.invariant(hoistables != null, { +@@ -374,7 +374,10 @@ function getProperty( + } else { + property = { + identifier: resolvedDependency.identifier, +- path: [...resolvedDependency.path, {property: propertyName, optional}], ++ path: [ ++ ...resolvedDependency.path, ++ {property: propertyName, optional}, ++ ], + }; + } + return property; +@@ -456,10 +459,10 @@ export class DependencyCollectionContext { + } + + /* +- * Records where a value was declared, and optionally, the scope where the +- * value originated from. This is later used to determine if a dependency +- * should be added to a scope; if the current scope we are visiting is the +- * same scope where the value originates, it can't be a dependency on itself. ++ * Records where a value was declared, and optionally, the scope where the value originated from. ++ * This is later used to determine if a dependency should be added to a scope; if the current ++ * scope we are visiting is the same scope where the value originates, it can't be a dependency ++ * on itself. + * + * Note that we do not track declarations or reassignments within inner + * functions for the following reasons: +@@ -589,16 +592,6 @@ export class DependencyCollectionContext { + }); + } + +- // ref.current access is not a valid dep +- if ( +- isUseRefType(maybeDependency.identifier) && +- maybeDependency.path.at(0)?.property === 'current' +- ) { +- maybeDependency = { +- identifier: maybeDependency.identifier, +- path: [], +- }; +- } + if (this.#checkValidDependency(maybeDependency)) { + this.#dependencies.value!.push(maybeDependency); + } +@@ -733,6 +726,11 @@ export function handleInstruction( + context.visitOperand(operand); + } + } ++ ++ context.declare(lvalue.identifier, { ++ id, ++ scope: context.currentScope, ++ }); + } + + function collectDependencies( diff --git a/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt new file mode 100644 index 000000000..778b466c1 --- /dev/null +++ b/locodiff-250425/results/react_compiler_packages_babel-plugin-react-compiler_src_HIR_PropagateScopeDependenciesHIR.ts/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt @@ -0,0 +1,825 @@ +``` +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +import { + ScopeId, + HIRFunction, + Place, + Instruction, + ReactiveScopeDependency, + Identifier, + ReactiveScope, + isObjectMethodType, + isRefValueType, + isUseRefType, + makeInstructionId, + InstructionId, + InstructionKind, + GeneratedSource, + DeclarationId, + areEqualPaths, + IdentifierId, + Terminal, + InstructionValue, + LoadContext, + TInstruction, + FunctionExpression, + ObjectMethod, + PropertyLiteral, + convertHoistedLValueKind, +} from './HIR'; +import { + collectHoistablePropertyLoads, + keyByScopeId, +} from './CollectHoistablePropertyLoads'; +import { + ScopeBlockTraversal, + eachInstructionOperand, + eachInstructionValueOperand, + eachPatternOperand, + eachTerminalOperand, +} from './visitors'; +import {Stack, empty} from '../Utils/Stack'; +import {CompilerError} from '../CompilerError'; +import {Iterable_some} from '../Utils/utils'; +import {ReactiveScopeDependencyTreeHIR} from './DeriveMinimalDependenciesHIR'; +import {collectOptionalChainSidemap} from './CollectOptionalChainDependencies'; + +export function propagateScopeDependenciesHIR(fn: HIRFunction): void { + const usedOutsideDeclaringScope = + findTemporariesUsedOutsideDeclaringScope(fn); + const temporaries = collectTemporariesSidemap(fn, usedOutsideDeclaringScope); + const { + temporariesReadInOptional, + processedInstrsInOptional, + hoistableObjects, + } = collectOptionalChainSidemap(fn); + + const hoistablePropertyLoads = keyByScopeId( + fn, + collectHoistablePropertyLoads(fn, temporaries, hoistableObjects), + ); + + const scopeDeps = collectDependencies( + fn, + usedOutsideDeclaringScope, + new Map([...temporaries, ...temporariesReadInOptional]), + processedInstrsInOptional, + ); + + /** + * Derive the minimal set of hoistable dependencies for each scope. + */ + for (const [scope, deps] of scopeDeps) { + if (deps.length === 0) { + continue; + } + + /** + * Step 1: Find hoistable accesses, given the basic block in + * which the scope begins. + */ + const hoistables = hoistablePropertyLoads.get(scope.id); + CompilerError.invariant(hoistables != null, { + reason: '[PropagateScopeDependencies] Scope not found in tracked blocks', + loc: GeneratedSource, + }); + /** + * Step 2: Calculate hoistable dependencies. + */ + const tree = new ReactiveScopeDependencyTreeHIR( + [...hoistables.assumedNonNullObjects].map(o => o.fullPath), + ); + for (const dep of deps) { + tree.addDependency({...dep}); + } + + /** + * Step 3: Reduce dependencies to a minimal set. + */ + const candidates = tree.deriveMinimalDependencies(); + for (const candidateDep of candidates) { + if ( + !Iterable_some( + scope.dependencies, + existingDep => + existingDep.identifier.declarationId === + candidateDep.identifier.declarationId && + areEqualPaths(existingDep.path, candidateDep.path), + ) + ) + scope.dependencies.add(candidateDep); + } + } +} + +export function findTemporariesUsedOutsideDeclaringScope( + fn: HIRFunction, +): ReadonlySet { + /* + * tracks all relevant LoadLocal and PropertyLoad lvalues + * and the scope where they are defined + */ + const declarations = new Map(); + const prunedScopes = new Set(); + const scopeTraversal = new ScopeBlockTraversal(); + const usedOutsideDeclaringScope = new Set(); + + function handlePlace(place: Place): void { + const declaringScope = declarations.get(place.identifier.declarationId); + if ( + declaringScope != null && + !scopeTraversal.isScopeActive(declaringScope) && + !prunedScopes.has(declaringScope) + ) { + // Declaring scope is not active === used outside declaring scope + usedOutsideDeclaringScope.add(place.identifier.declarationId); + } + } + + function handleInstruction(instr: Instruction): void { + const scope = scopeTraversal.currentScope; + if (scope == null || prunedScopes.has(scope)) { + return; + } + switch (instr.value.kind) { + case 'LoadLocal': + case 'LoadContext': + case 'PropertyLoad': { + declarations.set(instr.lvalue.identifier.declarationId, scope); + break; + } + default: { + break; + } + } + } + + for (const [blockId, block] of fn.body.blocks) { + scopeTraversal.recordScopes(block); + const scopeStartInfo = scopeTraversal.blockInfos.get(blockId); + if (scopeStartInfo?.kind === 'begin' && scopeStartInfo.pruned) { + prunedScopes.add(scopeStartInfo.scope.id); + } + for (const instr of block.instructions) { + for (const place of eachInstructionOperand(instr)) { + handlePlace(place); + } + handleInstruction(instr); + } + + for (const place of eachTerminalOperand(block.terminal)) { + handlePlace(place); + } + } + return usedOutsideDeclaringScope; +} + +/** + * @returns mapping of LoadLocal and PropertyLoad to the source of the load. + * ```js + * // source + * foo(a.b); + * + * // HIR: a potential sidemap is {0: a, 1: a.b, 2: foo} + * $0 = LoadLocal 'a' + * $1 = PropertyLoad $0, 'b' + * $2 = LoadLocal 'foo' + * $3 = CallExpression $2($1) + * ``` + * @param usedOutsideDeclaringScope is used to check the correctness of + * reordering LoadLocal / PropertyLoad calls. We only track a LoadLocal / + * PropertyLoad in the returned temporaries map if reordering the read (from the + * time-of-load to time-of-use) is valid. + * + * If a LoadLocal or PropertyLoad instruction is within the reactive scope range + * (a proxy for mutable range) of the load source, later instructions may + * reassign / mutate the source value. Since it's incorrect to reorder these + * load instructions to after their scope ranges, we also do not store them in + * identifier sidemaps. + * + * Take this example (from fixture + * `evaluation-order-mutate-call-after-dependency-load`) + * ```js + * // source + * function useFoo(arg) { + * const arr = [1, 2, 3, ...arg]; + * return [ + * arr.length, + * arr.push(0) + * ]; + * } + * + * // IR pseudocode + * scope @0 { + * $0 = arr = ArrayExpression [1, 2, 3, ...arg] + * $1 = arr.length + * $2 = arr.push(0) + * } + * scope @1 { + * $3 = ArrayExpression [$1, $2] + * } + * ``` + * Here, it's invalid for scope@1 to take `arr.length` as a dependency instead + * of $1, as the evaluation of `arr.length` changes between instructions $1 and + * $3. We do not track $1 -> arr.length in this case. + */ +export function collectTemporariesSidemap( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, +): ReadonlyMap { + const temporaries = new Map(); + collectTemporariesSidemapImpl( + fn, + usedOutsideDeclaringScope, + temporaries, + null, + ); + return temporaries; +} + +function isLoadContextMutable( + instrValue: InstructionValue, + id: InstructionId, +): instrValue is LoadContext { + if (instrValue.kind === 'LoadContext') { + /** + * Not all context variables currently have scopes due to limitations of + * mutability analysis for function expressions. + * + * Currently, many function expressions references are inferred to be + * 'Read' | 'Freeze' effects which don't replay mutable effects of captured + * context. + */ + return ( + instrValue.place.identifier.scope != null && + id >= instrValue.place.identifier.scope.range.end + ); + } + return false; +} +/** + * Recursive collect a sidemap of all `LoadLocal` and `PropertyLoads` with a + * function and all nested functions. + * + * Note that IdentifierIds are currently unique, so we can use a single + * Map across all nested functions. + */ +function collectTemporariesSidemapImpl( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, + temporaries: Map, + innerFnContext: {instrId: InstructionId} | null, +): void { + for (const [_, block] of fn.body.blocks) { + for (const {value, lvalue, id: origInstrId} of block.instructions) { + const instrId = + innerFnContext != null ? innerFnContext.instrId : origInstrId; + const usedOutside = usedOutsideDeclaringScope.has( + lvalue.identifier.declarationId, + ); + + if (value.kind === 'PropertyLoad' && !usedOutside) { + if ( + innerFnContext == null || + temporaries.has(value.object.identifier.id) + ) { + /** + * All dependencies of a inner / nested function must have a base + * identifier from the outermost component / hook. This is because the + * compiler cannot break an inner function into multiple granular + * scopes. + */ + const property = getProperty( + value.object, + value.property, + false, + temporaries, + ); + temporaries.set(lvalue.identifier.id, property); + } + } else if ( + (value.kind === 'LoadLocal' || isLoadContextMutable(value, instrId)) && + lvalue.identifier.name == null && + value.place.identifier.name !== null && + !usedOutside + ) { + if ( + innerFnContext == null || + fn.context.some( + context => context.identifier.id === value.place.identifier.id, + ) + ) { + temporaries.set(lvalue.identifier.id, { + identifier: value.place.identifier, + path: [], + }); + } + } else if ( + value.kind === 'FunctionExpression' || + value.kind === 'ObjectMethod' + ) { + collectTemporariesSidemapImpl( + value.loweredFunc.func, + usedOutsideDeclaringScope, + temporaries, + innerFnContext ?? {instrId}, + ); + } + } + } +} + +function getProperty( + object: Place, + propertyName: PropertyLiteral, + optional: boolean, + temporaries: ReadonlyMap, +): ReactiveScopeDependency { + /* + * (1) Get the base object either from the temporary sidemap (e.g. a LoadLocal) + * or a deep copy of an existing property dependency. + * Example 1: + * $0 = LoadLocal x + * $1 = PropertyLoad $0.y + * getProperty($0, ...) -> resolvedObject = x, resolvedDependency = null + * + * Example 2: + * $0 = LoadLocal x + * $1 = PropertyLoad $0.y + * $2 = PropertyLoad $1.z + * getProperty($1, ...) -> resolvedObject = null, resolvedDependency = x.y + * + * Example 3: + * $0 = Call(...) + * $1 = PropertyLoad $0.y + * getProperty($0, ...) -> resolvedObject = null, resolvedDependency = null + */ + const resolvedDependency = temporaries.get(object.identifier.id); + + /** + * (2) Push the last PropertyLoad + * TODO(mofeiZ): understand optional chaining + */ + let property: ReactiveScopeDependency; + if (resolvedDependency == null) { + property = { + identifier: object.identifier, + path: [{property: propertyName, optional}], + }; + } else { + property = { + identifier: resolvedDependency.identifier, + path: [ + ...resolvedDependency.path, + {property: propertyName, optional}, + ], + }; + } + return property; +} + +type Decl = { + id: InstructionId; + scope: Stack; +}; + +export class DependencyCollectionContext { + #declarations: Map = new Map(); + #reassignments: Map = new Map(); + + #scopes: Stack = empty(); + // Reactive dependencies used in the current reactive scope. + #dependencies: Stack> = empty(); + deps: Map> = new Map(); + + #temporaries: ReadonlyMap; + #temporariesUsedOutsideScope: ReadonlySet; + #processedInstrsInOptional: ReadonlySet; + + /** + * Tracks the traversal state. See Context.declare for explanation of why this + * is needed. + */ + #innerFnContext: {outerInstrId: InstructionId} | null = null; + + constructor( + temporariesUsedOutsideScope: ReadonlySet, + temporaries: ReadonlyMap, + processedInstrsInOptional: ReadonlySet, + ) { + this.#temporariesUsedOutsideScope = temporariesUsedOutsideScope; + this.#temporaries = temporaries; + this.#processedInstrsInOptional = processedInstrsInOptional; + } + + enterScope(scope: ReactiveScope): void { + // Set context for new scope + this.#dependencies = this.#dependencies.push([]); + this.#scopes = this.#scopes.push(scope); + } + + exitScope(scope: ReactiveScope, pruned: boolean): void { + // Save dependencies we collected from the exiting scope + const scopedDependencies = this.#dependencies.value; + CompilerError.invariant(scopedDependencies != null, { + reason: '[PropagateScopeDeps]: Unexpected scope mismatch', + loc: scope.loc, + }); + + // Restore context of previous scope + this.#scopes = this.#scopes.pop(); + this.#dependencies = this.#dependencies.pop(); + + /* + * Collect dependencies we recorded for the exiting scope and propagate + * them upward using the same rules as normal dependency collection. + * Child scopes may have dependencies on values created within the outer + * scope, which necessarily cannot be dependencies of the outer scope. + */ + for (const dep of scopedDependencies) { + if (this.#checkValidDependency(dep)) { + this.#dependencies.value?.push(dep); + } + } + + if (!pruned) { + this.deps.set(scope, scopedDependencies); + } + } + + isUsedOutsideDeclaringScope(place: Place): boolean { + return this.#temporariesUsedOutsideScope.has( + place.identifier.declarationId, + ); + } + + /* + * Records where a value was declared, and optionally, the scope where the value originated from. + * This is later used to determine if a dependency should be added to a scope; if the current + * scope we are visiting is the same scope where the value originates, it can't be a dependency + * on itself. + * + * Note that we do not track declarations or reassignments within inner + * functions for the following reasons: + * - inner functions cannot be split by scope boundaries and are guaranteed + * to consume their own declarations + * - reassignments within inner functions are tracked as context variables, + * which already have extended mutable ranges to account for reassignments + * - *most importantly* it's currently simply incorrect to compare inner + * function instruction ids (tracked by `decl`) with outer ones (as stored + * by root identifier mutable ranges). + */ + declare(identifier: Identifier, decl: Decl): void { + if (this.#innerFnContext != null) return; + if (!this.#declarations.has(identifier.declarationId)) { + this.#declarations.set(identifier.declarationId, decl); + } + this.#reassignments.set(identifier, decl); + } + hasDeclared(identifier: Identifier): boolean { + return this.#declarations.has(identifier.declarationId); + } + + // Checks if identifier is a valid dependency in the current scope + #checkValidDependency(maybeDependency: ReactiveScopeDependency): boolean { + // ref value is not a valid dep + if (isRefValueType(maybeDependency.identifier)) { + return false; + } + + /* + * object methods are not deps because they will be codegen'ed back in to + * the object literal. + */ + if (isObjectMethodType(maybeDependency.identifier)) { + return false; + } + + const identifier = maybeDependency.identifier; + /* + * If this operand is used in a scope, has a dynamic value, and was defined + * before this scope, then its a dependency of the scope. + */ + const currentDeclaration = + this.#reassignments.get(identifier) ?? + this.#declarations.get(identifier.declarationId); + const currentScope = this.currentScope.value; + return ( + currentScope != null && + currentDeclaration !== undefined && + currentDeclaration.id < currentScope.range.start + ); + } + + #isScopeActive(scope: ReactiveScope): boolean { + if (this.#scopes === null) { + return false; + } + return this.#scopes.find(state => state === scope); + } + + get currentScope(): Stack { + return this.#scopes; + } + + visitOperand(place: Place): void { + /* + * if this operand is a temporary created for a property load, try to resolve it to + * the expanded Place. Fall back to using the operand as-is. + */ + this.visitDependency( + this.#temporaries.get(place.identifier.id) ?? { + identifier: place.identifier, + path: [], + }, + ); + } + + visitProperty( + object: Place, + property: PropertyLiteral, + optional: boolean, + ): void { + const nextDependency = getProperty( + object, + property, + optional, + this.#temporaries, + ); + this.visitDependency(nextDependency); + } + + visitDependency(maybeDependency: ReactiveScopeDependency): void { + /* + * Any value used after its originally defining scope has concluded must be added as an + * output of its defining scope. Regardless of whether its a const or not, + * some later code needs access to the value. If the current + * scope we are visiting is the same scope where the value originates, it can't be a dependency + * on itself. + */ + + /* + * if originalDeclaration is undefined here, then this is not a local var + * (all decls e.g. `let x;` should be initialized in BuildHIR) + */ + const originalDeclaration = this.#declarations.get( + maybeDependency.identifier.declarationId, + ); + if ( + originalDeclaration !== undefined && + originalDeclaration.scope.value !== null + ) { + originalDeclaration.scope.each(scope => { + if ( + !this.#isScopeActive(scope) && + !Iterable_some( + scope.declarations.values(), + decl => + decl.identifier.declarationId === + maybeDependency.identifier.declarationId, + ) + ) { + scope.declarations.set(maybeDependency.identifier.id, { + identifier: maybeDependency.identifier, + scope: originalDeclaration.scope.value!, + }); + } + }); + } + + if (this.#checkValidDependency(maybeDependency)) { + this.#dependencies.value!.push(maybeDependency); + } + } + + /* + * Record a variable that is declared in some other scope and that is being reassigned in the + * current one as a {@link ReactiveScope.reassignments} + */ + visitReassignment(place: Place): void { + const currentScope = this.currentScope.value; + if ( + currentScope != null && + !Iterable_some( + currentScope.reassignments, + identifier => + identifier.declarationId === place.identifier.declarationId, + ) && + this.#checkValidDependency({identifier: place.identifier, path: []}) + ) { + currentScope.reassignments.add(place.identifier); + } + } + enterInnerFn( + innerFn: TInstruction | TInstruction, + cb: () => T, + ): T { + const prevContext = this.#innerFnContext; + this.#innerFnContext = this.#innerFnContext ?? {outerInstrId: innerFn.id}; + const result = cb(); + this.#innerFnContext = prevContext; + return result; + } + + /** + * Skip dependencies that are subexpressions of other dependencies. e.g. if a + * dependency is tracked in the temporaries sidemap, it can be added at + * site-of-use + */ + isDeferredDependency( + instr: + | {kind: HIRValue.Instruction; value: Instruction} + | {kind: HIRValue.Terminal; value: Terminal}, + ): boolean { + return ( + this.#processedInstrsInOptional.has(instr.value) || + (instr.kind === HIRValue.Instruction && + this.#temporaries.has(instr.value.lvalue.identifier.id)) + ); + } +} +enum HIRValue { + Instruction = 1, + Terminal, +} + +export function handleInstruction( + instr: Instruction, + context: DependencyCollectionContext, +): void { + const {id, value, lvalue} = instr; + context.declare(lvalue.identifier, { + id, + scope: context.currentScope, + }); + if ( + context.isDeferredDependency({kind: HIRValue.Instruction, value: instr}) + ) { + return; + } + if (value.kind === 'PropertyLoad') { + context.visitProperty(value.object, value.property, false); + } else if (value.kind === 'StoreLocal') { + context.visitOperand(value.value); + if (value.lvalue.kind === InstructionKind.Reassign) { + context.visitReassignment(value.lvalue.place); + } + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } else if (value.kind === 'DeclareLocal' || value.kind === 'DeclareContext') { + /* + * Some variables may be declared and never initialized. We need to retain + * (and hoist) these declarations if they are included in a reactive scope. + * One approach is to simply add all `DeclareLocal`s as scope declarations. + * + * Context variables with hoisted declarations only become live after their + * first assignment. We only declare real DeclareLocal / DeclareContext + * instructions (not hoisted ones) to avoid generating dependencies on + * hoisted declarations. + */ + if (convertHoistedLValueKind(value.lvalue.kind) === null) { + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } + } else if (value.kind === 'Destructure') { + context.visitOperand(value.value); + for (const place of eachPatternOperand(value.lvalue.pattern)) { + if (value.lvalue.kind === InstructionKind.Reassign) { + context.visitReassignment(place); + } + context.declare(place.identifier, { + id, + scope: context.currentScope, + }); + } + } else if (value.kind === 'StoreContext') { + /** + * Some StoreContext variables have hoisted declarations. If we're storing + * to a context variable that hasn't yet been declared, the StoreContext is + * the declaration. + * (see corresponding logic in PruneHoistedContext) + */ + if ( + !context.hasDeclared(value.lvalue.place.identifier) || + value.lvalue.kind !== InstructionKind.Reassign + ) { + context.declare(value.lvalue.place.identifier, { + id, + scope: context.currentScope, + }); + } + + for (const operand of eachInstructionValueOperand(value)) { + context.visitOperand(operand); + } + } else { + for (const operand of eachInstructionValueOperand(value)) { + context.visitOperand(operand); + } + } + + context.declare(lvalue.identifier, { + id, + scope: context.currentScope, + }); +} + +function collectDependencies( + fn: HIRFunction, + usedOutsideDeclaringScope: ReadonlySet, + temporaries: ReadonlyMap, + processedInstrsInOptional: ReadonlySet, +): Map> { + const context = new DependencyCollectionContext( + usedOutsideDeclaringScope, + temporaries, + processedInstrsInOptional, + ); + + for (const param of fn.params) { + if (param.kind === 'Identifier') { + context.declare(param.identifier, { + id: makeInstructionId(0), + scope: empty(), + }); + } else { + context.declare(param.place.identifier, { + id: makeInstructionId(0), + scope: empty(), + }); + } + } + + const scopeTraversal = new ScopeBlockTraversal(); + + const handleFunction = (fn: HIRFunction): void => { + for (const [blockId, block] of fn.body.blocks) { + scopeTraversal.recordScopes(block); + const scopeBlockInfo = scopeTraversal.blockInfos.get(blockId); + if (scopeBlockInfo?.kind === 'begin') { + context.enterScope(scopeBlockInfo.scope); + } else if (scopeBlockInfo?.kind === 'end') { + context.exitScope(scopeBlockInfo.scope, scopeBlockInfo.pruned); + } + // Record referenced optional chains in phis + for (const phi of block.phis) { + for (const operand of phi.operands) { + const maybeOptionalChain = temporaries.get(operand[1].identifier.id); + if (maybeOptionalChain) { + context.visitDependency(maybeOptionalChain); + } + } + } + for (const instr of block.instructions) { + if ( + instr.value.kind === 'FunctionExpression' || + instr.value.kind === 'ObjectMethod' + ) { + context.declare(instr.lvalue.identifier, { + id: instr.id, + scope: context.currentScope, + }); + /** + * Recursively visit the inner function to extract dependencies there + */ + const innerFn = instr.value.loweredFunc.func; + context.enterInnerFn( + instr as + | TInstruction + | TInstruction, + () => { + handleFunction(innerFn); + }, + ); + } else { + handleInstruction(instr, context); + } + } + + if ( + !context.isDeferredDependency({ + kind: HIRValue.Terminal, + value: block.terminal, + }) + ) { + for (const place of eachTerminalOperand(block.terminal)) { + context.visitOperand(place); + } + } + } + }; + + handleFunction(fn); + return context.deps; +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt new file mode 100644 index 000000000..63c134585 --- /dev/null +++ b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt @@ -0,0 +1,110 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +import type {Element} from 'react-devtools-shared/src/frontend/types'; + +import * as React from 'react'; +import {useContext, useMemo} from 'react'; +import {TreeStateContext} from './TreeContext'; +import {SettingsContext} from '../Settings/SettingsContext'; +import TreeFocusedContext from './TreeFocusedContext'; +import {StoreContext} from '../context'; +import {useSubscription} from '../hooks'; + +import styles from './SelectedTreeHighlight.css'; + +type Data = { + startIndex: number, + stopIndex: number, +}; + +export default function SelectedTreeHighlight(_: {}): React.Node { + const {lineHeight} = useContext(SettingsContext); + const store = useContext(StoreContext); + const treeFocused = useContext(TreeFocusedContext); + const {ownerID, inspectedElementID} = useContext(TreeStateContext); + + const subscription = useMemo( + () => ({ + getCurrentValue: () => { + if ( + inspectedElementID === null || + store.isInsideCollapsedSubTree(inspectedElementID) + ) { + return null; + } + + const element = store.getElementByID(inspectedElementID); + if ( + element === null || + element.isCollapsed || + element.children.length === 0 + ) { + return null; + } + + const startIndex = store.getIndexOfElementID(element.children[0]); + if (startIndex === null) { + return null; + } + + let stopIndex = null; + let current: null | Element = element; + while (current !== null) { + if (current.isCollapsed || current.children.length === 0) { + // We've found the last/deepest descendant. + stopIndex = store.getIndexOfElementID(current.id); + current = null; + } else { + const lastChildID = current.children[current.children.length - 1]; + current = store.getElementByID(lastChildID); + } + } + + if (stopIndex === null) { + return null; + } + + return { + startIndex, + stopIndex, + }; + }, + subscribe: (callback: Function) => { + store.addListener('mutated', callback); + return () => { + store.removeListener('mutated', callback); + }; + }, + }), + [inspectedElementID, store], + ); + const data = useSubscription(subscription); + + if (ownerID !== null) { + return null; + } + + if (data === null) { + return null; + } + + const {startIndex, stopIndex} = data; + + return ( +
+ ); +} \ No newline at end of file diff --git a/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/metadata.json b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/metadata.json new file mode 100644 index 000000000..5a311cba2 --- /dev/null +++ b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/metadata.json @@ -0,0 +1,25 @@ +{ + "model": "x-ai/grok-code-fast-1", + "benchmark_case": "react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js", + "prompts_dir": "locodiff-250425/prompts", + "prompt_file": "locodiff-250425/prompts/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js_prompt.txt", + "expected_file": "locodiff-250425/prompts/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js_expectedoutput.txt", + "timestamp_utc": "2025-09-09T15:58:39.231782+00:00", + "success": true, + "error": null, + "api_error": false, + "raw_response_length": 2968, + "extracted_output_length": 2950, + "expected_output_length": 2951, + "results_dir": "locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839", + "generation_id": "gen-1757433519-x9RLJ3H1U399qylGTz3a", + "cost_usd": 0.0036092, + "prompt_tokens": 5103, + "completion_tokens": 1126, + "total_tokens": 6229, + "native_prompt_tokens": 5164, + "native_completion_tokens": 2332, + "native_finish_reason": "stop", + "stats_error": null, + "native_tokens_reasoning": 1677 +} \ No newline at end of file diff --git a/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/output.diff b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/output.diff new file mode 100644 index 000000000..c8af7cb1b --- /dev/null +++ b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/output.diff @@ -0,0 +1 @@ +No differences found. diff --git a/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt new file mode 100644 index 000000000..b8d9092b2 --- /dev/null +++ b/locodiff-250425/results/react_packages_react-devtools-shared_src_devtools_views_Components_SelectedTreeHighlight.js/x-ai_grok-code-fast-1/20250909_155839/raw_response.txt @@ -0,0 +1,112 @@ +```javascript +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @flow + */ + +import type {Element} from 'react-devtools-shared/src/frontend/types'; + +import * as React from 'react'; +import {useContext, useMemo} from 'react'; +import {TreeStateContext} from './TreeContext'; +import {SettingsContext} from '../Settings/SettingsContext'; +import TreeFocusedContext from './TreeFocusedContext'; +import {StoreContext} from '../context'; +import {useSubscription} from '../hooks'; + +import styles from './SelectedTreeHighlight.css'; + +type Data = { + startIndex: number, + stopIndex: number, +}; + +export default function SelectedTreeHighlight(_: {}): React.Node { + const {lineHeight} = useContext(SettingsContext); + const store = useContext(StoreContext); + const treeFocused = useContext(TreeFocusedContext); + const {ownerID, inspectedElementID} = useContext(TreeStateContext); + + const subscription = useMemo( + () => ({ + getCurrentValue: () => { + if ( + inspectedElementID === null || + store.isInsideCollapsedSubTree(inspectedElementID) + ) { + return null; + } + + const element = store.getElementByID(inspectedElementID); + if ( + element === null || + element.isCollapsed || + element.children.length === 0 + ) { + return null; + } + + const startIndex = store.getIndexOfElementID(element.children[0]); + if (startIndex === null) { + return null; + } + + let stopIndex = null; + let current: null | Element = element; + while (current !== null) { + if (current.isCollapsed || current.children.length === 0) { + // We've found the last/deepest descendant. + stopIndex = store.getIndexOfElementID(current.id); + current = null; + } else { + const lastChildID = current.children[current.children.length - 1]; + current = store.getElementByID(lastChildID); + } + } + + if (stopIndex === null) { + return null; + } + + return { + startIndex, + stopIndex, + }; + }, + subscribe: (callback: Function) => { + store.addListener('mutated', callback); + return () => { + store.removeListener('mutated', callback); + }; + }, + }), + [inspectedElementID, store], + ); + const data = useSubscription(subscription); + + if (ownerID !== null) { + return null; + } + + if (data === null) { + return null; + } + + const {startIndex, stopIndex} = data; + + return ( +
+ ); +} +``` \ No newline at end of file diff --git a/locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt b/locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt new file mode 100644 index 000000000..31b1590d8 --- /dev/null +++ b/locodiff-250425/results/react_packages_react-dom_src___tests___ReactDOMOption-test.js/x-ai_grok-code-fast-1/20250909_155839/extracted_output.txt @@ -0,0 +1,289 @@ +/** + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + * + * @emails react-core + */ + +'use strict'; + +describe('ReactDOMOption', () => { + let React; + let ReactDOMClient; + let ReactDOMServer; + let act; + let assertConsoleErrorDev; + + beforeEach(() => { + jest.resetModules(); + React = require('react'); + ReactDOMClient = require('react-dom/client'); + ReactDOMServer = require('react-dom/server'); + act = require('internal-test-utils').act; + assertConsoleErrorDev = + require('internal-test-utils').assertConsoleErrorDev; + }); + + async function renderIntoDocument(children) { + const container = document.createElement('div'); + const root = ReactDOMClient.createRoot(container); + await act(() => root.render(children)); + return container; + } + + it('should flatten children to a string', async () => { + const stub = ( + + ); + const container = await renderIntoDocument(stub); + + expect(container.firstChild.innerHTML).toBe('1 foo'); + }); + + it('should warn for invalid child tags', async () => { + const el = ( +