diff --git a/cpp/include/cuvs/neighbors/cagra.hpp b/cpp/include/cuvs/neighbors/cagra.hpp index a7e1249677..b7277a3e3d 100644 --- a/cpp/include/cuvs/neighbors/cagra.hpp +++ b/cpp/include/cuvs/neighbors/cagra.hpp @@ -2497,9 +2497,9 @@ auto merge(raft::resources const& res, * @} */ -/// \defgroup mg_cpp_index_build ANN MG index build +/// \defgroup mg_cpp_cagra_index_build ANN MG CAGRA index build -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_cagra_index_build /** * @brief Builds a multi-GPU index * @@ -2521,7 +2521,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, float, uint32_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_cagra_index_build /** * @brief Builds a multi-GPU index * @@ -2543,7 +2543,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, half, uint32_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_cagra_index_build /** * @brief Builds a multi-GPU index * @@ -2565,7 +2565,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, int8_t, uint32_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_cagra_index_build /** * @brief Builds a multi-GPU index * @@ -2587,9 +2587,9 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, uint8_t, uint32_t>; -/// \defgroup mg_cpp_index_extend ANN MG index extend +/// \defgroup mg_cpp_cagra_index_extend ANN MG CAGRA index extend -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_cagra_index_extend /** * @brief Extends a multi-GPU index * @@ -2613,7 +2613,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_cagra_index_extend /** * @brief Extends a multi-GPU index * @@ -2637,7 +2637,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_cagra_index_extend /** * @brief Extends a multi-GPU index * @@ -2661,7 +2661,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_cagra_index_extend /** * @brief Extends a multi-GPU index * @@ -2685,9 +2685,9 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \defgroup mg_cpp_index_search ANN MG index search +/// \defgroup mg_cpp_cagra_index_search ANN MG CAGRA index search -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2716,7 +2716,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2745,7 +2745,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2775,7 +2775,7 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2805,7 +2805,7 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2834,7 +2834,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2863,7 +2863,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2893,7 +2893,7 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_cagra_index_search /** * @brief Searches a multi-GPU index * @@ -2923,9 +2923,9 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \defgroup mg_cpp_serialize ANN MG index serialization +/// \defgroup mg_cpp_cagra_serialize ANN MG CAGRA index serialization -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_cagra_serialize /** * @brief Serializes a multi-GPU index * @@ -2948,7 +2948,7 @@ void serialize( const cuvs::neighbors::mg_index, float, uint32_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_cagra_serialize /** * @brief Serializes a multi-GPU index * @@ -2970,7 +2970,7 @@ void serialize(const raft::resources& clique, const cuvs::neighbors::mg_index, half, uint32_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_cagra_serialize /** * @brief Serializes a multi-GPU index * @@ -2993,7 +2993,7 @@ void serialize( const cuvs::neighbors::mg_index, int8_t, uint32_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_cagra_serialize /** * @brief Serializes a multi-GPU index * @@ -3016,9 +3016,9 @@ void serialize( const cuvs::neighbors::mg_index, uint8_t, uint32_t>& index, const std::string& filename); -/// \defgroup mg_cpp_deserialize ANN MG index deserialization +/// \defgroup mg_cpp_cagra_deserialize ANN MG CAGRA index deserialization -/// \ingroup mg_cpp_deserialize +/// \ingroup mg_cpp_cagra_deserialize /** * @brief Deserializes a CAGRA multi-GPU index * @@ -3041,9 +3041,9 @@ template auto deserialize(const raft::resources& clique, const std::string& filename) -> cuvs::neighbors::mg_index, T, IdxT>; -/// \defgroup mg_cpp_distribute ANN MG local index distribution +/// \defgroup mg_cpp_cagra_distribute ANN MG CAGRA local index distribution -/// \ingroup mg_cpp_distribute +/// \ingroup mg_cpp_cagra_distribute /** * @brief Replicates a locally built and serialized CAGRA index to all GPUs to form a distributed * multi-GPU index diff --git a/cpp/include/cuvs/neighbors/ivf_flat.hpp b/cpp/include/cuvs/neighbors/ivf_flat.hpp index f214db295c..86dfc4350d 100644 --- a/cpp/include/cuvs/neighbors/ivf_flat.hpp +++ b/cpp/include/cuvs/neighbors/ivf_flat.hpp @@ -2023,9 +2023,9 @@ void deserialize(raft::resources const& handle, * @} */ -/// \defgroup mg_cpp_index_build ANN MG index build +/// \defgroup mg_cpp_ivf_flat_index_build ANN MG IVF-Flat index build -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_flat_index_build /** * @brief Builds a multi-GPU index * @@ -2047,7 +2047,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, float, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_flat_index_build /** * @brief Builds a multi-GPU index * @@ -2069,7 +2069,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, half, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_flat_index_build /** * @brief Builds a multi-GPU index * @@ -2091,7 +2091,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, int8_t, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_flat_index_build /** * @brief Builds a multi-GPU index * @@ -2113,9 +2113,9 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, uint8_t, int64_t>; -/// \defgroup mg_cpp_index_extend ANN MG index extend +/// \defgroup mg_cpp_ivf_flat_index_extend ANN MG IVF-Flat index extend -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_flat_index_extend /** * @brief Extends a multi-GPU index * @@ -2139,7 +2139,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_flat_index_extend /** * @brief Extends a multi-GPU index * @@ -2163,7 +2163,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_flat_index_extend /** * @brief Extends a multi-GPU index * @@ -2187,7 +2187,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_flat_index_extend /** * @brief Extends a multi-GPU index * @@ -2211,9 +2211,9 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \defgroup mg_cpp_index_search ANN MG index search +/// \defgroup mg_cpp_ivf_flat_index_search ANN MG IVF-Flat index search -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_flat_index_search /** * @brief Searches a multi-GPU index * @@ -2242,7 +2242,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_flat_index_search /** * @brief Searches a multi-GPU index * @@ -2271,7 +2271,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_flat_index_search /** * @brief Searches a multi-GPU index * @@ -2301,7 +2301,7 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_flat_index_search /** * @brief Searches a multi-GPU index * @@ -2331,9 +2331,9 @@ void search( raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \defgroup mg_cpp_serialize ANN MG index serialization +/// \defgroup mg_cpp_ivf_flat_serialize ANN MG IVF-Flat index serialization -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_flat_serialize /** * @brief Serializes a multi-GPU index * @@ -2356,7 +2356,7 @@ void serialize( const cuvs::neighbors::mg_index, float, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_flat_serialize /** * @brief Serializes a multi-GPU index * @@ -2379,7 +2379,7 @@ void serialize( const cuvs::neighbors::mg_index, half, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_flat_serialize /** * @brief Serializes a multi-GPU index * @@ -2402,7 +2402,7 @@ void serialize( const cuvs::neighbors::mg_index, int8_t, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_flat_serialize /** * @brief Serializes a multi-GPU index * @@ -2425,7 +2425,9 @@ void serialize( const cuvs::neighbors::mg_index, uint8_t, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_deserialize +/// \defgroup mg_cpp_ivf_flat_deserialize ANN MG IVF-Flat index deserialization + +/// \ingroup mg_cpp_ivf_flat_deserialize /** * @brief Deserializes an IVF-Flat multi-GPU index * @@ -2448,9 +2450,9 @@ template auto deserialize(const raft::resources& clique, const std::string& filename) -> cuvs::neighbors::mg_index, T, IdxT>; -/// \defgroup mg_cpp_distribute ANN MG local index distribution +/// \defgroup mg_cpp_ivf_flat_distribute ANN MG IVF-Flat local index distribution -/// \ingroup mg_cpp_distribute +/// \ingroup mg_cpp_ivf_flat_distribute /** * @brief Replicates a locally built and serialized IVF-Flat index to all GPUs to form a distributed * multi-GPU index diff --git a/cpp/include/cuvs/neighbors/ivf_pq.hpp b/cpp/include/cuvs/neighbors/ivf_pq.hpp index 1fa7a3a7d6..7a8dfb129c 100644 --- a/cpp/include/cuvs/neighbors/ivf_pq.hpp +++ b/cpp/include/cuvs/neighbors/ivf_pq.hpp @@ -2110,9 +2110,9 @@ void deserialize(raft::resources const& handle, * @} */ -/// \defgroup mg_cpp_index_build ANN MG index build +/// \defgroup mg_cpp_ivf_pq_index_build ANN MG IVF-PQ index build -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_pq_index_build /** * @brief Builds a multi-GPU index * @@ -2134,7 +2134,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, float, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_pq_index_build /** * @brief Builds a multi-GPU index * @@ -2156,7 +2156,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, half, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_pq_index_build /** * @brief Builds a multi-GPU index * @@ -2178,7 +2178,7 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, int8_t, int64_t>; -/// \ingroup mg_cpp_index_build +/// \ingroup mg_cpp_ivf_pq_index_build /** * @brief Builds a multi-GPU index * @@ -2200,9 +2200,9 @@ auto build(const raft::resources& clique, raft::host_matrix_view index_dataset) -> cuvs::neighbors::mg_index, uint8_t, int64_t>; -/// \defgroup mg_cpp_index_extend ANN MG index extend +/// \defgroup mg_cpp_ivf_pq_index_extend ANN MG IVF-PQ index extend -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_pq_index_extend /** * @brief Extends a multi-GPU index * @@ -2226,7 +2226,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_pq_index_extend /** * @brief Extends a multi-GPU index * @@ -2250,7 +2250,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_pq_index_extend /** * @brief Extends a multi-GPU index * @@ -2274,7 +2274,7 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \ingroup mg_cpp_index_extend +/// \ingroup mg_cpp_ivf_pq_index_extend /** * @brief Extends a multi-GPU index * @@ -2298,9 +2298,9 @@ void extend(const raft::resources& clique, raft::host_matrix_view new_vectors, std::optional> new_indices); -/// \defgroup mg_cpp_index_search ANN MG index search +/// \defgroup mg_cpp_ivf_pq_index_search ANN MG IVF-PQ index search -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_pq_index_search /** * @brief Searches a multi-GPU index * @@ -2329,7 +2329,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_pq_index_search /** * @brief Searches a multi-GPU index * @@ -2358,7 +2358,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_pq_index_search /** * @brief Searches a multi-GPU index * @@ -2387,7 +2387,7 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \ingroup mg_cpp_index_search +/// \ingroup mg_cpp_ivf_pq_index_search /** * @brief Searches a multi-GPU index * @@ -2416,9 +2416,9 @@ void search(const raft::resources& clique, raft::host_matrix_view neighbors, raft::host_matrix_view distances); -/// \defgroup mg_cpp_serialize ANN MG index serialization +/// \defgroup mg_cpp_ivf_pq_serialize ANN MG IVF-PQ index serialization -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_pq_serialize /** * @brief Serializes a multi-GPU index * @@ -2440,7 +2440,7 @@ void serialize(const raft::resources& clique, const cuvs::neighbors::mg_index, float, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_pq_serialize /** * @brief Serializes a multi-GPU index * @@ -2462,7 +2462,7 @@ void serialize(const raft::resources& clique, const cuvs::neighbors::mg_index, half, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_pq_serialize /** * @brief Serializes a multi-GPU index * @@ -2484,7 +2484,7 @@ void serialize(const raft::resources& clique, const cuvs::neighbors::mg_index, int8_t, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_serialize +/// \ingroup mg_cpp_ivf_pq_serialize /** * @brief Serializes a multi-GPU index * @@ -2506,7 +2506,9 @@ void serialize(const raft::resources& clique, const cuvs::neighbors::mg_index, uint8_t, int64_t>& index, const std::string& filename); -/// \ingroup mg_cpp_deserialize +/// \defgroup mg_cpp_ivf_pq_deserialize ANN MG IVF-PQ index deserialization + +/// \ingroup mg_cpp_ivf_pq_deserialize /** * @brief Deserializes an IVF-PQ multi-GPU index * @@ -2528,9 +2530,9 @@ template auto deserialize(const raft::resources& clique, const std::string& filename) -> cuvs::neighbors::mg_index, T, IdxT>; -/// \defgroup mg_cpp_distribute ANN MG local index distribution +/// \defgroup mg_cpp_ivf_pq_distribute ANN MG IVF-PQ local index distribution -/// \ingroup mg_cpp_distribute +/// \ingroup mg_cpp_ivf_pq_distribute /** * @brief Replicates a locally built and serialized IVF-PQ index to all GPUs to form a distributed * multi-GPU index diff --git a/docs/source/_static/collapse_overloads.js b/docs/source/_static/collapse_overloads.js index 2ec2e710fb..b40f2627df 100644 --- a/docs/source/_static/collapse_overloads.js +++ b/docs/source/_static/collapse_overloads.js @@ -1,5 +1,6 @@ document.addEventListener("DOMContentLoaded", () => { - const toc = document.querySelector(".bd-toc-nav"); + const toc = document.querySelector("#pst-page-toc-nav") || + document.querySelector(".bd-toc-nav"); if (!toc) return; // Get all TOC links diff --git a/docs/source/api_docs.rst b/docs/source/api_docs.rst index 68d184c72c..55f00a37a6 100644 --- a/docs/source/api_docs.rst +++ b/docs/source/api_docs.rst @@ -2,7 +2,7 @@ API Reference ============= .. toctree:: - :maxdepth: 3 + :maxdepth: 5 c_api.rst cpp_api.rst diff --git a/docs/source/c_api.rst b/docs/source/c_api.rst index c65eee06ef..93a35b11ed 100644 --- a/docs/source/c_api.rst +++ b/docs/source/c_api.rst @@ -8,7 +8,7 @@ C API Documentation :maxdepth: 4 c_api/core_c_api.rst - c_api/distance.rst c_api/cluster.rst + c_api/distance.rst c_api/neighbors.rst c_api/preprocessing.rst diff --git a/docs/source/c_api/cluster.rst b/docs/source/c_api/cluster.rst index 34795e45bf..f6d614c7a6 100644 --- a/docs/source/c_api/cluster.rst +++ b/docs/source/c_api/cluster.rst @@ -1,5 +1,5 @@ -Clustering -========== +Cluster +======= .. role:: py(code) :language: c diff --git a/docs/source/c_api/neighbors.rst b/docs/source/c_api/neighbors.rst index 305364bb2a..97ee08b8c2 100644 --- a/docs/source/c_api/neighbors.rst +++ b/docs/source/c_api/neighbors.rst @@ -16,4 +16,5 @@ Nearest Neighbors neighbors_ivf_flat_c.rst neighbors_ivf_pq_c.rst neighbors_mg.rst + neighbors_nn_descent_c.rst neighbors_vamana_c.rst diff --git a/docs/source/c_api/neighbors_bruteforce_c.rst b/docs/source/c_api/neighbors_bruteforce_c.rst index 36ba96f424..16018cb60b 100644 --- a/docs/source/c_api/neighbors_bruteforce_c.rst +++ b/docs/source/c_api/neighbors_bruteforce_c.rst @@ -1,5 +1,5 @@ -Bruteforce -========== +Brute Force KNN +=============== The bruteforce method is running the KNN algorithm. It performs an extensive search, and in contrast to ANN methods produces an exact result. diff --git a/docs/source/c_api/neighbors_mg.rst b/docs/source/c_api/neighbors_mg.rst index bffe3fc4c5..2fa6cac103 100644 --- a/docs/source/c_api/neighbors_mg.rst +++ b/docs/source/c_api/neighbors_mg.rst @@ -7,251 +7,10 @@ The Multi-GPU (SNMG - single-node multi-GPUs) C API provides a set of functions :language: c :class: highlight -Common Types and Enums -====================== +.. toctree:: + :maxdepth: 2 -Common types and enums used across multi-GPU ANN algorithms. - -``#include `` - -.. doxygengroup:: mg_c_common_types - :project: cuvs - :members: - :content-only: - -Multi-GPU IVF-Flat -================== - -The Multi-GPU IVF-Flat method extends the IVF-Flat ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). - -``#include `` - -IVF-Flat Index Build Parameters -------------------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_params - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Search Parameters --------------------------------- - -.. doxygengroup:: mg_ivf_flat_c_search_params - :project: cuvs - :members: - :content-only: - -IVF-Flat Index --------------- - -.. doxygengroup:: mg_ivf_flat_c_index - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Build --------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_build - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Search ---------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_search - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Extend ---------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_extend - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Serialize ------------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_serialize - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Deserialize ---------------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_deserialize - :project: cuvs - :members: - :content-only: - -IVF-Flat Index Distribute --------------------------- - -.. doxygengroup:: mg_ivf_flat_c_index_distribute - :project: cuvs - :members: - :content-only: - -Multi-GPU IVF-PQ -================= - -The Multi-GPU IVF-PQ method extends the IVF-PQ ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). - -``#include `` - -IVF-PQ Index Build Parameters ------------------------------ - -.. doxygengroup:: mg_ivf_pq_c_index_params - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Search Parameters ------------------------------- - -.. doxygengroup:: mg_ivf_pq_c_search_params - :project: cuvs - :members: - :content-only: - -IVF-PQ Index ------------- - -.. doxygengroup:: mg_ivf_pq_c_index - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Build ------------------- - -.. doxygengroup:: mg_ivf_pq_c_index_build - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Search -------------------- - -.. doxygengroup:: mg_ivf_pq_c_index_search - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Extend -------------------- - -.. doxygengroup:: mg_ivf_pq_c_index_extend - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Serialize ----------------------- - -.. doxygengroup:: mg_ivf_pq_c_index_serialize - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Deserialize ------------------------- - -.. doxygengroup:: mg_ivf_pq_c_index_deserialize - :project: cuvs - :members: - :content-only: - -IVF-PQ Index Distribute ------------------------ - -.. doxygengroup:: mg_ivf_pq_c_index_distribute - :project: cuvs - :members: - :content-only: - -Multi-GPU CAGRA -================ - -The Multi-GPU CAGRA method extends the CAGRA graph-based ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). - -``#include `` - -CAGRA Index Build Parameters ----------------------------- - -.. doxygengroup:: mg_cagra_c_index_params - :project: cuvs - :members: - :content-only: - -CAGRA Index Search Parameters ------------------------------ - -.. doxygengroup:: mg_cagra_c_search_params - :project: cuvs - :members: - :content-only: - -CAGRA Index ------------ - -.. doxygengroup:: mg_cagra_c_index - :project: cuvs - :members: - :content-only: - -CAGRA Index Build ------------------ - -.. doxygengroup:: mg_cagra_c_index_build - :project: cuvs - :members: - :content-only: - -CAGRA Index Search ------------------- - -.. doxygengroup:: mg_cagra_c_index_search - :project: cuvs - :members: - :content-only: - -CAGRA Index Extend ------------------- - -.. doxygengroup:: mg_cagra_c_index_extend - :project: cuvs - :members: - :content-only: - -CAGRA Index Serialize ---------------------- - -.. doxygengroup:: mg_cagra_c_index_serialize - :project: cuvs - :members: - :content-only: - -CAGRA Index Deserialize ------------------------ - -.. doxygengroup:: mg_cagra_c_index_deserialize - :project: cuvs - :members: - :content-only: - -CAGRA Index Distribute ----------------------- - -.. doxygengroup:: mg_cagra_c_index_distribute - :project: cuvs - :members: - :content-only: + neighbors_mg_all_neighbors_c.rst + neighbors_mg_cagra_c.rst + neighbors_mg_ivf_flat_c.rst + neighbors_mg_ivf_pq_c.rst diff --git a/docs/source/c_api/neighbors_mg_all_neighbors_c.rst b/docs/source/c_api/neighbors_mg_all_neighbors_c.rst new file mode 100644 index 0000000000..2c1aee2d81 --- /dev/null +++ b/docs/source/c_api/neighbors_mg_all_neighbors_c.rst @@ -0,0 +1,33 @@ +Multi-GPU All-Neighbors +======================= + +Unlike the other multi-GPU nearest neighbors algorithms (CAGRA, IVF-Flat, IVF-PQ), all-neighbors does not require a separate multi-GPU API. Multi-GPU support is built into the unified ``cuvsAllNeighborsBuild`` function. + +To enable multi-GPU execution: + +1. Create a multi-GPU ``cuvsResources_t`` handle using ``cuvsMultiGpuResourcesCreate`` (instead of ``cuvsResourcesCreate`` for single-GPU). You can optionally specify device IDs with ``cuvsMultiGpuResourcesCreateWithDeviceIds``. +2. Set ``n_clusters > 1`` in ``cuvsAllNeighborsIndexParams`` to enable data partitioning across GPUs. +3. Provide the dataset on host memory. + +The function automatically detects whether the resources handle is multi-GPU and distributes clusters across the available GPUs. When ``n_clusters == 1``, the build runs on a single GPU. + +.. code-block:: c + + #include + #include + + // Create multi-GPU resources (uses all available GPUs) + cuvsResources_t handle; + cuvsMultiGpuResourcesCreate(&handle); + + cuvsAllNeighborsIndexParams_t params; + cuvsAllNeighborsIndexParamsCreate(¶ms); + params->n_clusters = 8; + params->overlap_factor = 2; + + cuvsAllNeighborsBuild(handle, params, dataset, indices, distances, NULL, 1.0); + + cuvsAllNeighborsIndexParamsDestroy(params); + cuvsMultiGpuResourcesDestroy(handle); + +For the full API reference (parameters and build function), see :doc:`All-Neighbors `. diff --git a/docs/source/c_api/neighbors_mg_cagra_c.rst b/docs/source/c_api/neighbors_mg_cagra_c.rst new file mode 100644 index 0000000000..bab5f18f6a --- /dev/null +++ b/docs/source/c_api/neighbors_mg_cagra_c.rst @@ -0,0 +1,92 @@ +Multi-GPU CAGRA +=============== + +The Multi-GPU CAGRA method extends the CAGRA graph-based ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Common Types and Enums +---------------------- + +``#include `` + +.. doxygengroup:: mg_c_common_types + :project: cuvs + :members: + :content-only: + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_cagra_c_index_params + :project: cuvs + :members: + :content-only: + +Index Search Parameters +----------------------- + +.. doxygengroup:: mg_cagra_c_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: mg_cagra_c_index + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_cagra_c_index_build + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_cagra_c_index_search + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_cagra_c_index_extend + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_cagra_c_index_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_cagra_c_index_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_cagra_c_index_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_mg_ivf_flat_c.rst b/docs/source/c_api/neighbors_mg_ivf_flat_c.rst new file mode 100644 index 0000000000..0c6045ec1a --- /dev/null +++ b/docs/source/c_api/neighbors_mg_ivf_flat_c.rst @@ -0,0 +1,92 @@ +Multi-GPU IVF-Flat +================== + +The Multi-GPU IVF-Flat method extends the IVF-Flat ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Common Types and Enums +---------------------- + +``#include `` + +.. doxygengroup:: mg_c_common_types + :project: cuvs + :members: + :content-only: + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_ivf_flat_c_index_params + :project: cuvs + :members: + :content-only: + +Index Search Parameters +----------------------- + +.. doxygengroup:: mg_ivf_flat_c_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: mg_ivf_flat_c_index + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_ivf_flat_c_index_build + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_ivf_flat_c_index_search + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_ivf_flat_c_index_extend + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_ivf_flat_c_index_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_ivf_flat_c_index_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_ivf_flat_c_index_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_mg_ivf_pq_c.rst b/docs/source/c_api/neighbors_mg_ivf_pq_c.rst new file mode 100644 index 0000000000..20ea82bbc8 --- /dev/null +++ b/docs/source/c_api/neighbors_mg_ivf_pq_c.rst @@ -0,0 +1,92 @@ +Multi-GPU IVF-PQ +================ + +The Multi-GPU IVF-PQ method extends the IVF-PQ ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Common Types and Enums +---------------------- + +``#include `` + +.. doxygengroup:: mg_c_common_types + :project: cuvs + :members: + :content-only: + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_ivf_pq_c_index_params + :project: cuvs + :members: + :content-only: + +Index Search Parameters +----------------------- + +.. doxygengroup:: mg_ivf_pq_c_search_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: mg_ivf_pq_c_index + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_ivf_pq_c_index_build + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_ivf_pq_c_index_search + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_ivf_pq_c_index_extend + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_ivf_pq_c_index_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_ivf_pq_c_index_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_ivf_pq_c_index_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/neighbors_nn_descent_c.rst b/docs/source/c_api/neighbors_nn_descent_c.rst new file mode 100644 index 0000000000..3269dcea50 --- /dev/null +++ b/docs/source/c_api/neighbors_nn_descent_c.rst @@ -0,0 +1,34 @@ +NN-Descent +========== + +NN-Descent is a graph-based algorithm for building approximate k-nearest neighbor graphs. It iteratively refines an initial random graph by exploring neighbors of neighbors. + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +Index Parameters +---------------- + +.. doxygengroup:: nn_descent_c_index_params + :project: cuvs + :members: + :content-only: + +Index +----- + +.. doxygengroup:: nn_descent_c_index + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: nn_descent_c_index_build + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/preprocessing.rst b/docs/source/c_api/preprocessing.rst index 1c65455de0..52df3c5f6c 100644 --- a/docs/source/c_api/preprocessing.rst +++ b/docs/source/c_api/preprocessing.rst @@ -5,34 +5,8 @@ Preprocessing :language: c :class: highlight -Binary Quantizer ----------------- +.. toctree:: + :maxdepth: 2 -.. doxygengroup:: preprocessing_c_binary - :project: cuvs - :members: - :content-only: - -Product Quantizer ------------------ - -.. doxygengroup:: preprocessing_c_pq - :project: cuvs - :members: - :content-only: - -PCA (Principal Component Analysis) ------------------------------------ - -.. doxygengroup:: preprocessing_c_pca - :project: cuvs - :members: - :content-only: - -Scalar Quantizer ----------------- - -.. doxygengroup:: preprocessing_c_scalar - :project: cuvs - :members: - :content-only: + preprocessing_pca.rst + preprocessing_quantize.rst diff --git a/docs/source/c_api/preprocessing_pca.rst b/docs/source/c_api/preprocessing_pca.rst new file mode 100644 index 0000000000..4b2bc6772e --- /dev/null +++ b/docs/source/c_api/preprocessing_pca.rst @@ -0,0 +1,13 @@ +PCA +=== + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +.. doxygengroup:: preprocessing_c_pca + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/preprocessing_quantize.rst b/docs/source/c_api/preprocessing_quantize.rst new file mode 100644 index 0000000000..3dbd461c2c --- /dev/null +++ b/docs/source/c_api/preprocessing_quantize.rst @@ -0,0 +1,13 @@ +Quantization +============ + +.. role:: py(code) + :language: c + :class: highlight + +.. toctree:: + :maxdepth: 2 + + preprocessing_quantize_binary.rst + preprocessing_quantize_pq.rst + preprocessing_quantize_scalar.rst diff --git a/docs/source/c_api/preprocessing_quantize_binary.rst b/docs/source/c_api/preprocessing_quantize_binary.rst new file mode 100644 index 0000000000..cb8d34c998 --- /dev/null +++ b/docs/source/c_api/preprocessing_quantize_binary.rst @@ -0,0 +1,13 @@ +Binary Quantizer +================ + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +.. doxygengroup:: preprocessing_c_binary + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/preprocessing_quantize_pq.rst b/docs/source/c_api/preprocessing_quantize_pq.rst new file mode 100644 index 0000000000..4000eaaa9e --- /dev/null +++ b/docs/source/c_api/preprocessing_quantize_pq.rst @@ -0,0 +1,13 @@ +Product Quantizer +================= + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +.. doxygengroup:: preprocessing_c_pq + :project: cuvs + :members: + :content-only: diff --git a/docs/source/c_api/preprocessing_quantize_scalar.rst b/docs/source/c_api/preprocessing_quantize_scalar.rst new file mode 100644 index 0000000000..71827ee37b --- /dev/null +++ b/docs/source/c_api/preprocessing_quantize_scalar.rst @@ -0,0 +1,13 @@ +Scalar Quantizer +================ + +.. role:: py(code) + :language: c + :class: highlight + +``#include `` + +.. doxygengroup:: preprocessing_c_scalar + :project: cuvs + :members: + :content-only: diff --git a/docs/source/choosing_and_configuring_indexes.rst b/docs/source/choosing_and_configuring_indexes.rst index b4c140f295..34493667f7 100644 --- a/docs/source/choosing_and_configuring_indexes.rst +++ b/docs/source/choosing_and_configuring_indexes.rst @@ -1,5 +1,5 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Primer on vector search indexes +Primer on Vector Search Indexes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Vector search indexes often use approximations to trade-off accuracy of the results for speed, either through lowering latency (end-to-end single query speed) or by increasing throughput (the number of query vectors that can be satisfied in a short period of time). Vector search indexes, especially ones that use approximations, are very closely related to machine learning models but they are optimized for fast search and accuracy of results. @@ -18,14 +18,14 @@ For this reason, we focus on 4 primary data sizes: #. Large datasets (> 1 million vectors), goal is fast index creation at the expense of search quality #. Large datasets where high quality is preferred at the expense of fast index creation -Like other machine learning algorithms, vector search indexes generally have a training step – which means building the index – and an inference – or search step. The hyper-parameters also tend to be broken down into build and search parameters. +Like other machine learning algorithms, vector search indexes generally have a training step -- which means building the index -- and an inference -- or search step. The hyper-parameters also tend to be broken down into build and search parameters. -While not always the case, a general trend is often observed where the search speed decreases as the quality increases. This also tends to be the case with the index build performance, though different algorithms have different relationships between build time, quality, and search time. It’s important to understand that there’s no free lunch so there will always be trade-offs for each index type. +While not always the case, a general trend is often observed where the search speed decreases as the quality increases. This also tends to be the case with the index build performance, though different algorithms have different relationships between build time, quality, and search time. It's important to understand that there's no free lunch so there will always be trade-offs for each index type. Definition of quality ===================== -What do we mean when we say quality of an index? In machine learning terminology, we measure this using recall, which is sometimes used interchangeably to mean accuracy, even though the two are slightly different measures. Recall, when used in vector search, essentially means “out of all of my results, which results would have been included in the exact results?” In vector search, the objective is to find some number of vectors that are closest to a given query vector so recall tends to be more relaxed than accuracy, discriminating only on set inclusion, rather than on exact ordered list matching, which would be closer to an accuracy measure. +What do we mean when we say quality of an index? In machine learning terminology, we measure this using recall, which is sometimes used interchangeably to mean accuracy, even though the two are slightly different measures. Recall, when used in vector search, essentially means "out of all of my results, which results would have been included in the exact results?" In vector search, the objective is to find some number of vectors that are closest to a given query vector so recall tends to be more relaxed than accuracy, discriminating only on set inclusion, rather than on exact ordered list matching, which would be closer to an accuracy measure. Choosing vector search indexes ============================== @@ -35,7 +35,7 @@ Many vector search algorithms improve scalability while reducing the number of d Tiny datasets (< 100 thousand vectors) -------------------------------------- -These datasets are very small and it’s questionable whether or not the GPU would provide any value at all. If the dimensionality is also relatively small (< 1024), you could just use brute-force or HNSW on the CPU and get great performance. If the dimensionality is relatively large (1536, 2048, 4096), you should consider using HNSW. If build time performance is critical, you should consider using CAGRA to build the graph and convert it to an HNSW graph for search (this capability exists today in the standalone cuVS/RAFT libraries and will soon be added to Milvus). An IVF flat index can also be a great candidate here, as it can improve the search performance over brute-force by partitioning the vector space and thus reducing the search space. +These datasets are very small and it's questionable whether or not the GPU would provide any value at all. If the dimensionality is also relatively small (< 1024), you could just use brute-force or HNSW on the CPU and get great performance. If the dimensionality is relatively large (1536, 2048, 4096), you should consider using HNSW. If build time performance is critical, you should consider using CAGRA to build the graph and convert it to an HNSW graph for search (this capability exists today in the standalone cuVS/RAFT libraries and will soon be added to Milvus). An IVF flat index can also be a great candidate here, as it can improve the search performance over brute-force by partitioning the vector space and thus reducing the search space. Small datasets where GPU might not be needed (< 1 million vectors) ------------------------------------------------------------------ @@ -49,7 +49,7 @@ Large datasets (> 1 million vectors), goal is fast index creation at the expense For fast ingest where slightly lower search quality is acceptable (85% recall and above), the IVF (inverted file index) methods can be very useful, as they can be very fast to build and still have acceptable search performance. IVF-flat index will partition the vectors into some number of clusters (specified by the user as n_lists) and at search time, some number of closest clusters (defined by n_probes) will be searched with brute-force for each query vector. -IVF-PQ is similar to IVF-flat with the major difference that the vectors are compressed using a lossy product quantized compression so the index can have a much smaller footprint on the GPU. In general, it’s advised to set n_lists = sqrt(n_vectors) and set n_probes to some percentage of n_lists (e.g. 1%, 2%, 4%, 8%, 16%). Because IVF-PQ is a lossy compression, a refinement step can be performed by initially increasing the number of neighbors (by some multiple factor) and using the raw vectors to compute the exact distances, ultimately reducing the neighborhoods down to size k. Even a refinement of 2x (which would query initially for k*2) can be quite effective in making up for recall lost by the PQ compression, but it does come at the expense of having to keep the raw vectors around (keeping in mind many databases store the raw vectors anyways). +IVF-PQ is similar to IVF-flat with the major difference that the vectors are compressed using a lossy product quantized compression so the index can have a much smaller footprint on the GPU. In general, it's advised to set n_lists = sqrt(n_vectors) and set n_probes to some percentage of n_lists (e.g. 1%, 2%, 4%, 8%, 16%). Because IVF-PQ is a lossy compression, a refinement step can be performed by initially increasing the number of neighbors (by some multiple factor) and using the raw vectors to compute the exact distances, ultimately reducing the neighborhoods down to size k. Even a refinement of 2x (which would query initially for k*2) can be quite effective in making up for recall lost by the PQ compression, but it does come at the expense of having to keep the raw vectors around (keeping in mind many databases store the raw vectors anyways). Large datasets (> 1 million vectors), goal is high quality search at the expense of fast index creation ------------------------------------------------------------------------------------------------------- diff --git a/docs/source/comparing_indexes.rst b/docs/source/comparing_indexes.rst index 167aa2e072..47396614a7 100644 --- a/docs/source/comparing_indexes.rst +++ b/docs/source/comparing_indexes.rst @@ -1,14 +1,14 @@ .. _comparing_indexes: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Comparing performance of vector indexes +Comparing Performance of Vector Indexes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This document provides a brief overview methodology for comparing vector search indexes and models. For guidance on how to choose and configure an index type, please refer to :doc:`this ` guide. Unlike traditional database indexes, which will generally return correct results even without performance tuning, vector search indexes are more closely related to ML models and they can return absolutely garbage results if they have not been tuned. -For this reason, it’s important to consider the parameters that an index is built upon, both for its potential quality and throughput/latency, when comparing two trained indexes. While easier to build an index on its default parameters than having to tune them, a well tuned index can have a significantly better search quality AND perform within search perf constraints like maximal throughput and minimal latency. +For this reason, it's important to consider the parameters that an index is built upon, both for its potential quality and throughput/latency, when comparing two trained indexes. While easier to build an index on its default parameters than having to tune them, a well tuned index can have a significantly better search quality AND perform within search perf constraints like maximal throughput and minimal latency. What is recall? @@ -40,7 +40,7 @@ We suggest averaging performance within a range of recall. For general guidance, .. image:: images/recall_buckets.png -This allows us to make observations such as “at 95% recall level, model A can be built 3x faster than model B, but model B has 2x lower latency than model A” +This allows us to make observations such as "at 95% recall level, model A can be built 3x faster than model B, but model B has 2x lower latency than model A" .. image:: images/build_benchmarks.png @@ -53,7 +53,7 @@ The resulting data points will construct a curve known as a Pareto optimum. Plea How do I do this on large vector databases? =========================================== -It turns out that most vector databases, like Milvus for example, make many smaller vector search indexing models for a single “index”, and the distribution of the vectors across the smaller index models are assumed to be completely uniform. This means we can use subsampling to our benefit, and tune on smaller sub-samples of the overall dataset. +It turns out that most vector databases, like Milvus for example, make many smaller vector search indexing models for a single "index", and the distribution of the vectors across the smaller index models are assumed to be completely uniform. This means we can use subsampling to our benefit, and tune on smaller sub-samples of the overall dataset. Please note, however, that there are often caps on the size of each of these smaller indexes, and that needs to be taken into consideration when choosing the size of the sub sample to tune. diff --git a/docs/source/conf.py b/docs/source/conf.py index ffec63ded9..84d118fe42 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -117,6 +117,7 @@ "twitter_url": "https://twitter.com/rapidsai", "show_toc_level": 1, "navbar_align": "right", + "navigation_depth": 5, } # Add any paths that contain custom static files (such as style sheets) here, diff --git a/docs/source/cpp_api/neighbors_bruteforce.rst b/docs/source/cpp_api/neighbors_bruteforce.rst index 1a3f2f7154..67638671de 100644 --- a/docs/source/cpp_api/neighbors_bruteforce.rst +++ b/docs/source/cpp_api/neighbors_bruteforce.rst @@ -1,5 +1,5 @@ -Bruteforce -========== +Brute Force KNN +=============== The bruteforce method is running the KNN algorithm. It performs an extensive search, and in contrast to ANN methods produces an exact result. diff --git a/docs/source/cpp_api/neighbors_mg.rst b/docs/source/cpp_api/neighbors_mg.rst index a03490a157..8cf255a45b 100644 --- a/docs/source/cpp_api/neighbors_mg.rst +++ b/docs/source/cpp_api/neighbors_mg.rst @@ -7,70 +7,10 @@ The Multi-GPU (SNMG - single-node multi-GPUs) nearest neighbors API provides a s :language: c++ :class: highlight -``#include `` +.. toctree:: + :maxdepth: 2 -namespace *cuvs::neighbors* - -Index build parameters ----------------------- - -.. doxygengroup:: mg_cpp_index_params - :project: cuvs - :members: - :content-only: - -Search parameters ------------------ - -.. doxygengroup:: mg_cpp_search_params - :project: cuvs - :members: - :content-only: - -Index build ------------ - -.. doxygengroup:: mg_cpp_index_build - :project: cuvs - :members: - :content-only: - -Index extend ------------- - -.. doxygengroup:: mg_cpp_index_extend - :project: cuvs - :members: - :content-only: - -Index search ------------- - -.. doxygengroup:: mg_cpp_index_search - :project: cuvs - :members: - :content-only: - -Index serialize ---------------- - -.. doxygengroup:: mg_cpp_serialize - :project: cuvs - :members: - :content-only: - -Index deserialize ------------------ - -.. doxygengroup:: mg_cpp_deserialize - :project: cuvs - :members: - :content-only: - -Distribute pre-built local index --------------------------------- - -.. doxygengroup:: mg_cpp_distribute - :project: cuvs - :members: - :content-only: + neighbors_mg_all_neighbors.rst + neighbors_mg_cagra.rst + neighbors_mg_ivf_flat.rst + neighbors_mg_ivf_pq.rst diff --git a/docs/source/cpp_api/neighbors_mg_all_neighbors.rst b/docs/source/cpp_api/neighbors_mg_all_neighbors.rst new file mode 100644 index 0000000000..c4f066b98d --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg_all_neighbors.rst @@ -0,0 +1,26 @@ +Multi-GPU All-Neighbors +======================= + +Unlike the other multi-GPU nearest neighbors algorithms (CAGRA, IVF-Flat, IVF-PQ), all-neighbors does not require a separate multi-GPU API. Multi-GPU support is built into the unified ``all_neighbors::build`` function. + +To enable multi-GPU execution: + +1. Use ``raft::device_resources_snmg`` as the resources handle (instead of ``raft::resources``). This handle automatically detects all available GPUs. +2. Set ``n_clusters > 1`` in ``all_neighbors_params`` to enable data partitioning across GPUs. +3. Provide the dataset on host memory (``host_matrix_view``). + +The algorithm checks ``raft::resource::is_multi_gpu(handle)`` at runtime and distributes clusters across the available GPUs. When ``n_clusters == 1`` (the default), the build runs on a single GPU. + +.. code-block:: c++ + + #include + #include + + raft::device_resources_snmg handle; + cuvs::neighbors::all_neighbors::all_neighbors_params params; + params.n_clusters = 8; // partition data into 8 clusters + params.overlap_factor = 2; // each point assigned to 2 clusters + + all_neighbors::build(handle, params, dataset, indices, distances); + +For the full API reference (parameters and build function), see :doc:`All-Neighbors `. diff --git a/docs/source/cpp_api/neighbors_mg_cagra.rst b/docs/source/cpp_api/neighbors_mg_cagra.rst new file mode 100644 index 0000000000..306569a355 --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg_cagra.rst @@ -0,0 +1,76 @@ +Multi-GPU CAGRA +=============== + +The Multi-GPU CAGRA method extends the CAGRA graph-based ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::cagra* + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search Parameters +----------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_cpp_cagra_index_build + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_cpp_cagra_index_extend + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_cpp_cagra_index_search + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_cpp_cagra_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_cpp_cagra_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_cpp_cagra_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/neighbors_mg_ivf_flat.rst b/docs/source/cpp_api/neighbors_mg_ivf_flat.rst new file mode 100644 index 0000000000..11d8f1509e --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg_ivf_flat.rst @@ -0,0 +1,76 @@ +Multi-GPU IVF-Flat +================== + +The Multi-GPU IVF-Flat method extends the IVF-Flat ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::ivf_flat* + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search Parameters +----------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_cpp_ivf_flat_index_build + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_cpp_ivf_flat_index_extend + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_cpp_ivf_flat_index_search + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_cpp_ivf_flat_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_cpp_ivf_flat_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_cpp_ivf_flat_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/neighbors_mg_ivf_pq.rst b/docs/source/cpp_api/neighbors_mg_ivf_pq.rst new file mode 100644 index 0000000000..999a4be01e --- /dev/null +++ b/docs/source/cpp_api/neighbors_mg_ivf_pq.rst @@ -0,0 +1,76 @@ +Multi-GPU IVF-PQ +================ + +The Multi-GPU IVF-PQ method extends the IVF-PQ ANN algorithm to work across multiple GPUs. It provides two distribution modes: replicated (for higher throughput) and sharded (for handling larger datasets). + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::neighbors::ivf_pq* + +Index Build Parameters +---------------------- + +.. doxygengroup:: mg_cpp_index_params + :project: cuvs + :members: + :content-only: + +Search Parameters +----------------- + +.. doxygengroup:: mg_cpp_search_params + :project: cuvs + :members: + :content-only: + +Index Build +----------- + +.. doxygengroup:: mg_cpp_ivf_pq_index_build + :project: cuvs + :members: + :content-only: + +Index Extend +------------ + +.. doxygengroup:: mg_cpp_ivf_pq_index_extend + :project: cuvs + :members: + :content-only: + +Index Search +------------ + +.. doxygengroup:: mg_cpp_ivf_pq_index_search + :project: cuvs + :members: + :content-only: + +Index Serialize +--------------- + +.. doxygengroup:: mg_cpp_ivf_pq_serialize + :project: cuvs + :members: + :content-only: + +Index Deserialize +----------------- + +.. doxygengroup:: mg_cpp_ivf_pq_deserialize + :project: cuvs + :members: + :content-only: + +Index Distribute +---------------- + +.. doxygengroup:: mg_cpp_ivf_pq_distribute + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/preprocessing_quantize.rst b/docs/source/cpp_api/preprocessing_quantize.rst index fe8bf1ed8e..c624c4d586 100644 --- a/docs/source/cpp_api/preprocessing_quantize.rst +++ b/docs/source/cpp_api/preprocessing_quantize.rst @@ -1,45 +1,13 @@ -Quantize -======== - -This page provides C++ class references for the publicly-exposed elements of the -`cuvs/preprocessing/quantize` package. +Quantization +============ .. role:: py(code) :language: c++ :class: highlight -Binary Quantizer ----------------- - -``#include `` - -namespace *cuvs::preprocessing::quantize::binary* - -.. doxygengroup:: binary - :project: cuvs - :members: - :content-only: - -Product Quantizer ------------------ - -``#include `` - -namespace *cuvs::preprocessing::quantize::pq* - -.. doxygengroup:: pq - :project: cuvs - :members: - :content-only: - -Scalar Quantizer ----------------- - -``#include `` - -namespace *cuvs::preprocessing::quantize::scalar* +.. toctree:: + :maxdepth: 2 -.. doxygengroup:: scalar - :project: cuvs - :members: - :content-only: + preprocessing_quantize_binary.rst + preprocessing_quantize_pq.rst + preprocessing_quantize_scalar.rst diff --git a/docs/source/cpp_api/preprocessing_quantize_binary.rst b/docs/source/cpp_api/preprocessing_quantize_binary.rst new file mode 100644 index 0000000000..acdd53cc92 --- /dev/null +++ b/docs/source/cpp_api/preprocessing_quantize_binary.rst @@ -0,0 +1,15 @@ +Binary Quantizer +================ + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::preprocessing::quantize::binary* + +.. doxygengroup:: binary + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/preprocessing_quantize_pq.rst b/docs/source/cpp_api/preprocessing_quantize_pq.rst new file mode 100644 index 0000000000..7b72282647 --- /dev/null +++ b/docs/source/cpp_api/preprocessing_quantize_pq.rst @@ -0,0 +1,15 @@ +Product Quantizer +================= + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::preprocessing::quantize::pq* + +.. doxygengroup:: pq + :project: cuvs + :members: + :content-only: diff --git a/docs/source/cpp_api/preprocessing_quantize_scalar.rst b/docs/source/cpp_api/preprocessing_quantize_scalar.rst new file mode 100644 index 0000000000..4e76497b11 --- /dev/null +++ b/docs/source/cpp_api/preprocessing_quantize_scalar.rst @@ -0,0 +1,15 @@ +Scalar Quantizer +================ + +.. role:: py(code) + :language: c++ + :class: highlight + +``#include `` + +namespace *cuvs::preprocessing::quantize::scalar* + +.. doxygengroup:: scalar + :project: cuvs + :members: + :content-only: diff --git a/docs/source/filtering.rst b/docs/source/filtering.rst index cb168f94c8..2f64400274 100644 --- a/docs/source/filtering.rst +++ b/docs/source/filtering.rst @@ -1,7 +1,7 @@ .. _filtering: ~~~~~~~~~~~~~~~~~~~~~~~~ -Filtering vector indexes +Filtering Vector Indexes ~~~~~~~~~~~~~~~~~~~~~~~~ cuVS supports different type of filtering depending on the vector index being used. The main method used in all of the vector indexes diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index 656bdf32e4..a8060ce398 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -4,17 +4,17 @@ Getting Started - `New to vector search?`_ - * :doc:`Primer on vector search ` + * :doc:`Primer on Vector Search ` - * :doc:`Vector search indexes vs vector databases ` + * :doc:`Vector Search Indexes vs Vector Databases ` - * :doc:`Index tuning guide ` + * :doc:`Index Tuning Guide ` - * :doc:`Comparing vector search index performance ` + * :doc:`Comparing Vector Search Index Performance ` - `Supported indexes`_ - * :doc:`Vector search index guide ` + * :doc:`Vector Search Index Guide ` - `Using cuVS APIs`_ @@ -60,7 +60,7 @@ Supported indexes cuVS supports many of the standard index types with the list continuing to grow and stay current with the state-of-the-art. Please refer to our :doc:`vector search index guide ` to learn more about each individual index type, when they can be useful on the GPU, the tuning knobs they offer to trade off performance and quality. -The primary goal of cuVS is to enable speed, scale, and flexibility (in that order)- and one of the important value propositions is to enhance existing software deployments with extensible GPU capabilities to improve pain points while not interrupting parts of the system that work well today with CPU. +The primary goal of cuVS is to enable speed, scale, and flexibility (in that order) -- and one of the important value propositions is to enhance existing software deployments with extensible GPU capabilities to improve pain points while not interrupting parts of the system that work well today with CPU. Using cuVS APIs diff --git a/docs/source/index.rst b/docs/source/index.rst index ecf92ffa8e..e601659b4d 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,8 +11,8 @@ Useful Resources - `Example Notebooks `_: Example notebooks - `Code Examples `_: Self-contained code examples - `RAPIDS Community `_: Get help, contribute, and collaborate. -- `GitHub repository `_: Download the cuVS source code. -- `Issue tracker `_: Report issues or request features. +- `GitHub Repository `_: Download the cuVS source code. +- `Issue Tracker `_: Report issues or request features. diff --git a/docs/source/neighbors/all_neighbors.rst b/docs/source/neighbors/all_neighbors.rst index a70414fe06..ffba60be0f 100644 --- a/docs/source/neighbors/all_neighbors.rst +++ b/docs/source/neighbors/all_neighbors.rst @@ -1,4 +1,4 @@ -All-neighbors +All-Neighbors ============= All-neighbors is a specialized algorithm for building approximate all-neighbors k-NN graphs. Unlike traditional nearest neighbor indexes that are designed for searching, all-neighbors focuses on constructing complete k-NN graphs for entire datasets. diff --git a/docs/source/neighbors/bruteforce.rst b/docs/source/neighbors/bruteforce.rst index 3dc1155073..f3046dd638 100644 --- a/docs/source/neighbors/bruteforce.rst +++ b/docs/source/neighbors/bruteforce.rst @@ -1,5 +1,5 @@ -Brute-force -=========== +Brute Force KNN +=============== Brute-force, or flat index, is the most simple index type, as it ultimately boils down to an exhaustive matrix multiplication. diff --git a/docs/source/neighbors/neighbors.rst b/docs/source/neighbors/neighbors.rst index f66b73f867..f9e2b535cd 100644 --- a/docs/source/neighbors/neighbors.rst +++ b/docs/source/neighbors/neighbors.rst @@ -1,16 +1,16 @@ -Nearest Neighbor -================ +Nearest Neighbors +================= .. toctree:: :maxdepth: 3 :caption: Contents: + all_neighbors.rst bruteforce.rst cagra.rst ivfflat.rst ivfpq.rst vamana.rst - all_neighbors.rst Indices and tables diff --git a/docs/source/neighbors/vamana.rst b/docs/source/neighbors/vamana.rst index 4f5c2eb5f0..ce6fedae23 100644 --- a/docs/source/neighbors/vamana.rst +++ b/docs/source/neighbors/vamana.rst @@ -1,22 +1,22 @@ Vamana ====== -VAMANA is the underlying graph construction algorithm used to construct indexes for the DiskANN vector search solution. DiskANN and the Vamana algorithm are described in detail in the `published paper `, and a highly optimized `open-source repository ` includes many features for index construction and search. In cuVS, we provide a version of the Vamana algorithm optimized for GPU architectures to accelreate graph construction to build DiskANN idnexes. At a high level, the Vamana algorithm operates as follows: +Vamana is the underlying graph construction algorithm used to construct indexes for the DiskANN vector search solution. DiskANN and the Vamana algorithm are described in detail in the `published paper `_, and a highly optimized `open-source repository `_ includes many features for index construction and search. In cuVS, we provide a version of the Vamana algorithm optimized for GPU architectures to accelreate graph construction to build DiskANN idnexes. At a high level, the Vamana algorithm operates as follows: * 1. Starting with an empty graph, select a medoid vector from the D-dimension vector dataset and insert it into the graph. * 2. Iteratively insert batches of dataset vectors into the graph, connecting each inserted vector to neighbors based on a graph traversal. * 3. For each batch, create reverse edges and prune unnecessary edges. -There are many algorithmic details that are outlined in the `paper `, and many GPU-specific optimizations are included in this implementation. +There are many algorithmic details that are outlined in the `paper `_, and many GPU-specific optimizations are included in this implementation. -The current implementation of DiskANN in cuVS only includes the 'in-memory' graph construction and a serialization step that writes the index to a file. This index file can be then used by the `open-source DiskANN ` library to perform efficient search. Additional DiskANN functionality, including GPU-accelerated search and 'ssd' index build are planned for future cuVS releases. +The current implementation of DiskANN in cuVS only includes the 'in-memory' graph construction and a serialization step that writes the index to a file. This index file can be then used by the `open-source DiskANN `_ library to perform efficient search. Additional DiskANN functionality, including GPU-accelerated search and 'ssd' index build are planned for future cuVS releases. [ :doc:`C++ API <../cpp_api/neighbors_vamana>` ] Interoperability with CPU DiskANN --------------------------------- -The 'vamana::serialize' API calls writes the index to a file with a format that is compatible with the `open-source DiskANN repositoriy `. This allows cuVS to be used to accelerate index construction while leveraging the efficient CPU-based search currently available. +The 'vamana::serialize' API calls writes the index to a file with a format that is compatible with the `open-source DiskANN repository `_. This allows cuVS to be used to accelerate index construction while leveraging the efficient CPU-based search currently available. Configuration parameters ------------------------ diff --git a/docs/source/python_api/cluster.rst b/docs/source/python_api/cluster.rst index b5c0ab957c..6330aa6257 100644 --- a/docs/source/python_api/cluster.rst +++ b/docs/source/python_api/cluster.rst @@ -10,3 +10,4 @@ Cluster :caption: Contents: cluster_kmeans.rst + cluster_spectral.rst diff --git a/docs/source/python_api/cluster_spectral.rst b/docs/source/python_api/cluster_spectral.rst new file mode 100644 index 0000000000..b62888003a --- /dev/null +++ b/docs/source/python_api/cluster_spectral.rst @@ -0,0 +1,11 @@ +Spectral Clustering +=================== + +To use spectral clustering in Python, see cuML's `SpectralClustering `_ estimator. + +The `cuML `_ library offers a Python interface powered by cuVS under the hood. + +See Also +-------- + +- :doc:`Spectral Clustering C++ API <../cpp_api/cluster_spectral>` diff --git a/docs/source/python_api/neighbors_mg_all_neighbors.rst b/docs/source/python_api/neighbors_mg_all_neighbors.rst new file mode 100644 index 0000000000..1ecad5f284 --- /dev/null +++ b/docs/source/python_api/neighbors_mg_all_neighbors.rst @@ -0,0 +1,28 @@ +Multi-GPU All-Neighbors +======================= + +Unlike the other multi-GPU nearest neighbors algorithms (CAGRA, IVF-Flat, IVF-PQ), all-neighbors does not require a separate multi-GPU API. Multi-GPU support is built into the unified ``all_neighbors.build`` function. + +To enable multi-GPU execution: + +1. Use ``MultiGpuResources`` instead of ``Resources`` as the resources handle. +2. Set ``n_clusters > 1`` in ``AllNeighborsParams`` to enable data partitioning across GPUs. +3. Provide the dataset on host memory (e.g. a NumPy array). + +The algorithm detects multi-GPU resources at runtime and distributes clusters across the available GPUs. When ``n_clusters == 1`` (the default), the build runs on a single GPU. + +.. code-block:: python + + import numpy as np + from cuvs.common import MultiGpuResources + from cuvs.neighbors.all_neighbors import AllNeighborsParams, build + + handle = MultiGpuResources() + + params = AllNeighborsParams(n_clusters=8, overlap_factor=2) + dataset = np.random.random_sample((100000, 128)).astype(np.float32) + + indices, distances = build(dataset, k=10, params=params, resources=handle) + handle.sync() + +For the full API reference (parameters and build function), see :doc:`All-Neighbors `. diff --git a/docs/source/python_api/neighbors_multi_gpu.rst b/docs/source/python_api/neighbors_multi_gpu.rst index bb3a5a07ed..58ce6ac3b0 100644 --- a/docs/source/python_api/neighbors_multi_gpu.rst +++ b/docs/source/python_api/neighbors_multi_gpu.rst @@ -112,7 +112,7 @@ Algorithm-Specific Documentation :maxdepth: 2 :caption: Multi-GPU Algorithms: - neighbors_all_neighbors.rst + neighbors_mg_all_neighbors.rst neighbors_mg_cagra.rst neighbors_mg_ivf_flat.rst neighbors_mg_ivf_pq.rst diff --git a/docs/source/python_api/preprocessing.rst b/docs/source/python_api/preprocessing.rst index bbf1337710..5c0494b5df 100644 --- a/docs/source/python_api/preprocessing.rst +++ b/docs/source/python_api/preprocessing.rst @@ -5,51 +5,10 @@ Preprocessing :language: python :class: highlight -PCA (Principal Component Analysis) -################################### -.. autoclass:: cuvs.preprocessing.pca.Params - :members: +.. toctree:: + :maxdepth: 2 -.. autofunction:: cuvs.preprocessing.pca.fit - -.. autofunction:: cuvs.preprocessing.pca.fit_transform - -.. autofunction:: cuvs.preprocessing.pca.transform - -.. autofunction:: cuvs.preprocessing.pca.inverse_transform - -Binary Quantizer -################ - -.. autofunction:: cuvs.preprocessing.quantize.binary.transform - -Product Quantizer -################# - -.. autoclass:: cuvs.preprocessing.quantize.pq.Quantizer - :members: - -.. autoclass:: cuvs.preprocessing.quantize.pq.QuantizerParams - :members: - -.. autofunction:: cuvs.preprocessing.quantize.pq.build - -.. autofunction:: cuvs.preprocessing.quantize.pq.transform - -.. autofunction:: cuvs.preprocessing.quantize.pq.inverse_transform - -Scalar Quantizer -################ - -.. autoclass:: cuvs.preprocessing.quantize.scalar.Quantizer - :members: - -.. autoclass:: cuvs.preprocessing.quantize.scalar.QuantizerParams - :members: - -.. autofunction:: cuvs.preprocessing.quantize.scalar.train - -.. autofunction:: cuvs.preprocessing.quantize.scalar.transform - -.. autofunction:: cuvs.preprocessing.quantize.scalar.inverse_transform + preprocessing_pca.rst + preprocessing_quantize.rst + preprocessing_spectral_embedding.rst diff --git a/docs/source/python_api/preprocessing_pca.rst b/docs/source/python_api/preprocessing_pca.rst new file mode 100644 index 0000000000..7ca5fed126 --- /dev/null +++ b/docs/source/python_api/preprocessing_pca.rst @@ -0,0 +1,17 @@ +PCA +=== + +.. role:: py(code) + :language: python + :class: highlight + +.. autoclass:: cuvs.preprocessing.pca.Params + :members: + +.. autofunction:: cuvs.preprocessing.pca.fit + +.. autofunction:: cuvs.preprocessing.pca.fit_transform + +.. autofunction:: cuvs.preprocessing.pca.transform + +.. autofunction:: cuvs.preprocessing.pca.inverse_transform diff --git a/docs/source/python_api/preprocessing_quantize.rst b/docs/source/python_api/preprocessing_quantize.rst new file mode 100644 index 0000000000..ee02b57b41 --- /dev/null +++ b/docs/source/python_api/preprocessing_quantize.rst @@ -0,0 +1,13 @@ +Quantization +============ + +.. role:: py(code) + :language: python + :class: highlight + +.. toctree:: + :maxdepth: 2 + + preprocessing_quantize_binary.rst + preprocessing_quantize_pq.rst + preprocessing_quantize_scalar.rst diff --git a/docs/source/python_api/preprocessing_quantize_binary.rst b/docs/source/python_api/preprocessing_quantize_binary.rst new file mode 100644 index 0000000000..fb63abd5f4 --- /dev/null +++ b/docs/source/python_api/preprocessing_quantize_binary.rst @@ -0,0 +1,8 @@ +Binary Quantizer +================ + +.. role:: py(code) + :language: python + :class: highlight + +.. autofunction:: cuvs.preprocessing.quantize.binary.transform diff --git a/docs/source/python_api/preprocessing_quantize_pq.rst b/docs/source/python_api/preprocessing_quantize_pq.rst new file mode 100644 index 0000000000..d2893e5a00 --- /dev/null +++ b/docs/source/python_api/preprocessing_quantize_pq.rst @@ -0,0 +1,18 @@ +Product Quantizer +================= + +.. role:: py(code) + :language: python + :class: highlight + +.. autoclass:: cuvs.preprocessing.quantize.pq.Quantizer + :members: + +.. autoclass:: cuvs.preprocessing.quantize.pq.QuantizerParams + :members: + +.. autofunction:: cuvs.preprocessing.quantize.pq.build + +.. autofunction:: cuvs.preprocessing.quantize.pq.transform + +.. autofunction:: cuvs.preprocessing.quantize.pq.inverse_transform diff --git a/docs/source/python_api/preprocessing_quantize_scalar.rst b/docs/source/python_api/preprocessing_quantize_scalar.rst new file mode 100644 index 0000000000..ac9543eb72 --- /dev/null +++ b/docs/source/python_api/preprocessing_quantize_scalar.rst @@ -0,0 +1,18 @@ +Scalar Quantizer +================ + +.. role:: py(code) + :language: python + :class: highlight + +.. autoclass:: cuvs.preprocessing.quantize.scalar.Quantizer + :members: + +.. autoclass:: cuvs.preprocessing.quantize.scalar.QuantizerParams + :members: + +.. autofunction:: cuvs.preprocessing.quantize.scalar.train + +.. autofunction:: cuvs.preprocessing.quantize.scalar.transform + +.. autofunction:: cuvs.preprocessing.quantize.scalar.inverse_transform diff --git a/docs/source/python_api/preprocessing_spectral_embedding.rst b/docs/source/python_api/preprocessing_spectral_embedding.rst new file mode 100644 index 0000000000..445292b9e7 --- /dev/null +++ b/docs/source/python_api/preprocessing_spectral_embedding.rst @@ -0,0 +1,11 @@ +Spectral Embedding +================== + +To use spectral embedding in Python, see cuML's `SpectralEmbedding `_ estimator. + +The `cuML `_ library offers a Python interface powered by cuVS under the hood. + +See Also +-------- + +- :doc:`Spectral Embedding C++ API <../cpp_api/preprocessing_spectral_embedding>` diff --git a/docs/source/tuning_guide.rst b/docs/source/tuning_guide.rst index fd54fc42ae..b735047913 100644 --- a/docs/source/tuning_guide.rst +++ b/docs/source/tuning_guide.rst @@ -1,18 +1,18 @@ ~~~~~~~~~~~~~~~~~~~~~~ -Automated tuning Guide +Automated Tuning Guide ~~~~~~~~~~~~~~~~~~~~~~ Introduction ============ -A Method for tuning and evaluating Vector Search Indexes At Scale in Locally Indexed Vector Databases. For more information on the differences between locally and globally indexed vector databases, please see :doc:`this guide `. The goal of this guide is to give users a scalable and effective approach for tuning a vector search index, no matter how large. Evaluation of a vector search index “model” that measures recall in proportion to build time so that it penalizes the recall when the build time is really high (should ultimately optimize for finding a lower build time and higher recall). +A Method for tuning and evaluating Vector Search Indexes At Scale in Locally Indexed Vector Databases. For more information on the differences between locally and globally indexed vector databases, please see :doc:`this guide `. The goal of this guide is to give users a scalable and effective approach for tuning a vector search index, no matter how large. Evaluation of a vector search index "model" that measures recall in proportion to build time so that it penalizes the recall when the build time is really high (should ultimately optimize for finding a lower build time and higher recall). For more information on the various different types of vector search indexes, please see our :doc:`guide to choosing vector search indexes ` Why automated tuning? ===================== -As much as 75% of users have told us they will not be able to tune a vector database beyond one or two simple knobs and we suggest that an ideal “knob” would be to balance training time and search time with search quality. The more time, the higher the quality, and the more needed to find an acceptable search performance. Even the 25% of users that want to tune are still asking for simple tools for doing so. These users also ask for some simple guidelines for setting tuning parameters, like :doc:`this guide `. +As much as 75% of users have told us they will not be able to tune a vector database beyond one or two simple knobs and we suggest that an ideal "knob" would be to balance training time and search time with search quality. The more time, the higher the quality, and the more needed to find an acceptable search performance. Even the 25% of users that want to tune are still asking for simple tools for doing so. These users also ask for some simple guidelines for setting tuning parameters, like :doc:`this guide `. Since vector search indexes are more closely related to machine learning models than traditional databases indexes, one option for easing the parameter tuning burden is to use hyper-parameter optimization tools like `Ray Tune `_ and `Optuna `_. to verify this. diff --git a/docs/source/vector_databases_vs_vector_search.rst b/docs/source/vector_databases_vs_vector_search.rst index 5c43ee5508..4eed0c30ce 100644 --- a/docs/source/vector_databases_vs_vector_search.rst +++ b/docs/source/vector_databases_vs_vector_search.rst @@ -1,10 +1,10 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Vector search indexes vs vector databases +Vector Search Indexes vs Vector Databases ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ This guide provides information on the differences between vector search indexes and fully-fledged vector databases. For more information on selecting and configuring vector search indexes, please refer to our :doc:`guide on choosing and configuring indexes ` -One of the primary differences between vector database indexes and traditional database indexes is that vector search often uses approximations to trade-off accuracy of the results for speed. Because of this, while many mature databases offer mechanisms to tune their indexes and achieve better performance, vector database indexes can return completely garbage results if they aren’t tuned for a reasonable level of search quality in addition to performance tuning. This is because vector database indexes are more closely related to machine learning models than they are to traditional database indexes. +One of the primary differences between vector database indexes and traditional database indexes is that vector search often uses approximations to trade-off accuracy of the results for speed. Because of this, while many mature databases offer mechanisms to tune their indexes and achieve better performance, vector database indexes can return completely garbage results if they aren't tuned for a reasonable level of search quality in addition to performance tuning. This is because vector database indexes are more closely related to machine learning models than they are to traditional database indexes. What are the differences between vector databases and vector search indexes? ============================================================================ @@ -22,7 +22,7 @@ FAISS and cuVS are examples of standalone vector search libraries, which again a How is vector search used by vector databases? ============================================== -Within the context of vector databases, there are two primary ways in which vector search indexes are used and it’s important to understand which you are working with because it can have an effect on the behavior of the parameters with respect to the data. +Within the context of vector databases, there are two primary ways in which vector search indexes are used and it's important to understand which you are working with because it can have an effect on the behavior of the parameters with respect to the data. Many vector search algorithms improve scalability while reducing the number of distances by partitioning the vector space into smaller pieces, often through the use of clustering, hashing, trees, and other techniques. Another popular technique is to reduce the width or dimensionality of the space in order to decrease the cost of computing each distance. In contrast, databases often partition the data, but may only do so to improve things like io performance, partition tolerance, or scale, without regards to the underlying data distributions which are ultimately going to be used for vector search. @@ -33,19 +33,19 @@ Locally partitioned vector search indexes Most databases follow this design, and vectors are often first written to a write-ahead log for durability. After some number of vectors are written, the write-ahead logs become immutable and may be merged with other write-ahead logs before eventually being converted to a new vector search index. -The search is generally done over each locally partitioned index and the results combined. When setting hyperparameters, only the local vector search indexes need to be considered, though the same hyperparameters are going to be used across all of the local partitions. So, for example, if you’ve ingested 100M vectors but each partition only contains about 10M vectors, the size of the index only needs to consider its local 10M vectors. Details like number of vectors in the index are important, for example, when setting the number of clusters in an IVF-based (inverted file index) method, as I’ll cover below. +The search is generally done over each locally partitioned index and the results combined. When setting hyperparameters, only the local vector search indexes need to be considered, though the same hyperparameters are going to be used across all of the local partitions. So, for example, if you've ingested 100M vectors but each partition only contains about 10M vectors, the size of the index only needs to consider its local 10M vectors. Details like number of vectors in the index are important, for example, when setting the number of clusters in an IVF-based (inverted file index) method, as I'll cover below. Globally partitioned vector search indexes ------------------------------------------ -Some special-purpose vector databases follow this design, such as Yahoo’s Vespa and Google’s Spanner. A global index is trained to partition the entire database’s vectors up front as soon as there are enough vectors to do so (usually these databases are at a large enough scale that a significant number of vectors are bootstrapped initially and so it avoids the cold start problem). Ingested vectors are first run through the global index (clustering, for example, but tree- and graph-based methods have also been used) to determine which partition they belong to and the vectors are then (sent to, and) written directly to that partition. The individual partitions can contain a graph, tree, or a simple IVF list. These types of indexes have been able to scale to hundreds of billions to trillions of vectors, and since the partitions are themselves often implicitly based on neighborhoods, rather than being based on uniformly random distributed vectors like the locally partitioned architectures, the partitions can be grouped together or intentionally separated to support localized searches or load balancing, depending upon the needs of the system. +Some special-purpose vector databases follow this design, such as Yahoo's Vespa and Google's Spanner. A global index is trained to partition the entire database's vectors up front as soon as there are enough vectors to do so (usually these databases are at a large enough scale that a significant number of vectors are bootstrapped initially and so it avoids the cold start problem). Ingested vectors are first run through the global index (clustering, for example, but tree- and graph-based methods have also been used) to determine which partition they belong to and the vectors are then (sent to, and) written directly to that partition. The individual partitions can contain a graph, tree, or a simple IVF list. These types of indexes have been able to scale to hundreds of billions to trillions of vectors, and since the partitions are themselves often implicitly based on neighborhoods, rather than being based on uniformly random distributed vectors like the locally partitioned architectures, the partitions can be grouped together or intentionally separated to support localized searches or load balancing, depending upon the needs of the system. The challenge when setting hyper-parameters for globally partitioned indexes is that they need to account for the entire set of vectors, and thus the hyperparameters of the global index generally account for all of the vectors in the database, rather than any local partition. -Of course, the two approaches outlined above can also be used together (e.g. training a global “coarse” index and then creating localized vector search indexes within each of the global indexes) but to my knowledge, no such architecture has implemented this pattern. +Of course, the two approaches outlined above can also be used together (e.g. training a global "coarse" index and then creating localized vector search indexes within each of the global indexes) but to my knowledge, no such architecture has implemented this pattern. -A challenge with GPUs in vector databases today is that the resulting vector indexes are expected to fit into the memory of available GPUs for fast search. That is to say, there doesn’t exist today an efficient mechanism for offloading or swapping GPU indexes so they can be cached from disk or host memory, for example. We are working on mechanisms to do this, and to also utilize technologies like GPUDirect Storage and GPUDirect RDMA to improve the IO performance further. +A challenge with GPUs in vector databases today is that the resulting vector indexes are expected to fit into the memory of available GPUs for fast search. That is to say, there doesn't exist today an efficient mechanism for offloading or swapping GPU indexes so they can be cached from disk or host memory, for example. We are working on mechanisms to do this, and to also utilize technologies like GPUDirect Storage and GPUDirect RDMA to improve the IO performance further. Tuning and hyperparameter optimization ======================================