Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions c/src/cluster/kmeans.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void _fit(cuvsResources_t res,
}

T inertia_temp;
IdxT n_iter_temp;
int n_iter_temp;

auto kmeans_params = convert_params(params);
cuvs::cluster::kmeans::fit(*res_ptr,
Expand All @@ -92,7 +92,7 @@ void _fit(cuvsResources_t res,
sample_weight,
centroids_view,
raft::make_host_scalar_view<T>(&inertia_temp),
raft::make_host_scalar_view<IdxT>(&n_iter_temp));
raft::make_host_scalar_view<int>(&n_iter_temp));

*inertia = inertia_temp;
*n_iter = n_iter_temp;
Expand All @@ -118,7 +118,7 @@ void _fit(cuvsResources_t res,
}
} else {
T inertia_temp;
IdxT n_iter_temp;
int n_iter_temp;

std::optional<raft::device_vector_view<T const, IdxT>> sample_weight;
if (sample_weight_tensor != NULL) {
Expand All @@ -133,14 +133,14 @@ void _fit(cuvsResources_t res,
sample_weight,
cuvs::core::from_dlpack<mdspan_type>(centroids_tensor),
raft::make_host_scalar_view<T>(&inertia_temp),
raft::make_host_scalar_view<IdxT>(&n_iter_temp));
raft::make_host_scalar_view<int>(&n_iter_temp));
*inertia = inertia_temp;
*n_iter = n_iter_temp;
}
}
}

template <typename T, typename IdxT = int32_t, typename LabelsT = int32_t>
template <typename T, typename IdxT = int64_t, typename LabelsT = uint32_t>
void _predict(cuvsResources_t res,
const cuvsKMeansParams& params,
DLManagedTensor* X_tensor,
Expand Down
525 changes: 15 additions & 510 deletions cpp/include/cuvs/cluster/kmeans.hpp

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions cpp/include/cuvs/cluster/spectral.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ struct params {
void fit_predict(raft::resources const& handle,
params config,
raft::device_coo_matrix_view<float, int, int, int> connectivity_graph,
raft::device_vector_view<int, int> labels);
raft::device_vector_view<uint32_t, int> labels);

/**
* @brief Perform spectral clustering on a connectivity graph
Expand Down Expand Up @@ -122,7 +122,7 @@ void fit_predict(raft::resources const& handle,
void fit_predict(raft::resources const& handle,
params config,
raft::device_coo_matrix_view<double, int, int, int> connectivity_graph,
raft::device_vector_view<int, int> labels);
raft::device_vector_view<uint32_t, int> labels);

/**
* @brief Perform spectral clustering on a dense dataset
Expand Down Expand Up @@ -155,7 +155,7 @@ void fit_predict(raft::resources const& handle,
void fit_predict(raft::resources const& handle,
params config,
raft::device_matrix_view<float, int, raft::row_major> dataset,
raft::device_vector_view<int, int> labels);
raft::device_vector_view<uint32_t, int> labels);
/**
* @}
*/
Expand Down
26 changes: 14 additions & 12 deletions cpp/src/cluster/detail/kmeans.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ void kmeans_fit_main(raft::resources const& handle,
raft::device_vector_view<const DataT, IndexT> weight,
raft::device_matrix_view<DataT, IndexT> centroidsRawData,
raft::host_scalar_view<DataT> inertia,
raft::host_scalar_view<IndexT> n_iter,
raft::host_scalar_view<int> n_iter,
rmm::device_uvector<char>& workspace)
{
raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("kmeans_fit_main");
Expand Down Expand Up @@ -650,7 +650,7 @@ void initScalableKMeansPlusPlus(raft::resources const& handle,
handle, params, potentialCentroids, centroidsRawData, workspace);

auto inertia = raft::make_host_scalar<DataT>(0);
auto n_iter = raft::make_host_scalar<IndexT>(0);
auto n_iter = raft::make_host_scalar<int>(0);
cuvs::cluster::kmeans::params default_params;
default_params.n_clusters = params.n_clusters;

Expand Down Expand Up @@ -727,7 +727,7 @@ void kmeans_fit(raft::resources const& handle,
std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
raft::device_matrix_view<DataT, IndexT> centroids,
raft::host_scalar_view<DataT> inertia,
raft::host_scalar_view<IndexT> n_iter)
raft::host_scalar_view<int> n_iter)
{
raft::common::nvtx::range<cuvs::common::nvtx::domain::cuvs> fun_scope("kmeans_fit");
auto n_samples = X.extent(0);
Expand Down Expand Up @@ -798,8 +798,8 @@ void kmeans_fit(raft::resources const& handle,
cuvs::cluster::kmeans::params iter_params = pams;
iter_params.rng_state.seed = gen();

DataT iter_inertia = std::numeric_limits<DataT>::max();
IndexT n_current_iter = 0;
DataT iter_inertia = std::numeric_limits<DataT>::max();
int n_current_iter = 0;
if (iter_params.init == cuvs::cluster::kmeans::params::InitMethod::Random) {
// initializing with random samples from input dataset
RAFT_LOG_DEBUG(
Expand Down Expand Up @@ -844,7 +844,7 @@ void kmeans_fit(raft::resources const& handle,
weight.view(),
centroidsRawData.view(),
raft::make_host_scalar_view<DataT>(&iter_inertia),
raft::make_host_scalar_view<IndexT>(&n_current_iter),
raft::make_host_scalar_view<int>(&n_current_iter),
workspace);
if (iter_inertia < inertia[0]) {
inertia[0] = iter_inertia;
Expand Down Expand Up @@ -872,7 +872,7 @@ void kmeans_fit(raft::resources const& handle,
IndexT n_samples,
IndexT n_features,
DataT& inertia,
IndexT& n_iter)
int& n_iter)
{
auto XView = raft::make_device_matrix_view<const DataT, IndexT>(X, n_samples, n_features);
auto centroidsView =
Expand All @@ -894,7 +894,7 @@ void kmeans_predict(raft::resources const& handle,
raft::device_matrix_view<const DataT, IndexT> X,
std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
raft::device_matrix_view<const DataT, IndexT> centroids,
raft::device_vector_view<IndexT, IndexT> labels,
raft::device_vector_view<uint32_t, IndexT> labels,
bool normalize_weight,
raft::host_scalar_view<DataT> inertia)
{
Expand Down Expand Up @@ -981,8 +981,10 @@ void kmeans_predict(raft::resources const& handle,
raft::value_op{},
raft::add_op{});

raft::linalg::map(
handle, labels, raft::key_op{}, raft::make_const_mdspan(minClusterAndDistance.view()));
raft::linalg::map(handle,
labels,
raft::compose_op(raft::cast_op<uint32_t>{}, raft::key_op{}),
raft::make_const_mdspan(minClusterAndDistance.view()));

inertia[0] = clusterCostD.value(stream);
}
Expand All @@ -995,7 +997,7 @@ void kmeans_predict(raft::resources const& handle,
const DataT* centroids,
IndexT n_samples,
IndexT n_features,
IndexT* labels,
uint32_t* labels,
bool normalize_weight,
DataT& inertia)
{
Expand All @@ -1006,7 +1008,7 @@ void kmeans_predict(raft::resources const& handle,
if (sample_weight)
sample_weightView.emplace(
raft::make_device_vector_view<const DataT, IndexT>(sample_weight, n_samples));
auto labelsView = raft::make_device_vector_view<IndexT, IndexT>(labels, n_samples);
auto labelsView = raft::make_device_vector_view<uint32_t, IndexT>(labels, n_samples);
auto inertiaView = raft::make_host_scalar_view(&inertia);

cuvs::cluster::kmeans::detail::kmeans_predict<DataT, IndexT>(handle,
Expand Down
46 changes: 24 additions & 22 deletions cpp/src/cluster/detail/kmeans_auto_find_k.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -25,46 +25,50 @@ void compute_dispersion(raft::resources const& handle,
raft::device_matrix_view<const value_t, idx_t> X,
cuvs::cluster::kmeans::params& params,
raft::device_matrix_view<value_t, idx_t> centroids_view,
raft::device_vector_view<idx_t, idx_t> labels,
raft::device_vector_view<idx_t, idx_t> clusterSizes,
raft::device_vector_view<uint32_t, idx_t> labels,
raft::device_vector_view<int64_t, idx_t> clusterSizes,
rmm::device_uvector<char>& workspace,
raft::host_vector_view<value_t> clusterDispertionView,
raft::host_vector_view<value_t> resultsView,
raft::host_scalar_view<value_t> residual,
raft::host_scalar_view<idx_t> n_iter,
raft::host_scalar_view<int> n_iter,
int val,
idx_t n,
idx_t d)
{
auto centroids_const_view =
raft::make_device_matrix_view<const value_t, idx_t>(centroids_view.data_handle(), val, d);

idx_t* clusterSizes_ptr = clusterSizes.data_handle();
auto cluster_sizes_view =
raft::make_device_vector_view<const idx_t, idx_t>(clusterSizes_ptr, val);
raft::make_device_vector_view<const int64_t, idx_t>(clusterSizes.data_handle(), val);

params.n_clusters = val;

cuvs::cluster::kmeans::fit_predict(
handle, params, X, std::nullopt, std::make_optional(centroids_view), labels, residual, n_iter);

detail::countLabels(handle, labels.data_handle(), clusterSizes.data_handle(), n, val, workspace);
detail::countLabels(
handle, labels.data_handle(), clusterSizes.data_handle(), n, idx_t(val), workspace);

resultsView[val] = residual[0];
clusterDispertionView[val] = raft::stats::cluster_dispersion(
handle, centroids_const_view, cluster_sizes_view, std::nullopt, n);
resultsView[val] = residual[0];
clusterDispertionView[val] =
raft::stats::cluster_dispersion(handle,
centroids_const_view,
raft::make_const_mdspan(cluster_sizes_view),
std::nullopt,
n);
}

template <typename idx_t, typename value_t>
void find_k(raft::resources const& handle,
raft::device_matrix_view<const value_t, idx_t> X,
raft::host_scalar_view<idx_t> best_k,
raft::host_scalar_view<int> best_k,
raft::host_scalar_view<value_t> residual,
raft::host_scalar_view<idx_t> n_iter,
idx_t kmax,
idx_t kmin = 1,
idx_t maxiter = 100,
value_t tol = 1e-2)
raft::host_scalar_view<int> n_iter,
int kmax,
int kmin = 1,
int maxiter = 100,
value_t tol = 1e-2)
{
idx_t n = X.extent(0);
idx_t d = X.extent(1);
Expand All @@ -79,13 +83,11 @@ void find_k(raft::resources const& handle,
// Device memory

auto centroids = raft::make_device_matrix<value_t, idx_t>(handle, kmax, X.extent(1));
auto clusterSizes = raft::make_device_vector<idx_t>(handle, kmax);
auto labels = raft::make_device_vector<idx_t>(handle, n);
auto clusterSizes = raft::make_device_vector<int64_t, idx_t>(handle, kmax);
auto labels = raft::make_device_vector<uint32_t, idx_t>(handle, n);

rmm::device_uvector<char> workspace(0, raft::resource::get_cuda_stream(handle));

idx_t* clusterSizes_ptr = clusterSizes.data_handle();

// Host memory
auto results = raft::make_host_vector<value_t>(kmax + 1);
auto clusterDispersion = raft::make_host_vector<value_t>(kmax + 1);
Expand All @@ -109,9 +111,9 @@ void find_k(raft::resources const& handle,

auto centroids_view =
raft::make_device_matrix_view<value_t, idx_t>(centroids.data_handle(), left, d);
auto labels_view = raft::make_device_vector_view<idx_t, idx_t>(labels.data_handle(), n);
auto labels_view = raft::make_device_vector_view<uint32_t, idx_t>(labels.data_handle(), n);
auto clusterSizes_view =
raft::make_device_vector_view<idx_t, idx_t>(clusterSizes.data_handle(), kmax);
raft::make_device_vector_view<int64_t, idx_t>(clusterSizes.data_handle(), kmax);
compute_dispersion<value_t, idx_t>(handle,
X,
params,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/cluster/detail/kmeans_batched.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void fit(raft::resources const& handle,
std::optional<raft::host_vector_view<const T, IdxT>> sample_weight,
raft::device_matrix_view<T, IdxT> centroids,
raft::host_scalar_view<T> inertia,
raft::host_scalar_view<IdxT> n_iter)
raft::host_scalar_view<int> n_iter)
{
cudaStream_t stream = raft::resource::get_cuda_stream(handle);
auto n_samples = X.extent(0);
Expand Down Expand Up @@ -272,7 +272,7 @@ void fit(raft::resources const& handle,
? raft::make_device_matrix<T, IdxT>(handle, n_clusters, n_features)
: raft::make_device_matrix<T, IdxT>(handle, 0, 0);
T best_inertia = std::numeric_limits<T>::max();
IdxT best_n_iter = 0;
int best_n_iter = 0;

std::mt19937 gen(params.rng_state.seed);

Expand Down
4 changes: 0 additions & 4 deletions cpp/src/cluster/detail/kmeans_common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,7 @@ void minClusterAndDistanceCompute(
rmm::device_uvector<char>& workspace);

EXTERN_TEMPLATE_MIN_CLUSTER_AND_DISTANCE(float, int64_t)
EXTERN_TEMPLATE_MIN_CLUSTER_AND_DISTANCE(float, int)
EXTERN_TEMPLATE_MIN_CLUSTER_AND_DISTANCE(double, int64_t)
EXTERN_TEMPLATE_MIN_CLUSTER_AND_DISTANCE(double, int)

#undef EXTERN_TEMPLATE_MIN_CLUSTER_AND_DISTANCE

Expand Down Expand Up @@ -416,8 +414,6 @@ void minClusterDistanceCompute(raft::resources const& handle,

EXTERN_TEMPLATE_MIN_CLUSTER_DISTANCE(float, int64_t)
EXTERN_TEMPLATE_MIN_CLUSTER_DISTANCE(double, int64_t)
EXTERN_TEMPLATE_MIN_CLUSTER_DISTANCE(float, int)
EXTERN_TEMPLATE_MIN_CLUSTER_DISTANCE(double, int)

#undef EXTERN_TEMPLATE_MIN_CLUSTER_DISTANCE

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/cluster/detail/kmeans_mg.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ void initKMeansPlusPlus(const raft::resources& handle,
handle, params, const_centroids, centroidsRawData, workspace);

auto inertia = raft::make_host_scalar<DataT>(0);
auto n_iter = raft::make_host_scalar<IndexT>(0);
auto n_iter = raft::make_host_scalar<int>(0);
auto weight_view =
raft::make_device_vector_view<const DataT, IndexT>(weight.data_handle(), weight.extent(0));
cuvs::cluster::kmeans::params params_copy = params;
Expand Down Expand Up @@ -504,7 +504,7 @@ void fit(const raft::resources& handle,
std::optional<raft::device_vector_view<const DataT, IndexT>> sample_weight,
raft::device_matrix_view<DataT, IndexT> centroids,
raft::host_scalar_view<DataT> inertia,
raft::host_scalar_view<IndexT> n_iter,
raft::host_scalar_view<int> n_iter,
rmm::device_uvector<char>& workspace)
{
const auto& comm = raft::resource::get_comms(handle);
Expand Down
4 changes: 0 additions & 4 deletions cpp/src/cluster/detail/minClusterDistanceCompute.cu
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,6 @@ void minClusterAndDistanceCompute(

INSTANTIATE_MIN_CLUSTER_AND_DISTANCE(float, int64_t)
INSTANTIATE_MIN_CLUSTER_AND_DISTANCE(double, int64_t)
INSTANTIATE_MIN_CLUSTER_AND_DISTANCE(float, int)
INSTANTIATE_MIN_CLUSTER_AND_DISTANCE(double, int)

#undef INSTANTIATE_MIN_CLUSTER_AND_DISTANCE

Expand Down Expand Up @@ -294,8 +292,6 @@ void minClusterDistanceCompute(raft::resources const& handle,

INSTANTIATE_MIN_CLUSTER_DISTANCE(float, int64_t)
INSTANTIATE_MIN_CLUSTER_DISTANCE(double, int64_t)
INSTANTIATE_MIN_CLUSTER_DISTANCE(float, int)
INSTANTIATE_MIN_CLUSTER_DISTANCE(double, int)

#undef INSTANTIATE_MIN_CLUSTER_DISTANCE

Expand Down
14 changes: 9 additions & 5 deletions cpp/src/cluster/detail/spectral.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -18,7 +18,7 @@ template <typename DataT>
void fit_predict(raft::resources const& handle,
params config,
raft::device_coo_matrix_view<DataT, int, int, int> connectivity_graph,
raft::device_vector_view<int, int> labels)
raft::device_vector_view<uint32_t, int> labels)
{
int n_samples = connectivity_graph.structure_view().get_n_rows();
DataT inertia;
Expand Down Expand Up @@ -51,20 +51,24 @@ void fit_predict(raft::resources const& handle,
config.n_components,
raft::resource::get_cuda_stream(handle));

auto X_i64 = raft::make_device_matrix_view<DataT, int64_t>(
embedding_row_major.data_handle(), int64_t(n_samples), int64_t(config.n_components));
auto labels_i64 = raft::make_device_vector_view<uint32_t, int64_t>(
labels.data_handle(), int64_t(n_samples));
cuvs::cluster::kmeans::fit_predict(handle,
kmeans_config,
embedding_row_major.view(),
raft::make_const_mdspan(X_i64),
std::nullopt,
std::nullopt,
labels,
labels_i64,
raft::make_host_scalar_view(&inertia),
raft::make_host_scalar_view(&n_iter));
}

void fit_predict(raft::resources const& handle,
params config,
raft::device_matrix_view<float, int, raft::row_major> dataset,
raft::device_vector_view<int, int> labels)
raft::device_vector_view<uint32_t, int> labels)
{
int n_samples = dataset.extent(0);

Expand Down
Loading
Loading