Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/knn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ void partial_sort(TmpDistances distances, SimplexLUT out, int n_lib, int n_pred,
const int team_size = std::min(32, std::max(n_lib / top_k, 1));
#else
const int team_size = 1;
using std::min;
#endif

const size_t scratch_size =
Expand Down Expand Up @@ -234,7 +233,8 @@ void partial_sort(TmpDistances distances, SimplexLUT out, int n_lib, int n_pred,

int k = 0;
// Shift elements until the insertion point is found
for (k = min(static_cast<int>(j) / team_size, top_k - 1);
for (k = Kokkos::min(static_cast<int>(j) / team_size,
top_k - 1);
k > 0; k--) {
if (scratch_dist(r, k - 1) <= cur_dist) {
break;
Expand Down Expand Up @@ -274,7 +274,7 @@ void partial_sort(TmpDistances distances, SimplexLUT out, int n_lib, int n_pred,
scratch_idx(min_rank, scratch_head(min_rank)) + shift;

scratch_head(min_rank) =
min(scratch_head(min_rank) + 1, top_k);
Kokkos::min(scratch_head(min_rank) + 1, top_k);
}
});
});
Expand Down
47 changes: 15 additions & 32 deletions src/stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,72 +5,55 @@ namespace edm

float corrcoef(TimeSeries x, TimeSeries y)
{
#ifndef KOKKOS_ENABLE_CUDA
using std::min;
using std::sqrt;
#endif

CorrcoefState state;
CorrcoefSimpleState state;

Kokkos::parallel_reduce(
"EDM::stats::corrcoef", min(x.size(), y.size()),
KOKKOS_LAMBDA(int i, CorrcoefState &upd) {
upd += CorrcoefState(x(i), y(i));
"EDM::stats::corrcoef", Kokkos::min(x.size(), y.size()),
KOKKOS_LAMBDA(int i, CorrcoefSimpleState &upd) {
upd += CorrcoefSimpleState(x(i), y(i));
},
Kokkos::Sum<CorrcoefState>(state));
Kokkos::Sum<CorrcoefSimpleState>(state));

return state.rho();
}

void corrcoef(CrossMap rho, Dataset ds, TimeSeries x)
{
#ifndef KOKKOS_ENABLE_CUDA
using std::min;
using std::sqrt;
#endif

Kokkos::parallel_for(
"EDM::stats::corrcoef",
Kokkos::TeamPolicy<>(ds.extent(1), Kokkos::AUTO),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type &member) {
const int j = member.league_rank();
CorrcoefState state;
CorrcoefSimpleState state;

Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(member, min(x.extent(0), ds.extent(0))),
[=](int i, CorrcoefState &upd) {
upd += CorrcoefState(x(i), ds(i, j));
Kokkos::TeamThreadRange(member,
Kokkos::min(x.extent(0), ds.extent(0))),
[=](int i, CorrcoefSimpleState &upd) {
upd += CorrcoefSimpleState(x(i), ds(i, j));
},
Kokkos::Sum<CorrcoefState>(state));
Kokkos::Sum<CorrcoefSimpleState>(state));

rho(j) = state.rho();
});
}

float mae(TimeSeries x, TimeSeries y)
{
#ifndef KOKKOS_ENABLE_CUDA
using std::abs;
using std::min;
#endif

int n = min(x.size(), y.size());
int n = Kokkos::min(x.size(), y.size());
float sum;

Kokkos::parallel_reduce(
"EDM::stats::mae", n,
KOKKOS_LAMBDA(int i, float &upd) { upd += abs(x(i) - y(i)); }, sum);
KOKKOS_LAMBDA(int i, float &upd) { upd += Kokkos::abs(x(i) - y(i)); },
sum);

return sum / n;
}

float mse(TimeSeries x, TimeSeries y)
{
#ifndef KOKKOS_ENABLE_CUDA
using std::min;
#endif

int n = min(x.size(), y.size());
int n = Kokkos::min(x.size(), y.size());
float sum;

Kokkos::parallel_reduce(
Expand Down
71 changes: 50 additions & 21 deletions src/stats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,49 @@

namespace edm
{

// Simple sum-of-products state for corrcoef (faster parallel reduction)
typedef struct corrcoef_simple_state {
float sum_x, sum_y, sum_x2, sum_y2, sum_xy;
int n;

KOKKOS_INLINE_FUNCTION corrcoef_simple_state()
: sum_x(0.0f), sum_y(0.0f), sum_x2(0.0f), sum_y2(0.0f), sum_xy(0.0f),
n(0)
{
}

KOKKOS_INLINE_FUNCTION corrcoef_simple_state(float x, float y)
: sum_x(x), sum_y(y), sum_x2(x * x), sum_y2(y * y), sum_xy(x * y), n(1)
{
}

KOKKOS_INLINE_FUNCTION
float rho() const
{
const float mean_x = sum_x / n;
const float mean_y = sum_y / n;
const float var_x = sum_x2 / n - mean_x * mean_x;
const float var_y = sum_y2 / n - mean_y * mean_y;
const float cov_xy = sum_xy / n - mean_x * mean_y;
return cov_xy / sqrt(var_x * var_y);
}

KOKKOS_INLINE_FUNCTION
corrcoef_simple_state &operator+=(const corrcoef_simple_state &src)
{
sum_x += src.sum_x;
sum_y += src.sum_y;
sum_x2 += src.sum_x2;
sum_y2 += src.sum_y2;
sum_xy += src.sum_xy;
n += src.n;
return *this;
}

} CorrcoefSimpleState;

// Welford-based state for corrcoef (better numerical stability)
typedef struct corrcoef_state {
int n;
float x_mean, y_mean, x_m2, y_m2, xy_m2;
Expand Down Expand Up @@ -44,27 +87,6 @@ typedef struct corrcoef_state {

return *this;
}

KOKKOS_INLINE_FUNCTION void
operator+=(const volatile corrcoef_state &src) volatile
{
const int n_a = n, n_b = src.n;
const int n_ab = n_a + n_b;

const float x_delta = src.x_mean - x_mean;
const float y_delta = src.y_mean - y_mean;

n = n_ab;

if (n == 0) return;

x_mean += x_delta * n_b / n_ab;
y_mean += y_delta * n_b / n_ab;

x_m2 += src.x_m2 + x_delta * x_delta * n_a * n_b / n_ab;
y_m2 += src.y_m2 + y_delta * y_delta * n_a * n_b / n_ab;
xy_m2 += src.xy_m2 + x_delta * y_delta * n_a * n_b / n_ab;
}
} CorrcoefState;

float corrcoef(TimeSeries x, TimeSeries y);
Expand All @@ -77,6 +99,13 @@ float mse(TimeSeries x, TimeSeries y);
namespace Kokkos
{

template <> struct reduction_identity<edm::CorrcoefSimpleState> {
KOKKOS_FORCEINLINE_FUNCTION static edm::CorrcoefSimpleState sum()
{
return edm::CorrcoefSimpleState();
}
};

template <> struct reduction_identity<edm::CorrcoefState> {
KOKKOS_FORCEINLINE_FUNCTION static edm::CorrcoefState sum()
{
Expand Down