Skip to content

Commit f17fe1d

Browse files
authored
Merge pull request #242 from cppalliance/drbg_nvcc
Add NVCC HASH and HMAC DRBG Testing
2 parents c3e2562 + b47793d commit f17fe1d

File tree

6 files changed

+246
-4
lines changed

6 files changed

+246
-4
lines changed

include/boost/crypt/utility/config.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@
127127
// ----- Has something -----
128128

129129
// ----- Unreachable -----
130-
#if defined(__GNUC__) || defined(__clang__)
130+
#if defined(__GNUC__) || defined(__clang__) || defined(BOOST_CRYPT_HAS_CUDA)
131131
# define BOOST_CRYPT_UNREACHABLE __builtin_unreachable()
132132
#elif defined(_MSC_VER)
133133
# define BOOST_CRYPT_UNREACHABLE __assume(0)

include/boost/crypt2/drbg/detail/hash_drbg.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define BOOST_CRYPT2_DRBG_HASH_DRBG_HPP
77

88
#include <boost/crypt2/detail/config.hpp>
9+
#include <boost/crypt2/detail/assert.hpp>
910
#include <boost/crypt2/detail/compat.hpp>
1011
#include <boost/crypt2/detail/concepts.hpp>
1112
#include <boost/crypt2/detail/clear_mem.hpp>
@@ -61,7 +62,7 @@ class hash_drbg
6162
static constexpr compat::uint64_t reseed_interval {281474976710656ULL}; // 2^48
6263

6364
compat::array<compat::byte, seedlen_bytes> constant_ {};
64-
compat::span<const std::byte, seedlen_bytes> constant_span_ {constant_};
65+
compat::span<const compat::byte, seedlen_bytes> constant_span_ {constant_};
6566
compat::array<compat::byte, seedlen_bytes> value_ {};
6667
compat::span<const compat::byte, seedlen_bytes> value_span_ {value_};
6768

@@ -108,7 +109,7 @@ class hash_drbg
108109
compat::span<const compat::byte, Extent3> personalization = compat::span<const compat::byte, 0>{}) noexcept -> state;
109110

110111
template <concepts::sized_range SizedRange1,
111-
concepts::sized_range SizedRange2,
112+
concepts::sized_range SizedRange2 = compat::span<const compat::byte, 0U>,
112113
concepts::sized_range SizedRange3 = compat::span<const compat::byte, 0U>>
113114
BOOST_CRYPT_GPU_ENABLED auto init(SizedRange1&& entropy,
114115
SizedRange2&& nonce = compat::span<const compat::byte, 0U> {},

include/boost/crypt2/drbg/detail/hmac_drbg.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ BOOST_CRYPT_GPU_ENABLED_CONSTEXPR auto hmac_drbg<HMACType, max_hasher_security,
135135
const auto provided_data_size {provided_data_1.size() + provided_data_2.size() + provided_data_3.size()};
136136

137137
// Step 1: V || 0x00 || provided data
138-
compat::array<compat::byte, 1U> storage_gap {std::byte{0x00}};
138+
compat::array<compat::byte, 1U> storage_gap {compat::byte{0x00}};
139139
compat::span<const compat::byte, 1U> storage_gap_span {storage_gap};
140140

141141
HMACType hmac;

test/nvcc_jamfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ run test_shake128_nvcc.cu ;
2424
run test_shake256_nvcc.cu ;
2525

2626
run test_hmac.cu ;
27+
28+
run test_hmac_drbg.cu ;
29+
run test_hash_drbg.cu ;

test/test_hash_drbg.cu

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright Matt Borland 2024
2+
// Use, modification and distribution are subject to the
3+
// Boost Software License, Version 1.0. (See accompanying file
4+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5+
6+
#include <cuda_runtime.h>
7+
#include <boost/crypt2/drbg/sha1_drbg.hpp>
8+
#include "cuda_managed_ptr.hpp"
9+
#include "stopwatch.hpp"
10+
#include "generate_random_strings.hpp"
11+
#include <iostream>
12+
#include <iomanip>
13+
#include <exception>
14+
#include <memory>
15+
#include <span>
16+
17+
using digest_type = typename cuda::std::array<cuda::std::byte, 80>;
18+
19+
// The kernel function
20+
__global__ void cuda_test(char** in, digest_type* out, int numElements)
21+
{
22+
int i = blockIdx.x * blockDim.x + threadIdx.x;
23+
24+
if (i < numElements)
25+
{
26+
boost::crypt::sha1_hash_drbg drbg;
27+
cuda::std::span<char> in_span {in[i], static_cast<cuda::std::size_t>(64)};
28+
drbg.init(in_span);
29+
drbg.generate(out[i], 640U);
30+
}
31+
}
32+
33+
int main()
34+
{
35+
try
36+
{
37+
// Error code to check return values for CUDA calls
38+
cudaError_t err = cudaSuccess;
39+
40+
// Print the vector length to be used, and compute its size
41+
constexpr int numElements = 50000;
42+
constexpr std::size_t elementSize = 64;
43+
44+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
45+
46+
// Allocate the managed input vector A
47+
char** input_vector1;
48+
cudaMallocManaged(&input_vector1, numElements * sizeof(char*));
49+
50+
// Allocate the managed output vector C
51+
cuda_managed_ptr<digest_type> output_vector(numElements);
52+
53+
for (int i = 0; i < numElements; ++i)
54+
{
55+
cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char));
56+
if (input_vector1[i] == nullptr)
57+
{
58+
throw std::runtime_error("Failed to allocate memory for input_vector1");
59+
}
60+
boost::crypt::generate_random_string(input_vector1[i], elementSize);
61+
}
62+
63+
// Launch the Vector Add CUDA Kernel
64+
int threadsPerBlock = 256;
65+
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
66+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
67+
68+
watch w;
69+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1, output_vector.get(), numElements);
70+
cudaDeviceSynchronize();
71+
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
72+
73+
err = cudaGetLastError();
74+
if (err != cudaSuccess)
75+
{
76+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
77+
return EXIT_FAILURE;
78+
}
79+
80+
// Verify that the result vector is correct
81+
std::vector<digest_type> results;
82+
results.reserve(numElements);
83+
w.reset();
84+
for(int i = 0; i < numElements; ++i)
85+
{
86+
digest_type out;
87+
boost::crypt::sha1_hash_drbg drbg;
88+
std::span<char> in_span(input_vector1[i], static_cast<std::size_t>(64));
89+
drbg.init(in_span);
90+
drbg.generate(out, 640U);
91+
results.emplace_back(out);
92+
}
93+
double t = w.elapsed();
94+
95+
// check the results
96+
for(int i = 0; i < numElements; ++i)
97+
{
98+
if (output_vector[i][0] != results[i][0])
99+
{
100+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
101+
return EXIT_FAILURE;
102+
}
103+
}
104+
105+
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
106+
std::cout << "Done\n";
107+
108+
// Cleanup all the memory we allocated
109+
for (int i = 0; i < numElements; ++i)
110+
{
111+
cudaFree(input_vector1[i]);
112+
}
113+
cudaFree(input_vector1);
114+
}
115+
catch (const std::exception& e)
116+
{
117+
std::cerr << "Terminated with exception: " << e.what() << std::endl;
118+
}
119+
}

test/test_hmac_drbg.cu

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright Matt Borland 2024
2+
// Use, modification and distribution are subject to the
3+
// Boost Software License, Version 1.0. (See accompanying file
4+
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
5+
6+
#include <cuda_runtime.h>
7+
#include <boost/crypt2/drbg/sha1_drbg.hpp>
8+
#include "cuda_managed_ptr.hpp"
9+
#include "stopwatch.hpp"
10+
#include "generate_random_strings.hpp"
11+
#include <iostream>
12+
#include <iomanip>
13+
#include <exception>
14+
#include <memory>
15+
#include <span>
16+
17+
using digest_type = typename cuda::std::array<cuda::std::byte, 80>;
18+
19+
// The kernel function
20+
__global__ void cuda_test(char** in, digest_type* out, int numElements)
21+
{
22+
int i = blockIdx.x * blockDim.x + threadIdx.x;
23+
24+
if (i < numElements)
25+
{
26+
boost::crypt::sha1_hmac_drbg drbg;
27+
cuda::std::span<char> in_span {in[i], static_cast<cuda::std::size_t>(64)};
28+
drbg.init(in_span);
29+
drbg.generate(out[i], 640U);
30+
}
31+
}
32+
33+
int main()
34+
{
35+
try
36+
{
37+
// Error code to check return values for CUDA calls
38+
cudaError_t err = cudaSuccess;
39+
40+
// Print the vector length to be used, and compute its size
41+
constexpr int numElements = 50000;
42+
constexpr std::size_t elementSize = 64;
43+
44+
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
45+
46+
// Allocate the managed input vector A
47+
char** input_vector1;
48+
cudaMallocManaged(&input_vector1, numElements * sizeof(char*));
49+
50+
// Allocate the managed output vector C
51+
cuda_managed_ptr<digest_type> output_vector(numElements);
52+
53+
for (int i = 0; i < numElements; ++i)
54+
{
55+
cudaMallocManaged(&input_vector1[i], elementSize * sizeof(char));
56+
if (input_vector1[i] == nullptr)
57+
{
58+
throw std::runtime_error("Failed to allocate memory for input_vector1");
59+
}
60+
boost::crypt::generate_random_string(input_vector1[i], elementSize);
61+
}
62+
63+
// Launch the Vector Add CUDA Kernel
64+
int threadsPerBlock = 256;
65+
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
66+
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
67+
68+
watch w;
69+
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector1, output_vector.get(), numElements);
70+
cudaDeviceSynchronize();
71+
std::cout << "CUDA kernal done in " << w.elapsed() << "s" << std::endl;
72+
73+
err = cudaGetLastError();
74+
if (err != cudaSuccess)
75+
{
76+
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
77+
return EXIT_FAILURE;
78+
}
79+
80+
// Verify that the result vector is correct
81+
std::vector<digest_type> results;
82+
results.reserve(numElements);
83+
w.reset();
84+
for(int i = 0; i < numElements; ++i)
85+
{
86+
digest_type out;
87+
boost::crypt::sha1_hmac_drbg drbg;
88+
std::span<char> in_span(input_vector1[i], static_cast<std::size_t>(64));
89+
drbg.init(in_span);
90+
drbg.generate(out, 640U);
91+
results.emplace_back(out);
92+
}
93+
double t = w.elapsed();
94+
95+
// check the results
96+
for(int i = 0; i < numElements; ++i)
97+
{
98+
if (output_vector[i][0] != results[i][0])
99+
{
100+
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
101+
return EXIT_FAILURE;
102+
}
103+
}
104+
105+
std::cout << "Test PASSED with calculation time: " << t << "s" << std::endl;
106+
std::cout << "Done\n";
107+
108+
// Cleanup all the memory we allocated
109+
for (int i = 0; i < numElements; ++i)
110+
{
111+
cudaFree(input_vector1[i]);
112+
}
113+
cudaFree(input_vector1);
114+
}
115+
catch (const std::exception& e)
116+
{
117+
std::cerr << "Terminated with exception: " << e.what() << std::endl;
118+
}
119+
}

0 commit comments

Comments
 (0)