-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathParMatrixMul.cpp
More file actions
95 lines (78 loc) · 2.42 KB
/
ParMatrixMul.cpp
File metadata and controls
95 lines (78 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <iostream>
#include "LSH.h"
#include "SignedRandomProjection.h"
#include <string>
#include <ctime>
int Bucket::_size = 64; //use more if the buckets are heavy
using namespace std;
__global__
void divide_row(int Mdim, int Ndim, int Pdim, double* A, double* B, double* C, LSH* _Algo, SignedRandomProjection* proj)
{
//Run vector multiplication for each columns from the retrieved LSH bucket.
int i = threadIdx.x; // Index of a row of A
int *queryhashes = proj->getHash(&A[i * Pdim], Pdim);
int *retrieved = _Algo->retrieve(queryhashes);//Indices of bucket that collided.
for (int l = 2; l < retrieved[0] + 2; l++)
{
for (int k = 0; k < Pdim; k++)
{
C[i * Mdim + retrieved[l] - 1] += A[i * Ndim + k] * B[k * Pdim + retrieved[l] - 1];
}
}
}
void par_mat_mul(int K, int L, double* A, double* B, double* C, int N, int M, int P)
{
//Create LSH (Assuming that this runs in parallel) =============================
LSH *_Algo = new LSH(K, L);
SignedRandomProjection *proj = new SignedRandomProjection(Mdim, K * L);
cout<<"Making Hash Table from B."<<endl;
for (size_t c = 0; c < Mdim; c++)
{
double arr[Pdim];
for (size_t r = 0; r < Pdim; r++)
{
arr[r] = B[r * Mdim + c];
cout<<arr[r]<<" ";
}
cout<<endl;
int * hashes = proj->getHash(arr, Pdim);
_Algo->add(hashes, c + 1);
}
//===============================================================================
const dim3 blockSize(N, 1, 1);
const dim3 gridSize(1, 1, 1);
mat_mul<<<gridSize, blockSize>>>(M, N, P, (double *) A, (double *) B, (double *) C, (LSH *) _Algo, (SignedRandomProjection *) proj);
cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
}
int main() {
//parameters (very critical to set properly)
int K = 25;
int L = 10;
//Data
double B[5][5] =
{
1,5,5,19,4,
-8,9,9,3,8,
12,13,13,4,12,
2,24,24,-13,26,
-25,26,26,2,26
};
int N = 3, M = 5, P = 5;
double A[3][5] =
{
5,9,13,24,26,
18.3,3.3,4,-12,2,
1,-8,12,2,-25
};
double C[3][5] = {0};
par_mat_mul(K, L, (double *) A, (double *) B, (double *) C, N, M, P);
for (int i = 0; i < N; i++)
{
for (int j = 0; j < M; j++)
{
cout<<C[i][j]<<" ";
}
cout<<endl;
}
return 0;
}