2626#define MSWEEP_LIKELIHOOD_HPP
2727
2828#include " Matrix.hpp"
29- #include " telescope.hpp"
3029
3130#include " mSWEEP_openmp_config.hpp"
3231
4140#include < algorithm>
4241#include < memory>
4342
43+ #include " mSWEEP_alignment.hpp"
4444#include " Grouping.hpp"
4545
4646namespace mSWEEP {
@@ -106,52 +106,91 @@ class LL_WOR21 : public Likelihood<T> {
106106 return ll_mat;
107107 }
108108
109- void fill_ll_mat (const telescope ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
109+ void fill_ll_mat (const mSWEEP ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
110110 size_t num_ecs = alignment.n_ecs ();
111+ size_t n_targets = alignment.get_n_targets ();
112+
113+ size_t n_threads = 1 ;
114+ #if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
115+ #pragma omp parallel
116+ {
117+ n_threads = omp_get_num_threads ();
118+ }
119+ #endif
120+
121+ // This double loop is currently the slowest part in the input reading
122+ std::vector<bm::sparse_vector<V, bm::bvector<>>> local_counts (n_threads);
123+ #pragma omp parallel for schedule(static)
124+ for (size_t i = 0 ; i < num_ecs; ++i) {
125+ for (size_t j = 0 ; j < n_targets; ++j) {
126+ if (alignment (i, j)) {
127+ #if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1
128+ local_counts[omp_get_thread_num ()].inc ((size_t )((size_t )alignment.get_groups ()[j]*num_ecs) + i);
129+ #else
130+ local_counts[0 ].inc ((size_t )((size_t )alignment.get_groups ()[j]*num_ecs) + i);
131+ #endif
132+ }
133+ }
134+ }
135+
136+ bm::sparse_vector<V, bm::bvector<>> group_counts = std::move (local_counts[0 ]);
137+ for (size_t i = 1 ; i < n_threads; ++i) {
138+ group_counts.merge (local_counts[i]);
139+ }
111140
112141 bool mask_groups = min_hits > 0 ;
113142 this ->groups_mask = std::vector<bool >(n_groups, !mask_groups);
114143 std::vector<V> masked_group_sizes;
144+ std::vector<size_t > groups_pos (n_groups, 0 );
145+ size_t n_masked_groups = 0 ;
115146 if (mask_groups) {
116147 std::vector<size_t > group_hit_counts (n_groups, (size_t )0 );
117148 // Create mask identifying groups that have at least 1 alignment
118- for (size_t i = 0 ; i < num_ecs; ++i) {
119- for (size_t j = 0 ; j < n_groups; ++j) {
120- group_hit_counts[j] += (alignment (j, i) > 0 ) * alignment.reads_in_ec (i);
149+ #pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts)
150+ for (size_t j = 0 ; j < n_groups; ++j) {
151+ for (size_t i = 0 ; i < num_ecs; ++i) {
152+ group_hit_counts[j] += (group_counts[j*num_ecs + i] > 0 ) * alignment.reads_in_ec (i);
121153 }
122154 }
155+
123156 for (size_t i = 0 ; i < n_groups; ++i) {
124157 this ->groups_mask [i] = groups_mask[i] || (group_hit_counts[i] >= min_hits);
125158 if (this ->groups_mask [i]) {
159+ groups_pos[i] = n_masked_groups;
126160 masked_group_sizes.push_back (group_sizes[i]);
161+ ++n_masked_groups;
127162 }
128163 }
129164 } else {
130165 masked_group_sizes = group_sizes;
166+ #pragma omp parallel for schedule(static)
167+ for (size_t i = 0 ; i < n_groups; ++i) {
168+ groups_pos[i] = i;
169+ }
131170 }
132- size_t n_masked_groups = masked_group_sizes.size ();
171+ n_masked_groups = masked_group_sizes.size ();
133172
134173 this ->update_bb_parameters (masked_group_sizes, n_masked_groups, this ->bb_constants );
135174 const seamat::DenseMatrix<T> &precalc_lls_mat = this ->precalc_lls (masked_group_sizes, n_masked_groups);
136175
137176 this ->log_likelihoods .resize (n_masked_groups, num_ecs, std::log (this ->zero_inflation ));
138- for ( size_t j = 0 ; j < num_ecs; ++j) {
139- size_t groups_pos = 0 ;
140- for (size_t i = 0 ; i < n_groups; ++i) {
177+
178+ # pragma omp parallel for schedule(static)
179+ for (size_t i = 0 ; i < n_groups; ++i) {
141180 if (this ->groups_mask [i]) {
142- this ->log_likelihoods (groups_pos, j) = precalc_lls_mat (groups_pos, alignment (i, j));
143- ++groups_pos;
181+ for (size_t j = 0 ; j < num_ecs; ++j) {
182+ this ->log_likelihoods (groups_pos[i], j) = precalc_lls_mat (groups_pos[i], group_counts[i*num_ecs + j]);
183+ }
144184 }
145- }
146185 }
147186 }
148187
149- void fill_ec_counts (const telescope ::Alignment &alignment) {
188+ void fill_ec_counts (const mSWEEP ::Alignment &alignment) {
150189 // Fill log ec counts.
151190 this ->log_ec_counts .resize (alignment.n_ecs (), 0 );
152191#pragma omp parallel for schedule(static)
153192 for (size_t i = 0 ; i < alignment.n_ecs (); ++i) {
154- this ->log_ec_counts [i] = std::log (alignment.reads_in_ec (i));
193+ this ->log_ec_counts [i] = std::log (alignment.reads_in_ec (i));
155194 }
156195 }
157196
@@ -170,14 +209,14 @@ class LL_WOR21 : public Likelihood<T> {
170209public:
171210 LL_WOR21 () = default ;
172211
173- LL_WOR21 (const std::vector<V> &group_sizes, const telescope ::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
212+ LL_WOR21 (const std::vector<V> &group_sizes, const mSWEEP ::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) {
174213 this ->bb_constants [0 ] = tol;
175214 this ->bb_constants [1 ] = frac_mu;
176215 this ->zero_inflation = _zero_inflation;
177216 this ->from_grouped_alignment (alignment, group_sizes, n_groups, min_hits);
178217 }
179218
180- void from_grouped_alignment (const telescope ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
219+ void from_grouped_alignment (const mSWEEP ::Alignment &alignment, const std::vector<V> &group_sizes, const size_t n_groups, const size_t min_hits) {
181220 this ->fill_ll_mat (alignment, group_sizes, n_groups, min_hits);
182221 this ->fill_ec_counts (alignment);
183222 }
@@ -292,7 +331,7 @@ class LL_WOR21 : public Likelihood<T> {
292331 const std::vector<bool >& groups_considered () const override { return this ->groups_mask ; };
293332};
294333template <typename T>
295- std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood (const telescope ::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
334+ std::unique_ptr<Likelihood<T>> ConstructAdaptiveLikelihood (const mSWEEP ::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) {
296335 size_t max_group_size = grouping.max_group_size ();
297336 size_t n_groups = grouping.get_n_groups ();
298337 std::unique_ptr<Likelihood<T>> log_likelihoods;
0 commit comments