Skip to content

Commit 12a35bc

Browse files
committed
Fix clear before finalize
1 parent 5f5d692 commit 12a35bc

File tree

4 files changed

+43
-29
lines changed

4 files changed

+43
-29
lines changed

libtiledbvcf/src/stats/allele_count.cc

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -336,19 +336,20 @@ void AlleleCount::flush(bool finalize) {
336336
fragment_sample_names_.insert(sample_names_.begin(), sample_names_.end());
337337
sample_names_.clear();
338338

339-
// Clear buffers
340-
contig_buffer_.clear();
341-
contig_offsets_.clear();
342-
pos_buffer_.clear();
343-
ref_buffer_.clear();
344-
ref_offsets_.clear();
345-
alt_buffer_.clear();
346-
alt_offsets_.clear();
347-
filter_buffer_.clear();
348-
filter_offsets_.clear();
349-
gt_buffer_.clear();
350-
gt_offsets_.clear();
351-
count_buffer_.clear();
339+
// Shrink buffers to zero size but keep storage (resize(0)) so REST/client
340+
// can still read from the same memory if submit response or finalize uses it.
341+
contig_buffer_.resize(0);
342+
contig_offsets_.resize(0);
343+
pos_buffer_.resize(0);
344+
ref_buffer_.resize(0);
345+
ref_offsets_.resize(0);
346+
alt_buffer_.resize(0);
347+
alt_offsets_.resize(0);
348+
filter_buffer_.resize(0);
349+
filter_offsets_.resize(0);
350+
gt_buffer_.resize(0);
351+
gt_offsets_.resize(0);
352+
count_buffer_.resize(0);
352353
}
353354

354355
if (finalize) {

libtiledbvcf/src/stats/variant_stats.cc

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -441,20 +441,22 @@ void VariantStats::flush(bool finalize) {
441441
fragment_sample_names_.insert(sample_names_.begin(), sample_names_.end());
442442
sample_names_.clear();
443443

444-
// Clear buffers
445-
contig_buffer_.clear();
446-
contig_offsets_.clear();
447-
pos_buffer_.clear();
448-
sample_buffer_.clear();
449-
sample_offsets_.clear();
450-
allele_buffer_.clear();
451-
allele_offsets_.clear();
452-
ac_buffer_.clear();
453-
an_buffer_.clear();
454-
n_hom_buffer_.clear();
444+
// Shrink buffers to zero size but keep storage (resize(0)) so REST async
445+
// upload can still read from the same memory if submit() returned before
446+
// the server finished copying; avoids data corruption from buffer reuse.
447+
contig_buffer_.resize(0);
448+
contig_offsets_.resize(0);
449+
pos_buffer_.resize(0);
450+
sample_buffer_.resize(0);
451+
sample_offsets_.resize(0);
452+
allele_buffer_.resize(0);
453+
allele_offsets_.resize(0);
454+
ac_buffer_.resize(0);
455+
an_buffer_.resize(0);
456+
n_hom_buffer_.resize(0);
455457
if (array_version_ >= 3) {
456-
max_length_buffer_.clear();
457-
end_buffer_.clear();
458+
max_length_buffer_.resize(0);
459+
end_buffer_.resize(0);
458460
}
459461
}
460462

libtiledbvcf/src/utils/buffer.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,9 @@ void Buffer::append(const void* data, size_t bytes) {
9696
}
9797

9898
void Buffer::clear() {
99-
offsets_.clear();
99+
// Use resize(0) so storage is preserved; REST async upload may still read
100+
// from these buffers after submit() returns, avoiding S3 BadDigest.
101+
offsets_.resize(0);
100102
data_size_ = 0;
101103
offset_nelts_ = 0;
102104
data_effective_size_ = 0;

libtiledbvcf/src/write/writer.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,9 @@ std::pair<uint64_t, uint64_t> Writer::ingest_samples_v4(
993993
// Write and finalize anchors stored in the anchor worker.
994994
anchors_ingested += write_anchors(anchor_worker);
995995

996+
// Unset buffer sizes on the query (same ptrs, size 0). The Query
997+
// must retain the submit response for finalize; do not discard
998+
// the query or its state until submit_and_finalize() completes.
996999
worker->buffers().clear_query_buffers(
9971000
query_.get(), dataset_->metadata().version);
9981001
// Finalize fragment for this contig async
@@ -1104,8 +1107,8 @@ std::pair<uint64_t, uint64_t> Writer::ingest_samples_v4(
11041107
}
11051108
}
11061109

1107-
// When an ingestion worker is finished, clear its query buffers
1108-
// only if the query buffers are not empty.
1110+
// When an ingestion worker is finished, unset buffer sizes on the query
1111+
// (Query retains submit response for finalize). Only do so if set.
11091112
if (finished && utils::query_buffers_set(query_.get())) {
11101113
worker->buffers().clear_query_buffers(
11111114
query_.get(), dataset_->metadata().version);
@@ -1389,6 +1392,12 @@ void Writer::dataset_version(int32_t* version) const {
13891392
*version = dataset_->metadata().version;
13901393
}
13911394

1395+
// REST write protocol: Submit (WRITE) returns serialized query state in the
1396+
// response. The client must retain that exact state and send it as the
1397+
// finalize request body. If the client discards the submit response before
1398+
// calling finalize, the server cannot deserialize the query correctly. The
1399+
// TileDB Query object must hold the submit response until submit_and_finalize()
1400+
// completes, which sends the submit response.
13921401
void Writer::finalize_query(std::unique_ptr<tiledb::Query> query) {
13931402
if (utils::query_buffers_set(query.get())) {
13941403
LOG_FATAL("Cannot submit_and_finalize query with buffers set.");

0 commit comments

Comments
 (0)