Skip to content

Commit 3b3976e

Browse files
authored
Merge pull request #29196 from dotnwat/ct-l0-reader
ct: a few simplifications in level zero reader
2 parents 30b3d25 + 308f5fc commit 3b3976e

File tree

7 files changed

+56
-52
lines changed

7 files changed

+56
-52
lines changed

src/v/cloud_topics/batch_cache/batch_cache.cc

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ ss::future<> batch_cache::stop() {
4747
}
4848

4949
void batch_cache::put(const model::ntp& ntp, const model::record_batch& b) {
50+
vassert(
51+
b.term() > model::term_id{-1},
52+
"Batch without term in the cache: {}",
53+
b.header());
5054
if (_lm == nullptr) {
5155
return;
5256
}
@@ -81,6 +85,17 @@ batch_cache::get(const model::ntp& ntp, model::offset o) {
8185
if (auto it = _index.find(ntp); it != _index.end()) {
8286
auto rb = it->second->get(o);
8387
if (rb.has_value()) {
88+
vassert(
89+
rb->term() > model::term_id{-1},
90+
"Batch without term in the cache: {}",
91+
rb->header());
92+
vassert(
93+
rb->base_offset() <= o && o <= rb->last_offset(),
94+
"Unexpected batch for {}, got range: [{},{}] for offset {}",
95+
ntp,
96+
rb->base_offset(),
97+
rb->last_offset(),
98+
o);
8499
_probe.register_get(rb->size_bytes());
85100
} else {
86101
_probe.register_miss();

src/v/cloud_topics/level_one/frontend_reader/level_one_reader.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ level_one_log_reader_impl::read_batches(l1::object_reader& reader) {
257257
set_end_of_stream();
258258
break;
259259
}
260-
_config.bytes_consumed += batch_size;
260+
_bytes_consumed += batch_size;
261261

262262
batches.push_back(std::move(batch));
263263

@@ -355,8 +355,8 @@ bool level_one_log_reader_impl::is_end_of_stream() const {
355355
}
356356

357357
bool level_one_log_reader_impl::is_over_limit_with_bytes(size_t size) const {
358-
return (_config.strict_max_bytes || _config.bytes_consumed > 0)
359-
&& (_config.bytes_consumed + size) > _config.max_bytes;
358+
return (_config.strict_max_bytes || _bytes_consumed > 0)
359+
&& (_bytes_consumed + size) > _config.max_bytes;
360360
}
361361

362362
} // namespace cloud_topics

src/v/cloud_topics/level_one/frontend_reader/level_one_reader.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ class level_one_log_reader_impl : public model::record_batch_reader::impl {
135135
l1::metastore* _metastore;
136136
l1::io* _io;
137137
prefix_logger _log;
138+
size_t _bytes_consumed{0};
138139
};
139140

140141
} // namespace cloud_topics

src/v/cloud_topics/level_zero/frontend_reader/level_zero_reader.cc

Lines changed: 32 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ level_zero_log_reader_impl::level_zero_log_reader_impl(
3232
ss::lw_shared_ptr<cluster::partition> ctp,
3333
data_plane_api* ct_api)
3434
: _config(cfg)
35+
, _next_offset(_config.start_offset)
3536
, _ctp(std::move(ctp))
3637
, _ct_api(ct_api)
3738
, _log(cd_log, fmt::format("[{}/{}]", fmt::ptr(this), _ctp->ntp())) {
@@ -52,10 +53,8 @@ level_zero_log_reader_impl::do_load_slice(
5253
// the 'empty' state. It doesn't make any difference if the reader is in
5354
// the 'materialized' state. If we're in 'ready' state we risk to go out
5455
// of sync with cached metadata so it's safer to hydrate.
55-
if (cache_enabled()) {
56-
if (auto cached = maybe_load_slices_from_cache()) {
57-
co_return std::move(cached.value());
58-
}
56+
if (auto cached = maybe_read_batches_from_cache(); !cached.empty()) {
57+
co_return cached;
5958
}
6059

6160
chunked_circular_buffer<model::record_batch> res;
@@ -88,63 +87,54 @@ level_zero_log_reader_impl::do_load_slice(
8887
co_return res;
8988
}
9089

91-
std::optional<chunked_circular_buffer<model::record_batch>>
92-
level_zero_log_reader_impl::maybe_load_slices_from_cache() {
90+
chunked_circular_buffer<model::record_batch>
91+
level_zero_log_reader_impl::maybe_read_batches_from_cache() {
9392
chunked_circular_buffer<model::record_batch> ret;
94-
auto current = _config.start_offset;
95-
while (current <= _config.max_offset) {
93+
if (!cache_enabled()) {
94+
return ret;
95+
}
96+
97+
/*
98+
* Fetch batches from the cache starting at `_next_offset` until we hit a
99+
* gap or a control batch and must then fetch the data from object storage.
100+
*/
101+
while (_next_offset <= _config.max_offset) {
96102
auto batch = _ct_api->cache_get(
97-
_ctp->ntp(), kafka::offset_cast(current));
103+
_ctp->ntp(), kafka::offset_cast(_next_offset));
98104
if (!batch.has_value()) {
99-
// We hit a gap in the cache and have to download objects
100-
// from S3.
101-
//
102-
// NOTE: this can also happen when transactions are used and
103-
// we would encounter reading a control batch from the local log.
104105
break;
105106
}
107+
106108
vlog(
107109
_log.trace,
108110
"Loaded batch from cache for {}: {} @ term {}",
109-
current,
111+
_next_offset,
110112
batch.value().base_offset(),
111113
batch.value().term());
112-
vassert(
113-
batch.value().term() > model::term_id{-1},
114-
"Batch without term in the cache: {}",
115-
batch.value().header());
114+
116115
auto batch_size = batch.value().size_bytes();
117116
if (is_over_limit(batch_size)) {
118117
break;
119118
}
120-
vassert(
121-
batch->base_offset() <= kafka::offset_cast(current)
122-
&& kafka::offset_cast(current) <= batch->last_offset(),
123-
"Unexpected batch for {}, got range: [{},{}] for offset {}",
124-
_ctp->ntp(),
125-
batch->base_offset(),
126-
batch->last_offset(),
127-
current);
119+
128120
ret.push_back(std::move(batch.value()));
129-
_config.bytes_consumed += batch_size;
130-
current = model::offset_cast(
121+
_bytes_consumed += batch_size;
122+
_next_offset = model::offset_cast(
131123
model::next_offset(ret.back().last_offset()));
132124
}
133-
_config.start_offset = current;
134-
if (_config.start_offset > _config.max_offset) {
125+
126+
if (_next_offset > _config.max_offset) {
135127
vlog(
136128
_log.debug,
137129
"reached end of stream, start offset: {}, max offset: {}, "
138-
"current: {}",
130+
"next offset: {}",
139131
_config.start_offset,
140132
_config.max_offset,
141-
current);
133+
_next_offset);
142134
_current = state::end_of_stream_state;
143135
}
144-
if (!ret.empty()) {
145-
return ret;
146-
}
147-
return std::nullopt;
136+
137+
return ret;
148138
}
149139

150140
storage::local_log_reader_config level_zero_log_reader_impl::ctp_read_config() {
@@ -155,7 +145,7 @@ storage::local_log_reader_config level_zero_log_reader_impl::ctp_read_config() {
155145
*/
156146
auto ot_state = _ctp->get_offset_translator_state();
157147
auto start_offset = ot_state->to_log_offset(
158-
kafka::offset_cast(_config.start_offset));
148+
kafka::offset_cast(_next_offset));
159149
auto max_offset = ot_state->to_log_offset(
160150
kafka::offset_cast(_config.max_offset));
161151

@@ -324,7 +314,7 @@ ss::future<> level_zero_log_reader_impl::materialize_batches(
324314
hydrated_batch_size);
325315
break;
326316
}
327-
_config.bytes_consumed += hydrated_batch_size;
317+
_bytes_consumed += hydrated_batch_size;
328318
if (
329319
auto* meta = std::get_if<cloud_topics::extent_meta>(
330320
&unhydrated_it->data)) {
@@ -453,7 +443,7 @@ void level_zero_log_reader_impl::consume_materialized_batches(
453443
_hydrated.size(),
454444
_unhydrated.size());
455445
*dest = std::exchange(_hydrated, {});
456-
_config.start_offset = model::offset_cast(
446+
_next_offset = model::offset_cast(
457447
model::next_offset(dest->back().last_offset()));
458448
_current = _unhydrated.empty() ? state::empty_state : state::ready_state;
459449
}
@@ -467,7 +457,7 @@ bool level_zero_log_reader_impl::is_end_of_stream() const {
467457
}
468458

469459
bool level_zero_log_reader_impl::is_over_limit(size_t size) const {
470-
return (_config.strict_max_bytes || _config.bytes_consumed > 0)
471-
&& (_config.bytes_consumed + size) > _config.max_bytes;
460+
return (_config.strict_max_bytes || _bytes_consumed > 0)
461+
&& (_bytes_consumed + size) > _config.max_bytes;
472462
}
473463
} // namespace cloud_topics

src/v/cloud_topics/level_zero/frontend_reader/level_zero_reader.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ class level_zero_log_reader_impl : public model::record_batch_reader::impl {
101101
// Return data from the record batch cache.
102102
// This method could change state of the reader to end_of_stream_state
103103
// when it reaches committed offset.
104-
std::optional<chunked_circular_buffer<model::record_batch>>
105-
maybe_load_slices_from_cache();
104+
chunked_circular_buffer<model::record_batch>
105+
maybe_read_batches_from_cache();
106106

107107
// If adding a batch of `size` would cause this to go over the bytes limit.
108108
bool is_over_limit(size_t size) const;
@@ -136,9 +136,11 @@ class level_zero_log_reader_impl : public model::record_batch_reader::impl {
136136
chunked_circular_buffer<model::record_batch> _hydrated;
137137

138138
cloud_topic_log_reader_config _config;
139+
kafka::offset _next_offset;
139140
ss::lw_shared_ptr<cluster::partition> _ctp;
140141
data_plane_api* _ct_api;
141142
prefix_logger _log;
143+
size_t _bytes_consumed{0};
142144
};
143145

144146
} // namespace cloud_topics

src/v/cloud_topics/log_reader_config.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fmt::iterator cloud_topic_log_reader_config::format_to(fmt::iterator it) const {
2020
it,
2121
"start_offset:{}, max_offset:{}, min_bytes:{}, max_bytes:{}, "
2222
"strict_max_bytes:{}, type_filter: {}, first_timestamp:{}, "
23-
"bytes_consumed:{}, skip_cache:{}, abortable:{}, "
23+
"skip_cache:{}, abortable:{}, "
2424
"client_address:{}",
2525
start_offset,
2626
max_offset,
@@ -29,7 +29,6 @@ fmt::iterator cloud_topic_log_reader_config::format_to(fmt::iterator it) const {
2929
strict_max_bytes,
3030
type_filter,
3131
first_timestamp,
32-
bytes_consumed,
3332
skip_cache,
3433
abort_source.has_value(),
3534
client_address);

src/v/cloud_topics/log_reader_config.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,6 @@ struct cloud_topic_log_reader_config {
8080

8181
model::opt_client_address_t client_address;
8282

83-
// used by log reader
84-
size_t bytes_consumed{0};
85-
8683
// do not let the lower level readers go over budget even when that means
8784
// that the reader will return no batches.
8885
bool strict_max_bytes{false};

0 commit comments

Comments
 (0)