Skip to content

Commit 6a75511

Browse files
xiaoxmengfacebook-github-bot
authored andcommitted
misc: Minor index related code improvement (facebookincubator#396)
Summary: X-link: facebookincubator/velox#15878 This diff adds support for cluster index bounds filtering to ScanSpec and refactors some index-related code for better organization. * Index Constants Refactoring (dwio/nimble/index/IndexConstants.h): Introduced new header file defining kKeyStreamId constant (UINT32_MAX) as a placeholder stream ID for cluster index key streams. This reserved ID ensures no conflicts with regular data streams * StripeIndexGroup Enhancement (dwio/nimble/index/StripeIndexGroup.h): Added streamId() accessor method to StreamIndex class for retrieving the stream ID * IndexWriter Cleanup (dwio/nimble/velox/IndexWriter.cpp): Refactored to use the new kKeyStreamId constant instead of inline std::numeric_limits<offset_size>::max() Reviewed By: duxiao1212, Yuhta Differential Revision: D89942683
1 parent 19d1297 commit 6a75511

File tree

4 files changed

+66
-4
lines changed

4 files changed

+66
-4
lines changed

dwio/nimble/index/IndexConstants.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <cstdint>
19+
20+
namespace facebook::nimble {
21+
22+
// Placeholder stream ID for the key stream used in cluster index.
23+
// This is a reserved stream ID that should not be used by regular data streams.
24+
// The key stream contains encoded keys for efficient range-based filtering.
25+
constexpr uint32_t kKeyStreamId = UINT32_MAX;
26+
27+
} // namespace facebook::nimble

dwio/nimble/index/StripeIndexGroup.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ class StreamIndex {
118118
/// Returns chunk offset and size, or std::nullopt if not found
119119
std::optional<ChunkLocation> lookupChunk(uint32_t rowId) const;
120120

121+
/// Returns the stream ID this index is for
122+
uint32_t streamId() const {
123+
return streamId_;
124+
}
125+
121126
private:
122127
StreamIndex(
123128
const StripeIndexGroup* stripeIndexGroup,
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <gtest/gtest.h>
18+
#include <limits>
19+
20+
#include "dwio/nimble/index/IndexConstants.h"
21+
22+
namespace facebook::nimble::index::test {
23+
24+
TEST(IndexConstantsTest, kKeyStreamId) {
25+
// kKeyStreamId should be UINT32_MAX to ensure it doesn't conflict with
26+
// any real stream IDs.
27+
EXPECT_EQ(kKeyStreamId, std::numeric_limits<uint32_t>::max());
28+
EXPECT_EQ(kKeyStreamId, UINT32_MAX);
29+
}
30+
31+
} // namespace facebook::nimble::index::test

dwio/nimble/velox/IndexWriter.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,23 @@
1515
*/
1616
#include "dwio/nimble/velox/IndexWriter.h"
1717

18-
#include <limits>
19-
18+
#include "dwio/nimble/index/IndexConstants.h"
2019
#include "dwio/nimble/velox/BufferGrowthPolicy.h"
2120
#include "dwio/nimble/velox/ChunkedStreamWriter.h"
2221
#include "dwio/nimble/velox/StreamChunker.h"
2322

2423
namespace facebook::nimble::index {
2524

2625
namespace {
27-
// The key stream uses a placeholder offset value (max offset_size) that does
26+
// The key stream uses a placeholder offset value (kKeyStreamId) that does
2827
// not correspond to any actual stream position. This offset is not used for
2928
// data retrieval but is required to conform to Nimble's stream typing
3029
// framework, which expects all streams to have an associated offset.
3130
// Returns a reference to a static descriptor to ensure lifetime validity for
3231
// ContentStreamData which stores a reference to the descriptor.
3332
const StreamDescriptorBuilder& keyStreamDescriptor() {
3433
static const StreamDescriptorBuilder descriptor{
35-
std::numeric_limits<offset_size>::max(), ScalarKind::Binary};
34+
kKeyStreamId, ScalarKind::Binary};
3635
return descriptor;
3736
}
3837

0 commit comments

Comments
 (0)