Skip to content

Commit 5dfbea0

Browse files
committed
polish impls
1 parent 655becf commit 5dfbea0

File tree

6 files changed

+112
-143
lines changed

6 files changed

+112
-143
lines changed

src/iceberg/metrics_config.cc

Lines changed: 42 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include "iceberg/table_properties.h"
3131
#include "iceberg/util/checked_cast.h"
3232
#include "iceberg/util/type_util.h"
33-
#include "iceberg/util/visit_type.h"
3433

3534
namespace iceberg {
3635

@@ -41,16 +40,15 @@ constexpr std::string_view kCountsName = "counts";
4140
constexpr std::string_view kFullName = "full";
4241
constexpr std::string_view kTruncatePrefix = "truncate(";
4342
constexpr int32_t kDefaultTruncateLength = 16;
44-
const std::shared_ptr<MetricsMode> kDefaultMetricsMode =
45-
std::make_shared<MetricsMode>(MetricsMode::Kind::kTruncate, kDefaultTruncateLength);
43+
constexpr MetricsMode kDefaultMetricsMode = {.kind = MetricsMode::Kind::kTruncate,
44+
.length = kDefaultTruncateLength};
4645

47-
std::shared_ptr<MetricsMode> SortedColumnDefaultMode(
48-
std::shared_ptr<MetricsMode> default_mode) {
49-
if (default_mode->kind == MetricsMode::Kind::kNone ||
50-
default_mode->kind == MetricsMode::Kind::kCounts) {
46+
MetricsMode SortedColumnDefaultMode(MetricsMode default_mode) {
47+
if (default_mode.kind == MetricsMode::Kind::kNone ||
48+
default_mode.kind == MetricsMode::Kind::kCounts) {
5149
return kDefaultMetricsMode;
5250
} else {
53-
return std::move(default_mode);
51+
return default_mode;
5452
}
5553
}
5654

@@ -64,32 +62,19 @@ int32_t MaxInferredColumns(const TableProperties& properties) {
6462
return max_inferred_columns;
6563
}
6664

67-
Result<std::shared_ptr<MetricsMode>> ParseMode(const std::string& mode,
68-
std::shared_ptr<MetricsMode> fallback) {
69-
if (auto metrics_mode = MetricsMode::FromString(mode); metrics_mode.has_value()) {
70-
return std::move(metrics_mode.value());
71-
}
72-
return std::move(fallback);
65+
Result<MetricsMode> ParseMode(std::string_view mode, MetricsMode fallback) {
66+
return MetricsMode::FromString(mode).value_or(fallback);
7367
}
7468

7569
} // namespace
7670

77-
const std::shared_ptr<MetricsMode>& MetricsMode::None() {
78-
static const auto none = std::make_shared<MetricsMode>(Kind::kNone);
79-
return none;
80-
}
71+
MetricsMode MetricsMode::None() { return {.kind = Kind::kNone}; }
8172

82-
const std::shared_ptr<MetricsMode>& MetricsMode::Counts() {
83-
static const auto counts = std::make_shared<MetricsMode>(Kind::kCounts);
84-
return counts;
85-
}
73+
MetricsMode MetricsMode::Counts() { return {.kind = Kind::kCounts}; }
8674

87-
const std::shared_ptr<MetricsMode>& MetricsMode::Full() {
88-
static const auto full = std::make_shared<MetricsMode>(Kind::kFull);
89-
return full;
90-
}
75+
MetricsMode MetricsMode::Full() { return {.kind = Kind::kFull}; }
9176

92-
Result<std::shared_ptr<MetricsMode>> MetricsMode::FromString(std::string_view mode) {
77+
Result<MetricsMode> MetricsMode::FromString(std::string_view mode) {
9378
if (StringUtils::EqualsIgnoreCase(mode, kNoneName)) {
9479
return MetricsMode::None();
9580
} else if (StringUtils::EqualsIgnoreCase(mode, kCountsName)) {
@@ -109,47 +94,42 @@ Result<std::shared_ptr<MetricsMode>> MetricsMode::FromString(std::string_view mo
10994
return kDefaultMetricsMode;
11095
}
11196
ICEBERG_PRECHECK(length > 0, "Truncate length should be positive.");
112-
return std::make_shared<MetricsMode>(Kind::kTruncate, length);
97+
return MetricsMode{.kind = Kind::kTruncate, .length = length};
11398
}
11499
return InvalidArgument("Invalid metrics mode: {}", mode);
115100
}
116101

117-
MetricsConfig::MetricsConfig(
118-
std::unordered_map<std::string, std::shared_ptr<MetricsMode>> column_modes,
119-
std::shared_ptr<MetricsMode> default_mode)
120-
: column_modes_(std::move(column_modes)), default_mode_(std::move(default_mode)) {}
102+
MetricsConfig::MetricsConfig(ColumnModeMap column_modes, MetricsMode default_mode)
103+
: column_modes_(std::move(column_modes)), default_mode_(default_mode) {}
121104

122105
const std::shared_ptr<MetricsConfig>& MetricsConfig::Default() {
123-
static const std::shared_ptr<MetricsConfig> default_config(
124-
new MetricsConfig({}, kDefaultMetricsMode));
125-
return default_config;
106+
static const std::shared_ptr<MetricsConfig> kDefaultConfig(
107+
new MetricsConfig(/*column_modes=*/{}, kDefaultMetricsMode));
108+
return kDefaultConfig;
126109
}
127110

128111
Result<std::shared_ptr<MetricsConfig>> MetricsConfig::Make(const Table& table) {
129112
ICEBERG_ASSIGN_OR_RAISE(auto schema, table.schema());
130-
131113
auto sort_order = table.sort_order();
132-
return MakeInternal(
133-
table.properties(), *schema,
134-
sort_order.has_value() ? *sort_order.value() : *SortOrder::Unsorted());
114+
return MakeInternal(table.properties(), *schema,
115+
*sort_order.value_or(SortOrder::Unsorted()));
135116
}
136117

137118
Result<std::shared_ptr<MetricsConfig>> MetricsConfig::MakeInternal(
138119
const TableProperties& props, const Schema& schema, const SortOrder& order) {
139-
std::unordered_map<std::string, std::shared_ptr<MetricsMode>> column_modes;
120+
ColumnModeMap column_modes;
140121

141-
std::shared_ptr<MetricsMode> default_mode = kDefaultMetricsMode;
122+
MetricsMode default_mode = kDefaultMetricsMode;
142123
if (props.configs().contains(TableProperties::kDefaultWriteMetricsMode.key())) {
143124
std::string configured_metrics_mode =
144125
props.Get(TableProperties::kDefaultWriteMetricsMode);
145126
ICEBERG_ASSIGN_OR_RAISE(default_mode,
146127
ParseMode(configured_metrics_mode, kDefaultMetricsMode));
147128
} else {
148129
int32_t max_inferred_columns = MaxInferredColumns(props);
149-
GetProjectedIdsVisitor visitor(true);
150-
ICEBERG_RETURN_UNEXPECTED(
151-
visitor.Visit(internal::checked_cast<const StructType&>(schema)));
152-
int32_t projected_columns = visitor.Finish().size();
130+
GetProjectedIdsVisitor visitor(/*include_struct_ids=*/true);
131+
ICEBERG_RETURN_UNEXPECTED(visitor.Visit(schema));
132+
auto projected_columns = static_cast<int32_t>(visitor.Finish().size());
153133
if (max_inferred_columns < projected_columns) {
154134
ICEBERG_ASSIGN_OR_RAISE(auto limit_field_ids,
155135
LimitFieldIds(schema, max_inferred_columns));
@@ -166,8 +146,8 @@ Result<std::shared_ptr<MetricsConfig>> MetricsConfig::MakeInternal(
166146
// First set sorted column with sorted column default (can be overridden by user)
167147
auto sorted_col_default_mode = SortedColumnDefaultMode(default_mode);
168148
auto sorted_columns = SortOrder::OrderPreservingSortedColumns(schema, order);
169-
for (const auto& sc : sorted_columns) {
170-
column_modes[std::string(sc)] = sorted_col_default_mode;
149+
for (const auto& sorted_column : sorted_columns) {
150+
column_modes[std::string(sorted_column)] = sorted_col_default_mode;
171151
}
172152

173153
// Handle user overrides of defaults
@@ -181,7 +161,7 @@ Result<std::shared_ptr<MetricsConfig>> MetricsConfig::MakeInternal(
181161
}
182162

183163
return std::shared_ptr<MetricsConfig>(
184-
new MetricsConfig(std::move(column_modes), std::move(default_mode)));
164+
new MetricsConfig(std::move(column_modes), default_mode));
185165
}
186166

187167
Result<std::unordered_set<int32_t>> MetricsConfig::LimitFieldIds(const Schema& schema,
@@ -190,30 +170,36 @@ Result<std::unordered_set<int32_t>> MetricsConfig::LimitFieldIds(const Schema& s
190170
public:
191171
explicit Visitor(int32_t limit) : limit_(limit) {}
192172

193-
Status Visit(const Type& type) { return VisitNestedType(type, this); }
173+
Status Visit(const Type& type) {
174+
if (type.is_nested()) {
175+
return VisitNested(internal::checked_cast<const NestedType&>(type));
176+
} else {
177+
return VisitPrimitive(internal::checked_cast<const PrimitiveType&>(type));
178+
}
179+
}
194180

195181
Status VisitNested(const NestedType& type) {
196-
for (auto& field : type.fields()) {
182+
for (const auto& field : type.fields()) {
197183
if (!ShouldContinue()) {
198184
break;
199185
}
200-
// TODO(zhuo.wang) or is_variant
186+
// TODO(zhuo.wang): variant type should also be handled here
201187
if (field.type()->is_primitive()) {
202188
ids_.insert(field.field_id());
203189
}
204190
}
205191

206-
for (auto& field : type.fields()) {
192+
for (const auto& field : type.fields()) {
207193
if (ShouldContinue()) {
208194
ICEBERG_RETURN_UNEXPECTED(Visit(*field.type()));
209195
}
210196
}
211197
return {};
212198
}
213199

214-
Status VisitNonNested(const Type& type) { return {}; }
200+
Status VisitPrimitive(const PrimitiveType& type) { return {}; }
215201

216-
std::unordered_set<int32_t> Finish() { return ids_; }
202+
std::unordered_set<int32_t> Finish() const { return ids_; }
217203

218204
private:
219205
bool ShouldContinue() { return ids_.size() < limit_; }
@@ -224,7 +210,7 @@ Result<std::unordered_set<int32_t>> MetricsConfig::LimitFieldIds(const Schema& s
224210
};
225211

226212
Visitor visitor(limit);
227-
ICEBERG_RETURN_UNEXPECTED(visitor.Visit(internal::checked_cast<const Type&>(schema)));
213+
ICEBERG_RETURN_UNEXPECTED(visitor.Visit(schema));
228214
return visitor.Finish();
229215
}
230216

@@ -245,8 +231,7 @@ Status MetricsConfig::VerifyReferencedColumns(
245231
return {};
246232
}
247233

248-
std::shared_ptr<MetricsMode> MetricsConfig::ColumnMode(
249-
const std::string& column_name) const {
234+
MetricsMode MetricsConfig::ColumnMode(std::string_view column_name) const {
250235
if (auto it = column_modes_.find(column_name); it != column_modes_.end()) {
251236
return it->second;
252237
}

src/iceberg/metrics_config.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "iceberg/iceberg_export.h"
3333
#include "iceberg/result.h"
3434
#include "iceberg/type_fwd.h"
35+
#include "iceberg/util/string_util.h"
3536

3637
namespace iceberg {
3738

@@ -44,17 +45,16 @@ struct ICEBERG_EXPORT MetricsMode {
4445
kFull,
4546
};
4647

47-
static Result<std::shared_ptr<MetricsMode>> FromString(std::string_view mode);
48-
49-
static const std::shared_ptr<MetricsMode>& None();
50-
static const std::shared_ptr<MetricsMode>& Counts();
51-
static const std::shared_ptr<MetricsMode>& Full();
48+
static Result<MetricsMode> FromString(std::string_view mode);
49+
static MetricsMode None();
50+
static MetricsMode Counts();
51+
static MetricsMode Full();
5252

5353
Kind kind;
5454
std::variant<std::monostate, int32_t> length;
5555
};
5656

57-
/// \brief Configuration utilities for table metrics
57+
/// \brief Configuration for collecting column metrics for an Iceberg table.
5858
class ICEBERG_EXPORT MetricsConfig {
5959
public:
6060
/// \brief Get the default metrics config.
@@ -77,12 +77,13 @@ class ICEBERG_EXPORT MetricsConfig {
7777
/// \brief Get the metrics mode for a specific column
7878
/// \param column_name The full name of the column
7979
/// \return The metrics mode for the column
80-
std::shared_ptr<MetricsMode> ColumnMode(const std::string& column_name) const;
80+
MetricsMode ColumnMode(std::string_view column_name) const;
8181

8282
private:
83-
MetricsConfig(
84-
std::unordered_map<std::string, std::shared_ptr<MetricsMode>> column_modes,
85-
std::shared_ptr<MetricsMode> default_mode);
83+
using ColumnModeMap =
84+
std::unordered_map<std::string, MetricsMode, StringHash, StringEqual>;
85+
86+
MetricsConfig(ColumnModeMap column_modes, MetricsMode default_mode);
8687

8788
/// \brief Generate a MetricsConfig for all columns based on overrides, schema, and sort
8889
/// order.
@@ -96,9 +97,8 @@ class ICEBERG_EXPORT MetricsConfig {
9697
const Schema& schema,
9798
const SortOrder& order);
9899

99-
private:
100-
std::unordered_map<std::string, std::shared_ptr<MetricsMode>> column_modes_;
101-
std::shared_ptr<MetricsMode> default_mode_;
100+
ColumnModeMap column_modes_;
101+
MetricsMode default_mode_;
102102
};
103103

104104
} // namespace iceberg

0 commit comments

Comments
 (0)