Skip to content

Commit 84814bc

Browse files
authored
feat: impl Transform::ToHumanString (#505)
1 parent 08e8127 commit 84814bc

File tree

8 files changed

+302
-7
lines changed

8 files changed

+302
-7
lines changed

src/iceberg/partition_spec.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,8 @@ Result<std::string> PartitionSpec::PartitionPath(const PartitionValues& data) co
111111
if (i > 0) {
112112
ss << "/";
113113
}
114-
// TODO(zhuo.wang): transform for partition value, will be fixed after transform util
115-
// is ready
116-
std::string partition_value = value.get().ToString();
114+
ICEBERG_ASSIGN_OR_RAISE(auto partition_value,
115+
fields_[i].transform()->ToHumanString(value));
117116
ss << UrlEncoder::Encode(fields_[i].name()) << "="
118117
<< UrlEncoder::Encode(partition_value);
119118
}

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ add_iceberg_test(schema_test
6868
schema_util_test.cc
6969
sort_field_test.cc
7070
sort_order_test.cc
71+
transform_human_string_test.cc
7172
transform_test.cc
7273
type_test.cc)
7374

src/iceberg/test/location_provider_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ TEST_F(LocationProviderTest, ObjectStorageWithPartition) {
112112

113113
std::vector<std::string> parts = SplitString(location, '/');
114114
ASSERT_GT(parts.size(), 2);
115-
EXPECT_EQ("data%231=%22val%231%22", parts[parts.size() - 2]);
115+
EXPECT_EQ("data%231=val%231", parts[parts.size() - 2]);
116116
}
117117

118118
TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ iceberg_tests = {
4040
'schema_util_test.cc',
4141
'sort_field_test.cc',
4242
'sort_order_test.cc',
43+
'transform_human_string_test.cc',
4344
'transform_test.cc',
4445
'type_test.cc',
4546
),

src/iceberg/test/partition_spec_test.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) {
458458
PartitionValues part_data(
459459
{Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
460460
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
461-
std::string expected =
462-
"id_partition=123/name_partition=%22val2%22/ts_partition=19489";
461+
std::string expected = "id_partition=123/name_partition=val2/ts_partition=2023-05-12";
463462
EXPECT_EQ(expected, path);
464463
}
465464

@@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) {
469468
{Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
470469
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
471470
std::string expected =
472-
"id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
471+
"id_partition=123/name_partition=val%232/ts_partition=2023-05-12";
473472
EXPECT_EQ(expected, path);
474473
}
475474
}
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <memory>
21+
#include <string>
22+
23+
#include <gtest/gtest.h>
24+
25+
#include "iceberg/test/matchers.h"
26+
#include "iceberg/transform.h"
27+
28+
namespace iceberg {
29+
30+
struct HumanStringTestParam {
31+
std::string test_name;
32+
std::shared_ptr<Type> source_type;
33+
Literal literal;
34+
std::vector<std::string> expecteds;
35+
};
36+
37+
class IdentityHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
38+
protected:
39+
std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
40+
};
41+
42+
TEST_P(IdentityHumanStringTest, ToHumanString) {
43+
const auto& param = GetParam();
44+
for (int32_t i = 0; i < transforms_.size(); ++i) {
45+
EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
46+
HasValue(::testing::Eq(param.expecteds[i])));
47+
}
48+
}
49+
50+
INSTANTIATE_TEST_SUITE_P(
51+
IdentityHumanStringTestCases, IdentityHumanStringTest,
52+
::testing::Values(
53+
HumanStringTestParam{.test_name = "Null",
54+
.literal = Literal::Null(std::make_shared<IntType>()),
55+
.expecteds{"null"}},
56+
HumanStringTestParam{.test_name = "Binary",
57+
.literal = Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
58+
.expecteds{"AQID"}},
59+
HumanStringTestParam{.test_name = "Fixed",
60+
.literal = Literal::Fixed(std::vector<uint8_t>{1, 2, 3}),
61+
.expecteds{"AQID"}},
62+
HumanStringTestParam{.test_name = "Date",
63+
.literal = Literal::Date(17501),
64+
.expecteds{"2017-12-01"}},
65+
HumanStringTestParam{.test_name = "Time",
66+
.literal = Literal::Time(36775038194),
67+
.expecteds{"10:12:55.038194"}},
68+
HumanStringTestParam{.test_name = "TimestampWithZone",
69+
.literal = Literal::TimestampTz(1512151975038194),
70+
.expecteds{"2017-12-01T18:12:55.038194+00:00"}},
71+
HumanStringTestParam{.test_name = "TimestampWithoutZone",
72+
.literal = Literal::Timestamp(1512123175038194),
73+
.expecteds{"2017-12-01T10:12:55.038194"}},
74+
HumanStringTestParam{.test_name = "Long",
75+
.literal = Literal::Long(-1234567890000L),
76+
.expecteds{"-1234567890000"}},
77+
HumanStringTestParam{.test_name = "String",
78+
.literal = Literal::String("a/b/c=d"),
79+
.expecteds{"a/b/c=d"}}),
80+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
81+
return info.param.test_name;
82+
});
83+
84+
class DateHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
85+
protected:
86+
std::vector<std::shared_ptr<Transform>> transforms_{
87+
Transform::Year(), Transform::Month(), Transform::Day()};
88+
};
89+
90+
TEST_P(DateHumanStringTest, ToHumanString) {
91+
const auto& param = GetParam();
92+
93+
for (uint32_t i = 0; i < transforms_.size(); i++) {
94+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
95+
transforms_[i]->Bind(std::make_shared<DateType>()));
96+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
97+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
98+
HasValue(::testing::Eq(param.expecteds[i])));
99+
}
100+
}
101+
102+
INSTANTIATE_TEST_SUITE_P(
103+
DateHumanStringTestCases, DateHumanStringTest,
104+
::testing::Values(
105+
HumanStringTestParam{.test_name = "Date",
106+
.literal = Literal::Date(17501),
107+
.expecteds = {"2017", "2017-12", "2017-12-01"}},
108+
HumanStringTestParam{.test_name = "NegativeDate",
109+
.literal = Literal::Date(-2),
110+
.expecteds = {"1969", "1969-12", "1969-12-30"}},
111+
HumanStringTestParam{.test_name = "DateLowerBound",
112+
.literal = Literal::Date(0),
113+
.expecteds = {"1970", "1970-01", "1970-01-01"}},
114+
HumanStringTestParam{.test_name = "NegativeDateLowerBound",
115+
.literal = Literal::Date(-365),
116+
.expecteds = {"1969", "1969-01", "1969-01-01"}},
117+
HumanStringTestParam{.test_name = "NegativeDateUpperBound",
118+
.literal = Literal::Date(-1),
119+
.expecteds = {"1969", "1969-12", "1969-12-31"}},
120+
HumanStringTestParam{.test_name = "Null",
121+
.literal = Literal::Null(std::make_shared<DateType>()),
122+
.expecteds = {"null", "null", "null"}}),
123+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
124+
return info.param.test_name;
125+
});
126+
127+
class TimestampHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
128+
protected:
129+
std::vector<std::shared_ptr<Transform>> transforms_{
130+
Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()};
131+
};
132+
133+
TEST_F(TimestampHumanStringTest, InvalidType) {
134+
ICEBERG_UNWRAP_OR_FAIL(auto above_max,
135+
Literal::Long(std::numeric_limits<int64_t>::max())
136+
.CastTo(std::make_shared<IntType>()));
137+
ICEBERG_UNWRAP_OR_FAIL(auto below_min,
138+
Literal::Long(std::numeric_limits<int64_t>::min())
139+
.CastTo(std::make_shared<IntType>()));
140+
141+
auto unmatch_type_literal = Literal::Long(std::numeric_limits<int64_t>::max());
142+
143+
for (const auto& transform : transforms_) {
144+
auto result = transform->ToHumanString(above_max);
145+
EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
146+
EXPECT_THAT(result,
147+
HasErrorMessage("Cannot transfrom human string for value: aboveMax"));
148+
149+
result = transform->ToHumanString(below_min);
150+
EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
151+
EXPECT_THAT(result,
152+
HasErrorMessage("Cannot transfrom human string for value: belowMin"));
153+
154+
result = transform->ToHumanString(unmatch_type_literal);
155+
EXPECT_THAT(result, IsError(ErrorKind::kNotSupported));
156+
EXPECT_THAT(result, HasErrorMessage(std::format(
157+
"Transfrom human {} from type {} is not supported",
158+
TransformTypeToString(transform->transform_type()),
159+
unmatch_type_literal.type()->ToString())));
160+
}
161+
}
162+
163+
TEST_P(TimestampHumanStringTest, ToHumanString) {
164+
const auto& param = GetParam();
165+
for (uint32_t i = 0; i < transforms_.size(); i++) {
166+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type));
167+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
168+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
169+
HasValue(::testing::Eq(param.expecteds[i])));
170+
}
171+
}
172+
173+
INSTANTIATE_TEST_SUITE_P(
174+
TimestampHumanStringTestCases, TimestampHumanStringTest,
175+
::testing::Values(
176+
HumanStringTestParam{
177+
.test_name = "Timestamp",
178+
.source_type = std::make_shared<TimestampType>(),
179+
.literal = Literal::Timestamp(1512123175038194),
180+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
181+
HumanStringTestParam{
182+
.test_name = "NegativeTimestamp",
183+
.source_type = std::make_shared<TimestampType>(),
184+
.literal = Literal::Timestamp(-136024961806),
185+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
186+
HumanStringTestParam{
187+
.test_name = "TimestampLowerBound",
188+
.source_type = std::make_shared<TimestampType>(),
189+
.literal = Literal::Timestamp(0),
190+
.expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
191+
HumanStringTestParam{
192+
.test_name = "NegativeTimestampLowerBound",
193+
.source_type = std::make_shared<TimestampType>(),
194+
.literal = Literal::Timestamp(-172800000000),
195+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
196+
},
197+
HumanStringTestParam{
198+
.test_name = "NegativeTimestampUpperBound",
199+
.source_type = std::make_shared<TimestampType>(),
200+
.literal = Literal::Timestamp(-1),
201+
.expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
202+
HumanStringTestParam{
203+
.test_name = "TimestampTz",
204+
.source_type = std::make_shared<TimestampTzType>(),
205+
.literal = Literal::TimestampTz(1512151975038194),
206+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
207+
HumanStringTestParam{.test_name = "Null",
208+
.source_type = std::make_shared<TimestampType>(),
209+
.literal = Literal::Null(std::make_shared<TimestampType>()),
210+
.expecteds = {"null", "null", "null", "null"}}),
211+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
212+
return info.param.test_name;
213+
});
214+
215+
} // namespace iceberg

src/iceberg/transform.cc

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "iceberg/util/checked_cast.h"
3232
#include "iceberg/util/macros.h"
3333
#include "iceberg/util/projection_util_internal.h"
34+
#include "iceberg/util/transform_util.h"
3435

3536
namespace iceberg {
3637
namespace {
@@ -366,6 +367,79 @@ Result<std::unique_ptr<UnboundPredicate>> Transform::ProjectStrict(
366367
std::unreachable();
367368
}
368369

370+
Result<std::string> Transform::ToHumanString(const Literal& value) {
371+
if (value.IsNull()) {
372+
return "null";
373+
}
374+
375+
if (value.IsAboveMax() || value.IsBelowMin()) [[unlikely]] {
376+
return NotSupported("Cannot transfrom human string for value: {}", value.ToString());
377+
}
378+
379+
switch (transform_type_) {
380+
case TransformType::kYear: {
381+
if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
382+
return NotSupported("Transfrom human year from type {} is not supported",
383+
value.type()->ToString());
384+
}
385+
return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
386+
}
387+
case TransformType::kMonth: {
388+
if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
389+
return NotSupported("Transfrom human month from type {} is not supported",
390+
value.type()->ToString());
391+
}
392+
return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
393+
}
394+
case TransformType::kDay: {
395+
if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
396+
return NotSupported("Transfrom human day from type {} is not supported",
397+
value.type()->ToString());
398+
}
399+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
400+
}
401+
case TransformType::kHour: {
402+
if (!std::holds_alternative<int32_t>(value.value())) [[unlikely]] {
403+
return NotSupported("Transfrom human hour from type {} is not supported",
404+
value.type()->ToString());
405+
}
406+
return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
407+
}
408+
case TransformType::kIdentity:
409+
case TransformType::kBucket:
410+
case TransformType::kTruncate:
411+
case TransformType::kUnknown:
412+
case TransformType::kVoid: {
413+
switch (value.type()->type_id()) {
414+
case TypeId::kDate:
415+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
416+
case TypeId::kTime:
417+
return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
418+
case TypeId::kTimestamp:
419+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
420+
case TypeId::kTimestampTz:
421+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
422+
case TypeId::kFixed:
423+
case TypeId::kBinary: {
424+
const auto& binary_data = std::get<std::vector<uint8_t>>(value.value());
425+
return TransformUtil::Base64Encode(
426+
{reinterpret_cast<const char*>(binary_data.data()), binary_data.size()});
427+
}
428+
case TypeId::kDecimal: {
429+
const auto& decimal_type = internal::checked_cast<DecimalType&>(*value.type());
430+
const auto& decimal = std::get<::iceberg::Decimal>(value.value());
431+
return decimal.ToString(decimal_type.scale());
432+
}
433+
case TypeId::kString:
434+
return std::get<std::string>(value.value());
435+
default:
436+
return value.ToString();
437+
}
438+
}
439+
}
440+
std::unreachable();
441+
}
442+
369443
bool TransformFunction::Equals(const TransformFunction& other) const {
370444
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
371445
}

src/iceberg/transform.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
194194
Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
195195
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
196196

197+
/// \brief Returns a human-readable string representation of a transformed value.
198+
///
199+
/// \param value The literal value to be transformed.
200+
/// \return A human-readable string representation of the value
201+
Result<std::string> ToHumanString(const Literal& value);
202+
197203
/// \brief Returns a string representation of this transform (e.g., "bucket[16]").
198204
std::string ToString() const override;
199205

0 commit comments

Comments
 (0)