Skip to content

Commit 3f358df

Browse files
committed
feat: Impl Transform::ToHumanString
1 parent 2bd493c commit 3f358df

File tree

7 files changed

+242
-6
lines changed

7 files changed

+242
-6
lines changed

src/iceberg/partition_spec.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,8 @@ Result<std::string> PartitionSpec::PartitionPath(const PartitionValues& data) co
111111
if (i > 0) {
112112
ss << "/";
113113
}
114-
// TODO(zhuo.wang): transform for partition value, will be fixed after transform util
115-
// is ready
116-
std::string partition_value = value.get().ToString();
114+
ICEBERG_ASSIGN_OR_RAISE(auto partition_value,
115+
fields_[i].transform()->ToHumanString(value));
117116
ss << UrlEncoder::Encode(fields_[i].name()) << "="
118117
<< UrlEncoder::Encode(partition_value);
119118
}

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ add_iceberg_test(schema_test
6868
schema_util_test.cc
6969
sort_field_test.cc
7070
sort_order_test.cc
71+
transform_human_string_test.cc
7172
transform_test.cc
7273
type_test.cc)
7374

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ iceberg_tests = {
4040
'schema_util_test.cc',
4141
'sort_field_test.cc',
4242
'sort_order_test.cc',
43+
'transform_human_string_test.cc',
4344
'transform_test.cc',
4445
'type_test.cc',
4546
),

src/iceberg/test/partition_spec_test.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,7 @@ TEST(PartitionSpecTest, PartitionPath) {
458458
PartitionValues part_data(
459459
{Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
460460
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
461-
std::string expected =
462-
"id_partition=123/name_partition=%22val2%22/ts_partition=19489";
461+
std::string expected = "id_partition=123/name_partition=val2/ts_partition=2023-05-12";
463462
EXPECT_EQ(expected, path);
464463
}
465464

@@ -469,7 +468,7 @@ TEST(PartitionSpecTest, PartitionPath) {
469468
{Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
470469
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
471470
std::string expected =
472-
"id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
471+
"id_partition=123/name_partition=val%232/ts_partition=2023-05-12";
473472
EXPECT_EQ(expected, path);
474473
}
475474
}
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <memory>
21+
#include <string>
22+
23+
#include <gtest/gtest.h>
24+
25+
#include "iceberg/test/matchers.h"
26+
#include "iceberg/transform.h"
27+
28+
namespace iceberg {
29+
30+
struct HumanStringTestParam {
31+
std::string test_name;
32+
std::shared_ptr<Type> source_type;
33+
Literal literal;
34+
std::vector<std::string> expecteds;
35+
};
36+
37+
class IdentityHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
38+
protected:
39+
std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
40+
};
41+
42+
TEST_P(IdentityHumanStringTest, ToHumanString) {
43+
const auto& param = GetParam();
44+
for (int32_t i = 0; i < transforms_.size(); ++i) {
45+
EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
46+
HasValue(::testing::Eq(param.expecteds[i])));
47+
}
48+
}
49+
50+
INSTANTIATE_TEST_SUITE_P(
51+
IdentityHumanStringTestCases, IdentityHumanStringTest,
52+
::testing::Values(
53+
HumanStringTestParam{.test_name = "Null",
54+
.literal = Literal::Null(std::make_shared<IntType>()),
55+
.expecteds{"null"}},
56+
HumanStringTestParam{.test_name = "Binary",
57+
.literal = Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
58+
.expecteds{"AQID"}},
59+
HumanStringTestParam{.test_name = "Fixed",
60+
.literal = Literal::Fixed(std::vector<uint8_t>{1, 2, 3}),
61+
.expecteds{"AQID"}},
62+
HumanStringTestParam{.test_name = "Date",
63+
.literal = Literal::Date(17501),
64+
.expecteds{"2017-12-01"}},
65+
HumanStringTestParam{.test_name = "Time",
66+
.literal = Literal::Time(36775038194),
67+
.expecteds{"10:12:55.038194"}},
68+
HumanStringTestParam{.test_name = "TimestampWithZone",
69+
.literal = Literal::TimestampTz(1512151975038194),
70+
.expecteds{"2017-12-01T18:12:55.038194+00:00"}},
71+
HumanStringTestParam{.test_name = "TimestampWithoutZone",
72+
.literal = Literal::Timestamp(1512123175038194),
73+
.expecteds{"2017-12-01T10:12:55.038194"}},
74+
HumanStringTestParam{.test_name = "Long",
75+
.literal = Literal::Long(-1234567890000L),
76+
.expecteds{"-1234567890000"}},
77+
HumanStringTestParam{.test_name = "String",
78+
.literal = Literal::String("a/b/c=d"),
79+
.expecteds{"a/b/c=d"}}),
80+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
81+
return info.param.test_name;
82+
});
83+
84+
class DateHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
85+
protected:
86+
std::vector<std::shared_ptr<Transform>> transforms_{
87+
Transform::Year(), Transform::Month(), Transform::Day()};
88+
};
89+
90+
TEST_P(DateHumanStringTest, ToHumanString) {
91+
const auto& param = GetParam();
92+
93+
for (uint32_t i = 0; i < transforms_.size(); i++) {
94+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
95+
transforms_[i]->Bind(std::make_shared<DateType>()));
96+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
97+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
98+
HasValue(::testing::Eq(param.expecteds[i])));
99+
}
100+
}
101+
102+
INSTANTIATE_TEST_SUITE_P(
103+
DateHumanStringTestCases, DateHumanStringTest,
104+
::testing::Values(
105+
HumanStringTestParam{.test_name = "Date",
106+
.literal = Literal::Date(17501),
107+
.expecteds = {"2017", "2017-12", "2017-12-01"}},
108+
HumanStringTestParam{.test_name = "NegativeDate",
109+
.literal = Literal::Date(-2),
110+
.expecteds = {"1969", "1969-12", "1969-12-30"}},
111+
HumanStringTestParam{.test_name = "DateLowerBound",
112+
.literal = Literal::Date(0),
113+
.expecteds = {"1970", "1970-01", "1970-01-01"}},
114+
HumanStringTestParam{.test_name = "NegativeDateLowerBound",
115+
.literal = Literal::Date(-365),
116+
.expecteds = {"1969", "1969-01", "1969-01-01"}},
117+
HumanStringTestParam{.test_name = "NegativeDateUpperBound",
118+
.literal = Literal::Date(-1),
119+
.expecteds = {"1969", "1969-12", "1969-12-31"}},
120+
HumanStringTestParam{.test_name = "Null",
121+
.literal = Literal::Null(std::make_shared<DateType>()),
122+
.expecteds = {"null", "null", "null"}}),
123+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
124+
return info.param.test_name;
125+
});
126+
127+
class TimestampHumanStringTest : public ::testing::TestWithParam<HumanStringTestParam> {
128+
protected:
129+
std::vector<std::shared_ptr<Transform>> transforms_{
130+
Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()};
131+
};
132+
133+
TEST_P(TimestampHumanStringTest, ToHumanString) {
134+
const auto& param = GetParam();
135+
for (uint32_t i = 0; i < transforms_.size(); i++) {
136+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type));
137+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
138+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
139+
HasValue(::testing::Eq(param.expecteds[i])));
140+
}
141+
}
142+
143+
INSTANTIATE_TEST_SUITE_P(
144+
TimestampHumanStringTestCases, TimestampHumanStringTest,
145+
::testing::Values(
146+
HumanStringTestParam{
147+
.test_name = "Timestamp",
148+
.source_type = std::make_shared<TimestampType>(),
149+
.literal = Literal::Timestamp(1512123175038194),
150+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
151+
HumanStringTestParam{
152+
.test_name = "NegativeTimestamp",
153+
.source_type = std::make_shared<TimestampType>(),
154+
.literal = Literal::Timestamp(-136024961806),
155+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
156+
HumanStringTestParam{
157+
.test_name = "TimestampLowerBound",
158+
.source_type = std::make_shared<TimestampType>(),
159+
.literal = Literal::Timestamp(0),
160+
.expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
161+
HumanStringTestParam{
162+
.test_name = "NegativeTimestampLowerBound",
163+
.source_type = std::make_shared<TimestampType>(),
164+
.literal = Literal::Timestamp(-172800000000),
165+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
166+
},
167+
HumanStringTestParam{
168+
.test_name = "NegativeTimestampUpperBound",
169+
.source_type = std::make_shared<TimestampType>(),
170+
.literal = Literal::Timestamp(-1),
171+
.expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
172+
HumanStringTestParam{
173+
.test_name = "TimestampTz",
174+
.source_type = std::make_shared<TimestampTzType>(),
175+
.literal = Literal::TimestampTz(1512151975038194),
176+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
177+
HumanStringTestParam{.test_name = "Null",
178+
.source_type = std::make_shared<TimestampType>(),
179+
.literal = Literal::Null(std::make_shared<TimestampType>()),
180+
.expecteds = {"null", "null", "null", "null"}}),
181+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
182+
return info.param.test_name;
183+
});
184+
185+
} // namespace iceberg

src/iceberg/transform.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "iceberg/util/checked_cast.h"
3232
#include "iceberg/util/macros.h"
3333
#include "iceberg/util/projection_util_internal.h"
34+
#include "iceberg/util/transform_util.h"
3435

3536
namespace iceberg {
3637
namespace {
@@ -366,6 +367,50 @@ Result<std::unique_ptr<UnboundPredicate>> Transform::ProjectStrict(
366367
std::unreachable();
367368
}
368369

370+
Result<std::string> Transform::ToHumanString(const Literal& value) {
371+
if (value.IsNull()) {
372+
return "null";
373+
}
374+
375+
switch (transform_type_) {
376+
case TransformType::kYear:
377+
return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
378+
case TransformType::kMonth:
379+
return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
380+
case TransformType::kDay:
381+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
382+
case TransformType::kHour:
383+
return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
384+
default: {
385+
switch (value.type()->type_id()) {
386+
case TypeId::kDate:
387+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
388+
case TypeId::kTime:
389+
return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
390+
case TypeId::kTimestamp:
391+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
392+
case TypeId::kTimestampTz:
393+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
394+
case TypeId::kFixed:
395+
case TypeId::kBinary: {
396+
const auto& binary_data = std::get<std::vector<uint8_t>>(value.value());
397+
return TransformUtil::Base64Encode(
398+
{reinterpret_cast<const char*>(binary_data.data()), binary_data.size()});
399+
}
400+
case TypeId::kDecimal: {
401+
const auto& decimal_type = internal::checked_cast<DecimalType&>(*value.type());
402+
const auto& decimal = std::get<::iceberg::Decimal>(value.value());
403+
return decimal.ToString(decimal_type.scale());
404+
}
405+
case TypeId::kString:
406+
return std::get<std::string>(value.value());
407+
default:
408+
return value.ToString();
409+
}
410+
}
411+
}
412+
}
413+
369414
bool TransformFunction::Equals(const TransformFunction& other) const {
370415
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
371416
}

src/iceberg/transform.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
194194
Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
195195
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
196196

197+
/// \brief Returns a human-readable String representation of a transformed value.
198+
///
199+
/// \param value The literal value to be transformed.
200+
/// @return a human-readable String representation of the value
201+
Result<std::string> ToHumanString(const Literal& value);
202+
197203
/// \brief Returns a string representation of this transform (e.g., "bucket[16]").
198204
std::string ToString() const override;
199205

0 commit comments

Comments
 (0)