Skip to content

Commit 52fbc54

Browse files
committed
refactor location provider & fix some comments
1 parent d855a40 commit 52fbc54

File tree

6 files changed

+61
-72
lines changed

6 files changed

+61
-72
lines changed

src/iceberg/expression/literal.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,6 @@ std::string Literal::ToString() const {
488488
.value_or("invalid literal of type decimal");
489489
}
490490
case TypeId::kString: {
491-
// TODO(zhuo.wang): escape string?
492491
return "\"" + std::get<std::string>(value_) + "\"";
493492
}
494493
case TypeId::kUuid: {

src/iceberg/location_provider.cc

Lines changed: 50 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ constexpr int32_t kEntropyDirLength = 4;
3535
constexpr int32_t kEntropyDirDepth = 3;
3636

3737
std::string DataLocation(const TableProperties& properties,
38-
const std::string& table_location) {
38+
std::string_view table_location) {
3939
auto data_location = properties.Get(TableProperties::kWriteDataLocation);
4040
if (data_location.empty()) {
4141
data_location = std::format("{}/data", table_location);
@@ -44,11 +44,11 @@ std::string DataLocation(const TableProperties& properties,
4444
}
4545

4646
std::string PathContext(std::string_view table_location) {
47-
std::string path = LocationUtil::StripTrailingSlash(table_location);
47+
std::string_view path = LocationUtil::StripTrailingSlash(table_location);
4848

4949
size_t last_slash = path.find_last_of('/');
5050
if (last_slash != std::string::npos && last_slash < path.length() - 1) {
51-
std::string_view data_path(path.data(), path.size() - last_slash - 1);
51+
std::string_view data_path = path.substr(last_slash + 1);
5252
std::string_view parent_path(path.data(), last_slash);
5353
size_t parent_last_slash = parent_path.find_last_of('/');
5454

@@ -74,7 +74,7 @@ std::string PathContext(std::string_view table_location) {
7474
std::string DirsFromHash(int32_t hash) {
7575
std::string hash_with_dirs;
7676

77-
for (int i = 0; i < kEntropyDirDepth * kEntropyDirLength; i += kEntropyDirLength) {
77+
for (int32_t i = 0; i < kEntropyDirDepth * kEntropyDirLength; i += kEntropyDirLength) {
7878
if (i > 0) {
7979
hash_with_dirs += "/";
8080
}
@@ -97,23 +97,25 @@ std::string ComputeHash(std::string_view file_name) {
9797

9898
} // namespace
9999

100-
Result<std::unique_ptr<LocationProvider>> LocationProviderFactory::For(
101-
const std::string& input_location, const TableProperties& properties) {
102-
std::string location = LocationUtil::StripTrailingSlash(input_location);
100+
/// \brief DefaultLocationProvider privides default location provider for local file
101+
/// system.
102+
class DefaultLocationProvider : public LocationProvider {
103+
public:
104+
DefaultLocationProvider(std::string_view table_location,
105+
const TableProperties& properties);
103106

104-
// Note: Not support dynamic constructor according to kWriteLocationProviderImpl
107+
std::string NewDataLocation(const std::string& filename) override;
105108

106-
properties.Get(TableProperties::kObjectStoreEnabled);
109+
Result<std::string> NewDataLocation(const PartitionSpec& spec,
110+
const PartitionValues& partition_data,
111+
const std::string& filename) override;
107112

108-
if (properties.Get(TableProperties::kObjectStoreEnabled)) {
109-
return std::make_unique<ObjectStoreLocationProvider>(location, properties);
110-
} else {
111-
return std::make_unique<DefaultLocationProvider>(location, properties);
112-
}
113-
}
113+
private:
114+
std::string data_location_;
115+
};
114116

115117
// Implementation of DefaultLocationProvider
116-
DefaultLocationProvider::DefaultLocationProvider(const std::string& table_location,
118+
DefaultLocationProvider::DefaultLocationProvider(std::string_view table_location,
117119
const TableProperties& properties)
118120
: data_location_(
119121
LocationUtil::StripTrailingSlash(DataLocation(properties, table_location))) {}
@@ -129,9 +131,27 @@ Result<std::string> DefaultLocationProvider::NewDataLocation(
129131
return std::format("{}/{}/{}", data_location_, partition_path, filename);
130132
}
131133

134+
/// \brief ObjectStoreLocationProvider provides location provider for object stores.
135+
class ObjectStoreLocationProvider : public LocationProvider {
136+
public:
137+
ObjectStoreLocationProvider(std::string_view table_location,
138+
const TableProperties& properties);
139+
140+
std::string NewDataLocation(const std::string& filename) override;
141+
142+
Result<std::string> NewDataLocation(const PartitionSpec& spec,
143+
const PartitionValues& partition_data,
144+
const std::string& filename) override;
145+
146+
private:
147+
std::string storage_location_;
148+
std::string context_;
149+
bool include_partition_paths_;
150+
};
151+
132152
// Implementation of ObjectStoreLocationProvider
133153
ObjectStoreLocationProvider::ObjectStoreLocationProvider(
134-
const std::string& table_location, const TableProperties& properties)
154+
std::string_view table_location, const TableProperties& properties)
135155
: include_partition_paths_(
136156
properties.Get(TableProperties::kWriteObjectStorePartitionedPaths)) {
137157
storage_location_ =
@@ -173,4 +193,17 @@ Result<std::string> ObjectStoreLocationProvider::NewDataLocation(
173193
}
174194
}
175195

196+
Result<std::unique_ptr<LocationProvider>> LocationProvider::Make(
197+
const std::string& input_location, const TableProperties& properties) {
198+
std::string_view location = LocationUtil::StripTrailingSlash(input_location);
199+
200+
// TODO(xxx): Support dynamic constructor according to kWriteLocationProviderImpl
201+
202+
if (properties.Get(TableProperties::kObjectStoreEnabled)) {
203+
return std::make_unique<ObjectStoreLocationProvider>(location, properties);
204+
} else {
205+
return std::make_unique<DefaultLocationProvider>(location, properties);
206+
}
207+
}
208+
176209
} // namespace iceberg

src/iceberg/location_provider.h

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -47,60 +47,17 @@ class ICEBERG_EXPORT LocationProvider {
4747
/// given spec
4848
/// \param filename a file name
4949
/// \return a fully-qualified location URI for a data file
50-
///
51-
/// TODO(wgtmac): StructLike is not well thought yet, we may wrap an ArrowArray
52-
/// with single row in StructLike.
5350
virtual Result<std::string> NewDataLocation(const PartitionSpec& spec,
5451
const PartitionValues& partition_data,
5552
const std::string& filename) = 0;
56-
};
57-
58-
class ICEBERG_EXPORT LocationProviderFactory {
59-
public:
60-
virtual ~LocationProviderFactory() = default;
6153

6254
/// \brief Create a LocationProvider for the given table location and properties.
6355
///
6456
/// \param input_location the table location
6557
/// \param properties the table properties
6658
/// \return a LocationProvider instance
67-
static Result<std::unique_ptr<LocationProvider>> For(const std::string& input_location,
68-
const TableProperties& properties);
69-
};
70-
71-
/// \brief DefaultLocationProvider privides default location provider for local file
72-
/// system.
73-
class ICEBERG_EXPORT DefaultLocationProvider : public LocationProvider {
74-
public:
75-
DefaultLocationProvider(const std::string& table_location,
76-
const TableProperties& properties);
77-
78-
std::string NewDataLocation(const std::string& filename) override;
79-
80-
Result<std::string> NewDataLocation(const PartitionSpec& spec,
81-
const PartitionValues& partition_data,
82-
const std::string& filename) override;
83-
84-
private:
85-
std::string data_location_;
86-
};
87-
88-
/// \brief ObjectStoreLocationProvider provides location provider for object stores.
89-
class ICEBERG_EXPORT ObjectStoreLocationProvider : public LocationProvider {
90-
public:
91-
ObjectStoreLocationProvider(const std::string& table_location,
92-
const TableProperties& properties);
93-
94-
std::string NewDataLocation(const std::string& filename) override;
95-
96-
Result<std::string> NewDataLocation(const PartitionSpec& spec,
97-
const PartitionValues& partition_data,
98-
const std::string& filename) override;
99-
100-
private:
101-
std::string storage_location_;
102-
std::string context_;
103-
bool include_partition_paths_;
59+
static Result<std::unique_ptr<LocationProvider>> Make(
60+
const std::string& input_location, const TableProperties& properties);
10461
};
10562

10663
} // namespace iceberg

src/iceberg/partition_spec.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ Result<std::string> PartitionSpec::PartitionPath(const PartitionValues& data) co
107107
if (i > 0) {
108108
ss << "/";
109109
}
110-
// TODO(zhuo.wang): json parse string literal?
110+
// TODO(zhuo.wang): transform partition value to string
111111
std::string partition_value = value.get().ToString();
112112
// TODO(zhuo.wang): UrlEncoder::Encode for partition name and value
113113
ss << fields_[i].name() << "="

src/iceberg/test/location_provider_test.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class LocationProviderTest : public ::testing::Test {
6363
TEST_F(LocationProviderTest, DefaultLocationProvider) {
6464
properties_ = {}; // Empty properties to use defaults
6565
ICEBERG_UNWRAP_OR_FAIL(auto provider,
66-
LocationProviderFactory::For(table_location_, properties_));
66+
LocationProvider::Make(table_location_, properties_));
6767

6868
auto location = provider->NewDataLocation("my_file");
6969
EXPECT_EQ(std::format("{}/data/my_file", table_location_), location);
@@ -73,7 +73,7 @@ TEST_F(LocationProviderTest, DefaultLocationProviderWithCustomDataLocation) {
7373
std::ignore =
7474
properties_.Set(TableProperties::kWriteDataLocation, std::string("new_location"));
7575
ICEBERG_UNWRAP_OR_FAIL(auto provider,
76-
LocationProviderFactory::For(table_location_, properties_));
76+
LocationProvider::Make(table_location_, properties_));
7777

7878
auto location = provider->NewDataLocation("my_file");
7979
EXPECT_EQ("new_location/my_file", location);
@@ -82,7 +82,7 @@ TEST_F(LocationProviderTest, DefaultLocationProviderWithCustomDataLocation) {
8282
TEST_F(LocationProviderTest, ObjectStorageLocationProvider) {
8383
std::ignore = properties_.Set(TableProperties::kObjectStoreEnabled, true);
8484
ICEBERG_UNWRAP_OR_FAIL(auto provider,
85-
LocationProviderFactory::For(table_location_, properties_));
85+
LocationProvider::Make(table_location_, properties_));
8686

8787
auto location = provider->NewDataLocation("test.parquet");
8888
std::string relative_location = location;
@@ -103,7 +103,7 @@ TEST_F(LocationProviderTest, ObjectStorageLocationProvider) {
103103
TEST_F(LocationProviderTest, ObjectStorageWithPartition) {
104104
std::ignore = properties_.Set(TableProperties::kObjectStoreEnabled, true);
105105
ICEBERG_UNWRAP_OR_FAIL(auto provider,
106-
LocationProviderFactory::For(table_location_, properties_));
106+
LocationProvider::Make(table_location_, properties_));
107107

108108
ICEBERG_UNWRAP_OR_FAIL(
109109
auto mock_spec,
@@ -126,7 +126,7 @@ TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {
126126
std::ignore = properties_.Set(TableProperties::kObjectStoreEnabled, true)
127127
.Set(TableProperties::kWriteObjectStorePartitionedPaths, false);
128128
ICEBERG_UNWRAP_OR_FAIL(auto provider,
129-
LocationProviderFactory::For(table_location_, properties_));
129+
LocationProvider::Make(table_location_, properties_));
130130

131131
auto location = provider->NewDataLocation("test.parquet");
132132

@@ -138,7 +138,7 @@ TEST_F(LocationProviderTest, ObjectStorageExcludePartitionInPath) {
138138
TEST_F(LocationProviderTest, HashInjection) {
139139
std::ignore = properties_.Set(TableProperties::kObjectStoreEnabled, true);
140140
ICEBERG_UNWRAP_OR_FAIL(auto provider,
141-
LocationProviderFactory::For(table_location_, properties_));
141+
LocationProvider::Make(table_location_, properties_));
142142

143143
auto location_a = provider->NewDataLocation("a");
144144
EXPECT_THAT(location_a, testing::EndsWith("/data/0101/0110/1001/10110010/a"));

src/iceberg/util/location_util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ namespace iceberg {
2727

2828
class ICEBERG_EXPORT LocationUtil {
2929
public:
30-
static std::string StripTrailingSlash(std::string_view path) {
30+
static std::string_view StripTrailingSlash(std::string_view path) {
3131
if (path.empty()) {
3232
return "";
3333
}
3434

3535
while (path.ends_with("/") && !path.ends_with("://")) {
3636
path.remove_suffix(1);
3737
}
38-
return std::string(path);
38+
return path;
3939
}
4040
};
4141

0 commit comments

Comments
 (0)