Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ set(ICEBERG_SOURCES
transform.cc
transform_function.cc
type.cc
update/expire_snapshots.cc
update/pending_update.cc
update/snapshot_update.cc
update/update_partition_spec.cc
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ iceberg_sources = files(
'transform.cc',
'transform_function.cc',
'type.cc',
'update/expire_snapshots.cc',
'update/pending_update.cc',
'update/snapshot_update.cc',
'update/update_partition_spec.cc',
Expand Down
13 changes: 13 additions & 0 deletions src/iceberg/snapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ SnapshotRefType SnapshotRef::type() const noexcept {
retention);
}

std::optional<int64_t> SnapshotRef::max_ref_age_ms() const noexcept {
return std::visit(
[&](const auto& retention) -> std::optional<int64_t> {
using T = std::remove_cvref_t<decltype(retention)>;
if constexpr (std::is_same_v<T, Branch>) {
return retention.max_ref_age_ms;
} else {
return retention.max_ref_age_ms;
}
},
retention);
}

Status SnapshotRef::Validate() const {
if (type() == SnapshotRefType::kBranch) {
const auto& branch = std::get<Branch>(this->retention);
Expand Down
2 changes: 2 additions & 0 deletions src/iceberg/snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ struct ICEBERG_EXPORT SnapshotRef {

SnapshotRefType type() const noexcept;

std::optional<int64_t> max_ref_age_ms() const noexcept;

/// \brief Create a branch reference
///
/// \param snapshot_id The snapshot ID for the branch
Expand Down
8 changes: 8 additions & 0 deletions src/iceberg/table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "iceberg/table_properties.h"
#include "iceberg/table_scan.h"
#include "iceberg/transaction.h"
#include "iceberg/update/expire_snapshots.h"
#include "iceberg/update/update_partition_spec.h"
#include "iceberg/update/update_properties.h"
#include "iceberg/update/update_schema.h"
Expand Down Expand Up @@ -179,6 +180,13 @@ Result<std::shared_ptr<UpdateSchema>> Table::NewUpdateSchema() {
return transaction->NewUpdateSchema();
}

Result<std::shared_ptr<ExpireSnapshots>> Table::NewExpireSnapshots() {
ICEBERG_ASSIGN_OR_RAISE(
auto transaction, Transaction::Make(shared_from_this(), Transaction::Kind::kUpdate,
/*auto_commit=*/true));
return transaction->NewExpireSnapshots();
}

Result<std::shared_ptr<StagedTable>> StagedTable::Make(
TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
std::string metadata_location, std::shared_ptr<FileIO> io,
Expand Down
4 changes: 4 additions & 0 deletions src/iceberg/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ class ICEBERG_EXPORT Table : public std::enable_shared_from_this<Table> {
/// changes.
virtual Result<std::shared_ptr<UpdateSchema>> NewUpdateSchema();

/// \brief Create a new ExpireSnapshots to remove expired snapshots and commit the
/// changes.
virtual Result<std::shared_ptr<ExpireSnapshots>> NewExpireSnapshots();

protected:
Table(TableIdentifier identifier, std::shared_ptr<TableMetadata> metadata,
std::string metadata_location, std::shared_ptr<FileIO> io,
Expand Down
92 changes: 88 additions & 4 deletions src/iceberg/table_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,9 @@ class TableMetadataBuilder::Impl {
Status SetBranchSnapshot(int64_t snapshot_id, const std::string& branch);
Status SetBranchSnapshot(std::shared_ptr<Snapshot> snapshot, const std::string& branch);
Status SetRef(const std::string& name, std::shared_ptr<SnapshotRef> ref);
Status RemoveRef(const std::string& name);
Status RemoveSnapshots(const std::vector<int64_t>& snapshot_ids);
Status RemovePartitionSpecs(const std::vector<int32_t>& spec_ids);

Result<std::unique_ptr<TableMetadata>> Build();

Expand Down Expand Up @@ -1334,6 +1337,84 @@ int32_t TableMetadataBuilder::Impl::ReuseOrCreateNewSchemaId(
return new_schema_id;
}

Status TableMetadataBuilder::Impl::RemoveRef(const std::string& name) {
if (name == SnapshotRef::kMainBranch) {
metadata_.current_snapshot_id = kInvalidSnapshotId;
}

if (metadata_.refs.erase(name) != 0) {
changes_.push_back(std::make_unique<table::RemoveSnapshotRef>(name));
}

return {};
}

Status TableMetadataBuilder::Impl::RemoveSnapshots(
const std::vector<int64_t>& snapshot_ids) {
if (snapshot_ids.empty()) {
return {};
}

std::unordered_set<int64_t> ids_to_remove(snapshot_ids.begin(), snapshot_ids.end());
std::vector<std::shared_ptr<Snapshot>> retained_snapshots;
retained_snapshots.reserve(metadata_.snapshots.size() - snapshot_ids.size());
std::vector<int64_t> snapshot_ids_to_remove;
snapshot_ids_to_remove.reserve(snapshot_ids.size());

for (auto& snapshot : metadata_.snapshots) {
ICEBERG_CHECK(snapshot != nullptr, "Encountered null snapshot in metadata");
const int64_t snapshot_id = snapshot->snapshot_id;
if (ids_to_remove.contains(snapshot_id)) {
snapshots_by_id_.erase(snapshot_id);
snapshot_ids_to_remove.push_back(snapshot_id);
// FIXME: implement statistics removal and uncomment below
// ICEBERG_RETURN_UNEXPECTED(RemoveStatistics(snapshot_id));
// ICEBERG_RETURN_UNEXPECTED(RemovePartitionStatistics(snapshot_id));
} else {
retained_snapshots.push_back(std::move(snapshot));
}
}

if (!snapshot_ids_to_remove.empty()) {
changes_.push_back(std::make_unique<table::RemoveSnapshots>(snapshot_ids_to_remove));
}

metadata_.snapshots = std::move(retained_snapshots);

// Remove any refs that are no longer valid (dangling refs)
std::vector<std::string> dangling_refs;
for (const auto& [ref_name, ref] : metadata_.refs) {
if (!snapshots_by_id_.contains(ref->snapshot_id)) {
dangling_refs.push_back(ref_name);
}
}
for (const auto& ref_name : dangling_refs) {
ICEBERG_RETURN_UNEXPECTED(RemoveRef(ref_name));
}

return {};
}

Status TableMetadataBuilder::Impl::RemovePartitionSpecs(
const std::vector<int32_t>& spec_ids) {
if (spec_ids.empty()) {
return {};
}

std::unordered_set<int32_t> spec_ids_to_remove(spec_ids.begin(), spec_ids.end());
ICEBERG_PRECHECK(!spec_ids_to_remove.contains(metadata_.default_spec_id),
"Cannot remove the default partition spec");

metadata_.partition_specs =
metadata_.partition_specs | std::views::filter([&](const auto& spec) {
return !spec_ids_to_remove.contains(spec->spec_id());
}) |
std::ranges::to<std::vector<std::shared_ptr<PartitionSpec>>>();
changes_.push_back(std::make_unique<table::RemovePartitionSpecs>(spec_ids));

return {};
}

TableMetadataBuilder::TableMetadataBuilder(int8_t format_version)
: impl_(std::make_unique<Impl>(format_version)) {}

Expand Down Expand Up @@ -1436,7 +1517,8 @@ TableMetadataBuilder& TableMetadataBuilder::AddPartitionSpec(

TableMetadataBuilder& TableMetadataBuilder::RemovePartitionSpecs(
const std::vector<int32_t>& spec_ids) {
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->RemovePartitionSpecs(spec_ids));
return *this;
}

TableMetadataBuilder& TableMetadataBuilder::RemoveSchemas(
Expand Down Expand Up @@ -1464,7 +1546,7 @@ TableMetadataBuilder& TableMetadataBuilder::AddSortOrder(

TableMetadataBuilder& TableMetadataBuilder::AddSnapshot(
std::shared_ptr<Snapshot> snapshot) {
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->AddSnapshot(snapshot));
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->AddSnapshot(std::move(snapshot)));
return *this;
}

Expand All @@ -1487,7 +1569,8 @@ TableMetadataBuilder& TableMetadataBuilder::SetRef(const std::string& name,
}

TableMetadataBuilder& TableMetadataBuilder::RemoveRef(const std::string& name) {
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->RemoveRef(name));
return *this;
}

TableMetadataBuilder& TableMetadataBuilder::RemoveSnapshots(
Expand All @@ -1497,7 +1580,8 @@ TableMetadataBuilder& TableMetadataBuilder::RemoveSnapshots(

TableMetadataBuilder& TableMetadataBuilder::RemoveSnapshots(
const std::vector<int64_t>& snapshot_ids) {
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
ICEBERG_BUILDER_RETURN_IF_ERROR(impl_->RemoveSnapshots(snapshot_ids));
return *this;
}

TableMetadataBuilder& TableMetadataBuilder::SuppressHistoricalSnapshots() {
Expand Down
8 changes: 5 additions & 3 deletions src/iceberg/table_update.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ std::unique_ptr<TableUpdate> SetDefaultPartitionSpec::Clone() const {
// RemovePartitionSpecs

void RemovePartitionSpecs::ApplyTo(TableMetadataBuilder& builder) const {
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
builder.RemovePartitionSpecs(spec_ids_);
}

void RemovePartitionSpecs::GenerateRequirements(TableUpdateContext& context) const {
Expand Down Expand Up @@ -301,7 +301,9 @@ std::unique_ptr<TableUpdate> AddSnapshot::Clone() const {

// RemoveSnapshots

void RemoveSnapshots::ApplyTo(TableMetadataBuilder& builder) const {}
void RemoveSnapshots::ApplyTo(TableMetadataBuilder& builder) const {
builder.RemoveSnapshots(snapshot_ids_);
}

void RemoveSnapshots::GenerateRequirements(TableUpdateContext& context) const {
// RemoveSnapshots doesn't generate any requirements
Expand All @@ -322,7 +324,7 @@ std::unique_ptr<TableUpdate> RemoveSnapshots::Clone() const {
// RemoveSnapshotRef

void RemoveSnapshotRef::ApplyTo(TableMetadataBuilder& builder) const {
throw IcebergError(std::format("{} not implemented", __FUNCTION__));
builder.RemoveRef(ref_name_);
}

void RemoveSnapshotRef::GenerateRequirements(TableUpdateContext& context) const {
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ if(ICEBERG_BUILD_BUNDLE)
add_iceberg_test(table_update_test
USE_BUNDLE
SOURCES
expire_snapshots_test.cc
transaction_test.cc
update_partition_spec_test.cc
update_properties_test.cc
Expand Down
68 changes: 68 additions & 0 deletions src/iceberg/test/expire_snapshots_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/update/expire_snapshots.h"

#include "iceberg/test/matchers.h"
#include "iceberg/test/update_test_base.h"

namespace iceberg {

class ExpireSnapshotsTest : public UpdateTestBase {};

TEST_F(ExpireSnapshotsTest, DefaultExpireByAge) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
EXPECT_EQ(result.snapshot_ids_to_remove.size(), 1);
EXPECT_EQ(result.snapshot_ids_to_remove.at(0), 3051729675574597004);
}

TEST_F(ExpireSnapshotsTest, KeepAll) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
update->RetainLast(2);
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
EXPECT_TRUE(result.snapshot_ids_to_remove.empty());
EXPECT_TRUE(result.refs_to_remove.empty());
}

TEST_F(ExpireSnapshotsTest, ExpireById) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
update->ExpireSnapshotId(3051729675574597004);
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
EXPECT_EQ(result.snapshot_ids_to_remove.size(), 1);
EXPECT_EQ(result.snapshot_ids_to_remove.at(0), 3051729675574597004);
}

TEST_F(ExpireSnapshotsTest, ExpireOlderThan) {
struct TestCase {
int64_t expire_older_than;
size_t expected_num_expired;
};
const std::vector<TestCase> test_cases = {
{.expire_older_than = 1515100955770 - 1, .expected_num_expired = 0},
{.expire_older_than = 1515100955770 + 1, .expected_num_expired = 1}};
for (const auto& test_case : test_cases) {
ICEBERG_UNWRAP_OR_FAIL(auto update, table_->NewExpireSnapshots());
update->ExpireOlderThan(test_case.expire_older_than);
ICEBERG_UNWRAP_OR_FAIL(auto result, update->Apply());
EXPECT_EQ(result.snapshot_ids_to_remove.size(), test_case.expected_num_expired);
}
}

} // namespace iceberg
Loading
Loading