Skip to content

Commit 455ebf1

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 60a92c8693
1 parent ffed2ab commit 455ebf1

File tree

25 files changed

+301
-97
lines changed

25 files changed

+301
-97
lines changed

src/duckdb/extension/core_functions/function_list.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ static const StaticFunctionDefinition core_functions[] = {
294294
DUCKDB_SCALAR_FUNCTION_SET(ParseDirnameFun),
295295
DUCKDB_SCALAR_FUNCTION_SET(ParseDirpathFun),
296296
DUCKDB_SCALAR_FUNCTION_SET(ParseFilenameFun),
297+
DUCKDB_SCALAR_FUNCTION(ParseFormattedBytesFun),
297298
DUCKDB_SCALAR_FUNCTION_SET(ParsePathFun),
298299
DUCKDB_SCALAR_FUNCTION(PiFun),
299300
DUCKDB_SCALAR_FUNCTION_ALIAS(PositionFun),

src/duckdb/extension/core_functions/include/core_functions/scalar/string_functions.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ struct FormatreadablesizeFun {
113113
static constexpr const char *Name = "formatReadableSize";
114114
};
115115

116+
struct ParseFormattedBytesFun {
117+
static constexpr const char *Name = "parse_formatted_bytes";
118+
static constexpr const char *Parameters = "string";
119+
static constexpr const char *Description = "Parses a human-readable representation of a size in bytes into an integer.";
120+
static constexpr const char *Example = "parse_formatted_bytes('16 KiB')";
121+
static constexpr const char *Categories = "string,numeric";
122+
123+
static ScalarFunction GetFunction();
124+
};
125+
116126
struct FormatreadabledecimalsizeFun {
117127
static constexpr const char *Name = "formatReadableDecimalSize";
118128
static constexpr const char *Parameters = "integer";
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "duckdb/function/scalar_function.hpp"
2+
3+
#include "core_functions/scalar/string_functions.hpp"
4+
5+
namespace duckdb {
6+
static void ParseFormattedBytesFunction(DataChunk &args, ExpressionState &state, Vector &result) {
7+
auto arg0 = args.data[0];
8+
UnaryExecutor::Execute<string_t, idx_t>(arg0, result, args.size(), [&](string_t str) {
9+
// Invalid input exceptions thrown from ParseFormattedBytes won't be handled but will be thrown as is
10+
return StringUtil::ParseFormattedBytes(str.GetString());
11+
});
12+
}
13+
14+
ScalarFunction ParseFormattedBytesFun::GetFunction() {
15+
return ScalarFunction({LogicalType::VARCHAR}, LogicalType::UBIGINT, ParseFormattedBytesFunction);
16+
}
17+
} // namespace duckdb

src/duckdb/src/common/enum_util.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@
174174
#include "duckdb/planner/bound_result_modifier.hpp"
175175
#include "duckdb/planner/table_filter.hpp"
176176
#include "duckdb/storage/buffer/block_handle.hpp"
177-
#include "duckdb/storage/caching_file_system_wrapper.hpp"
177+
#include "duckdb/storage/caching_mode.hpp"
178178
#include "duckdb/storage/compression/bitpacking.hpp"
179179
#include "duckdb/storage/magic_bytes.hpp"
180180
#include "duckdb/storage/statistics/base_statistics.hpp"
@@ -913,19 +913,20 @@ CacheValidationMode EnumUtil::FromString<CacheValidationMode>(const char *value)
913913
const StringUtil::EnumStringLiteral *GetCachingModeValues() {
914914
static constexpr StringUtil::EnumStringLiteral values[] {
915915
{ static_cast<uint32_t>(CachingMode::ALWAYS_CACHE), "ALWAYS_CACHE" },
916-
{ static_cast<uint32_t>(CachingMode::CACHE_REMOTE_ONLY), "CACHE_REMOTE_ONLY" }
916+
{ static_cast<uint32_t>(CachingMode::CACHE_REMOTE_ONLY), "CACHE_REMOTE_ONLY" },
917+
{ static_cast<uint32_t>(CachingMode::NO_CACHING), "NO_CACHING" }
917918
};
918919
return values;
919920
}
920921

921922
template<>
922923
const char* EnumUtil::ToChars<CachingMode>(CachingMode value) {
923-
return StringUtil::EnumToString(GetCachingModeValues(), 2, "CachingMode", static_cast<uint32_t>(value));
924+
return StringUtil::EnumToString(GetCachingModeValues(), 3, "CachingMode", static_cast<uint32_t>(value));
924925
}
925926

926927
template<>
927928
CachingMode EnumUtil::FromString<CachingMode>(const char *value) {
928-
return static_cast<CachingMode>(StringUtil::StringToEnum(GetCachingModeValues(), 2, "CachingMode", value));
929+
return static_cast<CachingMode>(StringUtil::StringToEnum(GetCachingModeValues(), 3, "CachingMode", value));
929930
}
930931

931932
const StringUtil::EnumStringLiteral *GetCatalogLookupBehaviorValues() {

src/duckdb/src/common/sort/sorted_run_merger.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ SortedRunMergerLocalState::SortedRunMergerLocalState(SortedRunMergerGlobalState
299299
: iterator_state_type(gstate.iterator_state_type), sort_key_type(gstate.sort_key_type),
300300
task(SortedRunMergerTask::FINISHED), run_boundaries(gstate.num_runs),
301301
merged_partition_count(DConstants::INVALID_INDEX), merged_partition_index(DConstants::INVALID_INDEX),
302-
sorted_run_scan_state(gstate.context, gstate.merger.sort), sort_key_pointers(LogicalType::POINTER) {
302+
sort_key_pointers(LogicalType::POINTER), sorted_run_scan_state(gstate.context, gstate.merger.sort) {
303303
for (const auto &run : gstate.merger.sorted_runs) {
304304
auto &key_data = *run->key_data;
305305
switch (iterator_state_type) {

src/duckdb/src/common/string_util.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "jaro_winkler.hpp"
1212
#include "utf8proc_wrapper.hpp"
1313
#include "duckdb/common/types/string_type.hpp"
14+
#include "duckdb/common/operator/cast_operators.hpp"
1415

1516
#include <algorithm>
1617
#include <cctype>
@@ -266,6 +267,89 @@ string StringUtil::BytesToHumanReadableString(idx_t bytes, idx_t multiplier) {
266267
return to_string(array[0]) + (bytes == 1 ? " byte" : " bytes");
267268
}
268269

270+
string StringUtil::TryParseFormattedBytes(const string &arg, idx_t &result) {
271+
// split based on the number/non-number
272+
idx_t idx = 0;
273+
while (StringUtil::CharacterIsSpace(arg[idx])) {
274+
idx++;
275+
}
276+
idx_t num_start = idx;
277+
while ((arg[idx] >= '0' && arg[idx] <= '9') || arg[idx] == '.' || arg[idx] == 'e' || arg[idx] == 'E' ||
278+
arg[idx] == '-') {
279+
idx++;
280+
}
281+
if (idx == num_start) {
282+
return "Memory must have a number (e.g. 1GB)";
283+
}
284+
string number = arg.substr(num_start, idx - num_start);
285+
286+
// try to parse the number
287+
double limit;
288+
bool success = TryCast::Operation<string_t, double>(string_t(number), limit);
289+
if (!success) {
290+
return StringUtil::Format("Invalid memory limit: '%s'", number);
291+
}
292+
293+
// now parse the memory limit unit (e.g. bytes, gb, etc)
294+
while (StringUtil::CharacterIsSpace(arg[idx])) {
295+
idx++;
296+
}
297+
idx_t start = idx;
298+
while (idx < arg.size() && !StringUtil::CharacterIsSpace(arg[idx])) {
299+
idx++;
300+
}
301+
302+
if (limit < 0) {
303+
return "Memory cannot be negative";
304+
}
305+
306+
string unit = StringUtil::Lower(arg.substr(start, idx - start));
307+
idx_t multiplier;
308+
if (unit == "byte" || unit == "bytes" || unit == "b") {
309+
multiplier = 1;
310+
} else if (unit == "kilobyte" || unit == "kilobytes" || unit == "kb" || unit == "k") {
311+
multiplier = 1000LL;
312+
} else if (unit == "megabyte" || unit == "megabytes" || unit == "mb" || unit == "m") {
313+
multiplier = 1000LL * 1000LL;
314+
} else if (unit == "gigabyte" || unit == "gigabytes" || unit == "gb" || unit == "g") {
315+
multiplier = 1000LL * 1000LL * 1000LL;
316+
} else if (unit == "terabyte" || unit == "terabytes" || unit == "tb" || unit == "t") {
317+
multiplier = 1000LL * 1000LL * 1000LL * 1000LL;
318+
} else if (unit == "kib") {
319+
multiplier = 1024LL;
320+
} else if (unit == "mib") {
321+
multiplier = 1024LL * 1024LL;
322+
} else if (unit == "gib") {
323+
multiplier = 1024LL * 1024LL * 1024LL;
324+
} else if (unit == "tib") {
325+
multiplier = 1024LL * 1024LL * 1024LL * 1024LL;
326+
} else {
327+
return StringUtil::Format("Unknown unit for memory: '%s' (expected: KB, MB, GB, TB for 1000^i units or KiB, "
328+
"MiB, GiB, TiB for 1024^i units)",
329+
unit);
330+
}
331+
332+
// Make sure the result is not greater than `idx_t` max value
333+
constexpr double max_value = static_cast<double>(NumericLimits<idx_t>::Maximum());
334+
const double double_multiplier = static_cast<double>(multiplier);
335+
336+
if (limit > (max_value / double_multiplier)) {
337+
return "Memory value out of range: value is too large";
338+
}
339+
340+
result = LossyNumericCast<idx_t>(static_cast<double>(multiplier) * limit);
341+
return string();
342+
}
343+
344+
idx_t StringUtil::ParseFormattedBytes(const string &arg) {
345+
idx_t result;
346+
const string error = TryParseFormattedBytes(arg, result);
347+
if (!error.empty()) {
348+
throw InvalidInputException(error);
349+
}
350+
return result;
351+
}
352+
269353
string StringUtil::Upper(const string &str) {
270354
string copy(str);
271355
transform(copy.begin(), copy.end(), copy.begin(), [](unsigned char c) { return std::toupper(c); });

src/duckdb/src/common/virtual_file_system.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "duckdb/common/pipe_file_system.hpp"
66
#include "duckdb/common/string_util.hpp"
77
#include "duckdb/main/client_context.hpp"
8+
#include "duckdb/storage/caching_file_system_wrapper.hpp"
89

910
namespace duckdb {
1011

@@ -33,14 +34,29 @@ unique_ptr<FileHandle> VirtualFileSystem::OpenFileExtended(const OpenFileInfo &f
3334
compression = FileCompressionType::UNCOMPRESSED;
3435
}
3536
}
36-
// open the base file handle in UNCOMPRESSED mode
3737

38+
// open the base file handle in UNCOMPRESSED mode
3839
flags.SetCompression(FileCompressionType::UNCOMPRESSED);
39-
auto file_handle = FindFileSystem(file.path, opener).OpenFile(file, flags, opener);
40+
41+
auto &internal_filesystem = FindFileSystem(file.path, opener);
42+
43+
// File handle gets created.
44+
unique_ptr<FileHandle> file_handle = nullptr;
45+
46+
// Handle caching logic.
47+
if (flags.GetCachingMode() != CachingMode::NO_CACHING) {
48+
auto caching_filesystem =
49+
make_shared_ptr<CachingFileSystemWrapper>(internal_filesystem, opener, flags.GetCachingMode());
50+
// caching filesystem's lifecycle is extended inside of caching file handle.
51+
file_handle = caching_filesystem->OpenFile(file, flags, opener);
52+
} else {
53+
file_handle = internal_filesystem.OpenFile(file, flags, opener);
54+
}
4055
if (!file_handle) {
4156
return nullptr;
4257
}
4358

59+
// Evaluate and apply compression option then.
4460
const auto context = !flags.MultiClientAccess() ? FileOpener::TryGetClientContext(opener) : QueryContext();
4561
if (file_handle->GetType() == FileType::FILE_TYPE_FIFO) {
4662
file_handle = PipeFileSystem::OpenPipe(context, std::move(file_handle));

src/duckdb/src/function/scalar/string/concat.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,22 @@ unique_ptr<FunctionData> BindConcatFunctionInternal(ClientContext &context, Scal
305305
all_null = false;
306306
}
307307
}
308-
if (list_concat || all_null) {
308+
if (list_concat) {
309309
return BindListConcat(context, bound_function, arguments, is_operator);
310310
}
311+
if (all_null) {
312+
if (is_operator) {
313+
SetArgumentType(bound_function, LogicalTypeId::SQLNULL, is_operator);
314+
return make_uniq<ConcatFunctionData>(bound_function.GetReturnType(), is_operator);
315+
} else if (bound_function.varargs.id() == LogicalTypeId::LIST ||
316+
bound_function.varargs.id() == LogicalTypeId::ARRAY) {
317+
SetArgumentType(bound_function, LogicalTypeId::SQLNULL, is_operator);
318+
return make_uniq<ConcatFunctionData>(bound_function.GetReturnType(), is_operator);
319+
} else {
320+
SetArgumentType(bound_function, LogicalTypeId::VARCHAR, is_operator);
321+
return make_uniq<ConcatFunctionData>(bound_function.GetReturnType(), is_operator);
322+
}
323+
}
311324
auto return_type = all_blob ? LogicalType::BLOB : LogicalType::VARCHAR;
312325

313326
// we can now assume that the input is a string or castable to a string

src/duckdb/src/function/table/direct_file_reader.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "duckdb/common/serializer/memory_stream.hpp"
44
#include "duckdb/function/table/read_file.hpp"
55
#include "duckdb/storage/caching_file_system_wrapper.hpp"
6+
#include "duckdb/storage/caching_mode.hpp"
67

78
namespace duckdb {
89

@@ -55,7 +56,7 @@ AsyncResult DirectFileReader::Scan(ClientContext &context, GlobalTableFunctionSt
5556

5657
auto files = state.file_list;
5758

58-
auto caching_fs = CachingFileSystemWrapper::Get(context);
59+
auto &fs = FileSystem::GetFileSystem(context);
5960
const idx_t out_idx = 0;
6061

6162
// We utilize projection pushdown here to only read the file content if the 'data' column is requested
@@ -67,11 +68,12 @@ AsyncResult DirectFileReader::Scan(ClientContext &context, GlobalTableFunctionSt
6768
if (FileSystem::IsRemoteFile(file.path)) {
6869
flags |= FileFlags::FILE_FLAGS_DIRECT_IO;
6970
}
70-
file_handle = caching_fs.OpenFile(file, flags);
71+
flags.SetCachingMode(CachingMode::CACHE_REMOTE_ONLY);
72+
file_handle = fs.OpenFile(file, flags);
7173
} else {
7274
// At least verify that the file exist
7375
// The globbing behavior in remote filesystems can lead to files being listed that do not actually exist
74-
if (FileSystem::IsRemoteFile(file.path) && !caching_fs.FileExists(file.path)) {
76+
if (FileSystem::IsRemoteFile(file.path) && !fs.FileExists(file.path)) {
7577
output.SetCardinality(0);
7678
done = true;
7779
return SourceResultType::FINISHED;
@@ -146,7 +148,7 @@ AsyncResult DirectFileReader::Scan(ClientContext &context, GlobalTableFunctionSt
146148
// This can sometimes fail (e.g. httpfs file system cant always parse the last modified time
147149
// correctly)
148150
try {
149-
const auto timestamp_seconds = caching_fs.GetLastModifiedTime(*file_handle);
151+
const auto timestamp_seconds = fs.GetLastModifiedTime(*file_handle);
150152
FlatVector::GetData<timestamp_tz_t>(last_modified_vector)[out_idx] =
151153
timestamp_tz_t(timestamp_seconds);
152154
} catch (std::exception &ex) {

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev5206"
2+
#define DUCKDB_PATCH_VERSION "0-dev5253"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 5
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.5.0-dev5206"
11+
#define DUCKDB_VERSION "v1.5.0-dev5253"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "99a37abe11"
14+
#define DUCKDB_SOURCE_ID "60a92c8693"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

0 commit comments

Comments
 (0)