Skip to content

Commit 8241dd3

Browse files
suketaCopilot
andcommitted
Add BLOB support with test for binary data
Add Vector#assign_string_element_len to properly handle BLOB data containing null bytes. The existing assign_string_element uses StringValueCStr which stops at null bytes, making it unsuitable for binary data. Changes: - Add Vector#assign_string_element_len using StringValuePtr + RSTRING_LEN - Update DataChunk#set_value to use _len variant for BLOB type - Add comprehensive BLOB test with null bytes - Update CHANGELOG.md Test covers: - Binary data with leading null bytes (\x00\x01\x02\x03) - Binary data with embedded null bytes (binary\x00data) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 1ccc7a1 commit 8241dd3

File tree

4 files changed

+69
-2
lines changed

4 files changed

+69
-2
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
66
- add DuckDB::DataChunk#set_value method for high-level value writing with automatic type conversion.
77
- add DuckDB::MemoryHelper write methods for all numeric types (boolean, tinyint, smallint, utinyint, usmallint, uinteger, ubigint, float).
88
- add DuckDB::Vector#set_validity method to mark rows as NULL or valid.
9+
- add DuckDB::Vector#assign_string_element_len method for BLOB data with null bytes.
910
- add DuckDB::Vector#logical_type method to get the column type of a vector.
1011
- add DuckDB::TableFunction class (Phase 1: Core container).
1112
- add DuckDB::TableFunction#initialize for standard Ruby allocation pattern.

ext/duckdb/vector.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ static size_t memsize(const void *p);
88
static VALUE rbduckdb_vector_get_data(VALUE self);
99
static VALUE rbduckdb_vector_get_validity(VALUE self);
1010
static VALUE rbduckdb_vector_assign_string_element(VALUE self, VALUE index, VALUE str);
11+
static VALUE rbduckdb_vector_assign_string_element_len(VALUE self, VALUE index, VALUE str);
1112
static VALUE rbduckdb_vector_logical_type(VALUE self);
1213
static VALUE rbduckdb_vector_set_validity(VALUE self, VALUE index, VALUE valid);
1314

@@ -104,6 +105,32 @@ static VALUE rbduckdb_vector_assign_string_element(VALUE self, VALUE index, VALU
104105
return self;
105106
}
106107

108+
/*
109+
* call-seq:
110+
* vector.assign_string_element_len(index, str) -> self
111+
*
112+
* Assigns a string/blob value at the specified index with explicit length.
113+
* Supports strings containing null bytes (for BLOB columns).
114+
*
115+
* vector.assign_string_element_len(0, "\x00\x01\x02\x03")
116+
*/
117+
static VALUE rbduckdb_vector_assign_string_element_len(VALUE self, VALUE index, VALUE str) {
118+
rubyDuckDBVector *ctx;
119+
idx_t idx;
120+
const char *string_val;
121+
idx_t str_len;
122+
123+
TypedData_Get_Struct(self, rubyDuckDBVector, &vector_data_type, ctx);
124+
125+
idx = NUM2ULL(index);
126+
string_val = StringValuePtr(str);
127+
str_len = RSTRING_LEN(str);
128+
129+
duckdb_vector_assign_string_element_len(ctx->vector, idx, string_val, str_len);
130+
131+
return self;
132+
}
133+
107134
/*
108135
* call-seq:
109136
* vector.logical_type -> DuckDB::LogicalType
@@ -168,6 +195,7 @@ void rbduckdb_init_duckdb_vector(void) {
168195
rb_define_method(cDuckDBVector, "get_data", rbduckdb_vector_get_data, 0);
169196
rb_define_method(cDuckDBVector, "get_validity", rbduckdb_vector_get_validity, 0);
170197
rb_define_method(cDuckDBVector, "assign_string_element", rbduckdb_vector_assign_string_element, 2);
198+
rb_define_method(cDuckDBVector, "assign_string_element_len", rbduckdb_vector_assign_string_element_len, 2);
171199
rb_define_method(cDuckDBVector, "logical_type", rbduckdb_vector_logical_type, 0);
172200
rb_define_method(cDuckDBVector, "set_validity", rbduckdb_vector_set_validity, 2);
173201
}

lib/duckdb/data_chunk.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,10 @@ def set_value(col_idx, row_idx, value)
9090
when :double
9191
data = vector.get_data
9292
MemoryHelper.write_double(data, row_idx, value)
93-
when :varchar, :blob
93+
when :varchar
9494
vector.assign_string_element(row_idx, value.to_s)
95+
when :blob
96+
vector.assign_string_element_len(row_idx, value.to_s)
9597
else
9698
raise ArgumentError, "Unsupported type for DataChunk#set_value: #{type_id}"
9799
end

test/duckdb_test/data_chunk_test.rb

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,43 @@ def test_data_chunk_set_value_null # rubocop:disable Metrics/AbcSize, Metrics/Me
267267
assert_equal 100, rows[2].first
268268
end
269269

270-
# Test 10: DataChunk#set_value with multiple columns
270+
# Test 10: DataChunk#set_value with BLOB
271+
def test_data_chunk_set_value_blob # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
272+
@conn.execute('SET threads=1')
273+
274+
done = false
275+
table_function = DuckDB::TableFunction.new
276+
table_function.name = 'test_set_value_blob'
277+
278+
table_function.bind do |bind_info|
279+
bind_info.add_result_column('data', DuckDB::LogicalType::BLOB)
280+
end
281+
282+
table_function.init { |_init_info| done = false }
283+
284+
table_function.execute do |_func_info, output|
285+
if done
286+
output.size = 0
287+
else
288+
blob1 = "\x00\x01\x02\x03".b
289+
blob2 = "binary\x00data".b
290+
output.set_value(0, 0, blob1)
291+
output.set_value(0, 1, blob2)
292+
output.size = 2
293+
done = true
294+
end
295+
end
296+
297+
@conn.register_table_function(table_function)
298+
result = @conn.query('SELECT * FROM test_set_value_blob()')
299+
rows = result.each.to_a
300+
301+
assert_equal 2, rows.length
302+
assert_equal "\x00\x01\x02\x03".b, rows[0].first
303+
assert_equal "binary\x00data".b, rows[1].first
304+
end
305+
306+
# Test 11: DataChunk#set_value with multiple columns
271307
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Minitest/MultipleAssertions
272308
def test_data_chunk_set_value_multiple_columns
273309
@conn.execute('SET threads=1')

0 commit comments

Comments
 (0)