Skip to content

Commit 2ac9be5

Browse files
authored
Merge pull request #1113 from suketa/example_of_issue920
create sample script for ISSUE930
2 parents e829b4f + f865551 commit 2ac9be5

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

sample/issue930.rb

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# frozen_string_literal: true
2+
3+
require 'duckdb'
4+
require 'csv'
5+
6+
require 'stringio'
7+
8+
module DuckDB
9+
class Connection
10+
def register_as_table(name, io, csv_options: {}) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
11+
csv = CSV.new(io, **csv_options)
12+
headers = csv.first.headers
13+
csv.rewind
14+
columns = headers.each_with_object({}) { |header, hash| hash[header] = LogicalType::VARCHAR }
15+
tf = DuckDB::TableFunction.create(
16+
name:,
17+
columns:
18+
) do |_func_info, output|
19+
line = csv.readline
20+
if line
21+
line.each_with_index do |cell, index|
22+
output.set_value(index, 0, cell[1])
23+
end
24+
1
25+
else
26+
csv.rewind
27+
0
28+
end
29+
end
30+
register_table_function(tf)
31+
end
32+
end
33+
end
34+
35+
db = DuckDB::Database.open
36+
37+
csv_data = "id,name,age\n1,Alice,30\n2,Bob,25\n3,Charlie,35"
38+
csv_data += 30_000.times.map { |i| "\n#{i + 4},Name#{i + 4},#{rand(0..100)}" }.join
39+
csv_io = StringIO.new(csv_data)
40+
41+
con = db.connect
42+
con.query('SET threads=1')
43+
con.register_as_table('csv_io', csv_io, csv_options: { headers: true })
44+
result = con.query('SELECT * FROM csv_io()').to_a
45+
46+
p result
47+
puts result.first.first == '1'
48+
puts result.first[1] == 'Alice'

sample/issue930_benchmark.rb

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# frozen_string_literal: true
2+
3+
require 'duckdb'
4+
require 'csv'
5+
6+
require 'stringio'
7+
8+
module DuckDB
9+
class Connection
10+
def register_as_table_with_table_function(name, io, csv_options: {}) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
11+
csv = CSV.new(io, **csv_options)
12+
headers = csv.first.headers
13+
csv.rewind
14+
columns = headers.each_with_object({}) { |header, hash| hash[header] = LogicalType::VARCHAR }
15+
tf = DuckDB::TableFunction.create(
16+
name:,
17+
columns:
18+
) do |_func_info, output|
19+
line = csv.readline
20+
if line
21+
line.each_with_index do |cell, index|
22+
output.set_value(index, 0, cell[1])
23+
end
24+
1
25+
else
26+
csv.rewind
27+
0
28+
end
29+
end
30+
register_table_function(tf)
31+
end
32+
33+
def register_as_table_with_create_table(name, io, csv_options: {})
34+
csv = CSV.new(io, **csv_options)
35+
headers = csv.first.headers
36+
csv.rewind
37+
execute("CREATE OR REPLACE TABLE #{name} (#{headers.map { |h| "#{h} VARCHAR" }.join(', ')})")
38+
csv.each do |row|
39+
values = row.map { |cell| "'#{cell[1]}'" }.join(', ')
40+
execute("INSERT INTO #{name} VALUES (#{values})")
41+
end
42+
end
43+
end
44+
end
45+
46+
csv_data = 'id,name,age'
47+
csv_data += 100_000.times.map { |i| "\n#{i + 1},Name#{i + 1},#{rand(0..100)}" }.join
48+
csv_io = StringIO.new(csv_data)
49+
50+
db = DuckDB::Database.open
51+
con = db.connect
52+
con.query('SET threads=1')
53+
54+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
55+
con.register_as_table_with_table_function('csv_tf', csv_io, csv_options: { headers: true })
56+
con.query('SELECT * FROM csv_tf()').to_a
57+
end_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
58+
59+
start_time2 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
60+
con.register_as_table_with_create_table('csv_ct', csv_io, csv_options: { headers: true })
61+
con.query('SELECT * FROM csv_ct').to_a
62+
end_time2 = Process.clock_gettime(Process::CLOCK_MONOTONIC)
63+
64+
con.close
65+
db.close
66+
67+
puts "Time taken for table function approach: #{end_time - start_time} seconds"
68+
puts "Time taken for CREATE TABLE approach: #{end_time2 - start_time2} seconds"

0 commit comments

Comments
 (0)