Skip to content

Commit 36c0d34

Browse files
schlubbiCopilotarielvalentin
authored
feat: optimize extract_statement_type regex and encoding in helpers-mysql (#2088)
* perf: optimize extract_statement_type regex and encoding Remove trailing .* from QUERY_NAME_REGEX to avoid scanning the entire SQL string after matching the keyword. Only the first capture group (the SQL operation) is used — the .* forced O(n) cost for no benefit. Skip utf8_encode when the string is already valid UTF-8, which is the common case for strings from ActiveRecord/Trilogy. Fall back to binary: true encoding only for non-UTF-8 or invalid byte sequences. Benchmarked improvements (combined): - Short queries (53c): 1.91µs → 833ns (2.3×) - Long JOINs (768c): 6.97µs → 1.13µs (6.2×) - IN clauses (2,406c): 15.19µs → 886ns (17.1×) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * style: fix rubocop offenses - Use modifier unless for single-line body - Use unary plus (+) instead of .dup for unfreezing strings - Use single-quoted strings where interpolation is not needed - Remove redundant .freeze on string literals (frozen_string_literal) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Ariel Valentin <arielvalentin@users.noreply.github.com>
1 parent 57bf94f commit 36c0d34

File tree

2 files changed

+141
-26
lines changed

2 files changed

+141
-26
lines changed

helpers/mysql/lib/opentelemetry/helpers/mysql.rb

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ module MySQL
4242
# Ignore query names that might appear in comments prepended to the
4343
# statement.
4444
PREPENDED_COMMENTS_REGEX = %r{(?:\/\*.*?\*\/)}m
45-
QUERY_NAME_REGEX = Regexp.new("^\s*(?:#{PREPENDED_COMMENTS_REGEX})?\s*\\b(#{QUERY_NAMES.join('|')})\\b.*", Regexp::IGNORECASE)
45+
QUERY_NAME_REGEX = Regexp.new("^\s*(?:#{PREPENDED_COMMENTS_REGEX})?\s*\\b(#{QUERY_NAMES.join('|')})\\b", Regexp::IGNORECASE)
4646

4747
# This is a span naming utility intended for use in MySQL database
4848
# adapter instrumentation.
@@ -68,9 +68,11 @@ def database_span_name(sql, operation, database_name, config)
6868

6969
# @api private
7070
def extract_statement_type(sql)
71-
sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true)
71+
return unless sql
7272

73-
QUERY_NAME_REGEX.match(sql) { |match| match[1].downcase } unless sql.nil?
73+
sql = OpenTelemetry::Common::Utilities.utf8_encode(sql, binary: true) unless sql.encoding == ::Encoding::UTF_8 && sql.valid_encoding?
74+
75+
QUERY_NAME_REGEX.match(sql) { |match| match[1].downcase }
7476
rescue StandardError => e
7577
OpenTelemetry.handle_error(message: 'Error extracting SQL statement type', exception: e)
7678
nil

helpers/mysql/test/helpers/mysql_test.rb

Lines changed: 136 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -102,55 +102,168 @@
102102
end
103103
end
104104

105-
describe 'when sql contains invalid byte sequences' do
106-
let(:sql) { "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'" }
105+
describe 'recognizes all supported query names' do
106+
{
107+
'SELECT 1' => 'select',
108+
'INSERT INTO users VALUES (1)' => 'insert',
109+
'UPDATE users SET name = "a"' => 'update',
110+
'DELETE FROM users' => 'delete',
111+
'BEGIN' => 'begin',
112+
'COMMIT' => 'commit',
113+
'ROLLBACK' => 'rollback',
114+
'SAVEPOINT sp1' => 'savepoint',
115+
'RELEASE SAVEPOINT sp1' => 'release savepoint',
116+
'EXPLAIN SELECT 1' => 'explain',
117+
'DROP DATABASE test_db' => 'drop database',
118+
'DROP TABLE users' => 'drop table',
119+
'CREATE DATABASE test_db' => 'create database',
120+
'CREATE TABLE users (id INT)' => 'create table',
121+
"SET NAMES 'utf8mb4'" => 'set names'
122+
}.each do |query, expected|
123+
it "extracts '#{expected}' from: #{query[0..40]}" do
124+
assert_equal(expected, OpenTelemetry::Helpers::MySQL.extract_statement_type(query))
125+
end
126+
end
127+
end
107128

108-
it 'extracts the statement' do
109-
assert_equal('select', extract_statement_type)
129+
describe 'case insensitivity' do
130+
it 'handles uppercase' do
131+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type('SELECT 1'))
132+
end
133+
134+
it 'handles lowercase' do
135+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type('select 1'))
136+
end
137+
138+
it 'handles mixed case' do
139+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type('SeLeCt 1'))
140+
end
141+
142+
it 'handles mixed case for multi-word keywords' do
143+
assert_equal('drop table', OpenTelemetry::Helpers::MySQL.extract_statement_type('Drop Table users'))
110144
end
111145
end
112146

113-
describe 'when sql contains unknown query statement' do
114-
let(:sql) { 'DESELECT 1' }
147+
describe 'leading whitespace' do
148+
it 'handles leading spaces' do
149+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(' SELECT 1'))
150+
end
115151

116-
# nil sets the span name to 'mysql'
117-
it 'returns nil' do
118-
assert_nil(extract_statement_type)
152+
it 'handles leading newlines' do
153+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type("\nSELECT 1"))
119154
end
120155
end
121156

122-
describe 'when sql contains multiple query statements' do
123-
let(:sql) { 'EXPLAIN SELECT 1' }
157+
describe 'queries with long trailing content' do
158+
it 'extracts from queries with large IN clauses' do
159+
ids = (1..500).to_a.join(', ')
160+
sql = "SELECT * FROM users WHERE id IN (#{ids})"
161+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
162+
end
124163

125-
it 'extracts the statement type that begins the query' do
126-
assert_equal('explain', extract_statement_type)
164+
it 'extracts from queries with multiple JOINs' do
165+
sql = <<~SQL
166+
SELECT u.*, p.*, c.*, o.*
167+
FROM users u
168+
JOIN profiles p ON p.user_id = u.id
169+
JOIN companies c ON c.id = u.company_id
170+
JOIN orders o ON o.user_id = u.id
171+
WHERE u.active = 1
172+
SQL
173+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
174+
end
175+
176+
it 'extracts from long INSERT with many values' do
177+
values = (1..100).map { |i| "(#{i}, 'user#{i}')" }.join(', ')
178+
sql = "INSERT INTO users (id, name) VALUES #{values}"
179+
assert_equal('insert', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
127180
end
128181
end
129182

130-
describe 'when sql with marginalia-style prepended comments includes a query statement' do
131-
let(:sql) do
132-
"/*action='update',application='TrilogyTest',controller='users'*/ SELECT `users`.* FROM `users` WHERE `users`.`id` = 1 LIMIT 1"
183+
describe 'prepended comments' do
184+
it 'handles marginalia-style comments' do
185+
sql = "/*action='update',application='TrilogyTest',controller='users'*/ SELECT `users`.* FROM `users` WHERE `users`.`id` = 1 LIMIT 1"
186+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
133187
end
134188

135-
it 'does not capture the query statement within the comment' do
136-
assert_equal('select', extract_statement_type)
189+
it 'does not capture a keyword that appears only inside a comment' do
190+
sql = "/*action='delete'*/ SELECT 1"
191+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
192+
end
193+
194+
it 'handles comments with whitespace before and after' do
195+
sql = ' /* comment */ SELECT 1'
196+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
197+
end
198+
199+
it 'handles multi-line comments' do
200+
sql = "/* multi\nline\ncomment */ SELECT 1"
201+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
202+
end
203+
end
204+
205+
describe 'encoding handling' do
206+
it 'handles UTF-8 encoded strings' do
207+
sql = (+'SELECT * FROM users').force_encoding('UTF-8')
208+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
209+
end
210+
211+
it 'handles ASCII encoded strings' do
212+
sql = (+'SELECT * FROM users').force_encoding('ASCII')
213+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
214+
end
215+
216+
it 'handles binary (ASCII-8BIT) encoded strings' do
217+
sql = (+'SELECT * FROM users').force_encoding('ASCII-8BIT')
218+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
219+
end
220+
221+
it 'handles strings with invalid byte sequences' do
222+
sql = "SELECT * from users where users.id = 1 and users.email = 'test@test.com\255'"
223+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
224+
end
225+
226+
it 'handles frozen strings' do
227+
sql = 'SELECT * FROM users'
228+
assert_equal('select', OpenTelemetry::Helpers::MySQL.extract_statement_type(sql))
137229
end
138230
end
139231

140-
describe 'when sql is nil' do
141-
let(:sql) { nil }
232+
describe 'nil and empty inputs' do
233+
it 'returns nil for nil' do
234+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type(nil))
235+
end
142236

143-
it 'returns nil' do
144-
assert_nil(extract_statement_type)
237+
it 'returns nil for empty string' do
238+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type(''))
239+
end
240+
241+
it 'returns nil for whitespace-only string' do
242+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type(' '))
243+
end
244+
end
245+
246+
describe 'unrecognized statements' do
247+
it 'returns nil for unknown keywords' do
248+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type('DESELECT 1'))
249+
end
250+
251+
it 'returns nil for partial keyword matches' do
252+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type('SELECTIONS 1'))
253+
end
254+
255+
it 'does not match keywords embedded mid-string' do
256+
assert_nil(OpenTelemetry::Helpers::MySQL.extract_statement_type('MYSELECT 1'))
145257
end
146258
end
147259

148260
describe 'when an error is raised' do
149261
it 'logs a message' do
262+
sql = (+'SELECT 1').force_encoding('ASCII-8BIT')
150263
result = nil
151264
OpenTelemetry::TestHelpers.with_test_logger do |log_stream|
152265
allow(OpenTelemetry::Common::Utilities).to receive(:utf8_encode) { |_| raise 'boom!' }
153-
extract_statement_type
266+
result = OpenTelemetry::Helpers::MySQL.extract_statement_type(sql)
154267

155268
assert_nil(result)
156269
assert_match(/Error extracting/, log_stream.string)

0 commit comments

Comments
 (0)