Skip to content

Commit 6368564

Browse files
committed
Implement time to first token
1 parent 2f38b51 commit 6368564

File tree

2 files changed

+102
-72
lines changed

2 files changed

+102
-72
lines changed

newrelic/hooks/mlmodel_gemini.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,9 @@ def wrap_generate_content_stream_sync(wrapped, instance, args, kwargs):
284284
# Wrap returned generator in a generator proxy
285285
proxied_return_val = LLMStreamProxy(
286286
return_val,
287-
on_stream_chunk=_handle_stream_chunk(streaming_events=streaming_events),
287+
on_stream_chunk=_handle_stream_chunk(
288+
streaming_events=streaming_events, request_timestamp=request_timestamp
289+
),
288290
on_stop_iteration=_handle_streaming_generation_success(
289291
linking_metadata=linking_metadata,
290292
completion_id=completion_id,
@@ -377,7 +379,9 @@ async def wrap_generate_content_stream_async(wrapped, instance, args, kwargs):
377379
# Wrap returned async generator in an async generator proxy
378380
proxied_return_val = AsyncLLMStreamProxy(
379381
return_val,
380-
on_stream_chunk=_handle_stream_chunk(streaming_events=streaming_events),
382+
on_stream_chunk=_handle_stream_chunk(
383+
streaming_events=streaming_events, request_timestamp=request_timestamp
384+
),
381385
on_stop_iteration=_handle_streaming_generation_success(
382386
linking_metadata=linking_metadata,
383387
completion_id=completion_id,
@@ -531,7 +535,15 @@ def _handle_generation_success(
531535

532536

533537
def _record_generation_success(
534-
transaction, linking_metadata, completion_id, kwargs, ft, response, output_message_list=None, request_timestamp=None
538+
transaction,
539+
linking_metadata,
540+
completion_id,
541+
kwargs,
542+
ft,
543+
response,
544+
output_message_list=None,
545+
request_timestamp=None,
546+
time_to_first_token=None,
535547
):
536548
span_id = linking_metadata.get("span.id")
537549
trace_id = linking_metadata.get("trace.id")
@@ -592,6 +604,7 @@ def _record_generation_success(
592604
# separate request (every input and output from the LLM)
593605
"response.number_of_messages": 1 + len(output_message_list),
594606
"timestamp": request_timestamp,
607+
"time_to_first_token": time_to_first_token,
595608
}
596609

597610
llm_metadata = _get_llm_attributes(transaction)
@@ -646,6 +659,7 @@ def _on_stop_iteration(self, transaction):
646659
response=response,
647660
output_message_list=output_message_list,
648661
request_timestamp=request_timestamp,
662+
time_to_first_token=getattr(self, "_nr_time_to_first_token", None),
649663
)
650664
except Exception:
651665
_logger.warning(STREAM_PARSING_FAILURE_LOG_MESSAGE, exc_info=True)
@@ -656,9 +670,11 @@ def _on_stop_iteration(self, transaction):
656670
return _on_stop_iteration
657671

658672

659-
def _handle_stream_chunk(streaming_events):
673+
def _handle_stream_chunk(streaming_events, request_timestamp=None):
660674
def _on_stream_chunk(self, chunk):
661675
streaming_events.append(chunk)
676+
if not hasattr(self, "_nr_time_to_first_token") and request_timestamp:
677+
self._nr_time_to_first_token = int(1000.0 * time.time()) - request_timestamp
662678

663679
return _on_stream_chunk
664680

tests/mlmodel_gemini/test_text_generation.py

Lines changed: 82 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import google.genai
16+
import pytest
1617
from conftest import GEMINI_VERSION_METRIC
1718
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
1819
from testing_support.ml_testing_utils import (
@@ -33,70 +34,79 @@
3334
from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes
3435
from newrelic.api.transaction import add_custom_attribute
3536

36-
text_generation_recorded_events = [
37-
(
38-
{"type": "LlmChatCompletionSummary"},
39-
{
40-
"id": None, # UUID that varies with each run
41-
"timestamp": None,
42-
"llm.conversation_id": "my-awesome-id",
43-
"llm.foo": "bar",
44-
"span_id": None,
45-
"trace_id": "trace-id",
46-
"duration": None, # Response time varies each test run
47-
"request.model": "gemini-2.0-flash",
48-
"response.model": "gemini-2.0-flash",
49-
"request.temperature": 0.7,
50-
"request.max_tokens": 100,
51-
"response.choices.finish_reason": "STOP",
52-
"vendor": "gemini",
53-
"ingest_source": "Python",
54-
"response.number_of_messages": 2,
55-
},
56-
),
57-
(
58-
{"type": "LlmChatCompletionMessage"},
59-
{
60-
"id": None,
61-
"timestamp": None,
62-
"llm.conversation_id": "my-awesome-id",
63-
"llm.foo": "bar",
64-
"span_id": None,
65-
"trace_id": "trace-id",
66-
"content": "How many letters are in the word Python?",
67-
"role": "user",
68-
"completion_id": None,
69-
"sequence": 0,
70-
"response.model": "gemini-2.0-flash",
71-
"vendor": "gemini",
72-
"ingest_source": "Python",
73-
},
74-
),
75-
(
76-
{"type": "LlmChatCompletionMessage"},
77-
{
78-
"id": None,
79-
"llm.conversation_id": "my-awesome-id",
80-
"llm.foo": "bar",
81-
"span_id": None,
82-
"trace_id": "trace-id",
83-
"content": 'There are 6 letters in the word "Python".\n',
84-
"role": "model",
85-
"completion_id": None,
86-
"sequence": 1,
87-
"response.model": "gemini-2.0-flash",
88-
"vendor": "gemini",
89-
"is_response": True,
90-
"ingest_source": "Python",
91-
},
92-
),
93-
]
37+
38+
@pytest.fixture
39+
def text_generation_events(is_streaming):
40+
events = [
41+
(
42+
{"type": "LlmChatCompletionSummary"},
43+
{
44+
"id": None, # UUID that varies with each run
45+
"timestamp": None,
46+
"llm.conversation_id": "my-awesome-id",
47+
"llm.foo": "bar",
48+
"span_id": None,
49+
"trace_id": "trace-id",
50+
"duration": None, # Response time varies each test run
51+
"request.model": "gemini-2.0-flash",
52+
"response.model": "gemini-2.0-flash",
53+
"request.temperature": 0.7,
54+
"request.max_tokens": 100,
55+
"response.choices.finish_reason": "STOP",
56+
"vendor": "gemini",
57+
"ingest_source": "Python",
58+
"response.number_of_messages": 2,
59+
},
60+
),
61+
(
62+
{"type": "LlmChatCompletionMessage"},
63+
{
64+
"id": None,
65+
"timestamp": None,
66+
"llm.conversation_id": "my-awesome-id",
67+
"llm.foo": "bar",
68+
"span_id": None,
69+
"trace_id": "trace-id",
70+
"content": "How many letters are in the word Python?",
71+
"role": "user",
72+
"completion_id": None,
73+
"sequence": 0,
74+
"response.model": "gemini-2.0-flash",
75+
"vendor": "gemini",
76+
"ingest_source": "Python",
77+
},
78+
),
79+
(
80+
{"type": "LlmChatCompletionMessage"},
81+
{
82+
"id": None,
83+
"llm.conversation_id": "my-awesome-id",
84+
"llm.foo": "bar",
85+
"span_id": None,
86+
"trace_id": "trace-id",
87+
"content": 'There are 6 letters in the word "Python".\n',
88+
"role": "model",
89+
"completion_id": None,
90+
"sequence": 1,
91+
"response.model": "gemini-2.0-flash",
92+
"vendor": "gemini",
93+
"is_response": True,
94+
"ingest_source": "Python",
95+
},
96+
),
97+
]
98+
99+
if is_streaming:
100+
# Only valid for streaming, and varies each test run
101+
events[0][1]["time_to_first_token"] = None
102+
103+
return events
94104

95105

96106
@reset_core_stats_engine()
97-
def test_gemini_text_generation(exercise_text_model, text_generation_metrics, set_trace_info):
107+
def test_gemini_text_generation(exercise_text_model, text_generation_metrics, set_trace_info, text_generation_events):
98108
# Expect one summary event, one message event for the input, and message event for the output
99-
@validate_custom_events(events_with_context_attrs(text_generation_recorded_events))
109+
@validate_custom_events(events_with_context_attrs(text_generation_events))
100110
@validate_custom_event_count(count=3)
101111
@validate_transaction_metrics(
102112
name="test_gemini_text_generation",
@@ -156,8 +166,9 @@ def _test():
156166

157167

158168
@reset_core_stats_engine()
159-
def test_gemini_text_generation_with_llm_metadata(exercise_text_model, text_generation_metrics, set_trace_info):
160-
@validate_custom_events(events_with_context_attrs(text_generation_recorded_events))
169+
def test_gemini_text_generation_with_llm_metadata(
170+
exercise_text_model, text_generation_metrics, set_trace_info, text_generation_events
171+
):
161172
@validate_custom_event_count(count=3)
162173
@validate_transaction_metrics(
163174
name="test_gemini_text_generation_with_llm_metadata",
@@ -185,8 +196,9 @@ def _test():
185196

186197
@reset_core_stats_engine()
187198
@disabled_ai_monitoring_record_content_settings
188-
def test_gemini_text_generation_no_content(exercise_text_model, text_generation_metrics, set_trace_info):
189-
@validate_custom_events(events_sans_content(text_generation_recorded_events))
199+
def test_gemini_text_generation_no_content(
200+
exercise_text_model, text_generation_metrics, set_trace_info, text_generation_events
201+
):
190202
@validate_custom_event_count(count=3)
191203
@validate_transaction_metrics(
192204
name="test_gemini_text_generation_no_content",
@@ -212,8 +224,9 @@ def _test():
212224

213225
@reset_core_stats_engine()
214226
@override_llm_token_callback_settings(llm_token_count_callback)
215-
def test_gemini_text_generation_with_token_count(exercise_text_model, text_generation_metrics, set_trace_info):
216-
@validate_custom_events(add_token_count_to_events(text_generation_recorded_events))
227+
def test_gemini_text_generation_with_token_count(
228+
exercise_text_model, text_generation_metrics, set_trace_info, text_generation_events
229+
):
217230
@validate_custom_event_count(count=3)
218231
@validate_transaction_metrics(
219232
name="test_gemini_text_generation_with_token_count",
@@ -238,8 +251,9 @@ def _test():
238251

239252

240253
@reset_core_stats_engine()
241-
def test_gemini_text_generation_no_llm_metadata(exercise_text_model, text_generation_metrics, set_trace_info):
242-
@validate_custom_events(events_sans_llm_metadata(text_generation_recorded_events))
254+
def test_gemini_text_generation_no_llm_metadata(
255+
exercise_text_model, text_generation_metrics, set_trace_info, text_generation_events
256+
):
243257
# One summary event, one system message, one user message, and one response message from the assistant
244258
@validate_custom_event_count(count=3)
245259
@validate_transaction_metrics(

0 commit comments

Comments
 (0)