-
Notifications
You must be signed in to change notification settings - Fork 116
Expand file tree
/
Copy pathagent.py
More file actions
94 lines (78 loc) · 2.48 KB
/
agent.py
File metadata and controls
94 lines (78 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
---
title: Hedra Avatar with Pipeline
category: avatars
tags: [hedra, avatar, static_image, pipeline, inworld_tts, multilingual]
difficulty: intermediate
description: Visual avatar using Hedra with static image, pipeline architecture, and Inworld TTS
demonstrates:
- Hedra avatar session with static image loading
- Pipeline architecture with separate STT/LLM/TTS components
- Inworld TTS voice integration
- Multilingual turn detection model
- Noise cancellation with BVC
---
"""
from dotenv import load_dotenv
from livekit import agents
from livekit.agents import (
AgentSession,
Agent,
RoomInputOptions,
get_job_context,
)
from livekit.plugins import (
openai,
inworld,
deepgram,
noise_cancellation,
silero,
hedra,
)
from livekit.plugins.turn_detector.multilingual import MultilingualModel
from PIL import Image
import os
load_dotenv()
class StaticAvatarAgent(Agent):
def __init__(self) -> None:
super().__init__(
instructions="You are a helpful voice AI assistant with a visual avatar."
)
async def entrypoint(ctx: agents.JobContext):
avatar_dir = os.path.dirname(os.path.abspath(__file__))
avatar_image = None
for ext in ['.png', '.jpg', '.jpeg']:
image_path = os.path.join(avatar_dir, f'avatar{ext}')
if os.path.exists(image_path):
avatar_image = Image.open(image_path)
break
if not avatar_image:
raise FileNotFoundError("No avatar image found. Please place an avatar.png, avatar.jpg, or avatar.jpeg in the avatars directory.")
job_context = get_job_context()
avatar_identity = "static-avatar"
avatar_session = hedra.AvatarSession(
avatar_participant_identity=avatar_identity,
avatar_image=avatar_image,
)
session = AgentSession(
stt=deepgram.STT(model="nova-3", language="multi"),
llm=openai.LLM(),
tts=inworld.TTS(voice="Alex"),
vad=silero.VAD.load(),
turn_detection=MultilingualModel(),
)
await avatar_session.start(
session, room=job_context.room
)
await session.start(
room=ctx.room,
agent=StaticAvatarAgent(),
room_input_options=RoomInputOptions(
noise_cancellation=noise_cancellation.BVC(),
),
)
await session.generate_reply(
instructions="Greet the user and offer your assistance."
)
if __name__ == "__main__":
agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))