Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/deploy-examples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
strategy:
fail-fast: false
matrix:
example: [healthcare, survey, frontdesk, drive-thru, inference, avatar]
example: [healthcare, survey, frontdesk, drive-thru, inference, avatar, hotel_receptionist]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
Expand Down
35 changes: 33 additions & 2 deletions examples/drive-thru/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
function_tool,
inference,
)
from livekit.agents.voice import UserStateChangedEvent, presets

load_dotenv()

Expand Down Expand Up @@ -483,9 +484,17 @@ async def drive_thru_agent(ctx: JobContext) -> None:
],
},
),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("cartesia/sonic-3", voice="f786b574-daa5-4673-aa0c-cbe3e8534c02"),
llm=inference.LLM("google/gemma-4-31b-it"),
tts=inference.TTS(
"inworld/inworld-tts-2",
voice="Sarah",
extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
),
expressive=presets.CUSTOMER_SERVICE,
max_tool_steps=10,
# Flip user_state to "away" after 10s of mutual silence so we can
# check whether they're still there (default is 15s).
user_away_timeout=10.0,
)

background_audio = BackgroundAudioPlayer(
Expand Down Expand Up @@ -543,6 +552,28 @@ async def push_cart() -> None:

userdata.order.on_change = push_cart

idle_task: asyncio.Task[None] | None = None

async def _nudge_while_idle() -> None:
# Nudge every 10s until the user speaks again — speaking flips
# user_state out of "away", which cancels this task below.
while True:
logger.info("user idle — checking if they're still there")
await session.generate_reply(
instructions="The user has been idle, see if they're still there"
)
await asyncio.sleep(10)

@session.on("user_state_changed")
def _on_user_state_changed(ev: UserStateChangedEvent) -> None:
nonlocal idle_task
if ev.new_state == "away":
if idle_task is None or idle_task.done():
idle_task = asyncio.create_task(_nudge_while_idle())
elif idle_task is not None:
idle_task.cancel()
idle_task = None

await session.start(agent=DriveThruAgent(userdata=userdata), room=ctx.room)
await background_audio.start(room=ctx.room, agent_session=session)

Expand Down
79 changes: 39 additions & 40 deletions examples/drive-thru/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,49 +6,48 @@
from pydantic import BaseModel

COMMON_INSTRUCTIONS = (
"You are a quick and friendly McDonald’s drive-thru attendant. \n"
"Your job is to guide the customer smoothly through their order, speaking in short, natural voice responses. \n"
"This is a voice interaction-assume the customer just pulled up and is speaking to you through a drive-thru speaker. \n"
"Respond like you're hearing them, not reading text. \n"
"Assume they want food, even if they don’t start with a clear request, and help them get what they’re looking for. \n"
# Outcome — what a great interaction looks like.
"You are Mac, a quick and friendly McDonald’s drive-thru attendant, and a customer has just "
"pulled up to the speaker. A great interaction ends with their complete, correct order in the "
"ordering system — every item they asked for, at the right size, with nothing they didn’t ask "
"for — reached in as few, as natural exchanges as possible. \n"
"\n\n"
"If an item comes in different sizes, always ask for the size unless the customer already gave one. \n"
"If a customer orders a 'large meal', automatically assume both the fries and the drink should be large. \n"
"Do not ask again to confirm the size of the drink or fries. This inference is meant to streamline the interaction. \n"
"If the customer clearly indicates a different size for the fries or drink, respect their preference. \n"
# Voice & personality — keep it short and human.
"Your output is synthesized directly to speech, so produce a natural verbatim transcript, not "
"polished text. Start responses with real reactions (oh, hmm, ah) and fillers (um, uh, like) "
'rather than "Absolutely" or "Certainly", and let mid-sentence fillers (like, you know, I '
"mean) fall where they naturally would. Use informal phrasing: yeah, gonna, kinda, gotcha, "
"lemme. Keep replies short, upbeat, and snappy, and ask about one thing at a time so you never "
"overwhelm the customer. Confirm choices warmly ('Alright, one Big Mac Combo!'), and when "
"something’s missing or unavailable, say so with empathy and offer the closest option ('Ah, "
"we’re out of Sweet Tea right now — can I get you a Coke instead?'). \n"
"\n\n"
"Never infer or assume any detail the customer has not explicitly stated — especially the drink for a combo meal. \n"
"If a required detail is missing, always ask the customer before calling any tool. \n"
# How to work — infer intent, acknowledge before acting, stop when you have enough.
"Assume the customer wants food even if they don’t open with a clear request, and guide them "
"toward it. Treat each transcript as a rough draft of what was said — it may contain "
"speech-to-text errors, so don’t mention the transcript or repeat its mistakes. When you can "
"reasonably infer intent and it’s safe to, just go with it; when the input is genuinely "
"ambiguous or nonsensical, ask the customer to repeat. \n"
"Before a tool call that takes a moment, give a brief spoken acknowledgment first ('lemme get "
"that added') so there’s no dead air. After each step, ask yourself whether you now have "
"everything needed to complete the customer’s request: if you do, act; if a required detail "
"is still missing, ask for just that one detail. \n"
"\n\n"
"Be fast-keep responses short and snappy. \n"
"Sound human-sprinkle in light vocal pauses like 'Mmh…', 'Let me see…', or 'Alright…' at natural moments-but not too often. \n"
"Keep everything upbeat and easy to follow. Never overwhelm the customer, don't ask multiple questions at the same time. \n"
"\n\n"
"When a customer is confused or asks for something that doesn’t exist, let them know politely and suggest something close. \n"
"Always confirm what they picked in a warm, clear way, like: 'Alright, one Big Mac Combo!' \n"
"If something’s unavailable, say so with empathy: 'Ah, we're out of Sweet Tea right now-can I get you a Coke instead?' \n"
"\n\n"
"Whenever a customer asks for, changes, or removes something from their order, you MUST use a tool to make it happen. \n"
"Don’t fake it. Don’t pretend something was added - actually **call** the tool and make it real on the ordering system. \n"
"\n\n"
"Transcripts often contain speech-to-text errors-don’t mention the transcript, don’t repeat its mistakes. \n"
"Instead treat each user input as a rough draft of what was said. \n"
"If you can guess the user’s intent and it’s safe to do so, infer their meaning and respond naturally. \n"
"If the transcript is ambiguous/nonsense and you can’t guess their intent, ask the customer to repeat again. \n"
"Stay on-topic; if input is nonsensical in a drive-thru context, ask for concise clarification. \n"
"\n\n"
"Do not add any item on the user's behalf unless they specifically request it. If the user hasn't asked for an item, NEVER add it. \n"
"\n\n"
"When a customer changes an item or meal, make sure to remove the previous version before adding the new one. \n"
"Otherwise, the order may contain duplicates. \n"
"\n\n"
"Stricly stick to the defined menu, Do not invent or suggest any new sizes or items. \n"
"If the item specified by the user is unclear or not **exactly** on the menu, ask for clarification or say you don't have this specific item \n"
"E.g: a hamburger isn't a cheeseburger\n"
"Do not ask for size unless the item has more than one size option specified. \n"
"If an item does not require a size according to the menu, **NEVER** ask the customer to choose one or mention size at all. \n"
"\n\n"
"If there is any error from the tool, you should inform the customer and ask them to try again."
# Hard constraints — these are invariants, not judgment calls.
"Constraints that always hold:\n"
"- Stick strictly to the defined menu. Never invent items or sizes. If what the customer wants "
"isn’t *exactly* on the menu, say you don’t have it and offer the closest match (a hamburger "
"isn’t a cheeseburger). \n"
"- Any add, change, or removal must go through a tool call — actually call it, never pretend. "
"When a customer swaps an item, remove the old one before adding the new so the order has no "
"duplicates. \n"
"- Only add items the customer explicitly asked for; never add anything on their behalf. \n"
"- Don’t assume unstated details — especially the drink in a combo. If a required detail is "
"missing, ask before calling the tool. \n"
"- Ask about size only for items that actually have more than one size; if an item has a single "
"size, don’t mention size at all. For a 'large meal', make both the fries and drink large "
"without re-confirming, unless the customer specifies different sizes. \n"
"- If a tool returns an error, tell the customer and ask them to try again. \n"
)


Expand Down
89 changes: 74 additions & 15 deletions examples/frontdesk/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
import datetime
import logging
import os
Expand Down Expand Up @@ -38,6 +39,7 @@
task_completion_judge,
tool_use_judge,
)
from livekit.agents.voice import UserStateChangedEvent, presets

load_dotenv()

Expand All @@ -63,24 +65,51 @@ def __init__(self, *, timezone: str) -> None:

super().__init__(
instructions=(
f"You are Front-Desk, a helpful and efficient voice assistant. "
f"Today is {today}. Your main goal is to schedule an appointment for the user. "
"This is a voice conversation — speak naturally, clearly, and concisely. "
"When the user says hello or greets you, don’t just respond with a greeting — use it as an opportunity to move things forward. "
"For example, follow up with a helpful question like: 'Would you like to book a time?' "
"When asked for availability, call list_available_slots and offer a few clear, simple options. "
"Say things like 'Monday at 2 PM' — avoid timezones, timestamps, and avoid saying 'AM' or 'PM'. "
"Use natural phrases like 'in the morning' or 'in the evening', and don’t mention the year unless it’s different from the current one. "
"Offer a few options at a time, pause for a response, then guide the user to confirm. "
"If the time is no longer available, let them know gently and offer the next options. "
"Always keep the conversation flowing — be proactive, human, and focused on helping the user schedule with ease."
# Outcome — what a great interaction looks like.
f"You are Front-Desk, a helpful and efficient voice assistant. Today is {today}. "
"A great interaction ends with the user booked into an appointment slot that works "
"for them, reached through a warm, flowing conversation with as little "
"back-and-forth as possible. "
# Voice & personality — keep it short and human.
"Your output is synthesized directly to speech, so produce a natural verbatim "
"transcript, not polished text. Start responses with real reactions (oh, hmm, ah) "
'and fillers (um, uh, like) rather than "Absolutely" or "Certainly", with '
"mid-sentence fillers (like, you know, I mean) where they’d naturally fall. Mirror "
"the user's formality: if they're casual, use informal phrasing (gotcha, alright, "
"gonna, kinda, lemme, yeah); if they're more formal, keep your speech cleaner. Vary "
"your openers across turns — if you opened the last turn with 'gotcha', pick "
"'alright' or 'okay' this turn; don't repeat the same opener back-to-back. "
# How to work — be proactive, acknowledge before acting, stop when you can move forward.
"Be proactive: when the user greets you, use it to move things forward (e.g. "
"'Would you like to book a time?') rather than just greeting back. Before a tool "
"call that takes a moment, give a brief spoken acknowledgment so there’s no dead "
"air. After each result, check whether you can now move the user toward a booking: "
"if so, do it; if you're missing something, ask for just that. "
# Speaking about times — constraints that keep it natural over voice.
"When talking about availability, call list_available_slots and offer a few clear "
"options at a time, then pause for a response and guide the user to confirm. Say "
"times like 'Monday at 2' — avoid timezones, timestamps, and the words 'AM'/'PM'; "
"use natural phrases like 'in the morning' or 'in the evening', and don’t mention "
"the year unless it differs from the current one. When listing several times in the "
"same window, group them ('in the evening at 4, 5, or 6') instead of repeating the "
"time-of-day qualifier on each slot. If a chosen time is no longer available, let "
"them know gently and offer the next options."
)
)

self._slots_map: dict[str, AvailableSlot] = {}

async def on_enter(self) -> None:
await self.session.say("Hello, I can help you schedule an appointment!")
hour = datetime.datetime.now(self.tz).hour
time_of_day = "morning" if hour < 12 else "afternoon" if hour < 17 else "evening"
await self.session.generate_reply(
instructions=(
f"Say hello and welcome to the caller — it's currently {time_of_day} their time. "
"You're the front desk of an office and you're here to help them schedule a visit. "
"Invite them to book an appointment to visit, and ask what time works. "
"Keep it warm and brief."
)
)

@function_tool
async def schedule_appointment(
Expand All @@ -100,7 +129,7 @@ async def schedule_appointment(
email_result = await beta.workflows.GetEmailTask(chat_ctx=self.chat_ctx)

if ctx.speech_handle.interrupted:
return
return None

ctx.disallow_interruptions()

Expand Down Expand Up @@ -262,11 +291,41 @@ async def frontdesk_agent(ctx: JobContext):
session = AgentSession[Userdata](
userdata=userdata,
stt=inference.STT("deepgram/nova-3"),
llm=inference.LLM("google/gemini-2.5-flash"),
tts=inference.TTS("cartesia/sonic-3", voice="39b376fc-488e-4d0c-8b37-e00b72059fdd"),
llm=inference.LLM("google/gemma-4-31b-it"),
tts=inference.TTS(
"inworld/inworld-tts-2",
voice="Nadia",
extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
),
expressive=presets.CUSTOMER_SERVICE,
max_tool_steps=1,
# Flip user_state to "away" after 10s of mutual silence so we can
# check whether they're still there (default is 15s).
user_away_timeout=10.0,
)

idle_task: asyncio.Task[None] | None = None

async def _nudge_while_idle() -> None:
# Nudge every 10s until the user speaks again — speaking flips
# user_state out of "away", which cancels this task below.
while True:
logger.info("user idle — checking if they're still there")
await session.generate_reply(
instructions="The user has been idle, see if they're still there"
)
await asyncio.sleep(10)

@session.on("user_state_changed")
def _on_user_state_changed(ev: UserStateChangedEvent) -> None:
nonlocal idle_task
if ev.new_state == "away":
if idle_task is None or idle_task.done():
idle_task = asyncio.create_task(_nudge_while_idle())
elif idle_task is not None:
idle_task.cancel()
idle_task = None

await session.start(agent=FrontDeskAgent(timezone=timezone), room=ctx.room)


Expand Down
42 changes: 39 additions & 3 deletions examples/healthcare/agent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import asyncio
import json
import logging
import os
Expand Down Expand Up @@ -32,6 +33,7 @@
WarmTransferTask,
)
from livekit.agents.llm import ToolError, function_tool
from livekit.agents.voice import UserStateChangedEvent, presets

logger = logging.getLogger("HealthcareAgent")

Expand Down Expand Up @@ -563,7 +565,11 @@ def __init__(self, database=None) -> None:

async def on_enter(self) -> None:
await self.session.generate_reply(
instructions="Greet the user and gather the reason for their call."
instructions=(
"Warmly welcome the user to the healthcare clinic and ask how you can help "
'them today, e.g. "Welcome to the healthcare clinic, how can I help you?" '
"Then gather the reason for their call."
)
)

async def task_completed_callback(self, event, task_group):
Expand Down Expand Up @@ -751,11 +757,41 @@ async def entrypoint(ctx: JobContext):
session = AgentSession(
userdata=userdata,
stt=inference.STT("deepgram/nova-3", language="multi"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=inference.TTS("inworld/inworld-tts-1"),
llm=inference.LLM("google/gemma-4-31b-it"),
tts=inference.TTS(
"inworld/inworld-tts-2",
voice="Luna",
extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this required to make expressive work?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

speaking rate was preference from feedback but creative delivery_mode is optimal for expressive mode, not required

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just set it automatically?

),
expressive=presets.HEALTHCARE,
preemptive_generation=True,
# Flip user_state to "away" after 10s of mutual silence so we can
# check whether they're still there (default is 15s).
user_away_timeout=10.0,
)

idle_task: asyncio.Task[None] | None = None

async def _nudge_while_idle() -> None:
# Nudge every 10s until the user speaks again — speaking flips
# user_state out of "away", which cancels this task below.
while True:
logger.info("user idle — checking if they're still there")
await session.generate_reply(
instructions="The user has been idle, see if they're still there"
)
await asyncio.sleep(10)

@session.on("user_state_changed")
def _on_user_state_changed(ev: UserStateChangedEvent) -> None:
nonlocal idle_task
if ev.new_state == "away":
if idle_task is None or idle_task.done():
idle_task = asyncio.create_task(_nudge_while_idle())
elif idle_task is not None:
idle_task.cancel()
idle_task = None

await session.start(
agent=HealthcareAgent(database=db),
room=ctx.room,
Expand Down
Loading