from textwrap import dedent
from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.tools.cartesia import CartesiaTools
from agno.utils.audio import write_audio_to_file
agent_instructions = dedent(
"""Follow these steps SEQUENTIALLY to translate text and generate a localized voice note:
1. Identify the text to translate and the target language from the user request.
2. Translate the text accurately to the target language.
3. Analyze the emotion conveyed by the translated text.
4. Call `list_voices` to retrieve available voices.
5. Select a base voice matching the language and emotion.
6. Call `localize_voice` to create a new localized voice.
7. Call `text_to_speech` to generate the final audio.
"""
)
agent = Agent(
name="Emotion-Aware Translator Agent",
description="Translates text, analyzes emotion, selects a suitable voice, creates a localized voice, and generates a voice note (audio file) using Cartesia TTS tools.",
instructions=agent_instructions,
model=OpenAIChat(id="gpt-5-mini"),
tools=[CartesiaTools(enable_localize_voice=True)],
)
agent.print_response(
"Translate 'Hello! How are you? Tell me more about the weather in Paris?' to French and create a voice note."
)
response = agent.run_response
if response.audio:
write_audio_to_file(
response.audio[0].base64_audio,
filename="french_weather.mp3",
)