50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
"""TTS synthesis routes for per-head voice output."""
|
|
|
|
from typing import Any
|
|
|
|
from fastapi import APIRouter, HTTPException
|
|
|
|
from fusionagi.api.dependencies import get_session_store
|
|
from fusionagi.config.head_voices import get_voice_id_for_head
|
|
from fusionagi.schemas.head import HeadId
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/{session_id}/synthesize")
|
|
async def synthesize(
|
|
session_id: str,
|
|
body: dict[str, Any],
|
|
) -> dict[str, Any]:
|
|
"""
|
|
Synthesize text to audio for a head.
|
|
Body: { "text": "...", "head_id": "logic" }
|
|
Returns: { "audio_base64": "..." } or { "audio_base64": null } if TTS not configured.
|
|
"""
|
|
store = get_session_store()
|
|
if not store:
|
|
raise HTTPException(status_code=503, detail="Service not initialized")
|
|
sess = store.get(session_id)
|
|
if not sess:
|
|
raise HTTPException(status_code=404, detail="Session not found")
|
|
|
|
text = body.get("text", "")
|
|
head_id_str = body.get("head_id", "")
|
|
if not text:
|
|
raise HTTPException(status_code=400, detail="text is required")
|
|
|
|
try:
|
|
head_id = HeadId(head_id_str)
|
|
except ValueError:
|
|
head_id = HeadId.LOGIC
|
|
|
|
voice_id = get_voice_id_for_head(head_id)
|
|
audio_base64 = None
|
|
# TODO: Wire TTSAdapter (ElevenLabs, Azure, etc.) and synthesize
|
|
# if tts_adapter:
|
|
# audio_bytes = await tts_adapter.synthesize(text, voice_id=voice_id)
|
|
# if audio_bytes:
|
|
# import base64
|
|
# audio_base64 = base64.b64encode(audio_bytes).decode()
|
|
return {"audio_base64": audio_base64, "voice_id": voice_id}
|