FusionAGI/fusionagi/api/routes/tts.py

"""TTS synthesis routes for per-head voice output."""

from typing import Any

from fastapi import APIRouter, HTTPException

from fusionagi.api.dependencies import get_session_store
from fusionagi.config.head_voices import get_voice_id_for_head
from fusionagi.schemas.head import HeadId

router = APIRouter()


@router.post("/{session_id}/synthesize")
async def synthesize(
    session_id: str,
    body: dict[str, Any],
) -> dict[str, Any]:
    """
    Synthesize text to audio for a head.
    Body: { "text": "...", "head_id": "logic" }
    Returns: { "audio_base64": "..." } or { "audio_base64": null } if TTS not configured.
    """
    store = get_session_store()
    if not store:
        raise HTTPException(status_code=503, detail="Service not initialized")
    sess = store.get(session_id)
    if not sess:
        raise HTTPException(status_code=404, detail="Session not found")

    text = body.get("text", "")
    head_id_str = body.get("head_id", "")
    if not text:
        raise HTTPException(status_code=400, detail="text is required")

    try:
        head_id = HeadId(head_id_str)
    except ValueError:
        head_id = HeadId.LOGIC

    voice_id = get_voice_id_for_head(head_id)
    audio_base64 = None
    # TODO: Wire TTSAdapter (ElevenLabs, Azure, etc.) and synthesize
    # if tts_adapter:
    #     audio_bytes = await tts_adapter.synthesize(text, voice_id=voice_id)
    #     if audio_bytes:
    #         import base64
    #         audio_base64 = base64.b64encode(audio_bytes).decode()
    return {"audio_base64": audio_base64, "voice_id": voice_id}