virtual-banker/docs/openapi.yaml

openapi: 3.0.3
info:
  title: Virtual Banker API
  version: 1.0.0
  description: |
    REST API for the Virtual Banker platform, including voice operations (TTS/ASR),
    session management, conversation orchestration, and avatar control.

    Features:
    - Voice session management
    - Text-to-speech (TTS) synthesis with viseme support
    - Speech-to-text (ASR) transcription
    - Conversation orchestration
    - WebRTC signaling for real-time communication
    - Avatar animation and lip sync

  contact:
    name: Virtual Banker Support
    email: support@d-bis.org
  license:
    name: MIT
    url: https://opensource.org/licenses/MIT

servers:
  - url: https://virtual-banker.d-bis.org/v1
    description: Production server
  - url: https://sandbox-virtual-banker.d-bis.org/v1
    description: Sandbox server
  - url: http://localhost:8081/v1
    description: Development server

security:
  - BearerAuth: []

tags:
  - name: Sessions
    description: Session management operations
  - name: Voice
    description: Voice operations (TTS/ASR)
  - name: Conversation
    description: Conversation orchestration
  - name: Avatar
    description: Avatar control and animation
  - name: Providers
    description: Voice provider management
  - name: Health
    description: Health check endpoints

paths:
  /health:
    get:
      tags: [Health]
      summary: Health check
      description: Returns the health status of the Virtual Banker API and service connections
      operationId: getHealth
      security: []
      responses:
        '200':
          description: Service is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  status:
                    type: string
                    example: "healthy"
                  services:
                    type: object
                    properties:
                      asr:
                        type: object
                        properties:
                          provider:
                            type: string
                            example: "mock"
                          status:
                            type: string
                            example: "available"
                      tts:
                        type: object
                        properties:
                          provider:
                            type: string
                            example: "mock"
                          status:
                            type: string
                            example: "available"
                      llm:
                        type: object
                        properties:
                          provider:
                            type: string
                            example: "mock"
                          status:
                            type: string
                            example: "available"
                      database:
                        type: string
                        example: "connected"
                      redis:
                        type: string
                        example: "connected"
                  timestamp:
                    type: string
                    format: date-time

  /sessions:
    post:
      tags: [Sessions]
      summary: Create session
      description: Creates a new virtual banker session
      operationId: createSession
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateSessionRequest'
            example:
              tenant_id: "tenant-123"
              user_id: "user-456"
              auth_assertion: "jwt-token"
              portal_context:
                route: "/account"
                account_id: "acc-789"
      responses:
        '201':
          description: Session created successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SessionResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'

    get:
      tags: [Sessions]
      summary: List sessions
      description: Returns a list of active sessions for the authenticated user
      operationId: listSessions
      responses:
        '200':
          description: List of sessions
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SessionListResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /sessions/{sessionId}:
    get:
      tags: [Sessions]
      summary: Get session
      description: Returns session details
      operationId: getSession
      parameters:
        - $ref: '#/components/parameters/SessionId'
      responses:
        '200':
          description: Session details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SessionResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

    delete:
      tags: [Sessions]
      summary: End session
      description: Ends a virtual banker session
      operationId: endSession
      parameters:
        - $ref: '#/components/parameters/SessionId'
      responses:
        '200':
          description: Session ended
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/BaseResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /sessions/{sessionId}/refresh-token:
    post:
      tags: [Sessions]
      summary: Refresh ephemeral token
      description: Refreshes the ephemeral token for a session
      operationId: refreshToken
      parameters:
        - $ref: '#/components/parameters/SessionId'
      responses:
        '200':
          description: Token refreshed
          content:
            application/json:
              schema:
                type: object
                properties:
                  success:
                    type: boolean
                  data:
                    type: object
                    properties:
                      ephemeral_token:
                        type: string
                      expires_at:
                        type: string
                        format: date-time
                  timestamp:
                    type: string
                    format: date-time
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions:
    post:
      tags: [Voice]
      summary: Create voice session
      description: Creates a voice session for real-time TTS/ASR operations
      operationId: createVoiceSession
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateVoiceSessionRequest'
            example:
              session_id: "sess-abc123"
              provider:
                asr: "deepgram"
                tts: "elevenlabs"
              voice_config:
                voice_id: "21m00Tcm4TlvDq8ikWAM"
                model_id: "eleven_multilingual_v2"
                stability: 0.5
                similarity_boost: 0.75
      responses:
        '201':
          description: Voice session created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VoiceSessionResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/transcribe:
    post:
      tags: [Voice]
      summary: Transcribe audio (ASR)
      description: Transcribes audio to text using speech-to-text service
      operationId: transcribeAudio
      parameters:
        - $ref: '#/components/parameters/SessionId'
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - audio
              properties:
                audio:
                  type: string
                  format: binary
                  description: Audio file (WAV, MP3, OGG, etc.)
                language:
                  type: string
                  description: Language code (optional, auto-detect if not provided)
                  example: "en-US"
                format:
                  type: string
                  enum: [wav, mp3, ogg, webm]
                  default: "wav"
                include_words:
                  type: boolean
                  description: Include word-level timestamps
                  default: false
        content:
          application/json:
            schema:
              type: object
              required:
                - audio_data
              properties:
                audio_data:
                  type: string
                  format: byte
                  description: Base64-encoded audio data
                language:
                  type: string
                  example: "en-US"
                include_words:
                  type: boolean
                  default: false
      responses:
        '200':
          description: Transcription result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/TranscriptionResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/transcribe-stream:
    post:
      tags: [Voice]
      summary: Stream transcription (ASR)
      description: Streams audio for real-time transcription (returns SSE stream)
      operationId: transcribeAudioStream
      parameters:
        - $ref: '#/components/parameters/SessionId'
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              required:
                - audio_stream
              properties:
                audio_stream:
                  type: string
                  format: binary
                  description: Audio stream
                language:
                  type: string
                  example: "en-US"
      responses:
        '200':
          description: Transcription event stream (SSE)
          content:
            text/event-stream:
              schema:
                type: string
              example: |
                event: partial
                data: {"type":"partial","text":"Hello, how can I","confidence":0.95,"timestamp":1704067200}

                event: final
                data: {"type":"final","text":"Hello, how can I help you today?","confidence":0.98,"timestamp":1704067210,"words":[{"word":"Hello","start_time":0.0,"end_time":0.5},{"word":"how","start_time":0.6,"end_time":0.8}]}
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/synthesize:
    post:
      tags: [Voice]
      summary: Synthesize speech (TTS)
      description: Converts text to speech audio
      operationId: synthesizeSpeech
      parameters:
        - $ref: '#/components/parameters/SessionId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SynthesizeRequest'
            example:
              text: "Hello, how can I help you today?"
              voice_config:
                voice_id: "21m00Tcm4TlvDq8ikWAM"
                model_id: "eleven_multilingual_v2"
                stability: 0.5
                similarity_boost: 0.75
              format: "mp3"
              sample_rate: 44100
      responses:
        '200':
          description: Audio synthesis result
          content:
            application/json:
              schema:
                type: object
                properties:
                  success:
                    type: boolean
                  data:
                    type: object
                    properties:
                      audio_data:
                        type: string
                        format: byte
                        description: Base64-encoded audio data
                      format:
                        type: string
                        example: "mp3"
                      sample_rate:
                        type: integer
                        example: 44100
                      duration:
                        type: number
                        format: float
                        example: 2.5
                      visemes:
                        type: array
                        items:
                          $ref: '#/components/schemas/VisemeEvent'
                  timestamp:
                    type: string
                    format: date-time
          content:
            audio/mpeg:
              schema:
                type: string
                format: binary
            audio/wav:
              schema:
                type: string
                format: binary
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/synthesize-stream:
    post:
      tags: [Voice]
      summary: Stream speech synthesis (TTS)
      description: Streams text for real-time speech synthesis (returns audio stream)
      operationId: synthesizeSpeechStream
      parameters:
        - $ref: '#/components/parameters/SessionId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - text
              properties:
                text:
                  type: string
                  example: "Hello, how can I help you today?"
                voice_config:
                  $ref: '#/components/schemas/VoiceConfig'
                format:
                  type: string
                  enum: [mp3, wav, pcm]
                  default: "mp3"
                sample_rate:
                  type: integer
                  enum: [16000, 22050, 44100]
                  default: 44100
      responses:
        '200':
          description: Audio stream
          content:
            audio/mpeg:
              schema:
                type: string
                format: binary
            audio/wav:
              schema:
                type: string
                format: binary
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/visemes:
    post:
      tags: [Voice]
      summary: Get viseme events
      description: Returns viseme (lip shape) events for text, used for avatar lip sync
      operationId: getVisemes
      parameters:
        - $ref: '#/components/parameters/SessionId'
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required:
                - text
              properties:
                text:
                  type: string
                  example: "Hello, how can I help you today?"
      responses:
        '200':
          description: Viseme events
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/VisemeResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/sessions/{sessionId}/status:
    get:
      tags: [Voice]
      summary: Get voice session status
      description: Returns the status of a voice session
      operationId: getVoiceSessionStatus
      parameters:
        - $ref: '#/components/parameters/SessionId'
      responses:
        '200':
          description: Voice session status
          content:
            application/json:
              schema:
                type: object
                properties:
                  success:
                    type: boolean
                  data:
                    type: object
                    properties:
                      session_id:
                        type: string
                      state:
                        type: string
                        enum: [idle, listening, thinking, speaking]
                      asr_status:
                        type: string
                        enum: [active, inactive, error]
                      tts_status:
                        type: string
                        enum: [active, inactive, error]
                      provider:
                        type: object
                        properties:
                          asr:
                            type: string
                          tts:
                            type: string
                  timestamp:
                    type: string
                    format: date-time
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/providers:
    get:
      tags: [Providers]
      summary: List voice providers
      description: Returns a list of available TTS and ASR providers
      operationId: listVoiceProviders
      responses:
        '200':
          description: List of providers
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ProviderListResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /voice/webhooks:
    post:
      tags: [Voice]
      summary: Register webhook
      description: Registers a webhook URL for voice session events
      operationId: registerVoiceWebhook
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/RegisterWebhookRequest'
            example:
              url: "https://api.example.com/webhooks/voice"
              events:
                - "transcription.complete"
                - "synthesis.complete"
                - "session.state_changed"
              secret: "webhook_secret_token"
      responses:
        '201':
          description: Webhook registered
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/WebhookResponse'
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '403':
          $ref: '#/components/responses/Forbidden'
        '500':
          $ref: '#/components/responses/InternalServerError'

  /realtime/{sessionId}:
    get:
      tags: [Conversation]
      summary: WebRTC signaling endpoint
      description: WebSocket endpoint for WebRTC signaling (SDP exchange, ICE candidates)
      operationId: webrtcSignaling
      parameters:
        - $ref: '#/components/parameters/SessionId'
      responses:
        '101':
          description: Switching protocols to WebSocket
          headers:
            Upgrade:
              schema:
                type: string
                example: "websocket"
            Connection:
              schema:
                type: string
                example: "Upgrade"
        '400':
          $ref: '#/components/responses/BadRequest'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

components:
  securitySchemes:
    BearerAuth:
      type: http
      scheme: bearer
      bearerFormat: JWT
      description: JWT token or ephemeral session token

  parameters:
    SessionId:
      name: sessionId
      in: path
      required: true
      description: Session ID
      schema:
        type: string
        example: "sess-abc123"

  schemas:
    CreateSessionRequest:
      type: object
      required:
        - tenant_id
        - user_id
        - auth_assertion
      properties:
        tenant_id:
          type: string
          description: Tenant identifier
          example: "tenant-123"
        user_id:
          type: string
          description: User identifier
          example: "user-456"
        auth_assertion:
          type: string
          description: JWT authentication token
          example: "jwt-token"
        portal_context:
          type: object
          description: Portal context for session
          properties:
            route:
              type: string
              example: "/account"
            account_id:
              type: string
              example: "acc-789"

    CreateVoiceSessionRequest:
      type: object
      required:
        - session_id
      properties:
        session_id:
          type: string
          description: Parent session ID
          example: "sess-abc123"
        provider:
          type: object
          properties:
            asr:
              type: string
              enum: [mock, deepgram, google]
              default: "mock"
              example: "deepgram"
            tts:
              type: string
              enum: [mock, elevenlabs, azure]
              default: "mock"
              example: "elevenlabs"
        voice_config:
          $ref: '#/components/schemas/VoiceConfig'

    VoiceConfig:
      type: object
      properties:
        voice_id:
          type: string
          description: Voice ID (provider-specific)
          example: "21m00Tcm4TlvDq8ikWAM"
        model_id:
          type: string
          description: Model ID (provider-specific)
          example: "eleven_multilingual_v2"
        stability:
          type: number
          format: float
          description: Stability parameter (0.0-1.0)
          minimum: 0.0
          maximum: 1.0
          example: 0.5
        similarity_boost:
          type: number
          format: float
          description: Similarity boost parameter (0.0-1.0)
          minimum: 0.0
          maximum: 1.0
          example: 0.75
        style:
          type: number
          format: float
          description: Style parameter (0.0-1.0, ElevenLabs)
          minimum: 0.0
          maximum: 1.0
        use_speaker_boost:
          type: boolean
          description: Enable speaker boost (ElevenLabs)

    SynthesizeRequest:
      type: object
      required:
        - text
      properties:
        text:
          type: string
          description: Text to synthesize
          example: "Hello, how can I help you today?"
        voice_config:
          $ref: '#/components/schemas/VoiceConfig'
        format:
          type: string
          enum: [mp3, wav, pcm]
          default: "mp3"
          example: "mp3"
        sample_rate:
          type: integer
          enum: [16000, 22050, 44100]
          default: 44100
          example: 44100

    RegisterWebhookRequest:
      type: object
      required:
        - url
        - events
      properties:
        url:
          type: string
          format: uri
          description: Webhook URL
          example: "https://api.example.com/webhooks/voice"
        events:
          type: array
          description: Events to subscribe to
          items:
            type: string
            enum: [transcription.complete, transcription.partial, synthesis.complete, session.state_changed, session.created, session.ended]
          example: ["transcription.complete", "synthesis.complete"]
        secret:
          type: string
          description: Webhook secret for signature verification
          example: "webhook_secret_token"
        active:
          type: boolean
          default: true

    Session:
      type: object
      properties:
        session_id:
          type: string
        tenant_id:
          type: string
        user_id:
          type: string
        ephemeral_token:
          type: string
        config:
          type: object
          properties:
            theme:
              type: object
              properties:
                primaryColor:
                  type: string
            avatar_enabled:
              type: boolean
            greeting:
              type: string
            allowed_tools:
              type: array
              items:
                type: string
            policy:
              type: object
              properties:
                max_session_duration_minutes:
                  type: integer
                rate_limit_per_minute:
                  type: integer
                require_consent:
                  type: boolean
        expires_at:
          type: string
          format: date-time
        created_at:
          type: string
          format: date-time

    SessionResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              $ref: '#/components/schemas/Session'

    SessionListResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              type: object
              properties:
                sessions:
                  type: array
                  items:
                    $ref: '#/components/schemas/Session'

    VoiceSession:
      type: object
      properties:
        session_id:
          type: string
        state:
          type: string
          enum: [idle, listening, thinking, speaking]
        provider:
          type: object
          properties:
            asr:
              type: string
            tts:
              type: string
        voice_config:
          $ref: '#/components/schemas/VoiceConfig'
        created_at:
          type: string
          format: date-time

    VoiceSessionResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              $ref: '#/components/schemas/VoiceSession'

    TranscriptionEvent:
      type: object
      properties:
        type:
          type: string
          enum: [partial, final]
        text:
          type: string
        confidence:
          type: number
          format: float
          minimum: 0.0
          maximum: 1.0
        timestamp:
          type: integer
          format: int64
        words:
          type: array
          items:
            $ref: '#/components/schemas/Word'

    Word:
      type: object
      properties:
        word:
          type: string
        start_time:
          type: number
          format: float
        end_time:
          type: number
          format: float
        confidence:
          type: number
          format: float
          minimum: 0.0
          maximum: 1.0

    TranscriptionResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              $ref: '#/components/schemas/TranscriptionEvent'

    VisemeEvent:
      type: object
      properties:
        viseme:
          type: string
          description: Viseme identifier (e.g., "sil", "aa", "ee", "oh", "ou")
          example: "aa"
        start_time:
          type: number
          format: float
          description: Start time in seconds
          example: 0.1
        end_time:
          type: number
          format: float
          description: End time in seconds
          example: 0.3
        phoneme:
          type: string
          description: Phoneme identifier (optional)
          example: "/a/"

    VisemeResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              type: object
              properties:
                text:
                  type: string
                visemes:
                  type: array
                  items:
                    $ref: '#/components/schemas/VisemeEvent'
                duration:
                  type: number
                  format: float

    Provider:
      type: object
      properties:
        id:
          type: string
        name:
          type: string
        type:
          type: string
          enum: [asr, tts]
        available:
          type: boolean
        configured:
          type: boolean
        features:
          type: array
          items:
            type: string
          example: ["streaming", "word_timestamps", "custom_voice"]

    ProviderListResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              type: object
              properties:
                providers:
                  type: array
                  items:
                    $ref: '#/components/schemas/Provider'

    Webhook:
      type: object
      properties:
        webhook_id:
          type: string
        url:
          type: string
        events:
          type: array
          items:
            type: string
        active:
          type: boolean
        created_at:
          type: string
          format: date-time

    WebhookResponse:
      allOf:
        - $ref: '#/components/schemas/BaseResponse'
        - type: object
          properties:
            data:
              $ref: '#/components/schemas/Webhook'

    BaseResponse:
      type: object
      properties:
        success:
          type: boolean
          example: true
        timestamp:
          type: string
          format: date-time

    ErrorResponse:
      type: object
      properties:
        success:
          type: boolean
          example: false
        error:
          type: object
          properties:
            code:
              type: string
              example: "VALIDATION_ERROR"
            message:
              type: string
              example: "Invalid request parameters"
            details:
              type: object
        timestamp:
          type: string
          format: date-time

  responses:
    BadRequest:
      description: Bad request - validation error
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'

    Unauthorized:
      description: Unauthorized - missing or invalid authentication
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'

    Forbidden:
      description: Forbidden - insufficient permissions
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'

    NotFound:
      description: Resource not found
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'

    InternalServerError:
      description: Internal server error
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'