openapi: 3.1.0
info:
  title: Xantly Gateway API
  version: 1.0.0
  description: >
    Universal API Gateway for intelligent LLM routing, multi-agent
    orchestration, and cost-optimized inference. Production base URL
    is https://api.xantly.ai/v1. All requests require a Bearer token.

servers:
  - url: https://api.xantly.ai/v1
    description: Production
  - url: http://localhost:8085/v1
    description: Local development

components:
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer

  schemas:
    ChatCompletionRequest:
      type: object
      required: [model, messages]
      properties:
        model:
          type: string
          example: auto
        messages:
          type: array
          minItems: 1
          items:
            $ref: '#/components/schemas/ChatMessage'
        stream:
          type: boolean
          default: false
        n:
          type: integer
          minimum: 1
          maximum: 8
          default: 1
        max_tokens:
          type: integer
        temperature:
          type: number
          minimum: 0.0
          maximum: 2.0
        top_p:
          type: number
        frequency_penalty:
          type: number
          minimum: -2.0
          maximum: 2.0
          default: 0
        presence_penalty:
          type: number
          minimum: -2.0
          maximum: 2.0
          default: 0
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
        tools:
          type: array
          items:
            type: object
        tool_choice:
          oneOf:
            - type: string
            - type: object
        response_format:
          type: object
        seed:
          type: integer
        user:
          type: string
        metadata:
          type: object
          additionalProperties:
            type: string
        logprobs:
          type: boolean
          default: false
        top_logprobs:
          type: integer
          maximum: 20
        stream_options:
          type: object
          properties:
            include_usage:
              type: boolean
        reasoning_effort:
          type: string
          enum: [low, medium, high]
        service_tier:
          type: string
        routing_hints:
          $ref: '#/components/schemas/RoutingHints'
        routing_override:
          $ref: '#/components/schemas/RoutingOverride'
        xantly:
          $ref: '#/components/schemas/XantlyOrchestration'

    ChatMessage:
      type: object
      required: [role, content]
      properties:
        role:
          type: string
          enum: [system, user, assistant, tool]
        content:
          oneOf:
            - type: string
            - type: array
        name:
          type: string
        tool_calls:
          type: array
          items:
            type: object
        tool_call_id:
          type: string

    RoutingHints:
      type: object
      description: Soft routing preferences
      properties:
        mode:
          type: string
          enum: [fast, balanced, quality, cost_optimized, free_models_only]
        preference_dial:
          type: number
          minimum: 0.0
          maximum: 1.0
        prefer_latency:
          type: boolean
        prefer_quality:
          type: boolean
        max_cost_per_token:
          type: number
        max_latency_ms:
          type: integer
        max_tier:
          type: integer
          enum: [1, 2, 3]
        required_capabilities:
          type: array
          items:
            type: string
        task_complexity:
          type: string
          enum: [trivial, standard, complex, expert]
        chain_routing:
          type: string
          enum: [sticky, mixed]
        allow_free_fallback:
          type: boolean

    RoutingOverride:
      type: object
      description: Hard routing overrides
      properties:
        force_tier:
          type: string
          enum: [T1, T2, T3]
        force_lane:
          type: string
          enum: [smart, turbo]
        force_provider:
          type: string
        force_model:
          type: string

    XantlyOrchestration:
      type: object
      description: Orchestration controls
      properties:
        workflow_type:
          type: string
          enum: [chat, execution, planning, research]
        chain_id:
          type: string
        conversation_id:
          type: string
        planning_mode:
          type: string
        max_chain_steps:
          type: integer
          default: 10
          maximum: 50
        chain_timeout_secs:
          type: integer
          default: 120
          maximum: 600
        chain_routing:
          type: string
          enum: [sticky, mixed]
        reliability_level:
          type: string
          enum: [standard, high, critical]
          default: standard
        enable_memory:
          type: boolean
          default: false
        enable_speculation:
          type: boolean
        enable_hedging:
          type: boolean
        enable_cache:
          type: boolean
          default: true
        cache_ttl_secs:
          type: integer
          maximum: 86400
        output_verification:
          type: string
          enum: [none, native, schema, cross_model]
          default: none
        compress_context:
          type: boolean
          default: false
        redact_pii:
          type: boolean
          default: false
        voice_mode:
          type: string
        enable_tool_reranking:
          type: boolean
          default: false

    ChatCompletionResponse:
      type: object
      properties:
        id:
          type: string
        object:
          type: string
          enum: [chat.completion]
        created:
          type: integer
        model:
          type: string
        choices:
          type: array
          items:
            $ref: '#/components/schemas/ChatChoice'
        usage:
          $ref: '#/components/schemas/Usage'
        xantly_metadata:
          $ref: '#/components/schemas/XantlyMetadata'

    ChatChoice:
      type: object
      properties:
        index:
          type: integer
        message:
          $ref: '#/components/schemas/ChatMessage'
        finish_reason:
          type: string
          enum: [stop, length, tool_calls, content_filter]

    Usage:
      type: object
      properties:
        prompt_tokens:
          type: integer
        completion_tokens:
          type: integer
        total_tokens:
          type: integer

    XantlyMetadata:
      type: object
      description: Routing and execution transparency
      properties:
        request_id:
          type: string
        routing_decision:
          type: string
        provider:
          type: string
        tier:
          type: string
        latency_ms:
          type: integer
        estimated_cost_usd:
          type: number
        healer_report:
          type: object
          nullable: true

    ErrorResponse:
      type: object
      properties:
        error:
          type: object
          properties:
            message:
              type: string
            type:
              type: string
            code:
              type: string
            param:
              type: string
              nullable: true

    Budget:
      type: object
      properties:
        id:
          type: string
        entityId:
          type: string
        amount:
          type: number
        used:
          type: number
        period:
          type: string
        currency:
          type: string

security:
  - bearerAuth: []

paths:
  /chat/completions:
    post:
      operationId: createChatCompletion
      summary: Create chat completion
      description: Routes to optimal model based on task complexity, cost, latency, and quality.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ChatCompletionRequest'
      responses:
        '200':
          description: Successful completion.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ChatCompletionResponse'
        '400':
          description: Validation error.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Authentication error.
        '429':
          description: Rate limit exceeded.
        '500':
          description: Internal server error.

  /quotas/budgets:
    get:
      operationId: listBudgets
      summary: List budgets
      responses:
        '200':
          description: Array of budgets.
          content:
            application/json:
              schema:
                type: array
                items:
                  $ref: '#/components/schemas/Budget'
    post:
      operationId: setBudgetLimit
      summary: Set budget limit
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                entityId:
                  type: string
                limit:
                  type: number
                period:
                  type: string
      responses:
        '200':
          description: Updated budget confirmation.
