services:

  whisper-service:
    build: ./whisper-service
    ports:
      - "8001:8001"
    volumes:
      - shared-data:/data
      - whisper-cache:/root/.cache/huggingface
    environment:
      - WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
      - WHISPER_DEVICE=cuda
      - WHISPER_COMPUTE_TYPE=float16
      - WHISPER_BATCH_SIZE=${WHISPER_BATCH_SIZE:-16}
      - WHISPER_UNLOAD_AFTER=10
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s
    restart: unless-stopped

  pyannote-service:
    build: ./pyannote-service
    ports:
      - "8002:8002"
    volumes:
      - shared-data:/data
      - pyannote-cache:/root/.cache/huggingface
    environment:
      - HF_TOKEN=${HF_TOKEN}
      - PYANNOTE_DEVICE=${PYANNOTE_DEVICE:-cuda}
      - PYANNOTE_UNLOAD_AFTER=10
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 120s
    restart: unless-stopped

  gateway:
    build: ./gateway
    ports:
      - "8000:8000"
    environment:
      - WHISPER_URL=http://whisper-service:8001
      - PYANNOTE_URL=http://pyannote-service:8002
    depends_on:
      whisper-service:
        condition: service_healthy
      pyannote-service:
        condition: service_healthy
    restart: unless-stopped

volumes:
  shared-data:
  whisper-cache:
  pyannote-cache: