services: whisper-service: build: ./whisper-service ports: - "8001:8001" volumes: - shared-data:/data - whisper-cache:/root/.cache/huggingface environment: - WHISPER_MODEL=${WHISPER_MODEL:-large-v3} - WHISPER_DEVICE=cuda - WHISPER_COMPUTE_TYPE=float16 - WHISPER_BATCH_SIZE=${WHISPER_BATCH_SIZE:-16} - WHISPER_UNLOAD_AFTER=10 deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8001/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s restart: unless-stopped pyannote-service: build: ./pyannote-service ports: - "8002:8002" volumes: - shared-data:/data - pyannote-cache:/root/.cache/huggingface environment: - HF_TOKEN=${HF_TOKEN} - PYANNOTE_DEVICE=${PYANNOTE_DEVICE:-cuda} - PYANNOTE_UNLOAD_AFTER=10 deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8002/health"] interval: 30s timeout: 10s retries: 3 start_period: 120s restart: unless-stopped gateway: build: ./gateway ports: - "8000:8000" environment: - WHISPER_URL=http://whisper-service:8001 - PYANNOTE_URL=http://pyannote-service:8002 depends_on: whisper-service: condition: service_healthy pyannote-service: condition: service_healthy restart: unless-stopped volumes: shared-data: whisper-cache: pyannote-cache: