74 lines
1.7 KiB
YAML
74 lines
1.7 KiB
YAML
services:
|
|
|
|
whisper-service:
|
|
build: ./whisper-service
|
|
ports:
|
|
- "8001:8001"
|
|
volumes:
|
|
- shared-data:/data
|
|
- whisper-cache:/root/.cache/huggingface
|
|
environment:
|
|
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
|
|
- WHISPER_DEVICE=cuda
|
|
- WHISPER_COMPUTE_TYPE=float16
|
|
- WHISPER_BATCH_SIZE=${WHISPER_BATCH_SIZE:-16}
|
|
- WHISPER_UNLOAD_AFTER=10
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
restart: unless-stopped
|
|
|
|
pyannote-service:
|
|
build: ./pyannote-service
|
|
ports:
|
|
- "8002:8002"
|
|
volumes:
|
|
- shared-data:/data
|
|
- pyannote-cache:/root/.cache/huggingface
|
|
environment:
|
|
- HF_TOKEN=${HF_TOKEN}
|
|
- PYANNOTE_DEVICE=${PYANNOTE_DEVICE:-cuda}
|
|
- PYANNOTE_UNLOAD_AFTER=10
|
|
deploy:
|
|
resources:
|
|
reservations:
|
|
devices:
|
|
- driver: nvidia
|
|
count: 1
|
|
capabilities: [gpu]
|
|
healthcheck:
|
|
test: ["CMD", "curl", "-f", "http://localhost:8002/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 120s
|
|
restart: unless-stopped
|
|
|
|
gateway:
|
|
build: ./gateway
|
|
ports:
|
|
- "8000:8000"
|
|
environment:
|
|
- WHISPER_URL=http://whisper-service:8001
|
|
- PYANNOTE_URL=http://pyannote-service:8002
|
|
depends_on:
|
|
whisper-service:
|
|
condition: service_healthy
|
|
pyannote-service:
|
|
condition: service_healthy
|
|
restart: unless-stopped
|
|
|
|
volumes:
|
|
shared-data:
|
|
whisper-cache:
|
|
pyannote-cache: |