This commit is contained in:
Михаил Краевский 2025-09-10 21:41:42 +03:00
parent f7ed0cb14e
commit 1ca7efe4d1
14 changed files with 3020 additions and 2164 deletions

View File

@ -24,10 +24,13 @@ OPENAI_MODEL=gpt-4o-mini
OPENAI_EMBEDDINGS_MODEL=text-embedding-3-small
# LiveKit Configuration (for interview feature)
LIVEKIT_URL=ws://localhost:7880
LIVEKIT_URL=wss://hackaton-eizc9zqk.livekit.cloud
LIVEKIT_API_KEY=devkey
LIVEKIT_API_SECRET=devkey_secret_32chars_minimum_length
# App Configuration
APP_ENV=development
DEBUG=true
# Domain for Caddy (use your domain for automatic HTTPS)
DOMAIN=hr.aiquity.xyz

2
.env.local Normal file
View File

@ -0,0 +1,2 @@
NEXT_PUBLIC_API_BASE_URL=http://localhost:8000/api
NEXT_PUBLIC_LIVEKIT_URL=ws://localhost:7880

106
Caddyfile Normal file
View File

@ -0,0 +1,106 @@
# Caddyfile for HR AI Backend with automatic HTTPS
# Environment variable DOMAIN will be used, defaults to localhost
{$DOMAIN:localhost} {
# Backend API routes
handle /api/* {
reverse_proxy backend:8000
}
# Health check endpoint
handle /health {
reverse_proxy backend:8000
}
# LiveKit WebSocket and HTTP endpoints
handle /livekit/* {
reverse_proxy livekit:7880
}
# LiveKit WebSocket upgrade
handle /rtc {
reverse_proxy livekit:7880
}
# Frontend (SPA) - serve everything else
handle {
reverse_proxy frontend:3000
# SPA fallback - serve index.html for all non-API routes
@notapi {
not path /api/*
not path /health
not path /livekit/*
not path /rtc
file {
try_files {path} /index.html
}
}
rewrite @notapi /index.html
}
# Enable gzip compression
encode gzip
# Security headers
header {
# HSTS
Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
# XSS Protection
X-Content-Type-Options "nosniff"
X-Frame-Options "DENY"
X-XSS-Protection "1; mode=block"
# CORS for API (adjust origins as needed)
Access-Control-Allow-Origin "*"
Access-Control-Allow-Methods "GET, POST, PUT, DELETE, OPTIONS"
Access-Control-Allow-Headers "Content-Type, Authorization"
}
# Logging
log {
output file /var/log/caddy/access.log
format json
}
}
# Development/localhost configuration (no HTTPS)
localhost {
# Backend API routes
handle /api/* {
reverse_proxy backend:8000
}
# Health check endpoint
handle /health {
reverse_proxy backend:8000
}
# LiveKit endpoints
handle /livekit/* {
reverse_proxy livekit:7880
}
handle /rtc {
reverse_proxy livekit:7880
}
# Frontend
handle {
reverse_proxy frontend:3000
@notapi {
not path /api/*
not path /health
not path /livekit/*
not path /rtc
file {
try_files {path} /index.html
}
}
rewrite @notapi /index.html
}
encode gzip
}

75
Dockerfile Normal file
View File

@ -0,0 +1,75 @@
FROM --platform=linux/amd64 python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
g++ \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
# Install uv for faster package management
RUN pip install uv
# Copy dependency files
COPY pyproject.toml uv.lock ./
# Install Python dependencies
RUN uv sync --frozen --no-dev
# Install Playwright and Chromium for PDF generation
RUN uv run playwright install-deps
RUN uv run playwright install chromium
# Copy application code
COPY . .
# Create directories for agent communication
RUN mkdir -p /tmp/agent_commands
# Expose the port
EXPOSE 8000
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8000/health || exit 1
# Create startup script
COPY <<EOF /app/start.sh
#!/bin/bash
set -e
# Load environment variables from .env file if it exists
# Only set variables that are not already set
if [ -f .env ]; then
echo "Loading environment variables from .env file..."
set -a # automatically export all variables
source .env
set +a # stop auto-exporting
fi
# Run database migrations
echo "Running database migrations..."
uv run alembic upgrade head
# Start Celery worker in background
echo "Starting Celery worker in background..."
uv run celery -A celery_worker.celery_app worker --loglevel=info --pool=solo &
# Start FastAPI server
if [ "\$APP_ENV" = "development" ]; then
echo "Starting FastAPI development server..."
exec uv run fastapi dev main.py --host 0.0.0.0 --port 8000
else
echo "Starting FastAPI production server..."
exec uv run uvicorn app.main:app --host 0.0.0.0 --port 8000
fi
EOF
RUN chmod +x /app/start.sh
# Default command
CMD ["/app/start.sh"]

View File

@ -42,7 +42,7 @@ POST /api/interview/{resumeId}/token
{
"token": "livekit_jwt_token_here",
"roomName": "interview_room_123",
"serverUrl": "wss://your-livekit-server.com"
"serverUrl": "ws://your-livekit-server.com"
}
```
@ -50,7 +50,7 @@ POST /api/interview/{resumeId}/token
#### Environment Variables
```env
NEXT_PUBLIC_LIVEKIT_URL=wss://your-livekit-server.com
NEXT_PUBLIC_LIVEKIT_URL=ws://your-livekit-server.com
LIVEKIT_API_KEY=your_api_key
LIVEKIT_API_SECRET=your_api_secret
```

View File

@ -82,6 +82,28 @@ class InterviewAgent:
return self.sections[self.current_section]
return {}
def _format_questions(self, questions):
"""
Форматирует список вопросов в строку, независимо от их структуры
"""
if not questions:
return "Нет вопросов"
formatted = []
for question in questions:
if isinstance(question, str):
# Простая строка
formatted.append(question)
elif isinstance(question, dict):
# Объект с полями (например, из LLM генерации)
question_text = question.get("question", question.get("text", str(question)))
formatted.append(question_text)
else:
# Любой другой тип - приводим к строке
formatted.append(str(question))
return ", ".join(formatted)
def get_next_question(self) -> str:
"""Получить следующий вопрос"""
section = self.get_current_section()
@ -129,7 +151,7 @@ class InterviewAgent:
# Формируем план интервью для агента
sections_info = "\n".join(
[
f"- {section.get('name', 'Секция')}: {', '.join(section.get('questions', []))}"
f"- {section.get('name', 'Секция')}: {self._format_questions(section.get('questions', []))}"
for section in self.sections
]
)
@ -748,10 +770,23 @@ async def entrypoint(ctx: JobContext):
def main():
logging.basicConfig(level=logging.INFO)
asyncio.set_event_loop_policy(
asyncio.WindowsSelectorEventLoopPolicy()
) # фикс для Windows
# Настройка логирования для продакшена
if os.getenv("APP_ENV") == "production":
logging.basicConfig(
level=logging.INFO,
format='{"timestamp": "%(asctime)s", "level": "%(levelname)s", "logger": "%(name)s", "message": "%(message)s", "module": "%(module)s", "line": %(lineno)d}',
datefmt='%Y-%m-%dT%H:%M:%S'
)
else:
logging.basicConfig(
level=logging.DEBUG if os.getenv("DEBUG") == "true" else logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Устанавливаем политику цикла событий только для Windows
if os.name == "nt": # Windows
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))

View File

@ -1,4 +1,6 @@
import logging
from celery import Celery
from celery.signals import setup_logging
from rag.settings import settings
@ -15,4 +17,24 @@ celery_app.conf.update(
result_serializer="json",
timezone="UTC",
enable_utc=True,
worker_log_format="[%(asctime)s: %(levelname)s/%(processName)s] %(message)s",
worker_task_log_format="[%(asctime)s: %(levelname)s/%(processName)s][%(task_name)s(%(task_id)s)] %(message)s",
task_acks_late=True,
worker_prefetch_multiplier=1,
task_reject_on_worker_lost=True,
result_extended=True,
)
@setup_logging.connect
def config_loggers(*args, **kwargs):
"""Configure logging for Celery worker"""
logging.basicConfig(
level=logging.INFO,
format="[%(asctime)s: %(levelname)s/%(name)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
# Set specific log levels
logging.getLogger("celery").setLevel(logging.INFO)
logging.getLogger("celery.worker").setLevel(logging.INFO)
logging.getLogger("celery.task").setLevel(logging.INFO)

View File

@ -1,4 +1,3 @@
import asyncio
import json
import logging
from datetime import datetime
@ -100,16 +99,45 @@ def generate_interview_report(resume_id: int):
# Сохраняем отчет в БД
report_instance = _save_report_to_db(db, resume_id, report)
# Генерируем и загружаем PDF отчет
# Запускаем отдельную задачу для генерации PDF
if report_instance:
asyncio.run(
_generate_and_upload_pdf_report(
db,
report_instance,
resume.applicant_name,
vacancy.get("title", "Unknown Position"),
resume.resume_file_url,
)
from celery_worker.tasks import generate_pdf_report_task
report_data = {
"id": report_instance.id,
"interview_session_id": report_instance.interview_session_id,
"overall_score": report_instance.overall_score,
"technical_skills_score": report_instance.technical_skills_score,
"technical_skills_justification": report_instance.technical_skills_justification,
"technical_skills_concerns": report_instance.technical_skills_concerns,
"communication_score": report_instance.communication_score,
"communication_justification": report_instance.communication_justification,
"communication_concerns": report_instance.communication_concerns,
"problem_solving_score": report_instance.problem_solving_score,
"problem_solving_justification": report_instance.problem_solving_justification,
"problem_solving_concerns": report_instance.problem_solving_concerns,
"experience_relevance_score": report_instance.experience_relevance_score,
"experience_relevance_justification": report_instance.experience_relevance_justification,
"experience_relevance_concerns": report_instance.experience_relevance_concerns,
"cultural_fit_score": report_instance.cultural_fit_score,
"cultural_fit_justification": report_instance.cultural_fit_justification,
"cultural_fit_concerns": report_instance.cultural_fit_concerns,
"recommendation": report_instance.recommendation,
"strengths": report_instance.strengths,
"weaknesses": report_instance.weaknesses,
"red_flags": report_instance.red_flags,
"questions_analysis": report_instance.questions_analysis,
"next_steps": report_instance.next_steps,
"analysis_method": report_instance.analysis_method,
"created_at": report_instance.created_at.isoformat() if report_instance.created_at else None,
"updated_at": report_instance.updated_at.isoformat() if report_instance.updated_at else None,
}
generate_pdf_report_task.delay(
report_data=report_data,
candidate_name=resume.applicant_name,
position=vacancy.get("title", "Unknown Position"),
resume_file_url=resume.resume_file_url,
)
logger.info(

View File

@ -1,8 +1,12 @@
import json
import logging
import os
from typing import Any
from celery_worker.celery_app import celery_app
# Настраиваем логгер для задач
logger = logging.getLogger(__name__)
from celery_worker.database import (
SyncResumeRepository,
SyncVacancyRepository,
@ -223,7 +227,7 @@ def generate_interview_plan(
}
except Exception as e:
print(f"Ошибка генерации плана интервью: {str(e)}")
logger.error(f"Ошибка генерации плана интервью: {str(e)}", exc_info=True)
return None
@ -236,24 +240,37 @@ def parse_resume_task(self, resume_id: str, file_path: str):
resume_id: ID резюме
file_path: Путь к PDF файлу резюме
"""
logger.info(f"=== НАЧАЛО ОБРАБОТКИ РЕЗЮМЕ {resume_id} ===")
logger.info(f"Путь к файлу: {file_path}")
try:
# Шаг 0: Обновляем статус в БД - начали парсинг
logger.info(f"Шаг 0: Обновляем статус резюме {resume_id} на 'parsing'")
with get_sync_session() as session:
repo = SyncResumeRepository(session)
repo.update_status(int(resume_id), "parsing")
logger.info(f"Статус резюме {resume_id} успешно обновлен на 'parsing'")
# Обновляем статус задачи
logger.info(f"Обновляем состояние Celery задачи на PENDING")
self.update_state(
state="PENDING",
meta={"status": "Начинаем парсинг резюме...", "progress": 10},
)
logger.info(f"Состояние Celery задачи обновлено")
# Инициализируем модели из registry
logger.info(f"Шаг 1: Инициализируем модели из registry")
try:
logger.info("Получаем chat_model из registry")
chat_model = registry.get_chat_model()
logger.info("Chat model успешно получен")
logger.info("Получаем vector_store из registry")
vector_store = registry.get_vector_store()
logger.info("Vector store успешно получен")
except Exception as e:
logger.error(f"ОШИБКА при инициализации моделей: {str(e)}", exc_info=True)
# Обновляем статус в БД - ошибка инициализации
with get_sync_session() as session:
repo = SyncResumeRepository(session)
@ -262,17 +279,23 @@ def parse_resume_task(self, resume_id: str, file_path: str):
"failed",
error_message=f"Ошибка инициализации моделей: {str(e)}",
)
raise Exception(f"Ошибка инициализации моделей: {str(e)}")
raise RuntimeError(f"Ошибка инициализации моделей: {str(e)}")
# Шаг 1: Парсинг резюме
# Шаг 2: Парсинг резюме
logger.info(f"Шаг 2: Начинаем парсинг резюме")
self.update_state(
state="PROGRESS",
meta={"status": "Извлекаем текст из PDF...", "progress": 20},
)
logger.info(f"Состояние Celery обновлено на PROGRESS (20%)")
logger.info(f"Создаем ResumeParser")
parser = ResumeParser(chat_model)
logger.info(f"ResumeParser создан успешно")
logger.info(f"Проверяем существование файла: {file_path}")
if not os.path.exists(file_path):
logger.error(f"ФАЙЛ НЕ НАЙДЕН: {file_path}")
# Обновляем статус в БД - файл не найден
with get_sync_session() as session:
repo = SyncResumeRepository(session)
@ -281,61 +304,88 @@ def parse_resume_task(self, resume_id: str, file_path: str):
"failed",
error_message=f"Файл не найден: {file_path}",
)
raise Exception(f"Файл не найден: {file_path}")
logger.info(f"Статус резюме {resume_id} обновлен на 'failed' в БД")
raise FileNotFoundError(f"Файл не найден: {file_path}")
logger.info(f"Файл существует, начинаем парсинг")
parsed_resume = parser.parse_resume_from_file(file_path)
logger.info(f"Парсинг резюме завершен, получены данные: {list(parsed_resume.keys())}")
# Получаем оригинальные данные из формы
logger.info(f"Шаг 3: Получаем данные резюме из БД")
with get_sync_session() as session:
repo = SyncResumeRepository(session)
resume_record = repo.get_by_id(int(resume_id))
if not resume_record:
raise Exception(f"Резюме с ID {resume_id} не найдено в базе данных")
logger.error(f"РЕЗЮМЕ С ID {resume_id} НЕ НАЙДЕНО В БД")
raise ValueError(f"Резюме с ID {resume_id} не найдено в базе данных")
# Извлекаем нужные данные пока сессия активна
applicant_name = resume_record.applicant_name
applicant_email = resume_record.applicant_email
applicant_phone = resume_record.applicant_phone
logger.info(f"Данные резюме получены: name={applicant_name}, email={applicant_email}, phone={applicant_phone}")
# Создаем комбинированные данные: навыки и опыт из парсинга, контакты из формы
logger.info(f"Шаг 4: Объединяем данные из парсинга и формы")
combined_data = parsed_resume.copy()
combined_data["name"] = applicant_name or parsed_resume.get("name", "")
combined_data["email"] = applicant_email or parsed_resume.get("email", "")
combined_data["phone"] = applicant_phone or parsed_resume.get("phone", "")
logger.info(f"Комбинированные данные подготовлены")
# Шаг 2: Векторизация и сохранение в Milvus
# Шаг 5: Векторизация и сохранение в Milvus
logger.info(f"Шаг 5: Векторизация и сохранение в Milvus")
self.update_state(
state="PENDING",
meta={"status": "Сохраняем в векторную базу...", "progress": 60},
)
logger.info(f"Состояние Celery обновлено на 60%")
logger.info(f"Добавляем профиль кандидата в vector store")
vector_store.add_candidate_profile(str(resume_id), combined_data)
logger.info(f"Профиль кандидата добавлен в vector store")
# Шаг 3: Обновляем статус в PostgreSQL - успешно обработано
# Шаг 6: Обновляем статус в PostgreSQL
logger.info(f"Шаг 6: Подготовка к обновлению статуса в БД")
self.update_state(
state="PENDING",
meta={"status": "Обновляем статус в базе данных...", "progress": 85},
)
logger.info(f"Состояние Celery обновлено на 85%")
# Шаг 4: Генерируем план интервью
# Шаг 7: Генерируем план интервью
logger.info(f"Шаг 7: Генерация плана интервью")
self.update_state(
state="PENDING",
meta={"status": "Генерируем план интервью...", "progress": 90},
)
logger.info(f"Состояние Celery обновлено на 90%")
logger.info(f"Вызываем generate_interview_plan для резюме {resume_id}")
interview_plan = generate_interview_plan(int(resume_id), combined_data)
logger.info(f"План интервью сгенерирован: {interview_plan is not None}")
logger.info(f"Шаг 8: Обновляем статус в БД на основе плана интервью")
with get_sync_session() as session:
repo = SyncResumeRepository(session)
# Проверяем результат генерации плана интервью
print("interview_plan", interview_plan)
logger.info(f"Анализируем план интервью для резюме {resume_id}")
logger.info(f"План интервью: {interview_plan}")
if interview_plan and interview_plan.get("is_suitable", True):
logger.info(f"Кандидат подходит, обновляем статус на 'parsed'")
# Кандидат подходит - обновляем статус на parsed
repo.update_status(int(resume_id), "parsed", parsed_data=combined_data)
logger.info(f"Статус резюме {resume_id} обновлен на 'parsed'")
# Сохраняем план интервью
logger.info(f"Сохраняем план интервью для резюме {resume_id}")
repo.update_interview_plan(int(resume_id), interview_plan)
logger.info(f"План интервью сохранен")
else:
logger.info(f"Кандидат НЕ подходит, отклоняем")
# Кандидат не подходит - отклоняем
rejection_reason = (
interview_plan.get(
@ -344,14 +394,17 @@ def parse_resume_task(self, resume_id: str, file_path: str):
if interview_plan
else "Ошибка анализа соответствия"
)
logger.info(f"Причина отклонения: {rejection_reason}")
repo.update_status(
int(resume_id),
"rejected",
parsed_data=combined_data,
rejection_reason=rejection_reason,
)
logger.info(f"Статус резюме {resume_id} обновлен на 'rejected'")
# Завершаем с информацией об отклонении
logger.info(f"Обновляем состояние Celery на SUCCESS (отклонен)")
self.update_state(
state="SUCCESS",
meta={
@ -362,6 +415,7 @@ def parse_resume_task(self, resume_id: str, file_path: str):
"rejection_reason": rejection_reason,
},
)
logger.info(f"=== ЗАВЕРШЕНИЕ ОБРАБОТКИ РЕЗЮМЕ {resume_id} (ОТКЛОНЕН) ===")
return {
"resume_id": resume_id,
@ -371,6 +425,7 @@ def parse_resume_task(self, resume_id: str, file_path: str):
}
# Завершено успешно
logger.info(f"Обновляем состояние Celery на SUCCESS (принят)")
self.update_state(
state="SUCCESS",
meta={
@ -379,6 +434,7 @@ def parse_resume_task(self, resume_id: str, file_path: str):
"result": combined_data,
},
)
logger.info(f"=== УСПЕШНОЕ ЗАВЕРШЕНИЕ ОБРАБОТКИ РЕЗЮМЕ {resume_id} ===")
return {
"resume_id": resume_id,
@ -387,13 +443,16 @@ def parse_resume_task(self, resume_id: str, file_path: str):
}
except Exception as e:
error_message = str(e)
logger.error(f"Ошибка при обработке резюме {resume_id}: {error_message}", exc_info=True)
# В случае ошибки
self.update_state(
state="FAILURE",
meta={
"status": f"Ошибка при обработке резюме: {str(e)}",
"status": f"Ошибка при обработке резюме: {error_message}",
"progress": 0,
"error": str(e),
"error": error_message,
},
)
@ -401,11 +460,16 @@ def parse_resume_task(self, resume_id: str, file_path: str):
try:
with get_sync_session() as session:
repo = SyncResumeRepository(session)
repo.update_status(int(resume_id), "failed", error_message=str(e))
repo.update_status(int(resume_id), "failed", error_message=error_message)
except Exception as db_error:
print(f"Ошибка при обновлении статуса в БД: {str(db_error)}")
logger.error(f"Ошибка при обновлении статуса в БД: {str(db_error)}", exc_info=True)
raise
# Возвращаем стандартное исключение вместо re-raise
return {
"resume_id": resume_id,
"status": "failed",
"error": error_message,
}
# Функция больше не нужна - используем SyncResumeRepository напрямую
@ -431,7 +495,7 @@ def generate_interview_questions_task(self, resume_id: str, job_description: str
chat_model = registry.get_chat_model()
vector_store = registry.get_vector_store()
except Exception as e:
raise Exception(f"Ошибка инициализации моделей: {str(e)}")
raise RuntimeError(f"Ошибка инициализации моделей: {str(e)}")
# Шаг 1: Получить parsed резюме из базы данных
self.update_state(

133
docker-compose.yml Normal file
View File

@ -0,0 +1,133 @@
services:
# PostgreSQL Database
postgres:
image: postgres:15
environment:
POSTGRES_DB: hr_ai
POSTGRES_USER: hr_user
POSTGRES_PASSWORD: hr_password
volumes:
- postgres_data:/var/lib/postgresql/data
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U hr_user -d hr_ai"]
interval: 30s
timeout: 10s
retries: 5
# Redis for Celery and caching
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 30s
timeout: 10s
retries: 3
# LiveKit Server
livekit:
image: livekit/livekit-server:latest
ports:
- "7880:7880"
- "7881:7881"
- "7882:7882/udp"
- "3478:3478/udp"
volumes:
- caddy_data:/certs
depends_on:
- caddy
restart: unless-stopped
environment:
LIVEKIT_CONFIG: |
keys:
devkey: devkey_secret_32chars_minimum_length
webhook:
api_key: devkey
turn:
enabled: true
tls_port: 5349
domain: hr.aiquity.xyz
cert_file: /certs/certificates/acme-v02.api.letsencrypt.org-directory/${DOMAIN:-localhost}/${DOMAIN:-localhost}.crt
key_file: /certs/certificates/acme-v02.api.letsencrypt.org-directory/${DOMAIN:-localhost}/${DOMAIN:-localhost}.key
port: 7880
rtc:
tcp_port: 7881
port_range_start: 50000
port_range_end: 60000
use_external_ip: true
redis:
address: redis:6379
# HR AI Backend
backend:
image: cr.yandex/crp9p5rtbnbop36duusi/hr-ai-backend:latest
expose:
- "8000"
env_file:
- .env
environment:
- DATABASE_URL=postgresql+asyncpg://hr_user:hr_password@postgres:5432/hr_ai
- REDIS_CACHE_URL=redis
- REDIS_CACHE_PORT=6379
- REDIS_CACHE_DB=0
- LIVEKIT_URL=ws://livekit:7880
- LIVEKIT_API_KEY=devkey
- LIVEKIT_API_SECRET=devkey_secret_32chars_minimum_length
- APP_ENV=development
- DEBUG=true
volumes:
- ./agent_commands:/tmp/agent_commands
- backend_uploads:/app/uploads
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
livekit:
condition: service_started
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
# Caddy reverse proxy with automatic HTTPS
caddy:
image: caddy:2-alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- caddy_data:/data
- caddy_config:/config
depends_on:
- backend
- livekit
environment:
- DOMAIN=${DOMAIN:-localhost}
restart: unless-stopped
frontend:
image: cr.yandex/crp9p5rtbnbop36duusi/hr-ai-frontend:latest
expose:
- "3000"
environment:
- NODE_ENV=production
restart: unless-stopped
volumes:
- ./.env.local:/app/.env.local:ro
volumes:
postgres_data:
redis_data:
backend_uploads:
caddy_data:
caddy_config:

View File

@ -36,6 +36,7 @@ dependencies = [
"greenlet>=3.2.4",
"xhtml2pdf>=0.2.17",
"playwright>=1.55.0",
"celery-types==0.23.0",
]
[build-system]

71
scripts/build-and-push.sh Executable file
View File

@ -0,0 +1,71 @@
#!/bin/bash
# Build and push script for Yandex Cloud Container Registry
# Usage: ./scripts/build-and-push. [tag]
set -e
# Configuration
REGISTRY_ID="${YANDEX_REGISTRY_ID:-your-registry-id}"
IMAGE_NAME="hr-ai-backend"
TAG="${1:-latest}"
FULL_IMAGE_NAME="cr.yandex/${REGISTRY_ID}/${IMAGE_NAME}:${TAG}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
echo -e "${YELLOW}Building and pushing HR AI Backend to Yandex Cloud Container Registry${NC}"
# Check if required environment variables are set
if [ -z "$REGISTRY_ID" ] || [ "$REGISTRY_ID" = "your-registry-id" ]; then
echo -e "${RED}Error: YANDEX_REGISTRY_ID environment variable is not set${NC}"
echo "Please set it to your Yandex Cloud Container Registry ID"
echo "Example: export YANDEX_REGISTRY_ID=crp1234567890abcdef"
exit 1
fi
# Check if yc CLI is installed and authenticated
if ! command -v yc &> /dev/null; then
echo -e "${RED}Error: Yandex Cloud CLI (yc) is not installed${NC}"
echo "Please install it from: https://cloud.yandex.ru/docs/cli/quickstart"
exit 1
fi
# Check authentication
if ! yc config list | grep -q "token:"; then
echo -e "${RED}Error: Not authenticated with Yandex Cloud${NC}"
echo "Please run: yc init"
exit 1
fi
echo -e "${YELLOW}Configuring Docker for Yandex Cloud Container Registry...${NC}"
yc container registry configure-docker
echo -e "${YELLOW}Building Docker image: ${FULL_IMAGE_NAME}${NC}"
docker build -t "${FULL_IMAGE_NAME}" .
echo -e "${YELLOW}Pushing image to registry...${NC}"
docker push "${FULL_IMAGE_NAME}"
echo -e "${GREEN}✓ Successfully built and pushed: ${FULL_IMAGE_NAME}${NC}"
# Also tag as latest if a specific tag was provided
if [ "$TAG" != "latest" ]; then
LATEST_IMAGE_NAME="cr.yandex/${REGISTRY_ID}/${IMAGE_NAME}:latest"
echo -e "${YELLOW}Tagging as latest...${NC}"
docker tag "${FULL_IMAGE_NAME}" "${LATEST_IMAGE_NAME}"
docker push "${LATEST_IMAGE_NAME}"
echo -e "${GREEN}✓ Also pushed as: ${LATEST_IMAGE_NAME}${NC}"
fi
echo -e "${GREEN}Build and push completed successfully!${NC}"
echo ""
echo "Image is available at:"
echo " ${FULL_IMAGE_NAME}"
echo ""
echo "To use in production, update your docker-compose.prod.yml:"
echo " backend:"
echo " image: ${FULL_IMAGE_NAME}"

301
scripts/deploy.sh Executable file
View File

@ -0,0 +1,301 @@
#!/bin/bash
# SSH Deploy script for HR AI Backend
# Usage: ./scripts/deploy.sh [environment] [image_tag]
set -e
# Configuration
ENVIRONMENT="${1:-production}"
IMAGE_TAG="${2:-latest}"
REGISTRY_ID="${YANDEX_REGISTRY_ID:-your-registry-id}"
IMAGE_NAME="hr-ai-backend"
FULL_IMAGE_NAME="cr.yandex/${REGISTRY_ID}/${IMAGE_NAME}:${IMAGE_TAG}"
# Server configuration (set these as environment variables)
SERVER_HOST="${DEPLOY_HOST:-your-server.com}"
SERVER_USER="${DEPLOY_USER:-deploy}"
SERVER_PORT="${DEPLOY_PORT:-22}"
DEPLOY_PATH="${DEPLOY_PATH:-/opt/hr-ai-backend}"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
echo -e "${YELLOW}Deploying HR AI Backend to ${ENVIRONMENT} environment${NC}"
# Check if required environment variables are set
missing_vars=""
if [ -z "$SERVER_HOST" ] || [ "$SERVER_HOST" = "your-server.com" ]; then
missing_vars="$missing_vars DEPLOY_HOST"
fi
if [ -z "$SERVER_USER" ] || [ "$SERVER_USER" = "deploy" ]; then
missing_vars="$missing_vars DEPLOY_USER"
fi
if [ -z "$REGISTRY_ID" ] || [ "$REGISTRY_ID" = "your-registry-id" ]; then
missing_vars="$missing_vars YANDEX_REGISTRY_ID"
fi
if [ -n "$missing_vars" ]; then
echo -e "${RED}Error: Required environment variables are not set:${NC}"
for var in $missing_vars; do
echo " - $var"
done
echo ""
echo "Example configuration:"
echo "export DEPLOY_HOST=your-server.example.com"
echo "export DEPLOY_USER=deploy"
echo "export YANDEX_REGISTRY_ID=crp1234567890abcdef"
echo "export DEPLOY_PATH=/opt/hr-ai-backend # optional"
echo "export DEPLOY_PORT=22 # optional"
exit 1
fi
# Test SSH connection
echo -e "${BLUE}Testing SSH connection to ${SERVER_USER}@${SERVER_HOST}:${SERVER_PORT}...${NC}"
if ! ssh -p "${SERVER_PORT}" -o ConnectTimeout=10 -o StrictHostKeyChecking=no "${SERVER_USER}@${SERVER_HOST}" "echo 'SSH connection successful'"; then
echo -e "${RED}Error: Cannot connect to server via SSH${NC}"
echo "Please check your SSH key configuration and server details"
exit 1
fi
# Create deployment directory structure on server
echo -e "${BLUE}Creating deployment directories on server...${NC}"
ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "
sudo mkdir -p ${DEPLOY_PATH}/{config,logs,data,agent_commands}
sudo mkdir -p ${DEPLOY_PATH}/data/{postgres,redis,uploads,caddy_data,caddy_config}
sudo mkdir -p ${DEPLOY_PATH}/logs/{caddy}
sudo chown -R ${SERVER_USER}:${SERVER_USER} ${DEPLOY_PATH}
"
# Copy configuration files to server
echo -e "${BLUE}Copying configuration files to server...${NC}"
scp -P "${SERVER_PORT}" docker-compose.yml "${SERVER_USER}@${SERVER_HOST}:${DEPLOY_PATH}/"
scp -P "${SERVER_PORT}" Caddyfile "${SERVER_USER}@${SERVER_HOST}:${DEPLOY_PATH}/"
# Create frontend environment file
echo -e "${BLUE}Creating frontend environment configuration...${NC}"
ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "
cat > ${DEPLOY_PATH}/.env.local << 'EOF'
# Frontend Environment Configuration
NODE_ENV=production
# API URL (adjust based on your setup)
NEXT_PUBLIC_API_URL=https://\${SERVER_HOST}/api
REACT_APP_API_URL=https://\${SERVER_HOST}/api
VUE_APP_API_URL=https://\${SERVER_HOST}/api
# LiveKit Configuration for frontend
NEXT_PUBLIC_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
REACT_APP_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
VUE_APP_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
# For localhost development (no HTTPS)
# NEXT_PUBLIC_API_URL=http://\${SERVER_HOST}/api
# REACT_APP_API_URL=http://\${SERVER_HOST}/api
# VUE_APP_API_URL=http://\${SERVER_HOST}/api
# NEXT_PUBLIC_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
# REACT_APP_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
# VUE_APP_LIVEKIT_URL=ws://\${SERVER_HOST}/rtc
# Add your frontend-specific environment variables here
EOF
"
# Create production environment file on server
echo -e "${BLUE}Creating production environment configuration...${NC}"
ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "
cat > ${DEPLOY_PATH}/.env << 'EOF'
# Production Environment Configuration
DATABASE_URL=postgresql+asyncpg://hr_user:hr_password@postgres:5432/hr_ai
REDIS_CACHE_URL=redis
REDIS_CACHE_PORT=6379
REDIS_CACHE_DB=0
# LiveKit Configuration
LIVEKIT_URL=ws://livekit:7880
LIVEKIT_API_KEY=devkey
LIVEKIT_API_SECRET=devkey_secret_32chars_minimum_length
# Caddy Domain Configuration (set your domain for automatic HTTPS)
DOMAIN=${SERVER_HOST:-localhost}
# App Configuration
APP_ENV=production
DEBUG=false
# Add your production API keys here:
# OPENAI_API_KEY=your-openai-api-key
# DEEPGRAM_API_KEY=your-deepgram-api-key
# CARTESIA_API_KEY=your-cartesia-api-key
# ELEVENLABS_API_KEY=your-elevenlabs-api-key
# S3 Storage Configuration (optional)
# S3_ENDPOINT_URL=https://s3.storage.selcloud.ru
# S3_ACCESS_KEY_ID=your_s3_access_key
# S3_SECRET_ACCESS_KEY=your_s3_secret_key
# S3_BUCKET_NAME=your-bucket-name
# S3_REGION=ru-1
# Milvus Vector Database Configuration (optional)
# MILVUS_URI=http://milvus:19530
# MILVUS_COLLECTION=hr_candidate_profiles
EOF
"
# Create production docker compose override
echo -e "${BLUE}Creating production docker compose configuration...${NC}"
ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "
cat > ${DEPLOY_PATH}/docker-compose.prod.yml << 'EOF'
services:
backend:
image: ${FULL_IMAGE_NAME}
env_file:
- .env
restart: unless-stopped
volumes:
- ./agent_commands:/tmp/agent_commands
- ./data/uploads:/app/uploads
- ./logs:/app/logs
postgres:
restart: unless-stopped
volumes:
- ./data/postgres:/var/lib/postgresql/data
redis:
restart: unless-stopped
volumes:
- ./data/redis:/data
livekit:
restart: unless-stopped
ports:
- \"3478:3478/udp\"
caddy:
env_file:
- .env
restart: unless-stopped
volumes:
- ./data/caddy_data:/data
- ./data/caddy_config:/config
- ./logs/caddy:/var/log/caddy
frontend:
restart: unless-stopped
EOF
"
# Pull latest image and deploy
echo -e "${BLUE}Pulling latest image and starting services...${NC}"
ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "
cd ${DEPLOY_PATH}
# Configure Docker for Yandex Cloud Registry
echo 'Configuring Docker for Yandex Cloud Registry...'
# Completely reset Docker config to fix credential helper issues
echo 'Resetting Docker configuration...'
mkdir -p ~/.docker
cat > ~/.docker/config.json << 'DOCKER_CONFIG'
{
\"auths\": {},
\"HttpHeaders\": {
\"User-Agent\": \"Docker-Client/20.10.0 (linux)\"
}
}
DOCKER_CONFIG
# Install yc CLI if not found
if ! command -v yc &> /dev/null; then
echo 'Installing Yandex Cloud CLI...'
curl -sSL https://storage.yandexcloud.net/yandexcloud-yc/install.sh | bash
source ~/.bashrc || source ~/.bash_profile || export PATH=\"\$HOME/yandex-cloud/bin:\$PATH\"
fi
# Use manual login instead of yc configure-docker
if command -v yc &> /dev/null; then
echo 'Getting Yandex Cloud token and logging in manually...'
YC_TOKEN=\$(yc iam create-token 2>/dev/null)
if [ ! -z \"\$YC_TOKEN\" ]; then
echo \"\$YC_TOKEN\" | docker login --username oauth --password-stdin cr.yandex
echo 'Docker login successful'
else
echo 'Error: Could not get YC token. Please run: yc init'
echo 'You need to authenticate yc CLI first on the server'
exit 1
fi
else
echo 'Error: yc CLI installation failed'
exit 1
fi
echo 'Current Docker config:'
cat ~/.docker/config.json
# Pull only our custom images from Yandex Registry first
echo 'Pulling custom images from Yandex Registry...'
docker pull ${FULL_IMAGE_NAME} || echo 'Failed to pull backend image'
docker pull cr.yandex/crp9p5rtbnbop36duusi/hr-ai-frontend:latest || echo 'Failed to pull frontend image'
# Reset Docker config to default for pulling public images
echo 'Resetting Docker config for public images...'
cat > ~/.docker/config.json << 'DOCKER_CONFIG'
{
\"auths\": {}
}
DOCKER_CONFIG
# Stop old containers
echo 'Stopping existing services...'
docker compose -f docker-compose.yml -f docker-compose.prod.yml down --remove-orphans
# Start new containers
echo 'Starting services with new image...'
docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d
# Wait for services to start
echo 'Waiting for services to start...'
sleep 10
# Run database migrations
echo 'Running database migrations...'
docker compose -f docker-compose.yml -f docker-compose.prod.yml exec -T backend uv run alembic upgrade head || echo 'Migration failed or already up to date'
# Show service status
echo 'Service status:'
docker compose -f docker-compose.yml -f docker-compose.prod.yml ps
"
# Health check
echo -e "${BLUE}Performing health check...${NC}"
sleep 20
if ssh -p "${SERVER_PORT}" "${SERVER_USER}@${SERVER_HOST}" "curl -f http://localhost/health" >/dev/null 2>&1; then
echo -e "${GREEN}✓ Deployment successful! Service is healthy.${NC}"
else
echo -e "${YELLOW}⚠ Service deployed but health check failed. Check logs:${NC}"
echo "ssh -p ${SERVER_PORT} ${SERVER_USER}@${SERVER_HOST} 'cd ${DEPLOY_PATH} && docker compose logs backend caddy'"
fi
echo -e "${GREEN}Deployment completed!${NC}"
echo ""
echo "Service URLs:"
if [ "\$DOMAIN" != "localhost" ]; then
echo " Main site: https://${SERVER_HOST}"
echo " API: https://${SERVER_HOST}/api"
echo " LiveKit: https://${SERVER_HOST}/livekit"
else
echo " Main site: http://${SERVER_HOST}"
echo " API: http://${SERVER_HOST}/api"
echo " LiveKit: http://${SERVER_HOST}/livekit"
fi
echo ""
echo "Useful commands:"
echo " Check logs: ssh ${SERVER_USER}@${SERVER_HOST} 'cd ${DEPLOY_PATH} && docker compose logs -f'"
echo " Service status: ssh ${SERVER_USER}@${SERVER_HOST} 'cd ${DEPLOY_PATH} && docker compose ps'"
echo " Restart: ssh ${SERVER_USER}@${SERVER_HOST} 'cd ${DEPLOY_PATH} && docker compose restart'"

4241
uv.lock

File diff suppressed because it is too large Load Diff