diff --git a/ai_interviewer_agent.py b/ai_interviewer_agent.py
index 240f9dd..a571263 100644
--- a/ai_interviewer_agent.py
+++ b/ai_interviewer_agent.py
@@ -3,7 +3,7 @@ import json
import logging
import os
import time
-from datetime import datetime
+from datetime import UTC, datetime
# Принудительно устанавливаем UTF-8 для Windows
if os.name == "nt": # Windows
@@ -404,10 +404,10 @@ async def entrypoint(ctx: JobContext):
# STT
stt = (
- deepgram.STT(
- model="nova-2-general", language="ru", api_key=settings.deepgram_api_key
+ openai.STT(
+ model="whisper-1", language="ru", api_key=settings.openai_api_key
)
- if settings.deepgram_api_key
+ if settings.openai_api_key
else openai.STT(
model="whisper-1", language="ru", api_key=settings.openai_api_key
)
@@ -415,18 +415,16 @@ async def entrypoint(ctx: JobContext):
# LLM
llm = openai.LLM(
- model="gpt-4o", api_key=settings.openai_api_key, temperature=0.7
+ model="gpt-5-mini", api_key=settings.openai_api_key
)
# TTS
tts = (
- cartesia.TTS(
- model="sonic-turbo",
- language="ru",
- voice="da05e96d-ca10-4220-9042-d8acef654fa9",
- api_key=settings.cartesia_api_key,
+ openai.TTS(
+ model="gpt-4o-mini-tts",
+ api_key=settings.openai_api_key,
)
- if settings.cartesia_api_key
+ if settings.openai_api_key
else silero.TTS(language="ru", model="v4_ru")
)
@@ -664,6 +662,27 @@ async def entrypoint(ctx: JobContext):
logger.info(
"[SEQUENCE] Step 2: Room closure failed, but continuing sequence"
)
+ # Шаг 3: освобождаем агента через файл команд
+ logger.info("[SEQUENCE] Step 3: Releasing agent session")
+ try:
+ # Сигнализируем менеджеру агентов о завершении сессии
+ command_file = "agent_commands.json"
+ release_command = {
+ "action": "session_completed",
+ "session_id": session_id,
+ "room_name": room_name,
+ "timestamp": datetime.now(UTC).isoformat(),
+ }
+
+ with open(command_file, "w", encoding="utf-8") as f:
+ json.dump(release_command, f, ensure_ascii=False, indent=2)
+
+ logger.info(f"[SEQUENCE] Step 3: Session {session_id} release signal sent")
+
+ except Exception as e:
+ logger.error(f"[SEQUENCE] Step 3: Failed to send release signal: {str(e)}")
+ logger.info("[SEQUENCE] Step 3: Continuing without release signal")
+
# --- Упрощенная логика обработки пользовательского ответа ---
diff --git a/app/core/config.py b/app/core/config.py
index 13e5e82..a9b606d 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -24,7 +24,7 @@ class Settings(BaseSettings):
# LLM API Keys
openai_api_key: str | None = None
anthropic_api_key: str | None = None
- openai_model: str = "gpt-4o-mini"
+ openai_model: str = "gpt-5-mini"
openai_embeddings_model: str = "text-embedding-3-small"
# AI Agent API Keys (for voice interviewer)
diff --git a/app/models/interview_report.py b/app/models/interview_report.py
index 6be0c61..1dd8a10 100644
--- a/app/models/interview_report.py
+++ b/app/models/interview_report.py
@@ -77,6 +77,9 @@ class InterviewReportBase(SQLModel):
llm_model_used: str | None = Field(default=None, max_length=100)
analysis_duration_seconds: int | None = Field(default=None, ge=0)
+ # PDF отчет
+ pdf_report_url: str | None = Field(default=None, max_length=500)
+
class InterviewReport(InterviewReportBase, table=True):
"""Полный отчет по интервью с ID и временными метками"""
@@ -137,6 +140,7 @@ class InterviewReportCreate(SQLModel):
analysis_method: str | None = "openai_gpt4"
llm_model_used: str | None = None
analysis_duration_seconds: int | None = None
+ pdf_report_url: str | None = None
class InterviewReportUpdate(SQLModel):
@@ -181,6 +185,7 @@ class InterviewReportUpdate(SQLModel):
analysis_method: str | None = None
llm_model_used: str | None = None
analysis_duration_seconds: int | None = None
+ pdf_report_url: str | None = None
class InterviewReportRead(InterviewReportBase):
diff --git a/app/repositories/vacancy_repository.py b/app/repositories/vacancy_repository.py
index 3bf8cbd..b309700 100644
--- a/app/repositories/vacancy_repository.py
+++ b/app/repositories/vacancy_repository.py
@@ -14,6 +14,11 @@ class VacancyRepository(BaseRepository[Vacancy]):
def __init__(self, session: Annotated[AsyncSession, Depends(get_session)]):
super().__init__(Vacancy, session)
+ async def get_by_id(self, vacancy_id: int) -> Vacancy | None:
+ statement = select(Vacancy).where(Vacancy.id == vacancy_id)
+ result = await self._session.execute(statement)
+ return result.scalar_one_or_none()
+
async def get_by_company(self, company_name: str) -> list[Vacancy]:
statement = select(Vacancy).where(Vacancy.company_name == company_name)
result = await self._session.execute(statement)
diff --git a/app/routers/admin_router.py b/app/routers/admin_router.py
index dc2da77..036bc7b 100644
--- a/app/routers/admin_router.py
+++ b/app/routers/admin_router.py
@@ -1,3 +1,7 @@
+import json
+import os
+from datetime import UTC, datetime
+
from fastapi import APIRouter, Depends, HTTPException
from app.services.admin_service import AdminService
@@ -116,3 +120,50 @@ async def generate_reports_for_vacancy(
raise HTTPException(status_code=404, detail=result["error"])
return result
+
+
+@router.post("/interview/{session_id}/force-end")
+async def force_end_interview(session_id: int) -> dict:
+ """Принудительно завершить активное интервью"""
+ try:
+ # Получаем статус агента
+ agent_status = agent_manager.get_status()
+
+ if agent_status["status"] != "active":
+ raise HTTPException(
+ status_code=400,
+ detail=f"Agent is not active, current status: {agent_status['status']}"
+ )
+
+ if agent_status["session_id"] != session_id:
+ raise HTTPException(
+ status_code=400,
+ detail=f"Agent is not handling session {session_id}, current session: {agent_status['session_id']}"
+ )
+
+ # Записываем команду завершения в файл команд
+ command_file = "agent_commands.json"
+ end_command = {
+ "action": "end_session",
+ "session_id": session_id,
+ "timestamp": datetime.now(UTC).isoformat(),
+ "initiated_by": "admin_api"
+ }
+
+ with open(command_file, "w", encoding="utf-8") as f:
+ json.dump(end_command, f, ensure_ascii=False, indent=2)
+
+ return {
+ "success": True,
+ "message": f"Force end command sent for session {session_id}",
+ "session_id": session_id,
+ "command_file": command_file
+ }
+
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to send force end command: {str(e)}"
+ )
diff --git a/app/routers/analysis_router.py b/app/routers/analysis_router.py
index 7036578..3050260 100644
--- a/app/routers/analysis_router.py
+++ b/app/routers/analysis_router.py
@@ -1,6 +1,8 @@
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException
+from fastapi.responses import RedirectResponse
from pydantic import BaseModel
+from app.core.database import get_session
from app.repositories.resume_repository import ResumeRepository
from celery_worker.interview_analysis_task import (
analyze_multiple_candidates,
@@ -228,3 +230,150 @@ async def get_analysis_statistics(
else 0,
},
}
+
+
+@router.get("/pdf-report/{resume_id}")
+async def get_pdf_report(
+ resume_id: int,
+ session=Depends(get_session),
+ resume_repo: ResumeRepository = Depends(ResumeRepository),
+):
+ """
+ Получить PDF отчет по интервью
+
+ Если отчет готов - перенаправляет на S3 URL
+ Если отчета нет - возвращает информацию о статусе
+ """
+ from sqlmodel import select
+
+ from app.models.interview import InterviewSession
+ from app.models.interview_report import InterviewReport
+
+ # Проверяем, существует ли резюме
+ resume = await resume_repo.get_by_id(resume_id)
+ if not resume:
+ raise HTTPException(status_code=404, detail="Resume not found")
+
+ # Ищем сессию интервью и отчет
+ statement = (
+ select(InterviewReport, InterviewSession)
+ .join(
+ InterviewSession,
+ InterviewReport.interview_session_id == InterviewSession.id,
+ )
+ .where(InterviewSession.resume_id == resume_id)
+ )
+
+ result = await session.exec(statement)
+ report_session = result.first()
+
+ if not report_session:
+ # Если отчета нет - возможно, нужно запустить анализ
+ raise HTTPException(
+ status_code=404,
+ detail="Interview report not found. Run analysis first using POST /analysis/interview-report/{resume_id}",
+ )
+
+ report, interview_session = report_session
+
+ if not report.pdf_report_url:
+ # PDF еще не сгенерирован
+ return {
+ "status": "pdf_not_ready",
+ "message": "PDF report is being generated or failed to generate",
+ "report_id": report.id,
+ "candidate_name": resume.applicant_name,
+ }
+
+ # Перенаправляем на S3 URL
+ return RedirectResponse(url=report.pdf_report_url, status_code=302)
+
+
+@router.get("/report-data/{resume_id}")
+async def get_report_data(
+ resume_id: int,
+ session=Depends(get_session),
+ resume_repo: ResumeRepository = Depends(ResumeRepository),
+):
+ """
+ Получить данные отчета в JSON формате (без PDF)
+ """
+ from sqlmodel import select
+
+ from app.models.interview import InterviewSession
+ from app.models.interview_report import InterviewReport
+
+ # Проверяем, существует ли резюме
+ resume = await resume_repo.get_by_id(resume_id)
+ if not resume:
+ raise HTTPException(status_code=404, detail="Resume not found")
+
+ # Ищем отчет
+ statement = (
+ select(InterviewReport, InterviewSession)
+ .join(
+ InterviewSession,
+ InterviewReport.interview_session_id == InterviewSession.id,
+ )
+ .where(InterviewSession.resume_id == resume_id)
+ )
+
+ result = await session.exec(statement)
+ report_session = result.first()
+
+ if not report_session:
+ raise HTTPException(status_code=404, detail="Interview report not found")
+
+ report, interview_session = report_session
+
+ return {
+ "report_id": report.id,
+ "candidate_name": resume.applicant_name,
+ "position": "Unknown Position", # Можно расширить через vacancy
+ "interview_date": report.created_at.isoformat(),
+ "overall_score": report.overall_score,
+ "recommendation": report.recommendation.value,
+ "scores": {
+ "technical_skills": {
+ "score": report.technical_skills_score,
+ "justification": report.technical_skills_justification,
+ "concerns": report.technical_skills_concerns,
+ },
+ "experience_relevance": {
+ "score": report.experience_relevance_score,
+ "justification": report.experience_relevance_justification,
+ "concerns": report.experience_relevance_concerns,
+ },
+ "communication": {
+ "score": report.communication_score,
+ "justification": report.communication_justification,
+ "concerns": report.communication_concerns,
+ },
+ "problem_solving": {
+ "score": report.problem_solving_score,
+ "justification": report.problem_solving_justification,
+ "concerns": report.problem_solving_concerns,
+ },
+ "cultural_fit": {
+ "score": report.cultural_fit_score,
+ "justification": report.cultural_fit_justification,
+ "concerns": report.cultural_fit_concerns,
+ },
+ },
+ "strengths": report.strengths,
+ "weaknesses": report.weaknesses,
+ "red_flags": report.red_flags,
+ "next_steps": report.next_steps,
+ "metrics": {
+ "interview_duration_minutes": report.interview_duration_minutes,
+ "dialogue_messages_count": report.dialogue_messages_count,
+ "questions_quality_score": report.questions_quality_score,
+ },
+ "pdf_available": bool(report.pdf_report_url),
+ "pdf_url": report.pdf_report_url,
+ "analysis_metadata": {
+ "method": report.analysis_method,
+ "model_used": report.llm_model_used,
+ "analysis_duration": report.analysis_duration_seconds,
+ },
+ }
diff --git a/app/services/agent_manager.py b/app/services/agent_manager.py
index cb6b8cd..b193a63 100644
--- a/app/services/agent_manager.py
+++ b/app/services/agent_manager.py
@@ -42,6 +42,7 @@ class AgentManager:
self.api_secret = (
settings.livekit_api_secret or "devkey_secret_32chars_minimum_length"
)
+ self._monitoring_task = None
async def start_agent(self) -> bool:
"""Запускает AI агента в режиме ожидания (без конкретной сессии)"""
@@ -95,6 +96,11 @@ class AgentManager:
)
logger.info(f"AI Agent started with PID {process.pid}")
+
+ # Запускаем мониторинг команд
+ if not self._monitoring_task:
+ self._monitoring_task = asyncio.create_task(self._monitor_commands())
+
return True
except Exception as e:
@@ -126,6 +132,12 @@ class AgentManager:
logger.info(f"AI Agent with PID {self._agent_process.pid} stopped")
self._agent_process = None
+
+ # Останавливаем мониторинг команд
+ if self._monitoring_task:
+ self._monitoring_task.cancel()
+ self._monitoring_task = None
+
return True
except Exception as e:
@@ -134,7 +146,11 @@ class AgentManager:
return False
async def assign_session(
- self, session_id: int, room_name: str, interview_plan: dict, vacancy_data: dict = None
+ self,
+ session_id: int,
+ room_name: str,
+ interview_plan: dict,
+ vacancy_data: dict = None,
) -> bool:
"""Назначает агенту конкретную сессию интервью"""
async with self._lock:
@@ -159,11 +175,11 @@ class AgentManager:
"interview_plan": interview_plan,
"command": "start_interview",
}
-
+
# Добавляем данные вакансии если они переданы
if vacancy_data:
metadata["vacancy_data"] = vacancy_data
-
+
with open(metadata_file, "w", encoding="utf-8") as f:
json.dump(metadata, f, ensure_ascii=False, indent=2)
@@ -239,6 +255,39 @@ class AgentManager:
logger.error(f"Error releasing agent session: {e}")
return False
+ async def handle_session_completed(self, session_id: int, room_name: str) -> bool:
+ """Обрабатывает сигнал о завершении сессии от агента"""
+ async with self._lock:
+ if not self._agent_process:
+ logger.warning(f"No agent process to handle session_completed for {session_id}")
+ return False
+
+ if self._agent_process.session_id != session_id:
+ logger.warning(
+ f"Session mismatch: expected {self._agent_process.session_id}, got {session_id}"
+ )
+ return False
+
+ try:
+ # Очищаем файлы метаданных
+ try:
+ os.remove(f"session_metadata_{session_id}.json")
+ except FileNotFoundError:
+ pass
+
+ # Возвращаем агента в режим ожидания
+ old_session_id = self._agent_process.session_id
+ self._agent_process.session_id = None
+ self._agent_process.room_name = None
+ self._agent_process.status = "idle"
+
+ logger.info(f"Agent automatically released from session {old_session_id}")
+ return True
+
+ except Exception as e:
+ logger.error(f"Error handling session_completed: {e}")
+ return False
+
def get_status(self) -> dict:
"""Возвращает текущий статус агента"""
if not self._agent_process:
@@ -293,6 +342,45 @@ class AgentManager:
except Exception:
return False
+ async def _monitor_commands(self):
+ """Мониторит файл команд для обработки сигналов от агента"""
+ command_file = "agent_commands.json"
+ last_processed_timestamp = None
+
+ logger.info("[MONITOR] Starting command monitoring")
+
+ try:
+ while True:
+ try:
+ if os.path.exists(command_file):
+ with open(command_file, "r", encoding="utf-8") as f:
+ command = json.load(f)
+
+ # Проверяем timestamp чтобы избежать повторной обработки
+ command_timestamp = command.get("timestamp")
+ if command_timestamp and command_timestamp != last_processed_timestamp:
+ action = command.get("action")
+
+ if action == "session_completed":
+ session_id = command.get("session_id")
+ room_name = command.get("room_name")
+
+ logger.info(f"[MONITOR] Processing session_completed for {session_id}")
+ await self.handle_session_completed(session_id, room_name)
+
+ last_processed_timestamp = command_timestamp
+
+ await asyncio.sleep(2) # Проверяем каждые 2 секунды
+
+ except Exception as e:
+ logger.error(f"[MONITOR] Error processing command: {e}")
+ await asyncio.sleep(5) # Больший интервал при ошибке
+
+ except asyncio.CancelledError:
+ logger.info("[MONITOR] Command monitoring stopped")
+ except Exception as e:
+ logger.error(f"[MONITOR] Command monitoring failed: {e}")
+
# Глобальный экземпляр менеджера
agent_manager = AgentManager()
diff --git a/app/services/interview_service.py b/app/services/interview_service.py
index 4a0e5e9..b4acc66 100644
--- a/app/services/interview_service.py
+++ b/app/services/interview_service.py
@@ -135,7 +135,7 @@ class InterviewRoomService:
# Получаем готовый план интервью для AI агента
interview_plan = await self.get_resume_data_for_interview(resume_id)
-
+
# Получаем данные вакансии
resume = await self.resume_repo.get(resume_id)
vacancy_data = None
@@ -162,7 +162,10 @@ class InterviewRoomService:
# Назначаем сессию агенту через менеджер
success = await agent_manager.assign_session(
- interview_session.id, interview_session.room_name, interview_plan, vacancy_data
+ interview_session.id,
+ interview_session.room_name,
+ interview_plan,
+ vacancy_data,
)
if not success:
diff --git a/app/services/pdf_report_service.py b/app/services/pdf_report_service.py
new file mode 100644
index 0000000..8675dad
--- /dev/null
+++ b/app/services/pdf_report_service.py
@@ -0,0 +1,426 @@
+import io
+from datetime import datetime
+
+from reportlab.lib import colors
+from reportlab.lib.enums import TA_CENTER, TA_JUSTIFY
+from reportlab.lib.pagesizes import A4
+from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.lib.units import inch
+from reportlab.platypus import (
+ Paragraph,
+ SimpleDocTemplate,
+ Spacer,
+ Table,
+ TableStyle,
+)
+
+from app.core.s3 import s3_service
+from app.models.interview_report import InterviewReport, RecommendationType
+
+
+class PDFReportService:
+ """Сервис для генерации PDF отчетов по интервью"""
+
+ def __init__(self):
+ self.styles = getSampleStyleSheet()
+ self._setup_custom_styles()
+
+ def _setup_custom_styles(self):
+ """Настройка кастомных стилей для документа"""
+ # Заголовок отчета
+ self.styles.add(
+ ParagraphStyle(
+ name="ReportTitle",
+ parent=self.styles["Title"],
+ fontSize=18,
+ spaceAfter=30,
+ alignment=TA_CENTER,
+ textColor=colors.HexColor("#2E3440"),
+ fontName="Helvetica-Bold",
+ )
+ )
+
+ # Заголовки секций
+ self.styles.add(
+ ParagraphStyle(
+ name="SectionHeader",
+ parent=self.styles["Heading1"],
+ fontSize=14,
+ spaceAfter=12,
+ spaceBefore=20,
+ textColor=colors.HexColor("#5E81AC"),
+ fontName="Helvetica-Bold",
+ )
+ )
+
+ # Подзаголовки
+ self.styles.add(
+ ParagraphStyle(
+ name="SubHeader",
+ parent=self.styles["Heading2"],
+ fontSize=12,
+ spaceAfter=8,
+ spaceBefore=15,
+ textColor=colors.HexColor("#81A1C1"),
+ fontName="Helvetica-Bold",
+ )
+ )
+
+ # Обычный текст
+ self.styles.add(
+ ParagraphStyle(
+ name="BodyText",
+ parent=self.styles["Normal"],
+ fontSize=10,
+ spaceAfter=6,
+ alignment=TA_JUSTIFY,
+ textColor=colors.HexColor("#2E3440"),
+ )
+ )
+
+ # Стиль для метрик
+ self.styles.add(
+ ParagraphStyle(
+ name="MetricValue",
+ parent=self.styles["Normal"],
+ fontSize=12,
+ alignment=TA_CENTER,
+ textColor=colors.HexColor("#5E81AC"),
+ fontName="Helvetica-Bold",
+ )
+ )
+
+ async def generate_interview_report_pdf(
+ self, report: InterviewReport, candidate_name: str, position: str
+ ) -> bytes:
+ """
+ Генерирует PDF отчет по интервью
+
+ Args:
+ report: Модель отчета из БД
+ candidate_name: Имя кандидата
+ position: Название позиции
+
+ Returns:
+ bytes: PDF файл в виде байтов
+ """
+ buffer = io.BytesIO()
+ doc = SimpleDocTemplate(
+ buffer,
+ pagesize=A4,
+ rightMargin=72,
+ leftMargin=72,
+ topMargin=72,
+ bottomMargin=72,
+ )
+
+ # Собираем элементы документа
+ story = []
+
+ # Заголовок отчета
+ story.append(
+ Paragraph(
+ f"Отчет по собеседованию
{candidate_name}",
+ self.styles["ReportTitle"],
+ )
+ )
+
+ # Основная информация
+ story.append(Paragraph("Основная информация", self.styles["SectionHeader"]))
+
+ basic_info = [
+ ["Кандидат:", candidate_name],
+ ["Позиция:", position],
+ ["Дата интервью:", report.created_at.strftime("%d.%m.%Y %H:%M")],
+ ["Общий балл:", f"{report.overall_score}/100"],
+ ["Рекомендация:", self._format_recommendation(report.recommendation)],
+ ]
+
+ basic_table = Table(basic_info, colWidths=[2 * inch, 4 * inch])
+ basic_table.setStyle(
+ TableStyle(
+ [
+ ("ALIGN", (0, 0), (-1, -1), "LEFT"),
+ ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 10),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 6),
+ ("TOPPADDING", (0, 0), (-1, -1), 6),
+ ]
+ )
+ )
+ story.append(basic_table)
+ story.append(Spacer(1, 20))
+
+ # Оценки по критериям
+ story.append(Paragraph("Детальная оценка", self.styles["SectionHeader"]))
+
+ criteria_data = [
+ ["Критерий", "Балл", "Обоснование", "Риски"],
+ [
+ "Технические навыки",
+ f"{report.technical_skills_score}/100",
+ report.technical_skills_justification or "—",
+ report.technical_skills_concerns or "—",
+ ],
+ [
+ "Релевантность опыта",
+ f"{report.experience_relevance_score}/100",
+ report.experience_relevance_justification or "—",
+ report.experience_relevance_concerns or "—",
+ ],
+ [
+ "Коммуникация",
+ f"{report.communication_score}/100",
+ report.communication_justification or "—",
+ report.communication_concerns or "—",
+ ],
+ [
+ "Решение задач",
+ f"{report.problem_solving_score}/100",
+ report.problem_solving_justification or "—",
+ report.problem_solving_concerns or "—",
+ ],
+ [
+ "Культурное соответствие",
+ f"{report.cultural_fit_score}/100",
+ report.cultural_fit_justification or "—",
+ report.cultural_fit_concerns or "—",
+ ],
+ ]
+
+ criteria_table = Table(
+ criteria_data, colWidths=[2 * inch, 0.8 * inch, 2.2 * inch, 1.8 * inch]
+ )
+ criteria_table.setStyle(
+ TableStyle(
+ [
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#5E81AC")),
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.whitesmoke),
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
+ ("ALIGN", (1, 1), (1, -1), "CENTER"),
+ ("GRID", (0, 0), (-1, -1), 1, colors.HexColor("#D8DEE9")),
+ ("VALIGN", (0, 0), (-1, -1), "TOP"),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 8),
+ ("TOPPADDING", (0, 0), (-1, -1), 8),
+ ("LEFTPADDING", (0, 0), (-1, -1), 6),
+ ("RIGHTPADDING", (0, 0), (-1, -1), 6),
+ ]
+ )
+ )
+
+ # Цветовое кодирование баллов
+ for i in range(1, 6): # строки с баллами
+ score_cell = (1, i)
+ if hasattr(
+ report,
+ [
+ "technical_skills_score",
+ "experience_relevance_score",
+ "communication_score",
+ "problem_solving_score",
+ "cultural_fit_score",
+ ][i - 1],
+ ):
+ score = getattr(
+ report,
+ [
+ "technical_skills_score",
+ "experience_relevance_score",
+ "communication_score",
+ "problem_solving_score",
+ "cultural_fit_score",
+ ][i - 1],
+ )
+ if score >= 80:
+ criteria_table.setStyle(
+ TableStyle(
+ [
+ (
+ "BACKGROUND",
+ score_cell,
+ score_cell,
+ colors.HexColor("#A3BE8C"),
+ )
+ ]
+ )
+ )
+ elif score >= 60:
+ criteria_table.setStyle(
+ TableStyle(
+ [
+ (
+ "BACKGROUND",
+ score_cell,
+ score_cell,
+ colors.HexColor("#EBCB8B"),
+ )
+ ]
+ )
+ )
+ else:
+ criteria_table.setStyle(
+ TableStyle(
+ [
+ (
+ "BACKGROUND",
+ score_cell,
+ score_cell,
+ colors.HexColor("#BF616A"),
+ )
+ ]
+ )
+ )
+
+ story.append(criteria_table)
+ story.append(Spacer(1, 20))
+
+ # Сильные и слабые стороны
+ if report.strengths or report.weaknesses:
+ story.append(Paragraph("Анализ кандидата", self.styles["SectionHeader"]))
+
+ if report.strengths:
+ story.append(Paragraph("Сильные стороны:", self.styles["SubHeader"]))
+ for strength in report.strengths:
+ story.append(Paragraph(f"• {strength}", self.styles["BodyText"]))
+ story.append(Spacer(1, 10))
+
+ if report.weaknesses:
+ story.append(
+ Paragraph("Области для развития:", self.styles["SubHeader"])
+ )
+ for weakness in report.weaknesses:
+ story.append(Paragraph(f"• {weakness}", self.styles["BodyText"]))
+ story.append(Spacer(1, 10))
+
+ # Красные флаги
+ if report.red_flags:
+ story.append(Paragraph("Важные риски:", self.styles["SubHeader"]))
+ for red_flag in report.red_flags:
+ story.append(
+ Paragraph(
+ f"⚠ {red_flag}",
+ ParagraphStyle(
+ name="RedFlag",
+ parent=self.styles["BodyText"],
+ textColor=colors.HexColor("#BF616A"),
+ ),
+ )
+ )
+ story.append(Spacer(1, 15))
+
+ # Рекомендации и следующие шаги
+ if report.next_steps:
+ story.append(Paragraph("Рекомендации:", self.styles["SectionHeader"]))
+ story.append(Paragraph(report.next_steps, self.styles["BodyText"]))
+ story.append(Spacer(1, 15))
+
+ # Метрики интервью
+ if any(
+ [
+ report.interview_duration_minutes,
+ report.dialogue_messages_count,
+ report.questions_quality_score,
+ ]
+ ):
+ story.append(Paragraph("Метрики интервью", self.styles["SectionHeader"]))
+
+ metrics = []
+ if report.interview_duration_minutes:
+ metrics.append(
+ ["Длительность:", f"{report.interview_duration_minutes} мин"]
+ )
+ if report.dialogue_messages_count:
+ metrics.append(
+ ["Сообщений в диалоге:", str(report.dialogue_messages_count)]
+ )
+ if report.questions_quality_score:
+ metrics.append(
+ ["Качество ответов:", f"{report.questions_quality_score:.1f}/10"]
+ )
+
+ metrics_table = Table(metrics, colWidths=[2 * inch, 2 * inch])
+ metrics_table.setStyle(
+ TableStyle(
+ [
+ ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
+ ("FONTSIZE", (0, 0), (-1, -1), 10),
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 6),
+ ]
+ )
+ )
+ story.append(metrics_table)
+
+ # Подпись
+ story.append(Spacer(1, 30))
+ story.append(
+ Paragraph(
+ f"Отчет сгенерирован автоматически • {datetime.now().strftime('%d.%m.%Y %H:%M')}",
+ ParagraphStyle(
+ name="Footer",
+ parent=self.styles["Normal"],
+ fontSize=8,
+ alignment=TA_CENTER,
+ textColor=colors.HexColor("#4C566A"),
+ ),
+ )
+ )
+
+ # Генерируем PDF
+ doc.build(story)
+ buffer.seek(0)
+ return buffer.getvalue()
+
+ def _format_recommendation(self, recommendation: RecommendationType) -> str:
+ """Форматирует рекомендацию для отображения"""
+ recommendation_map = {
+ RecommendationType.STRONGLY_RECOMMEND: "✅ Настоятельно рекомендуем",
+ RecommendationType.RECOMMEND: "👍 Рекомендуем",
+ RecommendationType.CONSIDER: "🤔 Рассмотреть кандидатуру",
+ RecommendationType.REJECT: "❌ Не рекомендуем",
+ }
+ return recommendation_map.get(recommendation, str(recommendation))
+
+ async def generate_and_upload_pdf(
+ self, report: InterviewReport, candidate_name: str, position: str
+ ) -> str | None:
+ """
+ Генерирует PDF отчет и загружает его в S3
+
+ Args:
+ report: Модель отчета из БД
+ candidate_name: Имя кандидата
+ position: Название позиции
+
+ Returns:
+ str | None: URL файла в S3 или None при ошибке
+ """
+ try:
+ # Генерируем PDF
+ pdf_bytes = await self.generate_interview_report_pdf(
+ report, candidate_name, position
+ )
+
+ # Формируем имя файла
+ safe_name = "".join(
+ c for c in candidate_name if c.isalnum() or c in (" ", "-", "_")
+ ).strip()
+ safe_name = safe_name.replace(" ", "_")
+ filename = f"interview_report_{safe_name}_{report.id}.pdf"
+
+ # Загружаем в S3
+ file_url = await s3_service.upload_file(
+ file_content=pdf_bytes,
+ file_name=filename,
+ content_type="application/pdf",
+ )
+
+ return file_url
+
+ except Exception as e:
+ print(f"Error generating and uploading PDF report: {e}")
+ return None
+
+
+# Экземпляр сервиса
+pdf_report_service = PDFReportService()
diff --git a/celery_worker/interview_analysis_task.py b/celery_worker/interview_analysis_task.py
index 81a7586..6a4064f 100644
--- a/celery_worker/interview_analysis_task.py
+++ b/celery_worker/interview_analysis_task.py
@@ -1,3 +1,4 @@
+import asyncio
import json
import logging
from datetime import datetime
@@ -51,12 +52,18 @@ def generate_interview_report(resume_id: int):
# Получаем историю интервью
interview_session = _get_interview_session(db, resume_id)
- logger.info(f"[INTERVIEW_ANALYSIS] Found interview_session: {interview_session is not None}")
-
+ logger.info(
+ f"[INTERVIEW_ANALYSIS] Found interview_session: {interview_session is not None}"
+ )
+
if interview_session:
- logger.info(f"[INTERVIEW_ANALYSIS] Session ID: {interview_session.id}, dialogue_history length: {len(interview_session.dialogue_history) if interview_session.dialogue_history else 0}")
+ logger.info(
+ f"[INTERVIEW_ANALYSIS] Session ID: {interview_session.id}, dialogue_history length: {len(interview_session.dialogue_history) if interview_session.dialogue_history else 0}"
+ )
else:
- logger.warning(f"[INTERVIEW_ANALYSIS] No interview session found for resume_id: {resume_id}")
+ logger.warning(
+ f"[INTERVIEW_ANALYSIS] No interview session found for resume_id: {resume_id}"
+ )
# Парсим JSON данные
parsed_resume = _parse_json_field(resume.parsed_data)
@@ -68,13 +75,17 @@ def generate_interview_report(resume_id: int):
dialogue_history = interview_session.dialogue_history
elif isinstance(interview_session.dialogue_history, str):
try:
- dialogue_history = json.loads(interview_session.dialogue_history)
+ dialogue_history = json.loads(
+ interview_session.dialogue_history
+ )
if not isinstance(dialogue_history, list):
dialogue_history = []
except (json.JSONDecodeError, TypeError):
dialogue_history = []
-
- logger.info(f"[INTERVIEW_ANALYSIS] Parsed dialogue_history length: {len(dialogue_history)}")
+
+ logger.info(
+ f"[INTERVIEW_ANALYSIS] Parsed dialogue_history length: {len(dialogue_history)}"
+ )
# Генерируем отчет
report = _generate_comprehensive_report(
@@ -87,7 +98,18 @@ def generate_interview_report(resume_id: int):
)
# Сохраняем отчет в БД
- _save_report_to_db(db, resume_id, report)
+ report_instance = _save_report_to_db(db, resume_id, report)
+
+ # Генерируем и загружаем PDF отчет
+ if report_instance:
+ asyncio.run(
+ _generate_and_upload_pdf_report(
+ db,
+ report_instance,
+ resume.applicant_name,
+ vacancy.get("title", "Unknown Position"),
+ )
+ )
logger.info(
f"[INTERVIEW_ANALYSIS] Analysis completed for resume_id: {resume_id}, score: {report['overall_score']}"
@@ -155,25 +177,33 @@ def _get_interview_session(db, resume_id: int):
try:
from app.models.interview import InterviewSession
- logger.info(f"[GET_SESSION] Looking for interview session with resume_id: {resume_id}")
-
+ logger.info(
+ f"[GET_SESSION] Looking for interview session with resume_id: {resume_id}"
+ )
+
session = (
db.query(InterviewSession)
.filter(InterviewSession.resume_id == resume_id)
.first()
)
-
+
if session:
- logger.info(f"[GET_SESSION] Found session {session.id} for resume {resume_id}")
+ logger.info(
+ f"[GET_SESSION] Found session {session.id} for resume {resume_id}"
+ )
logger.info(f"[GET_SESSION] Session status: {session.status}")
- logger.info(f"[GET_SESSION] Dialogue history type: {type(session.dialogue_history)}")
+ logger.info(
+ f"[GET_SESSION] Dialogue history type: {type(session.dialogue_history)}"
+ )
if session.dialogue_history:
- logger.info(f"[GET_SESSION] Raw dialogue_history preview: {str(session.dialogue_history)[:200]}...")
+ logger.info(
+ f"[GET_SESSION] Raw dialogue_history preview: {str(session.dialogue_history)[:200]}..."
+ )
else:
logger.warning(f"[GET_SESSION] No session found for resume_id: {resume_id}")
-
+
return session
-
+
except Exception as e:
logger.error(f"Error getting interview session: {e}")
return None
@@ -497,7 +527,11 @@ def _calculate_experience_score(parsed_resume: dict, vacancy: dict) -> int:
def _save_report_to_db(db, resume_id: int, report: dict):
- """Сохраняет отчет в базу данных в таблицу interview_reports"""
+ """Сохраняет отчет в базу данных в таблицу interview_reports
+
+ Returns:
+ InterviewReport | None: Созданный или обновленный экземпляр отчета
+ """
try:
from app.models.interview import InterviewSession
@@ -514,7 +548,7 @@ def _save_report_to_db(db, resume_id: int, report: dict):
logger.warning(
f"[INTERVIEW_ANALYSIS] No interview session found for resume_id: {resume_id}"
)
- return
+ return None
# Проверяем, есть ли уже отчет для этой сессии
existing_report = (
@@ -531,6 +565,9 @@ def _save_report_to_db(db, resume_id: int, report: dict):
_update_report_from_dict(existing_report, report)
existing_report.updated_at = datetime.utcnow()
db.add(existing_report)
+ db.commit()
+ db.refresh(existing_report)
+ return existing_report
else:
logger.info(
f"[INTERVIEW_ANALYSIS] Creating new report for session: {interview_session.id}"
@@ -538,13 +575,46 @@ def _save_report_to_db(db, resume_id: int, report: dict):
# Создаем новый отчет
new_report = _create_report_from_dict(interview_session.id, report)
db.add(new_report)
-
- logger.info(
- f"[INTERVIEW_ANALYSIS] Report saved for resume_id: {resume_id}, session: {interview_session.id}"
- )
+ db.commit()
+ db.refresh(new_report)
+ return new_report
except Exception as e:
logger.error(f"[INTERVIEW_ANALYSIS] Error saving report: {str(e)}")
+ return None
+
+
+async def _generate_and_upload_pdf_report(
+ db, report_instance: "InterviewReport", candidate_name: str, position: str
+):
+ """Генерирует PDF отчет и загружает его в S3"""
+ try:
+ from app.services.pdf_report_service import pdf_report_service
+
+ logger.info(
+ f"[PDF_GENERATION] Starting PDF generation for report ID: {report_instance.id}"
+ )
+
+ # Генерируем и загружаем PDF
+ pdf_url = await pdf_report_service.generate_and_upload_pdf(
+ report=report_instance, candidate_name=candidate_name, position=position
+ )
+
+ if pdf_url:
+ # Сохраняем URL в базу данных
+ report_instance.pdf_report_url = pdf_url
+ db.add(report_instance)
+ db.commit()
+ logger.info(
+ f"[PDF_GENERATION] PDF generated and uploaded successfully: {pdf_url}"
+ )
+ else:
+ logger.error(
+ f"[PDF_GENERATION] Failed to generate or upload PDF for report ID: {report_instance.id}"
+ )
+
+ except Exception as e:
+ logger.error(f"[PDF_GENERATION] Error generating PDF report: {str(e)}")
def _create_report_from_dict(
diff --git a/celery_worker/tasks.py b/celery_worker/tasks.py
index 246bda9..eec173f 100644
--- a/celery_worker/tasks.py
+++ b/celery_worker/tasks.py
@@ -3,7 +3,11 @@ import os
from typing import Any
from celery_worker.celery_app import celery_app
-from celery_worker.database import SyncResumeRepository, SyncVacancyRepository, get_sync_session
+from celery_worker.database import (
+ SyncResumeRepository,
+ SyncVacancyRepository,
+ get_sync_session,
+)
from rag.llm.model import ResumeParser
from rag.registry import registry
@@ -19,19 +23,22 @@ def generate_interview_plan(
with get_sync_session() as session:
resume_repo = SyncResumeRepository(session)
vacancy_repo = SyncVacancyRepository(session)
-
+
resume_record = resume_repo.get_by_id(resume_id)
if not resume_record:
- return {"is_suitable": False, "rejection_reason": "Резюме не найдено в БД"}
+ return {
+ "is_suitable": False,
+ "rejection_reason": "Резюме не найдено в БД",
+ }
# Получаем данные вакансии
vacancy_record = None
if resume_record.vacancy_id:
vacancy_record = vacancy_repo.get_by_id(resume_record.vacancy_id)
-
+
if not vacancy_record:
return {"is_suitable": False, "rejection_reason": "Вакансия не найдена"}
-
+
vacancy_data = {
"title": vacancy_record.title,
"description": vacancy_record.description,
@@ -43,17 +50,17 @@ def generate_interview_plan(
# Сначала проверяем соответствие резюме и вакансии через LLM
chat_model = registry.get_chat_model()
-
+
# Формируем опыт кандидата
experience_map = {
"noExperience": "Без опыта",
"between1And3": "1-3 года",
"between3And6": "3-6 лет",
- "moreThan6": "Более 6 лет"
+ "moreThan6": "Более 6 лет",
}
-
+
compatibility_prompt = f"""
- Проанализируй (не строго!) соответствие кандидата вакансии и определи, стоит ли проводить интервью.
+ Проанализируй соответствие кандидата вакансии и определи, стоит ли проводить интервью.
КЛЮЧЕВОЙ И ЕДИНСТВЕННЫй КРИТЕРИЙ ОТКЛОНЕНИЯ:
1. Профессиональная область кандидата: Полное несоответствие сферы деятельности вакансии (иначе 100 за критерий)
@@ -63,6 +70,7 @@ def generate_interview_plan(
3. Учитывай опыт с аналогичными, похожими, смежными технологиями
4. Когда смотришь на вакансию и кандидата не учитывай строгие слова, такие как "Требования", "Ключевые" и тп. Это лишь маркеры,
но не оценочные указатели
+ 5. Если есть спорные вопросы соответствия, лучше допустить к собеседованию и уточнить их там
КАНДИДАТ:
- Имя: {combined_data.get("name", "Не указано")}
@@ -86,19 +94,19 @@ def generate_interview_plan(
"rejection_reason": "Конкретная подробная причина отклонения с цитированием (если is_suitable=false)",
}}
"""
-
+
from langchain.schema import HumanMessage, SystemMessage
-
+
compatibility_messages = [
SystemMessage(
content="Ты эксперт по подбору персонала. Анализируй соответствие кандидатов вакансиям строго и объективно."
),
HumanMessage(content=compatibility_prompt),
]
-
+
compatibility_response = chat_model.get_llm().invoke(compatibility_messages)
compatibility_text = compatibility_response.content.strip()
-
+
# Парсим ответ о соответствии
compatibility_result = None
if compatibility_text.startswith("{") and compatibility_text.endswith("}"):
@@ -111,13 +119,19 @@ def generate_interview_plan(
compatibility_result = json.loads(compatibility_text[start:end])
# Если кандидат не подходит - возвращаем результат отклонения
- if not compatibility_result or not compatibility_result.get("is_suitable", True):
+ if not compatibility_result or not compatibility_result.get(
+ "is_suitable", True
+ ):
return {
"is_suitable": False,
- "rejection_reason": compatibility_result.get("rejection_reason", "Кандидат не соответствует требованиям вакансии") if compatibility_result else "Ошибка анализа соответствия",
- "match_details": compatibility_result
+ "rejection_reason": compatibility_result.get(
+ "rejection_reason", "Кандидат не соответствует требованиям вакансии"
+ )
+ if compatibility_result
+ else "Ошибка анализа соответствия",
+ "match_details": compatibility_result,
}
-
+
# Если кандидат подходит - генерируем план интервью
plan_prompt = f"""
Создай детальный план интервью для кандидата на основе его резюме и требований вакансии.
@@ -201,11 +215,11 @@ def generate_interview_plan(
interview_plan["is_suitable"] = True
interview_plan["match_details"] = compatibility_result
return interview_plan
-
+
return {
"is_suitable": True,
"match_details": compatibility_result,
- "error": "Не удалось сгенерировать план интервью"
+ "error": "Не удалось сгенерировать план интервью",
}
except Exception as e:
@@ -313,7 +327,7 @@ def parse_resume_task(self, resume_id: str, file_path: str):
with get_sync_session() as session:
repo = SyncResumeRepository(session)
-
+
# Проверяем результат генерации плана интервью
print("interview_plan", interview_plan)
if interview_plan and interview_plan.get("is_suitable", True):
@@ -323,14 +337,20 @@ def parse_resume_task(self, resume_id: str, file_path: str):
repo.update_interview_plan(int(resume_id), interview_plan)
else:
# Кандидат не подходит - отклоняем
- rejection_reason = interview_plan.get("rejection_reason", "Не соответствует требованиям вакансии") if interview_plan else "Ошибка анализа соответствия"
- repo.update_status(
- int(resume_id),
- "rejected",
- parsed_data=combined_data,
- rejection_reason=rejection_reason
+ rejection_reason = (
+ interview_plan.get(
+ "rejection_reason", "Не соответствует требованиям вакансии"
+ )
+ if interview_plan
+ else "Ошибка анализа соответствия"
)
-
+ repo.update_status(
+ int(resume_id),
+ "rejected",
+ parsed_data=combined_data,
+ rejection_reason=rejection_reason,
+ )
+
# Завершаем с информацией об отклонении
self.update_state(
state="SUCCESS",
@@ -339,15 +359,15 @@ def parse_resume_task(self, resume_id: str, file_path: str):
"progress": 100,
"result": combined_data,
"rejected": True,
- "rejection_reason": rejection_reason
+ "rejection_reason": rejection_reason,
},
)
-
+
return {
"resume_id": resume_id,
"status": "rejected",
"parsed_data": combined_data,
- "rejection_reason": rejection_reason
+ "rejection_reason": rejection_reason,
}
# Завершено успешно
diff --git a/migrations/versions/86cfa6ee73af_add_pdf_report_url_to_interview_reports.py b/migrations/versions/86cfa6ee73af_add_pdf_report_url_to_interview_reports.py
new file mode 100644
index 0000000..7319bb0
--- /dev/null
+++ b/migrations/versions/86cfa6ee73af_add_pdf_report_url_to_interview_reports.py
@@ -0,0 +1,113 @@
+"""add pdf_report_url to interview_reports
+
+Revision ID: 86cfa6ee73af
+Revises: 9c60c15f7846
+Create Date: 2025-09-07 18:20:54.026422
+
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+import sqlmodel
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision: str = "86cfa6ee73af"
+down_revision: str | Sequence[str] | None = "9c60c15f7846"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+ """Upgrade schema."""
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.add_column(
+ "interview_reports",
+ sa.Column(
+ "pdf_report_url",
+ sqlmodel.sql.sqltypes.AutoString(length=500),
+ nullable=True,
+ ),
+ )
+ op.drop_index(
+ op.f("idx_interview_reports_communication"), table_name="interview_reports"
+ )
+ op.drop_index(
+ op.f("idx_interview_reports_overall_score"), table_name="interview_reports"
+ )
+ op.drop_index(
+ op.f("idx_interview_reports_recommendation"), table_name="interview_reports"
+ )
+ op.drop_index(
+ op.f("idx_interview_reports_session_id"), table_name="interview_reports"
+ )
+ op.drop_index(
+ op.f("idx_interview_reports_technical_skills"), table_name="interview_reports"
+ )
+ op.alter_column(
+ "interview_sessions",
+ "transcript",
+ existing_type=sa.TEXT(),
+ type_=sqlmodel.sql.sqltypes.AutoString(),
+ existing_nullable=True,
+ )
+ op.alter_column(
+ "interview_sessions",
+ "ai_feedback",
+ existing_type=sa.TEXT(),
+ type_=sqlmodel.sql.sqltypes.AutoString(),
+ existing_nullable=True,
+ )
+ # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+ """Downgrade schema."""
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.alter_column(
+ "interview_sessions",
+ "ai_feedback",
+ existing_type=sqlmodel.sql.sqltypes.AutoString(),
+ type_=sa.TEXT(),
+ existing_nullable=True,
+ )
+ op.alter_column(
+ "interview_sessions",
+ "transcript",
+ existing_type=sqlmodel.sql.sqltypes.AutoString(),
+ type_=sa.TEXT(),
+ existing_nullable=True,
+ )
+ op.create_index(
+ op.f("idx_interview_reports_technical_skills"),
+ "interview_reports",
+ [sa.literal_column("technical_skills_score DESC")],
+ unique=False,
+ )
+ op.create_index(
+ op.f("idx_interview_reports_session_id"),
+ "interview_reports",
+ ["interview_session_id"],
+ unique=False,
+ )
+ op.create_index(
+ op.f("idx_interview_reports_recommendation"),
+ "interview_reports",
+ ["recommendation"],
+ unique=False,
+ )
+ op.create_index(
+ op.f("idx_interview_reports_overall_score"),
+ "interview_reports",
+ [sa.literal_column("overall_score DESC")],
+ unique=False,
+ )
+ op.create_index(
+ op.f("idx_interview_reports_communication"),
+ "interview_reports",
+ [sa.literal_column("communication_score DESC")],
+ unique=False,
+ )
+ op.drop_column("interview_reports", "pdf_report_url")
+ # ### end Alembic commands ###
diff --git a/pyproject.toml b/pyproject.toml
index c3b7994..06fd15d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,8 @@ dependencies = [
"livekit-agents[cartesia,deepgram,openai,silero,resemble,turn-detector]~=1.2",
"textract>=1.5.0",
"comtypes>=1.4.12",
+ "reportlab>=4.4.3",
+ "yandex-speechkit>=1.5.0",
]
[build-system]
diff --git a/rag/llm/model.py b/rag/llm/model.py
index 84b5129..53fd3e9 100644
--- a/rag/llm/model.py
+++ b/rag/llm/model.py
@@ -108,25 +108,29 @@ class ResumeParser:
try:
# Метод 1: COM автоматизация Word (самый надежный для Windows)
import os
- if os.name == 'nt': # Windows
+
+ if os.name == "nt": # Windows
try:
import comtypes.client
+
print(f"[DEBUG] Trying Word COM automation for {file_path}")
-
- word = comtypes.client.CreateObject('Word.Application')
+
+ word = comtypes.client.CreateObject("Word.Application")
word.Visible = False
-
+
doc = word.Documents.Open(file_path)
text = doc.Content.Text
doc.Close()
word.Quit()
-
+
if text and text.strip():
- print(f"[DEBUG] Word COM successfully extracted {len(text)} characters")
+ print(
+ f"[DEBUG] Word COM successfully extracted {len(text)} characters"
+ )
return text.strip()
except Exception as e:
print(f"[DEBUG] Word COM failed: {e}")
-
+
# Метод 2: Для .doc файлов используем python-docx
if Document:
try:
@@ -140,10 +144,13 @@ class ResumeParser:
# Попытка использовать textract (универсальная библиотека для извлечения текста)
try:
import textract
+
print(f"[DEBUG] Using textract to process {file_path}")
- text = textract.process(file_path).decode('utf-8')
+ text = textract.process(file_path).decode("utf-8")
if text and text.strip():
- print(f"[DEBUG] textract successfully extracted {len(text)} characters")
+ print(
+ f"[DEBUG] textract successfully extracted {len(text)} characters"
+ )
return text.strip()
else:
print("[DEBUG] textract returned empty text")
@@ -151,10 +158,11 @@ class ResumeParser:
print(f"[DEBUG] textract not available: {e}")
except Exception as e:
print(f"[DEBUG] textract failed: {e}")
-
+
# Попытка использовать docx2txt
try:
import docx2txt
+
text = docx2txt.process(file_path)
if text:
return text.strip()
@@ -162,12 +170,12 @@ class ResumeParser:
pass
except Exception:
pass
-
+
# Попытка использовать oletools для старых DOC файлов
try:
- from oletools.olevba import VBA_Parser
from oletools import olefile
-
+ from oletools.olevba import VBA_Parser
+
if olefile.isOleFile(file_path):
# Это старый формат DOC, пытаемся извлечь текст
# Пока что возвращаем информативную ошибку
diff --git a/rag/registry.py b/rag/registry.py
index ce04353..7df73e3 100644
--- a/rag/registry.py
+++ b/rag/registry.py
@@ -32,7 +32,7 @@ class ModelRegistry:
if self._chat_model is None:
if settings.openai_api_key:
llm = ChatOpenAI(
- api_key=settings.openai_api_key, model="gpt-4o-mini", temperature=0
+ api_key=settings.openai_api_key, model="gpt-5-mini"
)
self._chat_model = ChatModel(llm)
else:
diff --git a/rag/settings.py b/rag/settings.py
index 2b53b53..7230a69 100644
--- a/rag/settings.py
+++ b/rag/settings.py
@@ -24,7 +24,7 @@ class RagSettings(BaseSettings):
# LLM Settings
openai_api_key: str | None = None
anthropic_api_key: str | None = None
- openai_model: str = "gpt-4o-mini"
+ openai_model: str = "gpt-5-mini"
openai_embeddings_model: str = "text-embedding-3-small"
# AI Agent Settings
diff --git a/rag/vector_store.py b/rag/vector_store.py
index 7840fe3..edcc356 100644
--- a/rag/vector_store.py
+++ b/rag/vector_store.py
@@ -57,7 +57,9 @@ class MilvusVectorStore:
return True
except Exception as e:
- raise Exception(f"Ошибка при добавлении кандидата в Milvus: {str(e)}") from e
+ raise Exception(
+ f"Ошибка при добавлении кандидата в Milvus: {str(e)}"
+ ) from e
def search_similar_candidates(self, query: str, k: int = 5) -> list[dict[str, Any]]:
"""Поиск похожих кандидатов по запросу"""
@@ -92,7 +94,9 @@ class MilvusVectorStore:
return None
except Exception as e:
- raise Exception(f"Ошибка при получении кандидата из Milvus: {str(e)}") from e
+ raise Exception(
+ f"Ошибка при получении кандидата из Milvus: {str(e)}"
+ ) from e
def delete_candidate(self, candidate_id: str):
"""Удаляет кандидата из векторной базы"""
diff --git a/uv.lock b/uv.lock
index c2a3198..6f88e5b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1010,9 +1010,11 @@ dependencies = [
{ name = "python-dotenv" },
{ name = "python-multipart" },
{ name = "redis" },
+ { name = "reportlab" },
{ name = "sqlmodel" },
{ name = "textract" },
{ name = "uvicorn", extra = ["standard"] },
+ { name = "yandex-speechkit" },
]
[package.dev-dependencies]
@@ -1048,9 +1050,11 @@ requires-dist = [
{ name = "python-dotenv", specifier = ">=1.0.0" },
{ name = "python-multipart", specifier = ">=0.0.6" },
{ name = "redis", specifier = ">=5.0.0" },
+ { name = "reportlab", specifier = ">=4.4.3" },
{ name = "sqlmodel", specifier = ">=0.0.14" },
{ name = "textract", specifier = ">=1.5.0" },
{ name = "uvicorn", extras = ["standard"], specifier = ">=0.24.0" },
+ { name = "yandex-speechkit", specifier = ">=1.5.0" },
]
[package.metadata.requires-dev]
@@ -2733,6 +2737,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/58/f0/427018098906416f580e3cf1366d3b1abfb408a0652e9f31600c24a1903c/pydantic_settings-2.10.1-py3-none-any.whl", hash = "sha256:a60952460b99cf661dc25c29c0ef171721f98bfcb52ef8d9ea4c943d7c8cc796", size = 45235 },
]
+[[package]]
+name = "pydub"
+version = "0.25.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327 },
+]
+
[[package]]
name = "pygments"
version = "2.19.2"
@@ -3004,6 +3017,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cf/3e/7d7ac6fd085023312421e0d69dfabdfb28e116e513fadbe9afe710c01893/regex-2025.9.1-cp314-cp314-win_arm64.whl", hash = "sha256:f46d525934871ea772930e997d577d48c6983e50f206ff7b66d4ac5f8941e993", size = 271860 },
]
+[[package]]
+name = "reportlab"
+version = "4.4.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "charset-normalizer" },
+ { name = "pillow" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2f/83/3d44b873fa71ddc7d323c577fe4cfb61e05b34d14e64b6a232f9cfbff89d/reportlab-4.4.3.tar.gz", hash = "sha256:073b0975dab69536acd3251858e6b0524ed3e087e71f1d0d1895acb50acf9c7b", size = 3887532 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/52/c8/aaf4e08679e7b1dc896ad30de0d0527f0fd55582c2e6deee4f2cc899bf9f/reportlab-4.4.3-py3-none-any.whl", hash = "sha256:df905dc5ec5ddaae91fc9cb3371af863311271d555236410954961c5ee6ee1b5", size = 1953896 },
+]
+
[[package]]
name = "requests"
version = "2.32.5"
@@ -3843,6 +3869,20 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/34/a22e6664211f0c8879521328000bdcae9bf6dbafa94a923e531f6d5b3f73/xlsxwriter-3.2.5-py3-none-any.whl", hash = "sha256:4f4824234e1eaf9d95df9a8fe974585ff91d0f5e3d3f12ace5b71e443c1c6abd", size = 172347 },
]
+[[package]]
+name = "yandex-speechkit"
+version = "1.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "grpcio" },
+ { name = "protobuf" },
+ { name = "pydub" },
+ { name = "requests" },
+]
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/aa/56/30ae86f0efbbdd98f35f856822a46ddfca998b419e9a888672796fdaab6e/yandex_speechkit-1.5.0-py3-none-any.whl", hash = "sha256:ca44b10c30d6acb6440ef623e559341205b269672ae7fecc52290cc6375884fb", size = 118729 },
+]
+
[[package]]
name = "yarl"
version = "1.20.1"