add matching resume and vacancy

2025-09-07 01:23:55 +05:00 · 2025-09-07 01:23:55 +05:00 · 6ecb66514e
commit 6ecb66514e
parent 6e1c631d70
8 changed files with 2553 additions and 2049 deletions
--- a/ai_interviewer_agent.py
+++ b/ai_interviewer_agent.py
@ -24,7 +24,6 @@ from app.core.database import get_session
 from app.repositories.interview_repository import InterviewRepository
 from app.repositories.resume_repository import ResumeRepository
 from app.services.interview_finalization_service import InterviewFinalizationService
-from app.services.interview_service import InterviewRoomService
 from rag.settings import settings

 logger = logging.getLogger("ai-interviewer")
@ -50,8 +49,9 @@ async def close_room(room_name: str):
 class InterviewAgent:
    """AI Agent для проведения собеседований с управлением диалогом"""

-    def __init__(self, interview_plan: dict):
+    def __init__(self, interview_plan: dict, vacancy_data=None):
        self.interview_plan = interview_plan
+        self.vacancy_data = vacancy_data
        self.conversation_history = []

        # Состояние диалога
@ -155,7 +155,49 @@ class InterviewAgent:
        else:
            time_status = "НОРМАЛЬНО"

-        return f"""Ты опытный HR-интервьюер, который проводит адаптивное голосовое собеседование.
+        # Информация о вакансии
+        vacancy_info = ""
+        if self.vacancy_data:
+            employment_type_map = {
+                "full": "Полная занятость",
+                "part": "Частичная занятость", 
+                "project": "Проектная работа",
+                "volunteer": "Волонтёрство",
+                "probation": "Стажировка"
+            }
+            experience_map = {
+                "noExperience": "Без опыта",
+                "between1And3": "1-3 года",
+                "between3And6": "3-6 лет", 
+                "moreThan6": "Более 6 лет"
+            }
+            schedule_map = {
+                "fullDay": "Полный день",
+                "shift": "Сменный график",
+                "flexible": "Гибкий график",
+                "remote": "Удалённая работа",
+                "flyInFlyOut": "Вахтовый метод"
+            }
+            
+            vacancy_info = f"""
+
+ИНФОРМАЦИЯ О ВАКАНСИИ:
+- Должность: {self.vacancy_data.get('title', 'Не указана')}
+- Описание: {self.vacancy_data.get('description', 'Не указано')}
+- Ключевые навыки: {self.vacancy_data.get('key_skills') or 'Не указаны'}
+- Тип занятости: {employment_type_map.get(self.vacancy_data.get('employment_type'), self.vacancy_data.get('employment_type', 'Не указан'))}
+- Опыт работы: {experience_map.get(self.vacancy_data.get('experience'), self.vacancy_data.get('experience', 'Не указан'))}
+- График работы: {schedule_map.get(self.vacancy_data.get('schedule'), self.vacancy_data.get('schedule', 'Не указан'))}
+- Регион: {self.vacancy_data.get('area_name', 'Не указан')}
+- Профессиональные роли: {self.vacancy_data.get('professional_roles') or 'Не указаны'}
+- Контактное лицо: {self.vacancy_data.get('contacts_name') or 'Не указано'}"""
+
+        return f"""
+Ты опытный HR-интервьюер, который проводит адаптивное голосовое собеседование. Представься контактным именем из вакансии (если оно есть)
+
+ИНФОРМАЦИЯ О ВАКАНСИИ:
+
+{vacancy_info}

 ИНФОРМАЦИЯ О КАНДИДАТЕ:
 - Имя: {candidate_name}
@ -279,6 +321,7 @@ async def entrypoint(ctx: JobContext):
    # План интервью - получаем из метаданных сессии
    interview_plan = {}
    session_id = None
+    vacancy_data = None

    # Проверяем файлы команд для получения сессии
    command_file = "agent_commands.json"
@ -313,11 +356,15 @@ async def entrypoint(ctx: JobContext):
            with open(metadata_file, encoding="utf-8") as f:
                metadata = json.load(f)
                interview_plan = metadata.get("interview_plan", {})
+                vacancy_data = metadata.get("vacancy_data", None)
                session_id = metadata.get("session_id", session_id)
                logger.info(f"[INIT] Loaded interview plan for session {session_id}")
+                if vacancy_data:
+                    logger.info(f"[INIT] Loaded vacancy data from metadata: {vacancy_data.get('title', 'Unknown')}")
        except Exception as e:
            logger.warning(f"[INIT] Failed to load metadata: {str(e)}")
            interview_plan = {}
+            vacancy_data = None

    # Используем дефолтный план если план пустой или нет секций
    if not interview_plan or not interview_plan.get("interview_structure", {}).get(
@ -350,7 +397,7 @@ async def entrypoint(ctx: JobContext):
            "key_evaluation_points": ["Коммуникация"],
        }

-    interviewer = InterviewAgent(interview_plan)
+    interviewer = InterviewAgent(interview_plan, vacancy_data)
    logger.info(
        f"[INIT] InterviewAgent created with {len(interviewer.sections)} sections"
    )
@ -489,23 +536,10 @@ async def entrypoint(ctx: JobContext):
                )

                if not interviewer.interview_finalized:
-                    # Запускаем полную цепочку завершения интервью
-                    try:
-                        session_generator = get_session()
-                        db = await anext(session_generator)
-                        try:
-                            interview_repo = InterviewRepository(db)
-                            resume_repo = ResumeRepository(db)
-                            interview_service = InterviewRoomService(
-                                interview_repo, resume_repo
-                            )
-                            await interview_service.end_interview_session(session_id)
-                        finally:
-                            await session_generator.aclose()
-                    except Exception as e:
-                        logger.error(f"[FINALIZE] Error finalizing interview: {str(e)}")
-                    return True
-                break
+                    await complete_interview_sequence(
+                        ctx.room.name, interviewer
+                    )
+                    break

        return False

@ -544,19 +578,62 @@ async def entrypoint(ctx: JobContext):
    asyncio.create_task(monitor_end_commands())
    
    # --- Обработчик состояния пользователя (замена мониторинга тишины) ---
+    disconnect_timer: asyncio.Task | None = None
+    
    @session.on("user_state_changed")
    def on_user_state_changed(event):
        """Обработчик изменения состояния пользователя (активен/неактивен)"""
+        
        async def on_change():
+            nonlocal disconnect_timer
+
            logger.info(f"[USER_STATE] User state changed to: {event.new_state}")

-            if event.new_state == "away" and interviewer.intro_done:
-                logger.info("[USER_STATE] User went away, generating response...")
+            # === Пользователь начал говорить ===
+            if event.new_state == "speaking":
+                # Если есть таймер на 30 секунд — отменяем его
+                if disconnect_timer is not None:
+                    logger.info("[USER_STATE] Cancelling disconnect timer due to speaking")
+                    disconnect_timer.cancel()
+                    disconnect_timer = None

-                # Генерируем ответ через LLM с инструкциями
-                await session.generate_reply(
-                    instructions="Клиент молчит уже больше 10 секунд. Проверь связь фразой вроде 'Приём! Ты меня слышишь?' или 'Связь не пропала?'"
+            # === Пользователь молчит более 10 секунд (state == away) ===
+            elif event.new_state == "away" and interviewer.intro_done:
+                logger.info("[USER_STATE] User away detected, sending check-in message...")
+
+                # 1) Первое сообщение — проверка связи
+                handle = await session.generate_reply(
+                    instructions=(
+                        "Клиент молчит уже больше 10 секунд. "
+                        "Проверь связь фразой вроде 'Приём! Ты меня слышишь?' "
+                        "или 'Связь не пропала?'"
+                    )
                )
+                await handle  # ждем завершения первой реплики
+
+                # 2) Таймер на 30 секунд
+                async def disconnect_timeout():
+                    try:
+                        await asyncio.sleep(30)
+                        logger.info("[DISCONNECT_TIMER] 30 seconds passed, sending disconnect message")
+
+                        # Второе сообщение — считаем, что клиент отключился
+                        await session.generate_reply(
+                            instructions="Похоже клиент отключился"
+                        )
+                        
+                        logger.info("[DISCONNECT_TIMER] Disconnect message sent successfully")
+                    except asyncio.CancelledError:
+                        logger.info("[DISCONNECT_TIMER] Timer cancelled before completion")
+                    except Exception as e:
+                        logger.error(f"[DISCONNECT_TIMER] Error in disconnect timeout: {e}")
+
+                # 3) Если уже есть активный таймер — отменяем его перед запуском нового
+                if disconnect_timer is not None:
+                    disconnect_timer.cancel()
+
+                disconnect_timer = asyncio.create_task(disconnect_timeout())
+
        asyncio.create_task(on_change())

    # --- Полная цепочка завершения интервью ---
--- a/app/services/agent_manager.py
+++ b/app/services/agent_manager.py
@ -134,7 +134,7 @@ class AgentManager:
                return False

    async def assign_session(
-        self, session_id: int, room_name: str, interview_plan: dict
+        self, session_id: int, room_name: str, interview_plan: dict, vacancy_data: dict = None
    ) -> bool:
        """Назначает агенту конкретную сессию интервью"""
        async with self._lock:
@ -153,18 +153,19 @@ class AgentManager:
            try:
                # Создаем файл метаданных для сессии
                metadata_file = f"session_metadata_{session_id}.json"
+                metadata = {
+                    "session_id": session_id,
+                    "room_name": room_name,
+                    "interview_plan": interview_plan,
+                    "command": "start_interview",
+                }
+                
+                # Добавляем данные вакансии если они переданы
+                if vacancy_data:
+                    metadata["vacancy_data"] = vacancy_data
+                    
                with open(metadata_file, "w", encoding="utf-8") as f:
-                    json.dump(
-                        {
-                            "session_id": session_id,
-                            "room_name": room_name,
-                            "interview_plan": interview_plan,
-                            "command": "start_interview",
-                        },
-                        f,
-                        ensure_ascii=False,
-                        indent=2,
-                    )
+                    json.dump(metadata, f, ensure_ascii=False, indent=2)

                # Отправляем сигнал агенту через файл команд
                command_file = "agent_commands.json"
--- a/app/services/interview_service.py
+++ b/app/services/interview_service.py
@ -13,6 +13,7 @@ from app.models.interview import (
 from app.models.resume import ResumeStatus
 from app.repositories.interview_repository import InterviewRepository
 from app.repositories.resume_repository import ResumeRepository
+from app.repositories.vacancy_repository import VacancyRepository
 from app.services.agent_manager import agent_manager
 from rag.settings import settings

@ -22,9 +23,11 @@ class InterviewRoomService:
        self,
        interview_repo: Annotated[InterviewRepository, Depends(InterviewRepository)],
        resume_repo: Annotated[ResumeRepository, Depends(ResumeRepository)],
+        vacancy_repo: Annotated[VacancyRepository, Depends(VacancyRepository)],
    ):
        self.interview_repo = interview_repo
        self.resume_repo = resume_repo
+        self.vacancy_repo = vacancy_repo
        self.livekit_url = settings.livekit_url or "ws://localhost:7880"
        self.api_key = settings.livekit_api_key or "devkey"
        self.api_secret = settings.livekit_api_secret or "secret"
@ -103,11 +106,6 @@ class InterviewRoomService:
    async def get_livekit_token(self, resume_id: int) -> LiveKitTokenResponse | None:
        """Создает сессию собеседования и возвращает токен для LiveKit"""
        try:
-            # Проверяем доступность агента
-            if not agent_manager.is_available():
-                print("[ERROR] AI Agent is not available for new interview")
-                return None
-
            # Валидируем резюме
            validation = await self.validate_resume_for_interview(resume_id)
            if not validation.can_interview:
@ -124,35 +122,59 @@ class InterviewRoomService:
                    f"[DEBUG] Using existing interview session: {interview_session.id}"
                )
            else:
+                # Проверяем доступность агента
+                if not agent_manager.is_available():
+                    print("[ERROR] AI Agent is not available for new interview")
+                    return None
+
                # Создаем новую сессию собеседования
                interview_session = await self.create_interview_session(resume_id)
                if not interview_session:
                    return None
                print(f"[DEBUG] Created new interview session: {interview_session.id}")

+                # Получаем готовый план интервью для AI агента
+                interview_plan = await self.get_resume_data_for_interview(resume_id)
+                
+                # Получаем данные вакансии
+                resume = await self.resume_repo.get(resume_id)
+                vacancy_data = None
+                if resume and resume.vacancy_id:
+                    vacancy = await self.vacancy_repo.get_by_id(resume.vacancy_id)
+                    if vacancy:
+                        # Конвертируем объект вакансии в словарь для JSON сериализации
+                        vacancy_data = {
+                            "title": vacancy.title,
+                            "description": vacancy.description,
+                            "key_skills": vacancy.key_skills,
+                            "employment_type": vacancy.employment_type,
+                            "experience": vacancy.experience,
+                            "schedule": vacancy.schedule,
+                            "area_name": vacancy.area_name,
+                            "professional_roles": vacancy.professional_roles,
+                            "contacts_name": vacancy.contacts_name,
+                        }
+
+                # Обновляем статус сессии на ACTIVE
+                await self.interview_repo.update_session_status(
+                    interview_session.id, "active"
+                )
+
+                # Назначаем сессию агенту через менеджер
+                success = await agent_manager.assign_session(
+                    interview_session.id, interview_session.room_name, interview_plan, vacancy_data
+                )
+
+                if not success:
+                    print("[ERROR] Failed to assign session to AI agent")
+                    return None
+
            # Генерируем токен
            participant_name = f"user_{resume_id}"
            token = self.generate_access_token(
                interview_session.room_name, participant_name
            )

-            # Получаем готовый план интервью для AI агента
-            interview_plan = await self.get_resume_data_for_interview(resume_id)
-
-            # Обновляем статус сессии на ACTIVE
-            await self.interview_repo.update_session_status(
-                interview_session.id, "active"
-            )
-
-            # Назначаем сессию агенту через менеджер
-            success = await agent_manager.assign_session(
-                interview_session.id, interview_session.room_name, interview_plan
-            )
-
-            if not success:
-                print("[ERROR] Failed to assign session to AI agent")
-                return None
-
            return LiveKitTokenResponse(
                token=token,
                room_name=interview_session.room_name,
--- a/celery_worker/database.py
+++ b/celery_worker/database.py
@ -51,6 +51,7 @@ class SyncResumeRepository:
        status: str,
        parsed_data: dict = None,
        error_message: str = None,
+        rejection_reason: str = None,
    ):
        """Обновить статус резюме"""
        from datetime import datetime
@ -71,6 +72,10 @@ class SyncResumeRepository:
                resume.status = ResumeStatus.PARSE_FAILED
                if error_message:
                    resume.parse_error = error_message
+            elif status == "rejected":
+                resume.status = ResumeStatus.REJECTED
+                if rejection_reason:
+                    resume.notes = f"ОТКЛОНЕНО: {rejection_reason}"

            resume.updated_at = datetime.utcnow()
            self.session.add(resume)
@ -121,3 +126,16 @@ class SyncResumeRepository:
                    return data
            else:
                return data
+
+
+class SyncVacancyRepository:
+    """Синхронный repository для работы с Vacancy в Celery tasks"""
+
+    def __init__(self, session: Session):
+        self.session = session
+
+    def get_by_id(self, vacancy_id: int):
+        """Получить вакансию по ID"""
+        from app.models.vacancy import Vacancy
+
+        return self.session.query(Vacancy).filter(Vacancy.id == vacancy_id).first()
--- a/celery_worker/tasks.py
+++ b/celery_worker/tasks.py
@ -3,7 +3,7 @@ import os
 from typing import Any

 from celery_worker.celery_app import celery_app
-from celery_worker.database import SyncResumeRepository, get_sync_session
+from celery_worker.database import SyncResumeRepository, SyncVacancyRepository, get_sync_session
 from rag.llm.model import ResumeParser
 from rag.registry import registry

@ -17,24 +17,105 @@ def generate_interview_plan(
    try:
        # Получаем данные о вакансии из БД
        with get_sync_session() as session:
-            repo = SyncResumeRepository(session)
-            resume_record = repo.get_by_id(resume_id)
+            resume_repo = SyncResumeRepository(session)
+            vacancy_repo = SyncVacancyRepository(session)
            
+            resume_record = resume_repo.get_by_id(resume_id)
            if not resume_record:
-                return None
+                return {"is_suitable": False, "rejection_reason": "Резюме не найдено в БД"}
+
+            # Получаем данные вакансии
+            vacancy_record = None
+            if resume_record.vacancy_id:
+                vacancy_record = vacancy_repo.get_by_id(resume_record.vacancy_id)
+            
+            if not vacancy_record:
+                return {"is_suitable": False, "rejection_reason": "Вакансия не найдена"}
            
-            # Здесь нужно получить данные вакансии
-            # Пока используем заглушку, потом добавим связь с vacancy
            vacancy_data = {
-                "title": "Python Developer",
-                "requirements": "Python, FastAPI, PostgreSQL, Docker",
-                "company_name": "Tech Company",
-                "experience_level": "Middle",
+                "title": vacancy_record.title,
+                "description": vacancy_record.description,
+                "key_skills": vacancy_record.key_skills,
+                "experience": vacancy_record.experience,
+                "area_name": vacancy_record.area_name,
+                "professional_roles": vacancy_record.professional_roles,
            }

-        # Генерируем план через LLM
+        # Сначала проверяем соответствие резюме и вакансии через LLM
        chat_model = registry.get_chat_model()
        
+        # Формируем опыт кандидата
+        experience_map = {
+            "noExperience": "Без опыта",
+            "between1And3": "1-3 года",
+            "between3And6": "3-6 лет",
+            "moreThan6": "Более 6 лет"
+        }
+        
+        compatibility_prompt = f"""
+        Проанализируй (не строго!) соответствие кандидата вакансии и определи, стоит ли проводить интервью.
+        
+        КЛЮЧЕВЫЕ И ЕДИНСТВЕННЫЕ КРИТЕРИИ ОТКЛОНЕНИЯ:
+        1. Профессиональная область кандидата: Полное несоответствие сферы деятельности вакансии (иначе 100 за критерий)
+        2. Остальные показатели кандидата хотя бы примерно соответствуют вакансии: скиллы кандидата похожи или смежны вакансионным, опыт не сильно отдален 
+        от указанного 
+        
+        КАНДИДАТ:
+        - Имя: {combined_data.get("name", "Не указано")}
+        - Навыки: {", ".join(combined_data.get("skills", []))}
+        - Общий опыт: {combined_data.get("total_years", 0)} лет
+        - Образование: {combined_data.get("education", "Не указано")}
+        - Про работу: {combined_data.get("experience", "Не указано")}
+        - Саммари: {combined_data.get("summary", "Не указано")}
+        
+        ВАКАНСИЯ:
+        - Должность: {vacancy_data["title"]}
+        - Описание: {vacancy_data["description"]}...
+        - Ключевые навыки: {vacancy_data["key_skills"] or "Не указаны"}
+        - Требуемый опыт: {experience_map.get(vacancy_data["experience"], "Не указан")}
+        - Профессиональные роли: {vacancy_data["professional_roles"] or "Не указаны"}
+        
+
+        Верни ответ в JSON формате:
+        {{
+            "is_suitable": true/false,
+            "rejection_reason": "Конкретная подробная причина отклонения с цитированием (если is_suitable=false)",
+        }}
+        """
+        
+        from langchain.schema import HumanMessage, SystemMessage
+        
+        compatibility_messages = [
+            SystemMessage(
+                content="Ты эксперт по подбору персонала. Анализируй соответствие кандидатов вакансиям строго и объективно."
+            ),
+            HumanMessage(content=compatibility_prompt),
+        ]
+        
+        compatibility_response = chat_model.get_llm().invoke(compatibility_messages)
+        compatibility_text = compatibility_response.content.strip()
+        
+        # Парсим ответ о соответствии
+        compatibility_result = None
+        if compatibility_text.startswith("{") and compatibility_text.endswith("}"):
+            compatibility_result = json.loads(compatibility_text)
+        else:
+            # Ищем JSON в тексте
+            start = compatibility_text.find("{")
+            end = compatibility_text.rfind("}") + 1
+            if start != -1 and end > start:
+                compatibility_result = json.loads(compatibility_text[start:end])
+        print("compatibility_text", compatibility_text)
+        print("compatibility_result", compatibility_result)
+        # Если кандидат не подходит - возвращаем результат отклонения
+        if not compatibility_result or not compatibility_result.get("is_suitable", True):
+            return {
+                "is_suitable": False,
+                "rejection_reason": compatibility_result.get("rejection_reason", "Кандидат не соответствует требованиям вакансии") if compatibility_result else "Ошибка анализа соответствия",
+                "match_details": compatibility_result
+            }
+        
+        # Если кандидат подходит - генерируем план интервью
        plan_prompt = f"""
        Создай детальный план интервью для кандидата на основе его резюме и требований вакансии.
        
@ -45,10 +126,10 @@ def generate_interview_plan(
        - Образование: {combined_data.get("education", "Не указано")}
        
        ВАКАНСИЯ:
-        - Позиция: {vacancy_data["title"]}
-        - Требования: {vacancy_data["requirements"]}
-        - Компания: {vacancy_data["company_name"]}
-        - Уровень: {vacancy_data["experience_level"]}
+        - Должность: {vacancy_data["title"]}
+        - Описание: {vacancy_data["description"]}...
+        - Ключевые навыки: {vacancy_data["key_skills"] or "Не указаны"}
+        - Требуемый опыт: {experience_map.get(vacancy_data["experience"], "Не указан")}
        
        Создай план интервью в формате JSON:
        {{
@ -102,16 +183,27 @@ def generate_interview_plan(
        response_text = response.content.strip()

        # Парсим JSON ответ
+        interview_plan = None
        if response_text.startswith("{") and response_text.endswith("}"):
-            return json.loads(response_text)
+            interview_plan = json.loads(response_text)
        else:
            # Ищем JSON в тексте
            start = response_text.find("{")
            end = response_text.rfind("}") + 1
            if start != -1 and end > start:
-                return json.loads(response_text[start:end])
+                interview_plan = json.loads(response_text[start:end])

-        return None
+        if interview_plan:
+            # Добавляем информацию о том, что кандидат подходит
+            interview_plan["is_suitable"] = True
+            interview_plan["match_details"] = compatibility_result
+            return interview_plan
+            
+        return {
+            "is_suitable": True,
+            "match_details": compatibility_result,
+            "error": "Не удалось сгенерировать план интервью"
+        }

    except Exception as e:
        print(f"Ошибка генерации плана интервью: {str(e)}")
@ -143,7 +235,6 @@ def parse_resume_task(self, resume_id: str, file_path: str):
        # Инициализируем модели из registry
        try:
            chat_model = registry.get_chat_model()
-            embeddings_model = registry.get_embeddings_model()
            vector_store = registry.get_vector_store()
        except Exception as e:
            # Обновляем статус в БД - ошибка инициализации
@ -191,8 +282,8 @@ def parse_resume_task(self, resume_id: str, file_path: str):

        # Создаем комбинированные данные: навыки и опыт из парсинга, контакты из формы
        combined_data = parsed_resume.copy()
-        combined_data["name"] = applicant_name
-        combined_data["email"] = applicant_email
+        combined_data["name"] = applicant_name or parsed_resume.get("name", "")
+        combined_data["email"] = applicant_email or parsed_resume.get("email", "")
        combined_data["phone"] = applicant_phone or parsed_resume.get("phone", "")

        # Шаг 2: Векторизация и сохранение в Milvus
@ -219,10 +310,42 @@ def parse_resume_task(self, resume_id: str, file_path: str):

        with get_sync_session() as session:
            repo = SyncResumeRepository(session)
-            repo.update_status(int(resume_id), "parsed", parsed_data=combined_data)
-            # Сохраняем план интервью
-            if interview_plan:
+            
+            # Проверяем результат генерации плана интервью
+            print("interview_plan", interview_plan)
+            if interview_plan and interview_plan.get("is_suitable", True):
+                # Кандидат подходит - обновляем статус на parsed
+                repo.update_status(int(resume_id), "parsed", parsed_data=combined_data)
+                # Сохраняем план интервью
                repo.update_interview_plan(int(resume_id), interview_plan)
+            else:
+                # Кандидат не подходит - отклоняем
+                rejection_reason = interview_plan.get("rejection_reason", "Не соответствует требованиям вакансии") if interview_plan else "Ошибка анализа соответствия"
+                repo.update_status(
+                    int(resume_id), 
+                    "rejected", 
+                    parsed_data=combined_data,
+                    rejection_reason=rejection_reason
+                )
+                
+                # Завершаем с информацией об отклонении
+                self.update_state(
+                    state="SUCCESS",
+                    meta={
+                        "status": f"Резюме обработано, но кандидат отклонен: {rejection_reason}",
+                        "progress": 100,
+                        "result": combined_data,
+                        "rejected": True,
+                        "rejection_reason": rejection_reason
+                    },
+                )
+                
+                return {
+                    "resume_id": resume_id,
+                    "status": "rejected",
+                    "parsed_data": combined_data,
+                    "rejection_reason": rejection_reason
+                }

        # Завершено успешно
        self.update_state(
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,6 +27,8 @@ dependencies = [
    "livekit>=1.0.12",
    "livekit-api>=1.0.5",
    "livekit-agents[cartesia,deepgram,openai,silero,resemble,turn-detector]~=1.2",
+    "textract>=1.5.0",
+    "comtypes>=1.4.12",
 ]

 [build-system]
--- a/rag/llm/model.py
+++ b/rag/llm/model.py
@ -106,7 +106,28 @@ class ResumeParser:
    def extract_text_from_doc(self, file_path: str) -> str:
        """Извлекает текст из DOC файла"""
        try:
-            # Для .doc файлов используем antiword (если установлен) или попробуем python-docx
+            # Метод 1: COM автоматизация Word (самый надежный для Windows)
+            import os
+            if os.name == 'nt':  # Windows
+                try:
+                    import comtypes.client
+                    print(f"[DEBUG] Trying Word COM automation for {file_path}")
+                    
+                    word = comtypes.client.CreateObject('Word.Application')
+                    word.Visible = False
+                    
+                    doc = word.Documents.Open(file_path)
+                    text = doc.Content.Text
+                    doc.Close()
+                    word.Quit()
+                    
+                    if text and text.strip():
+                        print(f"[DEBUG] Word COM successfully extracted {len(text)} characters")
+                        return text.strip()
+                except Exception as e:
+                    print(f"[DEBUG] Word COM failed: {e}")
+            
+            # Метод 2: Для .doc файлов используем python-docx
            if Document:
                try:
                    doc = Document(file_path)
@ -116,20 +137,53 @@ class ResumeParser:
                    # Если python-docx не может прочитать .doc, пытаемся использовать системные утилиты
                    pass

-            # Попытка использовать системную команду antiword (для Linux/Mac)
-            import subprocess
-
+            # Попытка использовать textract (универсальная библиотека для извлечения текста)
            try:
-                result = subprocess.run(
-                    ["antiword", file_path], capture_output=True, text=True
-                )
-                if result.returncode == 0:
-                    return result.stdout.strip()
-            except FileNotFoundError:
+                import textract
+                print(f"[DEBUG] Using textract to process {file_path}")
+                text = textract.process(file_path).decode('utf-8')
+                if text and text.strip():
+                    print(f"[DEBUG] textract successfully extracted {len(text)} characters")
+                    return text.strip()
+                else:
+                    print("[DEBUG] textract returned empty text")
+            except ImportError as e:
+                print(f"[DEBUG] textract not available: {e}")
+            except Exception as e:
+                print(f"[DEBUG] textract failed: {e}")
+            
+            # Попытка использовать docx2txt
+            try:
+                import docx2txt
+                text = docx2txt.process(file_path)
+                if text:
+                    return text.strip()
+            except ImportError:
+                pass
+            except Exception:
+                pass
+            
+            # Попытка использовать oletools для старых DOC файлов
+            try:
+                from oletools.olevba import VBA_Parser
+                from oletools import olefile
+                
+                if olefile.isOleFile(file_path):
+                    # Это старый формат DOC, пытаемся извлечь текст
+                    # Пока что возвращаем информативную ошибку
+                    pass
+            except ImportError:
+                pass
+            except Exception:
                pass

            raise Exception(
-                "Не удалось найти подходящий инструмент для чтения DOC файлов. Рекомендуется использовать DOCX формат."
+                "Не удалось извлечь текст из DOC файла ни одним из методов. "
+                "Возможные причины:\n"
+                "1. Microsoft Word не установлен (для COM автоматизации)\n"
+                "2. Файл поврежден или не содержит текста\n"
+                "3. Файл защищен паролем\n"
+                "Рекомендуется конвертировать DOC в DOCX формат."
            )
        except Exception as e:
            raise Exception(f"Ошибка при чтении DOC: {str(e)}") from e
--- a/uv.lock
+++ b/uv.lock