add matching resume and vacancy

2025-09-07 01:23:55 +05:00 · 2025-09-07 01:23:55 +05:00 · 6ecb66514e
commit 6ecb66514e
parent 6e1c631d70
8 changed files with 2553 additions and 2049 deletions
--- a/ai_interviewer_agent.py
+++ b/ai_interviewer_agent.py
@ -24,7 +24,6 @@ from app.core.database import get_session
 from app.repositories.interview_repository import InterviewRepository
 from app.repositories.resume_repository import ResumeRepository
 from app.services.interview_finalization_service import InterviewFinalizationService
 from app.services.interview_service import InterviewRoomService
 from rag.settings import settings
 logger = logging.getLogger("ai-interviewer")
@ -50,8 +49,9 @@ async def close_room(room_name: str):
 class InterviewAgent:
    """AI Agent для проведения собеседований с управлением диалогом"""
-    def __init__(self, interview_plan: dict):
+    def __init__(self, interview_plan: dict, vacancy_data=None):
        self.interview_plan = interview_plan
        self.vacancy_data = vacancy_data
        self.conversation_history = []
        # Состояние диалога
@ -155,7 +155,49 @@ class InterviewAgent:
        else:
            time_status = "НОРМАЛЬНО"
-        return f"""Ты опытный HR-интервьюер, который проводит адаптивное голосовое собеседование.
+        # Информация о вакансии
        vacancy_info = ""
        if self.vacancy_data:
            employment_type_map = {
                "full": "Полная занятость",
                "part": "Частичная занятость", 
                "project": "Проектная работа",
                "volunteer": "Волонтёрство",
                "probation": "Стажировка"
            }
            experience_map = {
                "noExperience": "Без опыта",
                "between1And3": "1-3 года",
                "between3And6": "3-6 лет", 
                "moreThan6": "Более 6 лет"
            }
            schedule_map = {
                "fullDay": "Полный день",
                "shift": "Сменный график",
                "flexible": "Гибкий график",
                "remote": "Удалённая работа",
                "flyInFlyOut": "Вахтовый метод"
            }
            vacancy_info = f"""
 ИНФОРМАЦИЯ О ВАКАНСИИ:
 - Должность: {self.vacancy_data.get('title', 'Не указана')}
 - Описание: {self.vacancy_data.get('description', 'Не указано')}
 - Ключевые навыки: {self.vacancy_data.get('key_skills') or 'Не указаны'}
 - Тип занятости: {employment_type_map.get(self.vacancy_data.get('employment_type'), self.vacancy_data.get('employment_type', 'Не указан'))}
 - Опыт работы: {experience_map.get(self.vacancy_data.get('experience'), self.vacancy_data.get('experience', 'Не указан'))}
 - График работы: {schedule_map.get(self.vacancy_data.get('schedule'), self.vacancy_data.get('schedule', 'Не указан'))}
 - Регион: {self.vacancy_data.get('area_name', 'Не указан')}
 - Профессиональные роли: {self.vacancy_data.get('professional_roles') or 'Не указаны'}
 - Контактное лицо: {self.vacancy_data.get('contacts_name') or 'Не указано'}"""
        return f"""
 Ты опытный HR-интервьюер, который проводит адаптивное голосовое собеседование. Представься контактным именем из вакансии (если оно есть)
 ИНФОРМАЦИЯ О ВАКАНСИИ:
 {vacancy_info}
 ИНФОРМАЦИЯ О КАНДИДАТЕ:
 - Имя: {candidate_name}
@ -279,6 +321,7 @@ async def entrypoint(ctx: JobContext):
    # План интервью - получаем из метаданных сессии
    interview_plan = {}
    session_id = None
    vacancy_data = None
    # Проверяем файлы команд для получения сессии
    command_file = "agent_commands.json"
@ -313,11 +356,15 @@ async def entrypoint(ctx: JobContext):
            with open(metadata_file, encoding="utf-8") as f:
                metadata = json.load(f)
                interview_plan = metadata.get("interview_plan", {})
                vacancy_data = metadata.get("vacancy_data", None)
                session_id = metadata.get("session_id", session_id)
                logger.info(f"[INIT] Loaded interview plan for session {session_id}")
                if vacancy_data:
                    logger.info(f"[INIT] Loaded vacancy data from metadata: {vacancy_data.get('title', 'Unknown')}")
        except Exception as e:
            logger.warning(f"[INIT] Failed to load metadata: {str(e)}")
            interview_plan = {}
            vacancy_data = None
    # Используем дефолтный план если план пустой или нет секций
    if not interview_plan or not interview_plan.get("interview_structure", {}).get(
@ -350,7 +397,7 @@ async def entrypoint(ctx: JobContext):
            "key_evaluation_points": ["Коммуникация"],
        }
-    interviewer = InterviewAgent(interview_plan)
+    interviewer = InterviewAgent(interview_plan, vacancy_data)
    logger.info(
        f"[INIT] InterviewAgent created with {len(interviewer.sections)} sections"
    )
@ -489,23 +536,10 @@ async def entrypoint(ctx: JobContext):
                )
                if not interviewer.interview_finalized:
-                    # Запускаем полную цепочку завершения интервью
+                    await complete_interview_sequence(
-                    try:
+                        ctx.room.name, interviewer
-                        session_generator = get_session()
+                    )
-                        db = await anext(session_generator)
+                    break
                        try:
                            interview_repo = InterviewRepository(db)
                            resume_repo = ResumeRepository(db)
                            interview_service = InterviewRoomService(
                                interview_repo, resume_repo
                            )
                            await interview_service.end_interview_session(session_id)
                        finally:
                            await session_generator.aclose()
                    except Exception as e:
                        logger.error(f"[FINALIZE] Error finalizing interview: {str(e)}")
                    return True
                break
        return False
@ -544,19 +578,62 @@ async def entrypoint(ctx: JobContext):
    asyncio.create_task(monitor_end_commands())
    # --- Обработчик состояния пользователя (замена мониторинга тишины) ---
    disconnect_timer: asyncio.Task | None = None
    @session.on("user_state_changed")
    def on_user_state_changed(event):
        """Обработчик изменения состояния пользователя (активен/неактивен)"""
        async def on_change():
            nonlocal disconnect_timer
            logger.info(f"[USER_STATE] User state changed to: {event.new_state}")
-            if event.new_state == "away" and interviewer.intro_done:
+            # === Пользователь начал говорить ===
-                logger.info("[USER_STATE] User went away, generating response...")
+            if event.new_state == "speaking":
                # Если есть таймер на 30 секунд — отменяем его
                if disconnect_timer is not None:
                    logger.info("[USER_STATE] Cancelling disconnect timer due to speaking")
                    disconnect_timer.cancel()
                    disconnect_timer = None
-                # Генерируем ответ через LLM с инструкциями
+            # === Пользователь молчит более 10 секунд (state == away) ===
-                await session.generate_reply(
+            elif event.new_state == "away" and interviewer.intro_done:
-                    instructions="Клиент молчит уже больше 10 секунд. Проверь связь фразой вроде 'Приём! Ты меня слышишь?' или 'Связь не пропала?'"
+                logger.info("[USER_STATE] User away detected, sending check-in message...")
                # 1) Первое сообщение — проверка связи
                handle = await session.generate_reply(
                    instructions=(
                        "Клиент молчит уже больше 10 секунд. "
                        "Проверь связь фразой вроде 'Приём! Ты меня слышишь?' "
                        "или 'Связь не пропала?'"
                    )
                )
                await handle  # ждем завершения первой реплики
                # 2) Таймер на 30 секунд
                async def disconnect_timeout():
                    try:
                        await asyncio.sleep(30)
                        logger.info("[DISCONNECT_TIMER] 30 seconds passed, sending disconnect message")
                        # Второе сообщение — считаем, что клиент отключился
                        await session.generate_reply(
                            instructions="Похоже клиент отключился"
                        )
                        logger.info("[DISCONNECT_TIMER] Disconnect message sent successfully")
                    except asyncio.CancelledError:
                        logger.info("[DISCONNECT_TIMER] Timer cancelled before completion")
                    except Exception as e:
                        logger.error(f"[DISCONNECT_TIMER] Error in disconnect timeout: {e}")
                # 3) Если уже есть активный таймер — отменяем его перед запуском нового
                if disconnect_timer is not None:
                    disconnect_timer.cancel()
                disconnect_timer = asyncio.create_task(disconnect_timeout())
        asyncio.create_task(on_change())
    # --- Полная цепочка завершения интервью ---
--- a/app/services/agent_manager.py
+++ b/app/services/agent_manager.py
@ -134,7 +134,7 @@ class AgentManager:
                return False
    async def assign_session(
-        self, session_id: int, room_name: str, interview_plan: dict
+        self, session_id: int, room_name: str, interview_plan: dict, vacancy_data: dict = None
    ) -> bool:
        """Назначает агенту конкретную сессию интервью"""
        async with self._lock:
@ -153,18 +153,19 @@ class AgentManager:
            try:
                # Создаем файл метаданных для сессии
                metadata_file = f"session_metadata_{session_id}.json"
                metadata = {
                    "session_id": session_id,
                    "room_name": room_name,
                    "interview_plan": interview_plan,
                    "command": "start_interview",
                }
                # Добавляем данные вакансии если они переданы
                if vacancy_data:
                    metadata["vacancy_data"] = vacancy_data
                with open(metadata_file, "w", encoding="utf-8") as f:
-                    json.dump(
+                    json.dump(metadata, f, ensure_ascii=False, indent=2)
                        {
                            "session_id": session_id,
                            "room_name": room_name,
                            "interview_plan": interview_plan,
                            "command": "start_interview",
                        },
                        f,
                        ensure_ascii=False,
                        indent=2,
                    )
                # Отправляем сигнал агенту через файл команд
                command_file = "agent_commands.json"
--- a/app/services/interview_service.py
+++ b/app/services/interview_service.py
@ -13,6 +13,7 @@ from app.models.interview import (
 from app.models.resume import ResumeStatus
 from app.repositories.interview_repository import InterviewRepository
 from app.repositories.resume_repository import ResumeRepository
 from app.repositories.vacancy_repository import VacancyRepository
 from app.services.agent_manager import agent_manager
 from rag.settings import settings
@ -22,9 +23,11 @@ class InterviewRoomService:
        self,
        interview_repo: Annotated[InterviewRepository, Depends(InterviewRepository)],
        resume_repo: Annotated[ResumeRepository, Depends(ResumeRepository)],
        vacancy_repo: Annotated[VacancyRepository, Depends(VacancyRepository)],
    ):
        self.interview_repo = interview_repo
        self.resume_repo = resume_repo
        self.vacancy_repo = vacancy_repo
        self.livekit_url = settings.livekit_url or "ws://localhost:7880"
        self.api_key = settings.livekit_api_key or "devkey"
        self.api_secret = settings.livekit_api_secret or "secret"
@ -103,11 +106,6 @@ class InterviewRoomService:
    async def get_livekit_token(self, resume_id: int) -> LiveKitTokenResponse | None:
        """Создает сессию собеседования и возвращает токен для LiveKit"""
        try:
            # Проверяем доступность агента
            if not agent_manager.is_available():
                print("[ERROR] AI Agent is not available for new interview")
                return None
            # Валидируем резюме
            validation = await self.validate_resume_for_interview(resume_id)
            if not validation.can_interview:
@ -124,35 +122,59 @@ class InterviewRoomService:
                    f"[DEBUG] Using existing interview session: {interview_session.id}"
                )
            else:
                # Проверяем доступность агента
                if not agent_manager.is_available():
                    print("[ERROR] AI Agent is not available for new interview")
                    return None
                # Создаем новую сессию собеседования
                interview_session = await self.create_interview_session(resume_id)
                if not interview_session:
                    return None
                print(f"[DEBUG] Created new interview session: {interview_session.id}")
                # Получаем готовый план интервью для AI агента
                interview_plan = await self.get_resume_data_for_interview(resume_id)
                # Получаем данные вакансии
                resume = await self.resume_repo.get(resume_id)
                vacancy_data = None
                if resume and resume.vacancy_id:
                    vacancy = await self.vacancy_repo.get_by_id(resume.vacancy_id)
                    if vacancy:
                        # Конвертируем объект вакансии в словарь для JSON сериализации
                        vacancy_data = {
                            "title": vacancy.title,
                            "description": vacancy.description,
                            "key_skills": vacancy.key_skills,
                            "employment_type": vacancy.employment_type,
                            "experience": vacancy.experience,
                            "schedule": vacancy.schedule,
                            "area_name": vacancy.area_name,
                            "professional_roles": vacancy.professional_roles,
                            "contacts_name": vacancy.contacts_name,
                        }
                # Обновляем статус сессии на ACTIVE
                await self.interview_repo.update_session_status(
                    interview_session.id, "active"
                )
                # Назначаем сессию агенту через менеджер
                success = await agent_manager.assign_session(
                    interview_session.id, interview_session.room_name, interview_plan, vacancy_data
                )
                if not success:
                    print("[ERROR] Failed to assign session to AI agent")
                    return None
            # Генерируем токен
            participant_name = f"user_{resume_id}"
            token = self.generate_access_token(
                interview_session.room_name, participant_name
            )
            # Получаем готовый план интервью для AI агента
            interview_plan = await self.get_resume_data_for_interview(resume_id)
            # Обновляем статус сессии на ACTIVE
            await self.interview_repo.update_session_status(
                interview_session.id, "active"
            )
            # Назначаем сессию агенту через менеджер
            success = await agent_manager.assign_session(
                interview_session.id, interview_session.room_name, interview_plan
            )
            if not success:
                print("[ERROR] Failed to assign session to AI agent")
                return None
            return LiveKitTokenResponse(
                token=token,
                room_name=interview_session.room_name,
--- a/celery_worker/database.py
+++ b/celery_worker/database.py
@ -51,6 +51,7 @@ class SyncResumeRepository:
        status: str,
        parsed_data: dict = None,
        error_message: str = None,
        rejection_reason: str = None,
    ):
        """Обновить статус резюме"""
        from datetime import datetime
@ -71,6 +72,10 @@ class SyncResumeRepository:
                resume.status = ResumeStatus.PARSE_FAILED
                if error_message:
                    resume.parse_error = error_message
            elif status == "rejected":
                resume.status = ResumeStatus.REJECTED
                if rejection_reason:
                    resume.notes = f"ОТКЛОНЕНО: {rejection_reason}"
            resume.updated_at = datetime.utcnow()
            self.session.add(resume)
@ -121,3 +126,16 @@ class SyncResumeRepository:
                    return data
            else:
                return data
 class SyncVacancyRepository:
    """Синхронный repository для работы с Vacancy в Celery tasks"""
    def __init__(self, session: Session):
        self.session = session
    def get_by_id(self, vacancy_id: int):
        """Получить вакансию по ID"""
        from app.models.vacancy import Vacancy
        return self.session.query(Vacancy).filter(Vacancy.id == vacancy_id).first()
--- a/celery_worker/tasks.py
+++ b/celery_worker/tasks.py
@ -3,7 +3,7 @@ import os
 from typing import Any
 from celery_worker.celery_app import celery_app
-from celery_worker.database import SyncResumeRepository, get_sync_session
+from celery_worker.database import SyncResumeRepository, SyncVacancyRepository, get_sync_session
 from rag.llm.model import ResumeParser
 from rag.registry import registry
@ -17,24 +17,105 @@ def generate_interview_plan(
    try:
        # Получаем данные о вакансии из БД
        with get_sync_session() as session:
-            repo = SyncResumeRepository(session)
+            resume_repo = SyncResumeRepository(session)
-            resume_record = repo.get_by_id(resume_id)
+            vacancy_repo = SyncVacancyRepository(session)
-
+            
            resume_record = resume_repo.get_by_id(resume_id)
            if not resume_record:
-                return None
+                return {"is_suitable": False, "rejection_reason": "Резюме не найдено в БД"}
-            # Здесь нужно получить данные вакансии
+            # Получаем данные вакансии
-            # Пока используем заглушку, потом добавим связь с vacancy
+            vacancy_record = None
            if resume_record.vacancy_id:
                vacancy_record = vacancy_repo.get_by_id(resume_record.vacancy_id)
            if not vacancy_record:
                return {"is_suitable": False, "rejection_reason": "Вакансия не найдена"}
            vacancy_data = {
-                "title": "Python Developer",
+                "title": vacancy_record.title,
-                "requirements": "Python, FastAPI, PostgreSQL, Docker",
+                "description": vacancy_record.description,
-                "company_name": "Tech Company",
+                "key_skills": vacancy_record.key_skills,
-                "experience_level": "Middle",
+                "experience": vacancy_record.experience,
                "area_name": vacancy_record.area_name,
                "professional_roles": vacancy_record.professional_roles,
            }
-        # Генерируем план через LLM
+        # Сначала проверяем соответствие резюме и вакансии через LLM
        chat_model = registry.get_chat_model()
        # Формируем опыт кандидата
        experience_map = {
            "noExperience": "Без опыта",
            "between1And3": "1-3 года",
            "between3And6": "3-6 лет",
            "moreThan6": "Более 6 лет"
        }
        compatibility_prompt = f"""
        Проанализируй (не строго!) соответствие кандидата вакансии и определи, стоит ли проводить интервью.
        КЛЮЧЕВЫЕ И ЕДИНСТВЕННЫЕ КРИТЕРИИ ОТКЛОНЕНИЯ:
        1. Профессиональная область кандидата: Полное несоответствие сферы деятельности вакансии (иначе 100 за критерий)
        2. Остальные показатели кандидата хотя бы примерно соответствуют вакансии: скиллы кандидата похожи или смежны вакансионным, опыт не сильно отдален 
        от указанного 
        КАНДИДАТ:
        - Имя: {combined_data.get("name", "Не указано")}
        - Навыки: {", ".join(combined_data.get("skills", []))}
        - Общий опыт: {combined_data.get("total_years", 0)} лет
        - Образование: {combined_data.get("education", "Не указано")}
        - Про работу: {combined_data.get("experience", "Не указано")}
        - Саммари: {combined_data.get("summary", "Не указано")}
        ВАКАНСИЯ:
        - Должность: {vacancy_data["title"]}
        - Описание: {vacancy_data["description"]}...
        - Ключевые навыки: {vacancy_data["key_skills"] or "Не указаны"}
        - Требуемый опыт: {experience_map.get(vacancy_data["experience"], "Не указан")}
        - Профессиональные роли: {vacancy_data["professional_roles"] or "Не указаны"}
        Верни ответ в JSON формате:
        {{
            "is_suitable": true/false,
            "rejection_reason": "Конкретная подробная причина отклонения с цитированием (если is_suitable=false)",
        }}
        """
        from langchain.schema import HumanMessage, SystemMessage
        compatibility_messages = [
            SystemMessage(
                content="Ты эксперт по подбору персонала. Анализируй соответствие кандидатов вакансиям строго и объективно."
            ),
            HumanMessage(content=compatibility_prompt),
        ]
        compatibility_response = chat_model.get_llm().invoke(compatibility_messages)
        compatibility_text = compatibility_response.content.strip()
        # Парсим ответ о соответствии
        compatibility_result = None
        if compatibility_text.startswith("{") and compatibility_text.endswith("}"):
            compatibility_result = json.loads(compatibility_text)
        else:
            # Ищем JSON в тексте
            start = compatibility_text.find("{")
            end = compatibility_text.rfind("}") + 1
            if start != -1 and end > start:
                compatibility_result = json.loads(compatibility_text[start:end])
        print("compatibility_text", compatibility_text)
        print("compatibility_result", compatibility_result)
        # Если кандидат не подходит - возвращаем результат отклонения
        if not compatibility_result or not compatibility_result.get("is_suitable", True):
            return {
                "is_suitable": False,
                "rejection_reason": compatibility_result.get("rejection_reason", "Кандидат не соответствует требованиям вакансии") if compatibility_result else "Ошибка анализа соответствия",
                "match_details": compatibility_result
            }
        # Если кандидат подходит - генерируем план интервью
        plan_prompt = f"""
        Создай детальный план интервью для кандидата на основе его резюме и требований вакансии.
@ -45,10 +126,10 @@ def generate_interview_plan(
        - Образование: {combined_data.get("education", "Не указано")}
        ВАКАНСИЯ:
-        - Позиция: {vacancy_data["title"]}
+        - Должность: {vacancy_data["title"]}
-        - Требования: {vacancy_data["requirements"]}
+        - Описание: {vacancy_data["description"]}...
-        - Компания: {vacancy_data["company_name"]}
+        - Ключевые навыки: {vacancy_data["key_skills"] or "Не указаны"}
-        - Уровень: {vacancy_data["experience_level"]}
+        - Требуемый опыт: {experience_map.get(vacancy_data["experience"], "Не указан")}
        Создай план интервью в формате JSON:
        {{
@ -102,16 +183,27 @@ def generate_interview_plan(
        response_text = response.content.strip()
        # Парсим JSON ответ
        interview_plan = None
        if response_text.startswith("{") and response_text.endswith("}"):
-            return json.loads(response_text)
+            interview_plan = json.loads(response_text)
        else:
            # Ищем JSON в тексте
            start = response_text.find("{")
            end = response_text.rfind("}") + 1
            if start != -1 and end > start:
-                return json.loads(response_text[start:end])
+                interview_plan = json.loads(response_text[start:end])
-        return None
+        if interview_plan:
            # Добавляем информацию о том, что кандидат подходит
            interview_plan["is_suitable"] = True
            interview_plan["match_details"] = compatibility_result
            return interview_plan
        return {
            "is_suitable": True,
            "match_details": compatibility_result,
            "error": "Не удалось сгенерировать план интервью"
        }
    except Exception as e:
        print(f"Ошибка генерации плана интервью: {str(e)}")
@ -143,7 +235,6 @@ def parse_resume_task(self, resume_id: str, file_path: str):
        # Инициализируем модели из registry
        try:
            chat_model = registry.get_chat_model()
            embeddings_model = registry.get_embeddings_model()
            vector_store = registry.get_vector_store()
        except Exception as e:
            # Обновляем статус в БД - ошибка инициализации
@ -191,8 +282,8 @@ def parse_resume_task(self, resume_id: str, file_path: str):
        # Создаем комбинированные данные: навыки и опыт из парсинга, контакты из формы
        combined_data = parsed_resume.copy()
-        combined_data["name"] = applicant_name
+        combined_data["name"] = applicant_name or parsed_resume.get("name", "")
-        combined_data["email"] = applicant_email
+        combined_data["email"] = applicant_email or parsed_resume.get("email", "")
        combined_data["phone"] = applicant_phone or parsed_resume.get("phone", "")
        # Шаг 2: Векторизация и сохранение в Milvus
@ -219,10 +310,42 @@ def parse_resume_task(self, resume_id: str, file_path: str):
        with get_sync_session() as session:
            repo = SyncResumeRepository(session)
-            repo.update_status(int(resume_id), "parsed", parsed_data=combined_data)
+            
-            # Сохраняем план интервью
+            # Проверяем результат генерации плана интервью
-            if interview_plan:
+            print("interview_plan", interview_plan)
            if interview_plan and interview_plan.get("is_suitable", True):
                # Кандидат подходит - обновляем статус на parsed
                repo.update_status(int(resume_id), "parsed", parsed_data=combined_data)
                # Сохраняем план интервью
                repo.update_interview_plan(int(resume_id), interview_plan)
            else:
                # Кандидат не подходит - отклоняем
                rejection_reason = interview_plan.get("rejection_reason", "Не соответствует требованиям вакансии") if interview_plan else "Ошибка анализа соответствия"
                repo.update_status(
                    int(resume_id), 
                    "rejected", 
                    parsed_data=combined_data,
                    rejection_reason=rejection_reason
                )
                # Завершаем с информацией об отклонении
                self.update_state(
                    state="SUCCESS",
                    meta={
                        "status": f"Резюме обработано, но кандидат отклонен: {rejection_reason}",
                        "progress": 100,
                        "result": combined_data,
                        "rejected": True,
                        "rejection_reason": rejection_reason
                    },
                )
                return {
                    "resume_id": resume_id,
                    "status": "rejected",
                    "parsed_data": combined_data,
                    "rejection_reason": rejection_reason
                }
        # Завершено успешно
        self.update_state(
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,6 +27,8 @@ dependencies = [
    "livekit>=1.0.12",
    "livekit-api>=1.0.5",
    "livekit-agents[cartesia,deepgram,openai,silero,resemble,turn-detector]~=1.2",
    "textract>=1.5.0",
    "comtypes>=1.4.12",
 ]
 [build-system]
--- a/rag/llm/model.py
+++ b/rag/llm/model.py
@ -106,7 +106,28 @@ class ResumeParser:
    def extract_text_from_doc(self, file_path: str) -> str:
        """Извлекает текст из DOC файла"""
        try:
-            # Для .doc файлов используем antiword (если установлен) или попробуем python-docx
+            # Метод 1: COM автоматизация Word (самый надежный для Windows)
            import os
            if os.name == 'nt':  # Windows
                try:
                    import comtypes.client
                    print(f"[DEBUG] Trying Word COM automation for {file_path}")
                    word = comtypes.client.CreateObject('Word.Application')
                    word.Visible = False
                    doc = word.Documents.Open(file_path)
                    text = doc.Content.Text
                    doc.Close()
                    word.Quit()
                    if text and text.strip():
                        print(f"[DEBUG] Word COM successfully extracted {len(text)} characters")
                        return text.strip()
                except Exception as e:
                    print(f"[DEBUG] Word COM failed: {e}")
            # Метод 2: Для .doc файлов используем python-docx
            if Document:
                try:
                    doc = Document(file_path)
@ -116,20 +137,53 @@ class ResumeParser:
                    # Если python-docx не может прочитать .doc, пытаемся использовать системные утилиты
                    pass
-            # Попытка использовать системную команду antiword (для Linux/Mac)
+            # Попытка использовать textract (универсальная библиотека для извлечения текста)
            import subprocess
            try:
-                result = subprocess.run(
+                import textract
-                    ["antiword", file_path], capture_output=True, text=True
+                print(f"[DEBUG] Using textract to process {file_path}")
-                )
+                text = textract.process(file_path).decode('utf-8')
-                if result.returncode == 0:
+                if text and text.strip():
-                    return result.stdout.strip()
+                    print(f"[DEBUG] textract successfully extracted {len(text)} characters")
-            except FileNotFoundError:
+                    return text.strip()
                else:
                    print("[DEBUG] textract returned empty text")
            except ImportError as e:
                print(f"[DEBUG] textract not available: {e}")
            except Exception as e:
                print(f"[DEBUG] textract failed: {e}")
            # Попытка использовать docx2txt
            try:
                import docx2txt
                text = docx2txt.process(file_path)
                if text:
                    return text.strip()
            except ImportError:
                pass
            except Exception:
                pass
            # Попытка использовать oletools для старых DOC файлов
            try:
                from oletools.olevba import VBA_Parser
                from oletools import olefile
                if olefile.isOleFile(file_path):
                    # Это старый формат DOC, пытаемся извлечь текст
                    # Пока что возвращаем информативную ошибку
                    pass
            except ImportError:
                pass
            except Exception:
                pass
            raise Exception(
-                "Не удалось найти подходящий инструмент для чтения DOC файлов. Рекомендуется использовать DOCX формат."
+                "Не удалось извлечь текст из DOC файла ни одним из методов. "
                "Возможные причины:\n"
                "1. Microsoft Word не установлен (для COM автоматизации)\n"
                "2. Файл поврежден или не содержит текста\n"
                "3. Файл защищен паролем\n"
                "Рекомендуется конвертировать DOC в DOCX формат."
            )
        except Exception as e:
            raise Exception(f"Ошибка при чтении DOC: {str(e)}") from e
--- a/uv.lock
+++ b/uv.lock