add interview
This commit is contained in:
parent
f909eb69fb
commit
7af5a55b2e
38
.env.example
38
.env.example
@ -1,12 +1,38 @@
|
||||
DATABASE_URL=postgresql+asyncpg://user:password@localhost:5432/hr_ai_db
|
||||
# Database Configuration
|
||||
DATABASE_URL=postgresql+asyncpg://username:password@localhost:5432/hr_ai
|
||||
|
||||
# Selectel S3 Configuration
|
||||
S3_ENDPOINT_URL=https://s3.selcdn.ru
|
||||
S3_ACCESS_KEY_ID=your_access_key
|
||||
S3_SECRET_ACCESS_KEY=your_secret_key
|
||||
S3_BUCKET_NAME=your_bucket_name
|
||||
# Redis Configuration (for Celery and caching)
|
||||
REDIS_CACHE_URL=localhost
|
||||
REDIS_CACHE_PORT=6379
|
||||
REDIS_CACHE_DB=0
|
||||
|
||||
# Milvus Vector Database Configuration
|
||||
MILVUS_URI=http://localhost:19530
|
||||
MILVUS_COLLECTION=hr_candidate_profiles
|
||||
|
||||
# S3 Storage Configuration (Selectel/AWS/etc)
|
||||
S3_ENDPOINT_URL=https://s3.storage.selcloud.ru
|
||||
S3_ACCESS_KEY_ID=your_s3_access_key
|
||||
S3_SECRET_ACCESS_KEY=your_s3_secret_key
|
||||
S3_BUCKET_NAME=your-bucket-name
|
||||
S3_REGION=ru-1
|
||||
|
||||
# LLM API Keys
|
||||
OPENAI_API_KEY=sk-your-openai-api-key-here
|
||||
ANTHROPIC_API_KEY=your-anthropic-api-key-here
|
||||
OPENAI_MODEL=gpt-4o-mini
|
||||
OPENAI_EMBEDDINGS_MODEL=text-embedding-3-small
|
||||
|
||||
# AI Agent API Keys (for voice interviewer)
|
||||
DEEPGRAM_API_KEY=your-deepgram-api-key-here
|
||||
CARTESIA_API_KEY=your-cartesia-api-key-here
|
||||
ELEVENLABS_API_KEY=your-elevenlabs-api-key-here
|
||||
|
||||
# LiveKit Configuration (for interview feature)
|
||||
LIVEKIT_URL=ws://localhost:7880
|
||||
LIVEKIT_API_KEY=devkey
|
||||
LIVEKIT_API_SECRET=devkey_secret_32chars_minimum_length
|
||||
|
||||
# App Configuration
|
||||
APP_ENV=development
|
||||
DEBUG=true
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
||||
.claude
|
||||
.venv
|
||||
.idea
|
330
INTERVIEW_INTEGRATION.md
Normal file
330
INTERVIEW_INTEGRATION.md
Normal file
@ -0,0 +1,330 @@
|
||||
# HR Interview Integration with LiveKit
|
||||
|
||||
## Обзор
|
||||
|
||||
Система интеграции для проведения голосовых собеседований с HR AI агентом через LiveKit. Пользователь общается голосом с AI, который анализирует речь, генерирует вопросы через LLM и отвечает синтезированной речью.
|
||||
|
||||
## Архитектура
|
||||
|
||||
### Frontend (Ready)
|
||||
- ✅ Кнопка "К собеседованию" для резюме со статусом `parsed`
|
||||
- ✅ Страница `/interview/[id]` с проверкой доступности
|
||||
- ✅ Компонент `InterviewSession` с LiveKit интеграцией
|
||||
- ✅ Аудио-только режим (без видео)
|
||||
- ✅ Управление микрофоном и завершением сессии
|
||||
|
||||
### Backend API Requirements
|
||||
|
||||
#### 1. Валидация резюме для собеседования
|
||||
```http
|
||||
GET /api/resume/{resumeId}/validate-interview
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"canInterview": true,
|
||||
"message": "Resume is ready for interview"
|
||||
}
|
||||
```
|
||||
|
||||
**Error cases:**
|
||||
- `404` - резюме не найдено
|
||||
- `400` - резюме не готово (статус не `parsed`)
|
||||
|
||||
#### 2. Получение токена для LiveKit
|
||||
```http
|
||||
POST /api/interview/{resumeId}/token
|
||||
```
|
||||
|
||||
**Response:**
|
||||
```json
|
||||
{
|
||||
"token": "livekit_jwt_token_here",
|
||||
"roomName": "interview_room_123",
|
||||
"serverUrl": "wss://your-livekit-server.com"
|
||||
}
|
||||
```
|
||||
|
||||
### LiveKit Server Integration
|
||||
|
||||
#### Environment Variables
|
||||
```env
|
||||
NEXT_PUBLIC_LIVEKIT_URL=wss://your-livekit-server.com
|
||||
LIVEKIT_API_KEY=your_api_key
|
||||
LIVEKIT_API_SECRET=your_api_secret
|
||||
```
|
||||
|
||||
#### Server-side Components
|
||||
|
||||
1. **LiveKit Room Management**
|
||||
- Создание уникальных комнат для каждого собеседования
|
||||
- Управление участниками (пользователь + AI агент)
|
||||
- Токены с ограниченными правами
|
||||
|
||||
2. **Speech-to-Text Service**
|
||||
- Получение аудио потока от пользователя
|
||||
- Конвертация речи в текст в реальном времени
|
||||
- Определение пауз в речи для передачи в LLM
|
||||
|
||||
3. **LLM Processing**
|
||||
- Анализ текста от пользователя
|
||||
- Генерация вопросов на основе резюме
|
||||
- Контекст предыдущих ответов
|
||||
- Логика завершения собеседования
|
||||
|
||||
4. **Text-to-Speech Service**
|
||||
- Конвертация ответов LLM в аудио
|
||||
- Передача аудио потока через LiveKit
|
||||
|
||||
#### Data Flow
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant Frontend
|
||||
participant Backend
|
||||
participant LiveKit
|
||||
participant STT as Speech-to-Text
|
||||
participant LLM
|
||||
participant TTS as Text-to-Speech
|
||||
|
||||
User->>Frontend: Нажимает "К собеседованию"
|
||||
Frontend->>Backend: GET /api/resume/{id}/validate-interview
|
||||
Backend-->>Frontend: canInterview: true
|
||||
Frontend->>Backend: POST /api/interview/{id}/token
|
||||
Backend-->>Frontend: LiveKit token
|
||||
Frontend->>LiveKit: Connect to room
|
||||
|
||||
Frontend->>LiveKit: Send start_interview message
|
||||
LiveKit->>Backend: start_interview event
|
||||
Backend->>LLM: Generate first question
|
||||
LLM-->>Backend: Question text
|
||||
Backend->>TTS: Convert to speech
|
||||
TTS-->>Backend: Audio data
|
||||
Backend->>LiveKit: Send audio + question text
|
||||
LiveKit-->>Frontend: Receive audio + text
|
||||
|
||||
loop Interview Process
|
||||
User->>LiveKit: Speak answer
|
||||
LiveKit->>Backend: Audio stream
|
||||
Backend->>STT: Convert speech
|
||||
STT-->>Backend: User text
|
||||
Backend->>LLM: Process answer + generate next question
|
||||
LLM-->>Backend: Next question or end signal
|
||||
Backend->>TTS: Convert to speech
|
||||
TTS-->>Backend: Audio data
|
||||
Backend->>LiveKit: Send audio + text
|
||||
LiveKit-->>Frontend: Display question + play audio
|
||||
end
|
||||
|
||||
Backend->>LiveKit: Send interview_complete
|
||||
LiveKit-->>Frontend: Show completion
|
||||
Frontend->>User: Return to vacancy page
|
||||
```
|
||||
|
||||
#### Message Protocol
|
||||
|
||||
**Frontend → Server:**
|
||||
```json
|
||||
{
|
||||
"type": "start_interview",
|
||||
"resumeId": 123
|
||||
}
|
||||
|
||||
{
|
||||
"type": "end_interview",
|
||||
"resumeId": 123
|
||||
}
|
||||
```
|
||||
|
||||
**Server → Frontend:**
|
||||
```json
|
||||
{
|
||||
"type": "question",
|
||||
"text": "Расскажите о своем опыте в разработке",
|
||||
"questionNumber": 1
|
||||
}
|
||||
|
||||
{
|
||||
"type": "ai_speaking_start"
|
||||
}
|
||||
|
||||
{
|
||||
"type": "ai_speaking_end"
|
||||
}
|
||||
|
||||
{
|
||||
"type": "interview_complete",
|
||||
"summary": "Interview completed successfully"
|
||||
}
|
||||
```
|
||||
|
||||
### Implementation Steps
|
||||
|
||||
#### Priority Implementation Order
|
||||
1. **Phase 1**: Базовые API endpoints + LiveKit токены (минимальный MVP)
|
||||
2. **Phase 2**: Аудио pipeline (STT/TTS)
|
||||
3. **Phase 3**: LLM интеграция и логика собеседования
|
||||
|
||||
#### Backend Setup
|
||||
1. **Install LiveKit SDK**
|
||||
```bash
|
||||
pip install livekit livekit-api
|
||||
# or
|
||||
npm install livekit-server-sdk
|
||||
```
|
||||
|
||||
2. **Create LiveKit Room Service**
|
||||
```python
|
||||
from livekit import api
|
||||
|
||||
class InterviewRoomService:
|
||||
def create_room(self, resume_id: int):
|
||||
room_name = f"interview_{resume_id}_{timestamp}"
|
||||
# Create room logic
|
||||
|
||||
def generate_token(self, room_name: str, participant_name: str):
|
||||
# Generate JWT token
|
||||
```
|
||||
|
||||
3. **Implement API Endpoints**
|
||||
- Resume validation endpoint
|
||||
- Token generation endpoint
|
||||
- WebRTC signaling handling
|
||||
|
||||
4. **Set up Audio Processing Pipeline**
|
||||
- Speech-to-Text service (Google, Azure, или OpenAI Whisper)
|
||||
- Text-to-Speech service (Google, Azure, или ElevenLabs)
|
||||
- LLM integration (OpenAI GPT, Claude, или локальная модель)
|
||||
|
||||
5. **Database Schema Updates**
|
||||
```sql
|
||||
ALTER TABLE resumes ADD COLUMN interview_session_id VARCHAR(255);
|
||||
ALTER TABLE resumes ADD COLUMN interview_completed_at TIMESTAMP;
|
||||
|
||||
CREATE TABLE interview_sessions (
|
||||
id SERIAL PRIMARY KEY,
|
||||
resume_id INTEGER REFERENCES resumes(id),
|
||||
room_name VARCHAR(255),
|
||||
status VARCHAR(50),
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP,
|
||||
transcript TEXT,
|
||||
ai_feedback TEXT
|
||||
);
|
||||
```
|
||||
|
||||
### Security Considerations
|
||||
|
||||
1. **Token Security**
|
||||
- Ограниченное время жизни токенов
|
||||
- Права только на конкретную комнату
|
||||
- Валидация прав пользователя
|
||||
|
||||
2. **Data Privacy**
|
||||
- Шифрование аудио потоков
|
||||
- Временное хранение записей
|
||||
- GDPR соответствие
|
||||
|
||||
3. **Rate Limiting**
|
||||
- Ограничение на количество сессий
|
||||
- Защита от злоупотреблений
|
||||
|
||||
### Testing Strategy
|
||||
|
||||
1. **Unit Tests**
|
||||
- API endpoints
|
||||
- Token generation
|
||||
- Message handling
|
||||
|
||||
2. **Integration Tests**
|
||||
- LiveKit connection
|
||||
- Audio pipeline
|
||||
- LLM integration
|
||||
|
||||
3. **Load Testing**
|
||||
- Множественные одновременные сессии
|
||||
- Производительность аудио обработки
|
||||
|
||||
### Monitoring & Analytics
|
||||
|
||||
1. **Metrics**
|
||||
- Время подключения к сессии
|
||||
- Качество аудио соединения
|
||||
- Длительность собеседований
|
||||
- Процент завершенных интервью
|
||||
|
||||
2. **Logging**
|
||||
- События соединения
|
||||
- Ошибки аудио обработки
|
||||
- LLM запросы и ответы
|
||||
|
||||
### Deployment
|
||||
|
||||
1. **LiveKit Server**
|
||||
- Развертывание сервера LiveKit
|
||||
- SSL сертификаты
|
||||
- Настройка TURN серверов для NAT
|
||||
|
||||
2. **Scaling**
|
||||
- Горизонтальное масштабирование
|
||||
- Load balancing
|
||||
- CDN для статических ресурсов
|
||||
|
||||
## Quick Start для Backend разработчика
|
||||
|
||||
### Minimum Viable Product (1-2 дня)
|
||||
1. **Заглушки API endpoints:**
|
||||
```python
|
||||
# Всегда возвращать canInterview: true
|
||||
GET /api/resume/{id}/validate-interview
|
||||
|
||||
# Вернуть тестовый LiveKit токен
|
||||
POST /api/interview/{id}/token
|
||||
```
|
||||
|
||||
2. **Настроить LiveKit server локально:**
|
||||
```bash
|
||||
docker run --rm -p 7880:7880 -p 7881:7881 -p 7882:7882 \
|
||||
-e LIVEKIT_KEYS="your-api-key: your-secret-key" \
|
||||
livekit/livekit-server:latest
|
||||
```
|
||||
|
||||
3. **Тестирование соединения:**
|
||||
- Frontend должен подключиться к LiveKit
|
||||
- Пользователь увидит интерфейс собеседования
|
||||
- Микрофон должен работать
|
||||
|
||||
### Full Implementation (1-2 недели)
|
||||
После MVP добавить:
|
||||
- Реальную логику валидации резюме
|
||||
- Speech-to-Text processing
|
||||
- LLM для генерации вопросов
|
||||
- Text-to-Speech для ответов AI
|
||||
- Сохранение результатов в БД
|
||||
|
||||
### Возможные упрощения для MVP:
|
||||
- Валидацию можно убрать (всегда разрешать)
|
||||
- Вместо AI агента показывать заглушку "Скоро здесь будет AI интервьюер"
|
||||
- Логи всех действий в консоль для отладки
|
||||
|
||||
## Current Status
|
||||
|
||||
✅ **Completed:**
|
||||
- Frontend компоненты готовы
|
||||
- LiveKit интеграция настроена
|
||||
- UI для собеседования реализован
|
||||
|
||||
🔄 **Next Steps:**
|
||||
1. Реализовать backend API endpoints
|
||||
2. Настроить LiveKit server
|
||||
3. Интегрировать Speech-to-Text/Text-to-Speech
|
||||
4. Подключить LLM для генерации вопросов
|
||||
5. Тестирование и отладка
|
||||
|
||||
### Contact Frontend Developer
|
||||
При вопросах по интеграции:
|
||||
- Формат сообщений между клиентом и сервером
|
||||
- Структура токенов LiveKit
|
||||
- Обработка ошибок на фронтенде
|
272
TESTING_GUIDE.md
Normal file
272
TESTING_GUIDE.md
Normal file
@ -0,0 +1,272 @@
|
||||
# 🧪 HR-AI Backend Testing Guide
|
||||
|
||||
## ✅ System Status
|
||||
|
||||
**Core Components:** All PASS ✅
|
||||
- ✅ Database (PostgreSQL) - Connected, 1 resume found
|
||||
- ✅ RAG System (OpenAI) - Resume parsing works
|
||||
- ✅ Redis - Connected for Celery tasks
|
||||
- ✅ Interview Service - Token generation works
|
||||
- ✅ AI Agent - Initialization and plan handling works
|
||||
|
||||
## 🚀 Quick Start Testing (Without Voice)
|
||||
|
||||
### 1. Start the Services
|
||||
|
||||
```bash
|
||||
# Terminal 1: Start FastAPI server
|
||||
uvicorn app.main:app --reload --port 8000
|
||||
|
||||
# Terminal 2: Start Celery worker
|
||||
celery -A celery_worker.celery_app worker --loglevel=info
|
||||
|
||||
# Terminal 3: Monitor system
|
||||
python simple_test.py
|
||||
```
|
||||
|
||||
### 2. Test Resume Upload & Processing
|
||||
|
||||
```bash
|
||||
# Create test resume file
|
||||
echo "John Doe
|
||||
Python Developer
|
||||
Experience: 3 years
|
||||
Skills: Python, Django, FastAPI, PostgreSQL
|
||||
Education: Computer Science
|
||||
Email: john@example.com
|
||||
Phone: +1234567890" > test_resume.txt
|
||||
|
||||
# Upload via API
|
||||
curl -X POST "http://localhost:8000/resume/upload" \
|
||||
-H "Content-Type: multipart/form-data" \
|
||||
-F "file=@test_resume.txt" \
|
||||
-F "applicant_name=John Doe" \
|
||||
-F "applicant_email=john@example.com" \
|
||||
-F "applicant_phone=+1234567890" \
|
||||
-F "vacancy_id=1"
|
||||
```
|
||||
|
||||
### 3. Check Processing Results
|
||||
|
||||
```bash
|
||||
# Check resume in database
|
||||
curl http://localhost:8000/resume/1
|
||||
|
||||
# Check interview plan generation
|
||||
# Should see interview_plan field with structured questions
|
||||
```
|
||||
|
||||
### 4. Test Interview Session Creation
|
||||
|
||||
```bash
|
||||
# Create interview session
|
||||
curl -X POST "http://localhost:8000/interview/1/start" \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
## 🎤 Full Voice Testing (Requires Additional Setup)
|
||||
|
||||
### Prerequisites for Voice Testing
|
||||
|
||||
**1. LiveKit Server**
|
||||
```bash
|
||||
# Download LiveKit server
|
||||
docker run --rm -p 7880:7880 -p 7881:7881 \
|
||||
livekit/livekit-server --dev
|
||||
```
|
||||
|
||||
**2. Voice API Keys (Optional - has fallbacks)**
|
||||
|
||||
Add to your `.env` file:
|
||||
```bash
|
||||
# For better STT (Speech-to-Text)
|
||||
DEEPGRAM_API_KEY=your-deepgram-key
|
||||
|
||||
# For better TTS (Text-to-Speech)
|
||||
CARTESIA_API_KEY=your-cartesia-key
|
||||
# OR
|
||||
ELEVENLABS_API_KEY=your-elevenlabs-key
|
||||
```
|
||||
|
||||
### Voice Interview Testing
|
||||
|
||||
**1. Start Complete Stack**
|
||||
```bash
|
||||
# All previous services PLUS:
|
||||
# Terminal 4: LiveKit server (see above)
|
||||
```
|
||||
|
||||
**2. Create Voice Interview Session**
|
||||
```bash
|
||||
# This will start AI agent subprocess
|
||||
curl -X POST "http://localhost:8000/interview/1/token"
|
||||
```
|
||||
|
||||
**3. Monitor AI Processes**
|
||||
```bash
|
||||
# Check running AI agents
|
||||
curl http://localhost:8000/admin/interview-processes
|
||||
|
||||
# System stats
|
||||
curl http://localhost:8000/admin/system-stats
|
||||
```
|
||||
|
||||
## 📊 Monitoring & Debugging
|
||||
|
||||
### 1. Check System Health
|
||||
```bash
|
||||
python simple_test.py
|
||||
```
|
||||
|
||||
### 2. Monitor Celery Tasks
|
||||
- Open Celery worker terminal
|
||||
- Should see task processing logs
|
||||
|
||||
### 3. Database Inspection
|
||||
```sql
|
||||
-- Check resumes
|
||||
SELECT id, applicant_name, status, interview_plan IS NOT NULL as has_plan
|
||||
FROM resume;
|
||||
|
||||
-- Check interview sessions
|
||||
SELECT id, room_name, status, ai_agent_pid, ai_agent_status
|
||||
FROM interview_sessions;
|
||||
```
|
||||
|
||||
### 4. Process Management
|
||||
```bash
|
||||
# List active AI processes
|
||||
curl http://localhost:8000/admin/interview-processes
|
||||
|
||||
# Stop specific process
|
||||
curl -X POST http://localhost:8000/admin/interview-processes/1/stop
|
||||
|
||||
# Cleanup dead processes
|
||||
curl -X POST http://localhost:8000/admin/interview-processes/cleanup
|
||||
```
|
||||
|
||||
## 🔧 Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**1. "Database connection error"**
|
||||
- Check PostgreSQL is running
|
||||
- Verify DATABASE_URL in config
|
||||
- Run: `alembic upgrade head`
|
||||
|
||||
**2. "RAG system error"**
|
||||
- Check OPENAI_API_KEY is set
|
||||
- Verify internet connection
|
||||
|
||||
**3. "Redis connection error"**
|
||||
```bash
|
||||
docker run -d -p 6379:6379 redis:alpine
|
||||
```
|
||||
|
||||
**4. "Import errors"**
|
||||
- Make sure you're in project root directory
|
||||
- Check virtual environment is activated
|
||||
|
||||
**5. "Celery tasks not processing"**
|
||||
- Ensure Redis is running
|
||||
- Check Celery worker logs
|
||||
- Restart Celery worker
|
||||
|
||||
### Performance Testing
|
||||
|
||||
**Test Multiple Concurrent Interviews:**
|
||||
```bash
|
||||
# Create 5 interview sessions simultaneously
|
||||
for i in {1..5}; do
|
||||
curl -X POST "http://localhost:8000/interview/$i/token" &
|
||||
done
|
||||
wait
|
||||
|
||||
# Monitor system resources
|
||||
curl http://localhost:8000/admin/system-stats
|
||||
```
|
||||
|
||||
## 🧪 Test Scenarios
|
||||
|
||||
### Scenario 1: Basic Resume Processing
|
||||
1. Upload resume → Check parsing
|
||||
2. Verify interview plan generation
|
||||
3. Confirm data in database
|
||||
|
||||
### Scenario 2: Interview Session Lifecycle
|
||||
1. Create session → Get token
|
||||
2. Start AI agent → Monitor process
|
||||
3. Stop session → Verify cleanup
|
||||
|
||||
### Scenario 3: Multi-User Load Test
|
||||
1. Upload 10 resumes simultaneously
|
||||
2. Create 5 interview sessions
|
||||
3. Monitor system resources
|
||||
4. Check process management
|
||||
|
||||
### Scenario 4: Error Recovery
|
||||
1. Stop Redis → Resume upload should queue
|
||||
2. Start Redis → Tasks should process
|
||||
3. Kill AI process → Should be detected and cleaned
|
||||
|
||||
## 📈 Expected Performance
|
||||
|
||||
**Single Interview:**
|
||||
- Memory: ~45MB per AI agent process
|
||||
- CPU: ~5-15% during active conversation
|
||||
- Startup: ~3-5 seconds per agent
|
||||
|
||||
**System Limits:**
|
||||
- Recommended max: 50 concurrent interviews
|
||||
- Theoretical max: ~150 interviews (on 32GB RAM)
|
||||
|
||||
## 🎯 Success Criteria
|
||||
|
||||
**✅ Basic Functionality:**
|
||||
- [ ] Resume upload and parsing works
|
||||
- [ ] Interview plans are generated
|
||||
- [ ] Database stores all data correctly
|
||||
- [ ] Celery processes tasks
|
||||
|
||||
**✅ Interview System:**
|
||||
- [ ] Interview sessions can be created
|
||||
- [ ] AI agent processes start successfully
|
||||
- [ ] Tokens are generated correctly
|
||||
- [ ] Process monitoring works
|
||||
|
||||
**✅ Advanced Features:**
|
||||
- [ ] Multiple concurrent interviews
|
||||
- [ ] Process cleanup works
|
||||
- [ ] System monitoring provides accurate data
|
||||
- [ ] Error recovery works correctly
|
||||
|
||||
**✅ Voice Testing (Optional):**
|
||||
- [ ] LiveKit connection established
|
||||
- [ ] STT/TTS services work (if configured)
|
||||
- [ ] Real-time conversation flows
|
||||
- [ ] Session termination works properly
|
||||
|
||||
## 📝 Test Results Log
|
||||
|
||||
Keep track of your testing:
|
||||
|
||||
```
|
||||
Date: ___________
|
||||
System Test: PASS/FAIL
|
||||
Resume Upload: PASS/FAIL
|
||||
Interview Creation: PASS/FAIL
|
||||
AI Agent Start: PASS/FAIL
|
||||
Voice Test: PASS/FAIL (if attempted)
|
||||
|
||||
Notes:
|
||||
_________________________________
|
||||
_________________________________
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🎉 Ready to Test!
|
||||
|
||||
Start with the **Quick Start Testing** section above. The system is ready for basic testing without voice features. For full voice testing, set up LiveKit server and optionally add voice API keys.
|
||||
|
||||
Good luck! 🚀
|
416
ai_interviewer_agent.py
Normal file
416
ai_interviewer_agent.py
Normal file
@ -0,0 +1,416 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, List
|
||||
from datetime import datetime
|
||||
|
||||
# Принудительно устанавливаем UTF-8 для Windows
|
||||
if os.name == 'nt': # Windows
|
||||
import sys
|
||||
if hasattr(sys, 'stdout') and hasattr(sys.stdout, 'reconfigure'):
|
||||
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
||||
sys.stderr.reconfigure(encoding='utf-8', errors='replace')
|
||||
from livekit.agents import (
|
||||
Agent,
|
||||
AgentSession,
|
||||
JobContext,
|
||||
WorkerOptions,
|
||||
cli,
|
||||
)
|
||||
from livekit.plugins import openai, deepgram, cartesia, silero, resemble
|
||||
from rag.settings import settings
|
||||
|
||||
logger = logging.getLogger("ai-interviewer")
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
class InterviewAgent:
|
||||
"""AI Agent для проведения собеседований с управлением диалогом"""
|
||||
|
||||
def __init__(self, interview_plan: Dict):
|
||||
self.interview_plan = interview_plan
|
||||
self.conversation_history = []
|
||||
|
||||
# Состояние диалога
|
||||
self.current_section = 0
|
||||
self.current_question_in_section = 0
|
||||
self.questions_asked_total = 0
|
||||
self.waiting_for_response = False
|
||||
self.last_question = None
|
||||
self.last_user_response = None
|
||||
|
||||
# Извлекаем структуру интервью
|
||||
self.sections = self.interview_plan.get('interview_structure', {}).get('sections', [])
|
||||
self.total_sections = len(self.sections)
|
||||
|
||||
def get_current_section(self) -> Dict:
|
||||
"""Получить текущую секцию интервью"""
|
||||
if self.current_section < len(self.sections):
|
||||
return self.sections[self.current_section]
|
||||
return {}
|
||||
|
||||
def get_next_question(self) -> str:
|
||||
"""Получить следующий вопрос из текущей секции"""
|
||||
section = self.get_current_section()
|
||||
questions = section.get('questions', [])
|
||||
|
||||
if self.current_question_in_section < len(questions):
|
||||
return questions[self.current_question_in_section]
|
||||
return None
|
||||
|
||||
def move_to_next_question(self):
|
||||
"""Переход к следующему вопросу"""
|
||||
section = self.get_current_section()
|
||||
questions = section.get('questions', [])
|
||||
|
||||
self.current_question_in_section += 1
|
||||
self.questions_asked_total += 1
|
||||
|
||||
# Если вопросы в секции закончились, переходим к следующей
|
||||
if self.current_question_in_section >= len(questions):
|
||||
self.move_to_next_section()
|
||||
|
||||
def move_to_next_section(self):
|
||||
"""Переход к следующей секции"""
|
||||
self.current_section += 1
|
||||
self.current_question_in_section = 0
|
||||
|
||||
if self.current_section < len(self.sections):
|
||||
logger.info(f"Переход к секции: {self.sections[self.current_section].get('name', 'Unnamed')}")
|
||||
|
||||
def is_interview_complete(self) -> bool:
|
||||
"""Проверяет, завершено ли интервью"""
|
||||
return self.current_section >= len(self.sections)
|
||||
|
||||
async def analyze_user_response(self, response: str, chat_model) -> Dict[str, str]:
|
||||
"""Анализирует ответ пользователя и решает следующий шаг"""
|
||||
try:
|
||||
from rag.registry import registry
|
||||
|
||||
analysis_prompt = f"""
|
||||
Проанализируй ответ кандидата на интервью и определи следующий шаг.
|
||||
|
||||
КОНТЕКСТ:
|
||||
- Текущая секция: {self.get_current_section().get('name', 'Unknown')}
|
||||
- Заданный вопрос: {self.last_question}
|
||||
- Ответ кандидата: {response}
|
||||
|
||||
Оцени ответ и определи действие:
|
||||
1. "continue" - ответ полный, переходим к следующему вопросу
|
||||
2. "clarify" - нужно уточнить или углубить ответ
|
||||
3. "redirect" - нужно перенаправить на тему
|
||||
|
||||
Ответь в JSON формате:
|
||||
{{
|
||||
"action": "continue|clarify|redirect",
|
||||
"reason": "Объяснение решения",
|
||||
"follow_up_question": "Уточняющий вопрос если action=clarify или redirect"
|
||||
}}
|
||||
"""
|
||||
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
messages = [
|
||||
SystemMessage(content="Ты эксперт-аналитик интервью. Анализируй ответы объективно."),
|
||||
HumanMessage(content=analysis_prompt)
|
||||
]
|
||||
|
||||
response_analysis = chat_model.chat(messages)
|
||||
response_text = response_analysis.content.strip()
|
||||
|
||||
# Парсим JSON ответ
|
||||
if response_text.startswith('{') and response_text.endswith('}'):
|
||||
return json.loads(response_text)
|
||||
else:
|
||||
# Fallback
|
||||
return {
|
||||
"action": "continue",
|
||||
"reason": "Не удалось проанализировать ответ",
|
||||
"follow_up_question": ""
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка анализа ответа: {str(e)}")
|
||||
return {
|
||||
"action": "continue",
|
||||
"reason": "Ошибка анализа",
|
||||
"follow_up_question": ""
|
||||
}
|
||||
|
||||
def _extract_questions_from_plan(self) -> List[str]:
|
||||
"""Извлечение вопросов из готового плана интервью"""
|
||||
questions = []
|
||||
|
||||
try:
|
||||
# Начинаем с приветствия из плана
|
||||
greeting = self.interview_plan.get('interview_structure', {}).get('greeting', 'Привет! Готов к интервью?')
|
||||
questions.append(greeting)
|
||||
|
||||
# Извлекаем вопросы из секций
|
||||
sections = self.interview_plan.get('interview_structure', {}).get('sections', [])
|
||||
|
||||
for section in sections:
|
||||
section_questions = section.get('questions', [])
|
||||
questions.extend(section_questions)
|
||||
|
||||
return questions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка извлечения вопросов из плана: {str(e)}")
|
||||
# Fallback вопросы
|
||||
return [
|
||||
"Привет! Расскажи немного о себе",
|
||||
"Какой у тебя опыт работы?",
|
||||
"Что тебя привлекает в этой позиции?",
|
||||
"Есть ли у тебя вопросы ко мне?"
|
||||
]
|
||||
|
||||
def get_system_instructions(self) -> str:
|
||||
"""Системные инструкции для AI агента"""
|
||||
candidate_info = self.interview_plan.get('candidate_info', {})
|
||||
interview_structure = self.interview_plan.get('interview_structure', {})
|
||||
focus_areas = self.interview_plan.get('focus_areas', [])
|
||||
|
||||
greeting = interview_structure.get('greeting', 'Привет! Готов к интервью?')
|
||||
|
||||
current_section = self.get_current_section()
|
||||
current_section_name = current_section.get('name', 'Неизвестно')
|
||||
progress = f"{self.current_section + 1}/{len(self.sections)}"
|
||||
|
||||
return f"""Ты опытный HR-интервьюер, который проводит структурированное голосовое собеседование.
|
||||
|
||||
ИНФОРМАЦИЯ О КАНДИДАТЕ:
|
||||
- Имя: {candidate_info.get('name', 'Кандидат')}
|
||||
- Опыт работы: {candidate_info.get('total_years', 0)} лет
|
||||
- Ключевые навыки: {', '.join(candidate_info.get('skills', []))}
|
||||
|
||||
ТЕКУЩЕЕ СОСТОЯНИЕ ИНТЕРВЬЮ:
|
||||
- Прогресс: {progress} секций
|
||||
- Текущая секция: {current_section_name}
|
||||
- Вопросов задано: {self.questions_asked_total}
|
||||
|
||||
ПЛАН ИНТЕРВЬЮ:
|
||||
{json.dumps(interview_structure.get('sections', []), ensure_ascii=False, indent=2)}
|
||||
|
||||
ТВОЯ ЗАДАЧА:
|
||||
1. Веди живое интерактивное интервью
|
||||
2. Анализируй каждый ответ кандидата
|
||||
3. Принимай решения:
|
||||
- Если ответ полный и достаточный → переходи к следующему вопросу
|
||||
- Если ответ поверхностный → задавай уточняющие вопросы
|
||||
- Если кандидат ушел от темы → мягко возвращай к вопросу
|
||||
4. Поддерживай естественный диалог
|
||||
|
||||
ПРАВИЛА ВЕДЕНИЯ ДИАЛОГА:
|
||||
✅ Говори только на русском языке
|
||||
✅ Задавай один вопрос за раз и жди ответа
|
||||
✅ Анализируй качество и полноту каждого ответа
|
||||
✅ Адаптируй следующие вопросы под полученные ответы
|
||||
✅ Показывай искреннюю заинтересованность
|
||||
✅ Если ответ неполный - углубляйся: "Расскажи подробнее...", "А как именно ты..."
|
||||
✅ При переходе между секциями делай плавные переходы
|
||||
✅ Завершай интервью благодарностью и следующими шагами
|
||||
|
||||
ПРИМЕРЫ РЕАКЦИЙ НА ОТВЕТЫ:
|
||||
- Короткий ответ: "Интересно! А можешь рассказать конкретный пример?"
|
||||
- Хороший ответ: "Отлично! Давай перейдем к следующему вопросу..."
|
||||
- Уход от темы: "Понимаю, но давай вернемся к..."
|
||||
|
||||
НАЧНИ С ПРИВЕТСТВИЯ: {greeting}
|
||||
"""
|
||||
|
||||
|
||||
async def entrypoint(ctx: JobContext):
|
||||
"""Точка входа для AI агента"""
|
||||
logger.info("Starting AI Interviewer Agent")
|
||||
|
||||
# Получаем данные о резюме из метаданных комнаты
|
||||
room_metadata = ctx.room.metadata if ctx.room.metadata else "{}"
|
||||
try:
|
||||
metadata = json.loads(room_metadata)
|
||||
interview_plan = metadata.get("interview_plan", {})
|
||||
if not hasattr(interview_plan, 'interview_structure'):
|
||||
raise ValueError
|
||||
except:
|
||||
# Fallback план для тестирования
|
||||
interview_plan = {
|
||||
"interview_structure": {
|
||||
"duration_minutes": 30,
|
||||
"greeting": "Привет! Готов к тестовому интервью?",
|
||||
"sections": [
|
||||
{
|
||||
"name": "Знакомство",
|
||||
"duration_minutes": 10,
|
||||
"questions": ["Расскажи о себе", "Что тебя привлекло в этой позиции?"]
|
||||
},
|
||||
{
|
||||
"name": "Технические навыки",
|
||||
"duration_minutes": 15,
|
||||
"questions": ["Расскажи о своем опыте с Python", "Какие проекты разрабатывал?"]
|
||||
},
|
||||
{
|
||||
"name": "Вопросы кандидата",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Есть ли у тебя вопросы ко мне?"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"focus_areas": ["technical_skills", "experience"],
|
||||
"candidate_info": {
|
||||
"name": "Тестовый кандидат",
|
||||
"skills": ["Python", "React", "PostgreSQL"],
|
||||
"total_years": 3,
|
||||
"education": "Высшее техническое"
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Interview plan: {interview_plan}")
|
||||
|
||||
# Создаем интервьюера с планом
|
||||
interviewer = InterviewAgent(interview_plan)
|
||||
|
||||
# Настройка STT (Speech-to-Text)
|
||||
if hasattr(settings, 'deepgram_api_key') and settings.deepgram_api_key:
|
||||
stt = deepgram.STT(
|
||||
model="nova-2-general",
|
||||
language="ru", # Русский язык
|
||||
api_key=settings.deepgram_api_key
|
||||
)
|
||||
else:
|
||||
# Fallback на OpenAI Whisper
|
||||
stt = openai.STT(
|
||||
model="whisper-1",
|
||||
language="ru",
|
||||
api_key=settings.openai_api_key
|
||||
)
|
||||
|
||||
# Настройка LLM
|
||||
llm = openai.LLM(
|
||||
model="gpt-4o-mini",
|
||||
api_key=settings.openai_api_key,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
# Настройка TTS (Text-to-Speech)
|
||||
if hasattr(settings, 'resemble_api_key') and settings.resemble_api_key:
|
||||
tts = resemble.TTS(
|
||||
voice_uuid="55592656",
|
||||
api_key=settings.resemble_api_key
|
||||
)
|
||||
else:
|
||||
# Fallback на локальный TTS
|
||||
tts = silero.TTS(
|
||||
language="ru",
|
||||
model="v4_ru"
|
||||
)
|
||||
|
||||
# Создание агента с системными инструкциями
|
||||
agent = Agent(
|
||||
instructions=interviewer.get_system_instructions()
|
||||
)
|
||||
|
||||
# Создание сессии агента
|
||||
session = AgentSession(
|
||||
vad=silero.VAD.load(), # Voice Activity Detection
|
||||
stt=stt,
|
||||
llm=llm,
|
||||
tts=tts,
|
||||
)
|
||||
|
||||
# Добавляем обработчики событий с управлением диалогом
|
||||
@session.on("user_speech_committed")
|
||||
def on_user_speech(msg):
|
||||
"""Синхронный callback. Внутри создаётся async-задача."""
|
||||
|
||||
async def handler():
|
||||
user_response = msg.content
|
||||
logger.info(f"User said: {user_response}")
|
||||
|
||||
# Сохраняем историю
|
||||
interviewer.conversation_history.append({
|
||||
"role": "user",
|
||||
"content": user_response,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"section": interviewer.get_current_section().get('name', 'Unknown')
|
||||
})
|
||||
|
||||
interviewer.last_user_response = user_response
|
||||
interviewer.waiting_for_response = False
|
||||
|
||||
try:
|
||||
# Анализ ответа
|
||||
analysis = await interviewer.analyze_user_response(user_response, llm)
|
||||
action = analysis.get("action", "continue")
|
||||
|
||||
logger.info(f"Response analysis: {action} - {analysis.get('reason', 'No reason')}")
|
||||
|
||||
if action == "continue":
|
||||
interviewer.move_to_next_question()
|
||||
|
||||
if not interviewer.is_interview_complete():
|
||||
next_question = interviewer.get_next_question()
|
||||
if next_question:
|
||||
await session.say(next_question)
|
||||
interviewer.last_question = next_question
|
||||
interviewer.waiting_for_response = True
|
||||
else:
|
||||
await session.say(
|
||||
"Спасибо за интервью! Это все вопросы, которые я хотел задать. "
|
||||
"В ближайшее время мы свяжемся с тобой по результатам."
|
||||
)
|
||||
|
||||
elif action in ["clarify", "redirect"]:
|
||||
follow_up = analysis.get("follow_up_question", "Можешь рассказать подробнее?")
|
||||
await session.say(follow_up)
|
||||
interviewer.waiting_for_response = True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Ошибка обработки ответа пользователя: {str(e)}")
|
||||
interviewer.move_to_next_question()
|
||||
|
||||
# запускаем асинхронный обработчик
|
||||
asyncio.create_task(handler())
|
||||
|
||||
@session.on("agent_speech_committed")
|
||||
def on_agent_speech(msg):
|
||||
"""Обработка речи агента"""
|
||||
agent_response = msg.content
|
||||
logger.info(f"Agent said: {agent_response}")
|
||||
|
||||
# Сохраняем в историю
|
||||
interviewer.conversation_history.append({
|
||||
"role": "assistant",
|
||||
"content": agent_response,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"section": interviewer.get_current_section().get('name', 'Unknown')
|
||||
})
|
||||
|
||||
# Если это вопрос, обновляем состояние
|
||||
if "?" in agent_response:
|
||||
interviewer.last_question = agent_response
|
||||
interviewer.waiting_for_response = True
|
||||
|
||||
# Запускаем сессию агента
|
||||
await session.start(agent=agent, room=ctx.room)
|
||||
|
||||
# Приветственное сообщение
|
||||
# В новой версии приветствие будет автоматически отправлено из системных инструкций
|
||||
|
||||
logger.info("AI Interviewer started successfully")
|
||||
|
||||
|
||||
def main():
|
||||
"""Запуск агента"""
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
# Настройки воркера
|
||||
worker_options = WorkerOptions(
|
||||
entrypoint_fnc=entrypoint,
|
||||
)
|
||||
|
||||
# Запуск через CLI
|
||||
cli.run_app(worker_options)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -3,14 +3,43 @@ from typing import Optional
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
database_url: str = "postgresql+asyncpg://user:password@localhost:5432/hr_ai_db"
|
||||
# Database
|
||||
database_url: str = "postgresql+asyncpg://tdjx:1309@localhost:5432/hr_ai"
|
||||
|
||||
# Redis Configuration (for Celery and caching)
|
||||
redis_cache_url: str = "localhost"
|
||||
redis_cache_port: int = 6379
|
||||
redis_cache_db: int = 0
|
||||
|
||||
# Milvus Vector Database
|
||||
milvus_uri: str = "http://localhost:19530"
|
||||
milvus_collection: str = "candidate_profiles"
|
||||
|
||||
# S3 Storage
|
||||
s3_endpoint_url: str = "https://s3.selcdn.ru"
|
||||
s3_access_key_id: str
|
||||
s3_secret_access_key: str
|
||||
s3_bucket_name: str
|
||||
s3_region: str = "ru-1"
|
||||
|
||||
# LLM API Keys
|
||||
openai_api_key: Optional[str] = None
|
||||
anthropic_api_key: Optional[str] = None
|
||||
openai_model: str = "gpt-4o-mini"
|
||||
openai_embeddings_model: str = "text-embedding-3-small"
|
||||
|
||||
# AI Agent API Keys (for voice interviewer)
|
||||
deepgram_api_key: Optional[str] = None
|
||||
cartesia_api_key: Optional[str] = None
|
||||
elevenlabs_api_key: Optional[str] = None
|
||||
resemble_api_key: Optional[str] = None
|
||||
|
||||
# LiveKit Configuration
|
||||
livekit_url: str = "ws://localhost:7880"
|
||||
livekit_api_key: str = "devkey"
|
||||
livekit_api_secret: str = "devkey_secret_32chars_minimum_length"
|
||||
|
||||
# App Configuration
|
||||
app_env: str = "development"
|
||||
debug: bool = True
|
||||
|
||||
|
66
app/models/interview.py
Normal file
66
app/models/interview.py
Normal file
@ -0,0 +1,66 @@
|
||||
from sqlmodel import SQLModel, Field, Column
|
||||
from sqlalchemy import Enum as SQLEnum, String
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class InterviewStatus(str, Enum):
|
||||
CREATED = "created"
|
||||
ACTIVE = "active"
|
||||
COMPLETED = "completed"
|
||||
FAILED = "failed"
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
|
||||
|
||||
class InterviewSessionBase(SQLModel):
|
||||
resume_id: int = Field(foreign_key="resume.id")
|
||||
room_name: str = Field(max_length=255, unique=True)
|
||||
status: str = Field(
|
||||
default="created",
|
||||
sa_column=Column(SQLEnum('created', 'active', 'completed', 'failed', name="interviewstatus"))
|
||||
)
|
||||
transcript: Optional[str] = None
|
||||
ai_feedback: Optional[str] = None
|
||||
# Добавляем отслеживание AI процесса
|
||||
ai_agent_pid: Optional[int] = None
|
||||
ai_agent_status: str = Field(default="not_started") # not_started, running, stopped, failed
|
||||
|
||||
|
||||
class InterviewSession(InterviewSessionBase, table=True):
|
||||
__tablename__ = "interview_sessions"
|
||||
|
||||
id: Optional[int] = Field(default=None, primary_key=True)
|
||||
started_at: datetime = Field(default_factory=datetime.utcnow)
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class InterviewSessionCreate(SQLModel):
|
||||
resume_id: int
|
||||
room_name: str
|
||||
|
||||
|
||||
class InterviewSessionUpdate(SQLModel):
|
||||
status: Optional[InterviewStatus] = None
|
||||
completed_at: Optional[datetime] = None
|
||||
transcript: Optional[str] = None
|
||||
ai_feedback: Optional[str] = None
|
||||
|
||||
|
||||
class InterviewSessionRead(InterviewSessionBase):
|
||||
id: int
|
||||
started_at: datetime
|
||||
completed_at: Optional[datetime] = None
|
||||
|
||||
|
||||
class InterviewValidationResponse(SQLModel):
|
||||
can_interview: bool
|
||||
message: str
|
||||
|
||||
|
||||
class LiveKitTokenResponse(SQLModel):
|
||||
token: str
|
||||
room_name: str
|
||||
server_url: str
|
@ -1,4 +1,5 @@
|
||||
from sqlmodel import SQLModel, Field, Relationship
|
||||
from sqlmodel import SQLModel, Field, Relationship, Column
|
||||
from sqlalchemy import JSON
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
@ -6,6 +7,9 @@ from enum import Enum
|
||||
|
||||
class ResumeStatus(str, Enum):
|
||||
PENDING = "pending"
|
||||
PARSING = "parsing"
|
||||
PARSED = "parsed"
|
||||
PARSE_FAILED = "parse_failed"
|
||||
UNDER_REVIEW = "under_review"
|
||||
INTERVIEW_SCHEDULED = "interview_scheduled"
|
||||
INTERVIEWED = "interviewed"
|
||||
@ -24,6 +28,9 @@ class ResumeBase(SQLModel):
|
||||
status: ResumeStatus = Field(default=ResumeStatus.PENDING)
|
||||
interview_report_url: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
parsed_data: Optional[dict] = Field(default=None, sa_column=Column(JSON))
|
||||
interview_plan: Optional[dict] = Field(default=None, sa_column=Column(JSON))
|
||||
parse_error: Optional[str] = None
|
||||
|
||||
|
||||
class Resume(ResumeBase, table=True):
|
||||
@ -49,6 +56,9 @@ class ResumeUpdate(SQLModel):
|
||||
status: Optional[ResumeStatus] = None
|
||||
interview_report_url: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
parsed_data: Optional[dict] = Field(default=None, sa_column=Column(JSON))
|
||||
interview_plan: Optional[dict] = Field(default=None, sa_column=Column(JSON))
|
||||
parse_error: Optional[str] = None
|
||||
|
||||
|
||||
class ResumeRead(ResumeBase):
|
||||
|
117
app/routers/admin_router.py
Normal file
117
app/routers/admin_router.py
Normal file
@ -0,0 +1,117 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.database import get_db
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
from typing import List, Dict
|
||||
import psutil
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["Admin"])
|
||||
|
||||
|
||||
@router.get("/interview-processes")
|
||||
async def list_active_interview_processes(db: AsyncSession = Depends(get_db)) -> Dict:
|
||||
"""Список всех активных AI процессов интервью"""
|
||||
interview_service = InterviewRoomService(db)
|
||||
|
||||
active_sessions = await interview_service.get_active_agent_processes()
|
||||
|
||||
processes_info = []
|
||||
for session in active_sessions:
|
||||
process_info = {
|
||||
"session_id": session.id,
|
||||
"resume_id": session.resume_id,
|
||||
"room_name": session.room_name,
|
||||
"pid": session.ai_agent_pid,
|
||||
"status": session.ai_agent_status,
|
||||
"started_at": session.started_at.isoformat() if session.started_at else None,
|
||||
"is_running": False,
|
||||
"memory_mb": 0,
|
||||
"cpu_percent": 0
|
||||
}
|
||||
|
||||
# Проверяем реальное состояние процесса
|
||||
if session.ai_agent_pid:
|
||||
try:
|
||||
process = psutil.Process(session.ai_agent_pid)
|
||||
if process.is_running():
|
||||
process_info["is_running"] = True
|
||||
process_info["memory_mb"] = round(process.memory_info().rss / 1024 / 1024, 1)
|
||||
process_info["cpu_percent"] = round(process.cpu_percent(), 1)
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
pass
|
||||
|
||||
processes_info.append(process_info)
|
||||
|
||||
return {
|
||||
"total_active_sessions": len(active_sessions),
|
||||
"processes": processes_info
|
||||
}
|
||||
|
||||
|
||||
@router.post("/interview-processes/{session_id}/stop")
|
||||
async def stop_interview_process(session_id: int, db: AsyncSession = Depends(get_db)) -> Dict:
|
||||
"""Остановить AI процесс для конкретного интервью"""
|
||||
interview_service = InterviewRoomService(db)
|
||||
|
||||
success = await interview_service.stop_agent_process(session_id)
|
||||
|
||||
if success:
|
||||
return {"message": f"AI process for session {session_id} stopped successfully"}
|
||||
else:
|
||||
raise HTTPException(status_code=404, detail=f"Session {session_id} not found or process not running")
|
||||
|
||||
|
||||
@router.post("/interview-processes/cleanup")
|
||||
async def cleanup_dead_processes(db: AsyncSession = Depends(get_db)) -> Dict:
|
||||
"""Очистка мертвых процессов"""
|
||||
interview_service = InterviewRoomService(db)
|
||||
|
||||
cleaned_count = await interview_service.cleanup_dead_processes()
|
||||
|
||||
return {
|
||||
"message": f"Cleaned up {cleaned_count} dead processes"
|
||||
}
|
||||
|
||||
|
||||
@router.get("/system-stats")
|
||||
async def get_system_stats() -> Dict:
|
||||
"""Общая статистика системы"""
|
||||
try:
|
||||
# Общая информация о системе
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage('/')
|
||||
|
||||
# Поиск всех Python процессов (потенциальные AI агенты)
|
||||
python_processes = []
|
||||
for proc in psutil.process_iter(['pid', 'name', 'memory_info', 'cpu_percent', 'cmdline']):
|
||||
try:
|
||||
if proc.info['name'] and 'python' in proc.info['name'].lower():
|
||||
cmdline = ' '.join(proc.info['cmdline']) if proc.info['cmdline'] else ''
|
||||
if 'ai_interviewer_agent' in cmdline:
|
||||
python_processes.append({
|
||||
'pid': proc.info['pid'],
|
||||
'memory_mb': round(proc.info['memory_info'].rss / 1024 / 1024, 1),
|
||||
'cpu_percent': proc.info['cpu_percent'] or 0,
|
||||
'cmdline': cmdline
|
||||
})
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
pass
|
||||
|
||||
return {
|
||||
"system": {
|
||||
"cpu_percent": cpu_percent,
|
||||
"memory_percent": memory.percent,
|
||||
"memory_available_gb": round(memory.available / 1024 / 1024 / 1024, 1),
|
||||
"disk_percent": disk.percent,
|
||||
"disk_free_gb": round(disk.free / 1024 / 1024 / 1024, 1)
|
||||
},
|
||||
"ai_agents": {
|
||||
"count": len(python_processes),
|
||||
"total_memory_mb": sum(p['memory_mb'] for p in python_processes),
|
||||
"processes": python_processes
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Error getting system stats: {str(e)}")
|
91
app/routers/interview_router.py
Normal file
91
app/routers/interview_router.py
Normal file
@ -0,0 +1,91 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from app.core.session_middleware import get_current_session, get_db_session
|
||||
from app.models.session import Session
|
||||
from app.models.interview import InterviewValidationResponse, LiveKitTokenResponse, InterviewStatus
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
|
||||
router = APIRouter(prefix="/interview", tags=["interview"])
|
||||
|
||||
|
||||
@router.get("/{resume_id}/validate-interview", response_model=InterviewValidationResponse)
|
||||
async def validate_interview(
|
||||
request: Request,
|
||||
resume_id: int,
|
||||
current_session: Session = Depends(get_current_session),
|
||||
db_session: AsyncSession = Depends(get_db_session)
|
||||
):
|
||||
"""Валидация резюме для проведения собеседования"""
|
||||
if not current_session:
|
||||
raise HTTPException(status_code=401, detail="No active session")
|
||||
|
||||
interview_service = InterviewRoomService(db_session)
|
||||
|
||||
# Проверяем валидность резюме для собеседования
|
||||
validation_result = await interview_service.validate_resume_for_interview(resume_id)
|
||||
|
||||
# Если резюме не найдено, возвращаем 404
|
||||
if "not found" in validation_result.message.lower():
|
||||
raise HTTPException(status_code=404, detail=validation_result.message)
|
||||
|
||||
# Если резюме не готово, возвращаем 400
|
||||
if not validation_result.can_interview and "not ready" in validation_result.message.lower():
|
||||
raise HTTPException(status_code=400, detail=validation_result.message)
|
||||
|
||||
return validation_result
|
||||
|
||||
|
||||
@router.post("/{resume_id}/token", response_model=LiveKitTokenResponse)
|
||||
async def get_interview_token(
|
||||
request: Request,
|
||||
resume_id: int,
|
||||
current_session: Session = Depends(get_current_session),
|
||||
db_session: AsyncSession = Depends(get_db_session)
|
||||
):
|
||||
"""Получение токена для LiveKit собеседования"""
|
||||
if not current_session:
|
||||
raise HTTPException(status_code=401, detail="No active session")
|
||||
|
||||
interview_service = InterviewRoomService(db_session)
|
||||
|
||||
# Получаем токен для LiveKit
|
||||
token_response = await interview_service.get_livekit_token(resume_id)
|
||||
|
||||
if not token_response:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Cannot create interview session. Check if resume is ready for interview."
|
||||
)
|
||||
|
||||
return token_response
|
||||
|
||||
|
||||
@router.patch("/{resume_id}/end")
|
||||
async def end_interview(
|
||||
request: Request,
|
||||
resume_id: int,
|
||||
current_session: Session = Depends(get_current_session),
|
||||
db_session: AsyncSession = Depends(get_db_session)
|
||||
):
|
||||
"""Завершение собеседования"""
|
||||
if not current_session:
|
||||
raise HTTPException(status_code=401, detail="No active session")
|
||||
|
||||
interview_service = InterviewRoomService(db_session)
|
||||
|
||||
# Получаем активную сессию собеседования
|
||||
interview_session = await interview_service.get_interview_session(resume_id)
|
||||
|
||||
if not interview_session:
|
||||
raise HTTPException(status_code=404, detail="No active interview session found")
|
||||
|
||||
# Завершаем сессию
|
||||
success = await interview_service.update_session_status(
|
||||
interview_session.id,
|
||||
"completed"
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=500, detail="Failed to end interview session")
|
||||
|
||||
return {"message": "Interview session ended successfully"}
|
@ -6,6 +6,8 @@ from app.models.resume import ResumeCreate, ResumeUpdate, ResumeRead, ResumeStat
|
||||
from app.models.session import Session
|
||||
from app.services.resume_service import ResumeService
|
||||
from app.services.file_service import FileService
|
||||
from celery_worker.tasks import parse_resume_task
|
||||
from celery_worker.celery_app import celery_app
|
||||
|
||||
router = APIRouter(prefix="/resumes", tags=["resumes"])
|
||||
|
||||
@ -28,10 +30,12 @@ async def create_resume(
|
||||
file_service = FileService()
|
||||
resume_service = ResumeService(db_session)
|
||||
|
||||
resume_file_url = await file_service.upload_resume_file(resume_file)
|
||||
if not resume_file_url:
|
||||
upload_result = await file_service.upload_resume_file(resume_file)
|
||||
if not upload_result:
|
||||
raise HTTPException(status_code=400, detail="Failed to upload resume file")
|
||||
|
||||
resume_file_url, local_file_path = upload_result
|
||||
|
||||
resume_data = ResumeCreate(
|
||||
vacancy_id=vacancy_id,
|
||||
applicant_name=applicant_name,
|
||||
@ -41,7 +45,25 @@ async def create_resume(
|
||||
cover_letter=cover_letter
|
||||
)
|
||||
|
||||
return await resume_service.create_resume_with_session(resume_data, current_session.id)
|
||||
# Создаем резюме в БД
|
||||
created_resume = await resume_service.create_resume_with_session(resume_data, current_session.id)
|
||||
|
||||
# Запускаем асинхронную задачу парсинга резюме
|
||||
try:
|
||||
# Запускаем Celery task для парсинга с локальным файлом
|
||||
task_result = parse_resume_task.delay(str(created_resume.id), local_file_path)
|
||||
|
||||
# Добавляем task_id в ответ для отслеживания статуса
|
||||
response_data = created_resume.model_dump()
|
||||
response_data["parsing_task_id"] = task_result.id
|
||||
response_data["parsing_status"] = "started"
|
||||
|
||||
return response_data
|
||||
|
||||
except Exception as e:
|
||||
# Если не удалось запустить парсинг, оставляем резюме в статусе PENDING
|
||||
print(f"Failed to start parsing task for resume {created_resume.id}: {str(e)}")
|
||||
return created_resume
|
||||
|
||||
|
||||
@router.get("/", response_model=List[ResumeRead])
|
||||
@ -170,6 +192,72 @@ async def upload_interview_report(
|
||||
return {"message": "Interview report uploaded successfully", "report_url": report_url}
|
||||
|
||||
|
||||
@router.get("/{resume_id}/parsing-status")
|
||||
async def get_parsing_status(
|
||||
request: Request,
|
||||
resume_id: int,
|
||||
task_id: str = Query(..., description="Task ID from resume upload response"),
|
||||
current_session: Session = Depends(get_current_session),
|
||||
db_session: AsyncSession = Depends(get_db_session)
|
||||
):
|
||||
"""Получить статус парсинга резюме по task_id"""
|
||||
if not current_session:
|
||||
raise HTTPException(status_code=401, detail="No active session")
|
||||
|
||||
# Проверяем доступ к резюме
|
||||
service = ResumeService(db_session)
|
||||
resume = await service.get_resume(resume_id)
|
||||
|
||||
if not resume:
|
||||
raise HTTPException(status_code=404, detail="Resume not found")
|
||||
|
||||
if resume.session_id != current_session.id:
|
||||
raise HTTPException(status_code=403, detail="Access denied")
|
||||
|
||||
# Получаем статус задачи из Celery
|
||||
try:
|
||||
task_result = celery_app.AsyncResult(task_id)
|
||||
|
||||
response = {
|
||||
"task_id": task_id,
|
||||
"task_state": task_result.state,
|
||||
"resume_status": resume.status,
|
||||
}
|
||||
|
||||
if task_result.state == 'PENDING':
|
||||
response.update({
|
||||
"status": "В очереди на обработку",
|
||||
"progress": 0
|
||||
})
|
||||
elif task_result.state == 'PROGRESS':
|
||||
response.update({
|
||||
"status": task_result.info.get('status', 'Обрабатывается'),
|
||||
"progress": task_result.info.get('progress', 0)
|
||||
})
|
||||
elif task_result.state == 'SUCCESS':
|
||||
response.update({
|
||||
"status": "Завершено успешно",
|
||||
"progress": 100,
|
||||
"result": task_result.info
|
||||
})
|
||||
elif task_result.state == 'FAILURE':
|
||||
response.update({
|
||||
"status": f"Ошибка: {str(task_result.info)}",
|
||||
"progress": 0,
|
||||
"error": str(task_result.info)
|
||||
})
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"task_id": task_id,
|
||||
"task_state": "UNKNOWN",
|
||||
"resume_status": resume.status,
|
||||
"error": f"Failed to get task status: {str(e)}"
|
||||
}
|
||||
|
||||
|
||||
@router.delete("/{resume_id}")
|
||||
async def delete_resume(
|
||||
request: Request,
|
||||
|
294
app/services/ai_interviewer_service.py
Normal file
294
app/services/ai_interviewer_service.py
Normal file
@ -0,0 +1,294 @@
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Optional, List
|
||||
from datetime import datetime
|
||||
from livekit import api, rtc
|
||||
from rag.settings import settings
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AIInterviewerService:
|
||||
"""Сервис AI интервьюера, который подключается к LiveKit комнате как участник"""
|
||||
|
||||
def __init__(self, interview_session_id: int, resume_data: Dict):
|
||||
self.interview_session_id = interview_session_id
|
||||
self.resume_data = resume_data
|
||||
self.room: Optional[rtc.Room] = None
|
||||
self.audio_source: Optional[rtc.AudioSource] = None
|
||||
self.conversation_history: List[Dict] = []
|
||||
self.current_question_index = 0
|
||||
self.interview_questions = []
|
||||
|
||||
async def connect_to_room(self, room_name: str, token: str):
|
||||
"""Подключение AI агента к LiveKit комнате"""
|
||||
try:
|
||||
self.room = rtc.Room()
|
||||
|
||||
# Настройка обработчиков событий
|
||||
self.room.on("participant_connected", self.on_participant_connected)
|
||||
self.room.on("track_subscribed", self.on_track_subscribed)
|
||||
self.room.on("data_received", self.on_data_received)
|
||||
|
||||
# Подключение к комнате
|
||||
await self.room.connect(settings.livekit_url, token)
|
||||
logger.info(f"AI agent connected to room: {room_name}")
|
||||
|
||||
# Создание аудио источника для TTS
|
||||
self.audio_source = rtc.AudioSource(sample_rate=16000, num_channels=1)
|
||||
track = rtc.LocalAudioTrack.create_audio_track("ai_voice", self.audio_source)
|
||||
|
||||
# Публикация аудио трека
|
||||
await self.room.local_participant.publish_track(track, rtc.TrackPublishOptions())
|
||||
|
||||
# Генерация первого вопроса
|
||||
await self.generate_interview_questions()
|
||||
await self.start_interview()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error connecting to room: {str(e)}")
|
||||
raise
|
||||
|
||||
async def on_participant_connected(self, participant: rtc.RemoteParticipant):
|
||||
"""Обработка подключения пользователя"""
|
||||
logger.info(f"Participant connected: {participant.identity}")
|
||||
# Можем отправить приветственное сообщение
|
||||
await self.send_message({
|
||||
"type": "ai_speaking_start"
|
||||
})
|
||||
|
||||
async def on_track_subscribed(
|
||||
self,
|
||||
track: rtc.Track,
|
||||
publication: rtc.RemoteTrackPublication,
|
||||
participant: rtc.RemoteParticipant
|
||||
):
|
||||
"""Обработка получения аудио трека от пользователя"""
|
||||
if track.kind == rtc.TrackKind.KIND_AUDIO:
|
||||
logger.info("Subscribed to user audio track")
|
||||
# Настройка обработки аудио для STT
|
||||
audio_stream = rtc.AudioStream(track)
|
||||
asyncio.create_task(self.process_user_audio(audio_stream))
|
||||
|
||||
async def on_data_received(self, data: bytes, participant: rtc.RemoteParticipant):
|
||||
"""Обработка сообщений от фронтенда"""
|
||||
try:
|
||||
message = json.loads(data.decode())
|
||||
await self.handle_frontend_message(message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing data message: {str(e)}")
|
||||
|
||||
async def handle_frontend_message(self, message: Dict):
|
||||
"""Обработка сообщений от фронтенда"""
|
||||
msg_type = message.get("type")
|
||||
|
||||
if msg_type == "start_interview":
|
||||
await self.start_interview()
|
||||
elif msg_type == "end_interview":
|
||||
await self.end_interview()
|
||||
elif msg_type == "user_finished_speaking":
|
||||
# Пользователь закончил говорить, можем обрабатывать его ответ
|
||||
pass
|
||||
|
||||
async def process_user_audio(self, audio_stream: rtc.AudioStream):
|
||||
"""Обработка аудио от пользователя через STT"""
|
||||
try:
|
||||
# Здесь будет интеграция с STT сервисом
|
||||
# Пока заглушка
|
||||
async for audio_frame in audio_stream:
|
||||
# TODO: Отправить аудио в STT (Whisper API)
|
||||
# user_text = await self.speech_to_text(audio_frame)
|
||||
# if user_text:
|
||||
# await self.process_user_response(user_text)
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing user audio: {str(e)}")
|
||||
|
||||
async def speech_to_text(self, audio_data: bytes) -> Optional[str]:
|
||||
"""Конвертация речи в текст через Whisper API"""
|
||||
# TODO: Интеграция с OpenAI Whisper или другим STT сервисом
|
||||
pass
|
||||
|
||||
async def text_to_speech(self, text: str) -> bytes:
|
||||
"""Конвертация текста в речь через TTS сервис"""
|
||||
# TODO: Интеграция с ElevenLabs, OpenAI TTS или другим сервисом
|
||||
pass
|
||||
|
||||
async def generate_interview_questions(self):
|
||||
"""Генерация вопросов для интервью на основе резюме"""
|
||||
try:
|
||||
from rag.registry import registry
|
||||
chat_model = registry.get_chat_model()
|
||||
|
||||
# Используем существующую логику генерации вопросов
|
||||
questions_prompt = f"""
|
||||
Сгенерируй 8 вопросов для голосового собеседования кандидата.
|
||||
|
||||
РЕЗЮМЕ КАНДИДАТА:
|
||||
Имя: {self.resume_data.get('name', 'Не указано')}
|
||||
Навыки: {', '.join(self.resume_data.get('skills', []))}
|
||||
Опыт работы: {self.resume_data.get('total_years', 0)} лет
|
||||
Образование: {self.resume_data.get('education', 'Не указано')}
|
||||
|
||||
ВАЖНО:
|
||||
1. Вопросы должны быть короткими и ясными для голосового формата
|
||||
2. Начни с простого приветствия и представления
|
||||
3. Каждый вопрос должен занимать не более 2-3 предложений
|
||||
4. Используй естественную разговорную речь
|
||||
|
||||
Верни только JSON массив строк с вопросами:
|
||||
["Привет! Расскажи немного о себе", "Какой у тебя опыт в...", ...]
|
||||
"""
|
||||
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
messages = [
|
||||
SystemMessage(content="Ты HR интервьюер. Говори естественно и дружелюбно."),
|
||||
HumanMessage(content=questions_prompt)
|
||||
]
|
||||
|
||||
response = chat_model.get_llm().invoke(messages)
|
||||
response_text = response.content.strip()
|
||||
|
||||
# Парсим JSON ответ
|
||||
if response_text.startswith('[') and response_text.endswith(']'):
|
||||
self.interview_questions = json.loads(response_text)
|
||||
else:
|
||||
# Fallback вопросы
|
||||
self.interview_questions = [
|
||||
"Привет! Расскажи немного о себе и своем опыте",
|
||||
"Что тебя привлекает в этой позиции?",
|
||||
"Расскажи о своем самом значимом проекте",
|
||||
"Какие технологии ты используешь в работе?",
|
||||
"Как ты решаешь сложные задачи?",
|
||||
"Есть ли у тебя вопросы ко мне?"
|
||||
]
|
||||
|
||||
logger.info(f"Generated {len(self.interview_questions)} interview questions")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating questions: {str(e)}")
|
||||
# Используем базовые вопросы
|
||||
self.interview_questions = [
|
||||
"Привет! Расскажи о своем опыте",
|
||||
"Что тебя интересует в этой позиции?",
|
||||
"Есть ли у тебя вопросы?"
|
||||
]
|
||||
|
||||
async def start_interview(self):
|
||||
"""Начало интервью"""
|
||||
if not self.interview_questions:
|
||||
await self.generate_interview_questions()
|
||||
|
||||
# Отправляем первый вопрос
|
||||
await self.ask_next_question()
|
||||
|
||||
async def ask_next_question(self):
|
||||
"""Задать следующий вопрос"""
|
||||
if self.current_question_index >= len(self.interview_questions):
|
||||
await self.end_interview()
|
||||
return
|
||||
|
||||
question = self.interview_questions[self.current_question_index]
|
||||
|
||||
# Отправляем сообщение фронтенду
|
||||
await self.send_message({
|
||||
"type": "question",
|
||||
"text": question,
|
||||
"questionNumber": self.current_question_index + 1
|
||||
})
|
||||
|
||||
# Конвертируем в речь и воспроизводим
|
||||
# TODO: Реализовать TTS
|
||||
# audio_data = await self.text_to_speech(question)
|
||||
# await self.play_audio(audio_data)
|
||||
|
||||
self.current_question_index += 1
|
||||
logger.info(f"Asked question {self.current_question_index}: {question}")
|
||||
|
||||
async def process_user_response(self, user_text: str):
|
||||
"""Обработка ответа пользователя"""
|
||||
# Сохраняем ответ в историю
|
||||
self.conversation_history.append({
|
||||
"type": "user_response",
|
||||
"text": user_text,
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"question_index": self.current_question_index - 1
|
||||
})
|
||||
|
||||
# Можем добавить анализ ответа через LLM
|
||||
# И решить - задать уточняющий вопрос или перейти к следующему
|
||||
|
||||
# Пока просто переходим к следующему вопросу
|
||||
await asyncio.sleep(1) # Небольшая пауза
|
||||
await self.ask_next_question()
|
||||
|
||||
async def send_message(self, message: Dict):
|
||||
"""Отправка сообщения фронтенду"""
|
||||
if self.room:
|
||||
data = json.dumps(message).encode()
|
||||
await self.room.local_participant.publish_data(data)
|
||||
|
||||
async def play_audio(self, audio_data: bytes):
|
||||
"""Воспроизведение аудио через LiveKit"""
|
||||
if self.audio_source:
|
||||
# TODO: Конвертировать audio_data в нужный формат и отправить
|
||||
pass
|
||||
|
||||
async def end_interview(self):
|
||||
"""Завершение интервью"""
|
||||
await self.send_message({
|
||||
"type": "interview_complete",
|
||||
"summary": f"Interview completed with {len(self.conversation_history)} responses"
|
||||
})
|
||||
|
||||
# Сохраняем транскрипт в базу данных
|
||||
transcript = json.dumps(self.conversation_history, ensure_ascii=False, indent=2)
|
||||
|
||||
# TODO: Обновить interview_session в БД с транскриптом
|
||||
|
||||
logger.info("Interview completed")
|
||||
|
||||
# Отключение от комнаты
|
||||
if self.room:
|
||||
await self.room.disconnect()
|
||||
|
||||
|
||||
class AIInterviewerManager:
|
||||
"""Менеджер для управления AI интервьюерами"""
|
||||
|
||||
def __init__(self):
|
||||
self.active_sessions: Dict[int, AIInterviewerService] = {}
|
||||
|
||||
async def start_interview_session(self, interview_session_id: int, room_name: str, resume_data: Dict):
|
||||
"""Запуск AI интервьюера для сессии"""
|
||||
try:
|
||||
# Создаем токен для AI агента
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
# Нужно создать специальный токен для AI агента
|
||||
|
||||
ai_interviewer = AIInterviewerService(interview_session_id, resume_data)
|
||||
|
||||
# TODO: Генерировать токен для AI агента
|
||||
# ai_token = generate_ai_agent_token(room_name)
|
||||
# await ai_interviewer.connect_to_room(room_name, ai_token)
|
||||
|
||||
self.active_sessions[interview_session_id] = ai_interviewer
|
||||
|
||||
logger.info(f"Started AI interviewer for session: {interview_session_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error starting AI interviewer: {str(e)}")
|
||||
raise
|
||||
|
||||
async def stop_interview_session(self, interview_session_id: int):
|
||||
"""Остановка AI интервьюера"""
|
||||
if interview_session_id in self.active_sessions:
|
||||
ai_interviewer = self.active_sessions[interview_session_id]
|
||||
await ai_interviewer.end_interview()
|
||||
del self.active_sessions[interview_session_id]
|
||||
logger.info(f"Stopped AI interviewer for session: {interview_session_id}")
|
||||
|
||||
|
||||
# Глобальный менеджер
|
||||
ai_interviewer_manager = AIInterviewerManager()
|
@ -1,5 +1,7 @@
|
||||
from fastapi import UploadFile
|
||||
from typing import Optional
|
||||
import tempfile
|
||||
import os
|
||||
from app.core.s3 import s3_service
|
||||
|
||||
|
||||
@ -7,18 +9,60 @@ class FileService:
|
||||
def __init__(self):
|
||||
self.s3_service = s3_service
|
||||
|
||||
async def upload_resume_file(self, file: UploadFile) -> Optional[str]:
|
||||
async def upload_resume_file(self, file: UploadFile) -> Optional[tuple[str, str]]:
|
||||
"""
|
||||
Загружает резюме в S3 и сохраняет локальную копию для парсинга
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: (s3_url, local_file_path) или None при ошибке
|
||||
"""
|
||||
if not file.filename:
|
||||
return None
|
||||
|
||||
content = await file.read()
|
||||
content_type = file.content_type or "application/octet-stream"
|
||||
|
||||
return await self.s3_service.upload_file(
|
||||
# Загружаем в S3
|
||||
s3_url = await self.s3_service.upload_file(
|
||||
file_content=content,
|
||||
file_name=file.filename,
|
||||
content_type=content_type
|
||||
)
|
||||
|
||||
if not s3_url:
|
||||
return None
|
||||
|
||||
# Сохраняем локальную копию для парсинга
|
||||
try:
|
||||
# Создаем временный файл с сохранением расширения
|
||||
temp_dir = tempfile.gettempdir()
|
||||
file_extension = os.path.splitext(file.filename)[1]
|
||||
if not file_extension:
|
||||
# Пытаемся определить расширение по MIME типу
|
||||
if content_type == 'application/pdf':
|
||||
file_extension = '.pdf'
|
||||
elif content_type in ['application/vnd.openxmlformats-officedocument.wordprocessingml.document']:
|
||||
file_extension = '.docx'
|
||||
elif content_type in ['application/msword']:
|
||||
file_extension = '.doc'
|
||||
elif content_type == 'text/plain':
|
||||
file_extension = '.txt'
|
||||
else:
|
||||
file_extension = '.pdf' # fallback
|
||||
|
||||
temp_filename = f"resume_{hash(s3_url)}_{file.filename}"
|
||||
local_file_path = os.path.join(temp_dir, temp_filename)
|
||||
|
||||
# Сохраняем содержимое файла
|
||||
with open(local_file_path, 'wb') as temp_file:
|
||||
temp_file.write(content)
|
||||
|
||||
return (s3_url, local_file_path)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to save local copy: {str(e)}")
|
||||
# Если не удалось сохранить локально, возвращаем только S3 URL
|
||||
return (s3_url, s3_url)
|
||||
|
||||
async def upload_interview_report(self, file: UploadFile) -> Optional[str]:
|
||||
if not file.filename:
|
||||
|
464
app/services/interview_service.py
Normal file
464
app/services/interview_service.py
Normal file
@ -0,0 +1,464 @@
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
import json
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
from datetime import datetime, timedelta
|
||||
from livekit.api import AccessToken, VideoGrants
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlalchemy import select
|
||||
from app.models.interview import (
|
||||
InterviewSession,
|
||||
InterviewSessionCreate,
|
||||
InterviewSessionUpdate,
|
||||
InterviewStatus,
|
||||
InterviewValidationResponse,
|
||||
LiveKitTokenResponse
|
||||
)
|
||||
from app.models.resume import Resume, ResumeStatus
|
||||
from app.models.vacancy import Vacancy
|
||||
from rag.settings import settings
|
||||
|
||||
|
||||
class InterviewRoomService:
|
||||
def __init__(self, db_session: AsyncSession):
|
||||
self.db = db_session
|
||||
self.livekit_url = settings.livekit_url or "ws://localhost:7880"
|
||||
self.api_key = settings.livekit_api_key or "devkey"
|
||||
self.api_secret = settings.livekit_api_secret or "secret"
|
||||
|
||||
async def validate_resume_for_interview(self, resume_id: int) -> InterviewValidationResponse:
|
||||
"""Проверяет, можно ли проводить собеседование для данного резюме"""
|
||||
try:
|
||||
# Получаем резюме
|
||||
result = await self.db.execute(select(Resume).where(Resume.id == resume_id))
|
||||
resume = result.scalar_one_or_none()
|
||||
|
||||
if not resume:
|
||||
return InterviewValidationResponse(
|
||||
can_interview=False,
|
||||
message="Resume not found"
|
||||
)
|
||||
|
||||
# Проверяем статус резюме
|
||||
if resume.status != ResumeStatus.PARSED:
|
||||
return InterviewValidationResponse(
|
||||
can_interview=False,
|
||||
message=f"Resume is not ready for interview. Current status: {resume.status}"
|
||||
)
|
||||
|
||||
# Проверяем активную сессию только для информации (не блокируем)
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession)
|
||||
.where(InterviewSession.resume_id == resume_id)
|
||||
.where(InterviewSession.status == "active")
|
||||
)
|
||||
active_session = result.scalar_one_or_none()
|
||||
|
||||
message = "Resume is ready for interview"
|
||||
if active_session:
|
||||
message = "Resume has an active interview session"
|
||||
|
||||
return InterviewValidationResponse(
|
||||
can_interview=True,
|
||||
message=message
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return InterviewValidationResponse(
|
||||
can_interview=False,
|
||||
message=f"Error validating resume: {str(e)}"
|
||||
)
|
||||
|
||||
async def create_interview_session(self, resume_id: int) -> Optional[InterviewSession]:
|
||||
"""Создает новую сессию собеседования"""
|
||||
try:
|
||||
# Генерируем уникальное имя комнаты с UUID
|
||||
unique_id = str(uuid.uuid4())[:8]
|
||||
timestamp = int(time.time())
|
||||
room_name = f"interview_{resume_id}_{timestamp}_{unique_id}"
|
||||
|
||||
# Создаем сессию в БД
|
||||
session_data = InterviewSessionCreate(
|
||||
resume_id=resume_id,
|
||||
room_name=room_name
|
||||
)
|
||||
|
||||
interview_session = InterviewSession(**session_data.model_dump())
|
||||
self.db.add(interview_session)
|
||||
await self.db.commit()
|
||||
await self.db.refresh(interview_session)
|
||||
|
||||
return interview_session
|
||||
|
||||
except Exception as e:
|
||||
await self.db.rollback()
|
||||
print(f"Error creating interview session: {str(e)}")
|
||||
return None
|
||||
|
||||
def generate_access_token(self, room_name: str, participant_name: str) -> str:
|
||||
"""Генерирует JWT токен для LiveKit"""
|
||||
try:
|
||||
at = AccessToken(self.api_key, self.api_secret)
|
||||
# Исправляем использование grants
|
||||
grants = VideoGrants(
|
||||
room_join=True,
|
||||
room=room_name,
|
||||
can_publish=True,
|
||||
can_subscribe=True
|
||||
)
|
||||
at.with_grants(grants).with_identity(participant_name)
|
||||
|
||||
return at.to_jwt()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating LiveKit token: {str(e)}")
|
||||
raise
|
||||
|
||||
async def get_livekit_token(self, resume_id: int) -> Optional[LiveKitTokenResponse]:
|
||||
"""Создает сессию собеседования и возвращает токен для LiveKit"""
|
||||
try:
|
||||
# Валидируем резюме
|
||||
validation = await self.validate_resume_for_interview(resume_id)
|
||||
if not validation.can_interview:
|
||||
return None
|
||||
|
||||
# Проверяем, есть ли уже созданная сессия для этого резюме
|
||||
existing_session = await self.get_interview_session(resume_id)
|
||||
if existing_session:
|
||||
# Используем существующую сессию
|
||||
interview_session = existing_session
|
||||
print(f"[DEBUG] Using existing interview session: {interview_session.id}")
|
||||
else:
|
||||
# Создаем новую сессию собеседования
|
||||
interview_session = await self.create_interview_session(resume_id)
|
||||
if not interview_session:
|
||||
return None
|
||||
print(f"[DEBUG] Created new interview session: {interview_session.id}")
|
||||
|
||||
# Генерируем токен
|
||||
participant_name = f"user_{resume_id}"
|
||||
token = self.generate_access_token(
|
||||
interview_session.room_name,
|
||||
participant_name
|
||||
)
|
||||
|
||||
# Получаем готовый план интервью для AI агента
|
||||
interview_plan = await self.get_resume_data_for_interview(resume_id)
|
||||
|
||||
# Обновляем статус сессии на ACTIVE
|
||||
await self.update_session_status(interview_session.id, "active")
|
||||
|
||||
# Запускаем AI агента для этой сессии
|
||||
await self.start_ai_interviewer(interview_session, interview_plan)
|
||||
|
||||
return LiveKitTokenResponse(
|
||||
token=token,
|
||||
room_name=interview_session.room_name,
|
||||
server_url=self.livekit_url
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting LiveKit token: {str(e)}")
|
||||
return None
|
||||
|
||||
async def update_session_status(self, session_id: int, status: str) -> bool:
|
||||
"""Обновляет статус сессии собеседования"""
|
||||
try:
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession).where(InterviewSession.id == session_id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session:
|
||||
return False
|
||||
|
||||
session.status = status
|
||||
if status == "completed":
|
||||
session.completed_at = datetime.utcnow()
|
||||
|
||||
await self.db.commit()
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
await self.db.rollback()
|
||||
print(f"Error updating session status: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_interview_session(self, resume_id: int) -> Optional[InterviewSession]:
|
||||
"""Получает активную сессию собеседования для резюме"""
|
||||
try:
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession)
|
||||
.where(InterviewSession.resume_id == resume_id)
|
||||
.where(InterviewSession.status.in_(["created", "active"]))
|
||||
.order_by(InterviewSession.started_at.desc())
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting interview session: {str(e)}")
|
||||
return None
|
||||
|
||||
async def start_ai_interviewer(self, interview_session: InterviewSession, interview_plan: dict):
|
||||
"""Запускает AI интервьюера для сессии"""
|
||||
try:
|
||||
# Создаем токен для AI агента
|
||||
ai_token = self.generate_access_token(
|
||||
interview_session.room_name,
|
||||
f"ai_interviewer_{interview_session.id}"
|
||||
)
|
||||
|
||||
# Подготавливаем метаданные с планом интервью
|
||||
room_metadata = json.dumps({
|
||||
"interview_plan": interview_plan,
|
||||
"session_id": interview_session.id
|
||||
})
|
||||
|
||||
# Запускаем AI агента в отдельном процессе
|
||||
agent_cmd = [
|
||||
"uv",
|
||||
"run",
|
||||
"ai_interviewer_agent.py",
|
||||
"connect",
|
||||
"--room", interview_session.room_name,
|
||||
"--url", self.livekit_url,
|
||||
"--api-key", self.api_key,
|
||||
"--api-secret", self.api_secret,
|
||||
]
|
||||
|
||||
# Устанавливаем переменные окружения
|
||||
env = os.environ.copy()
|
||||
env.update({
|
||||
"LIVEKIT_ROOM_METADATA": room_metadata,
|
||||
"OPENAI_API_KEY": settings.openai_api_key or "",
|
||||
"DEEPGRAM_API_KEY": settings.deepgram_api_key or "",
|
||||
"CARTESIA_API_KEY": settings.cartesia_api_key or "",
|
||||
})
|
||||
|
||||
# Запускаем процесс в фоне
|
||||
with open(f"ai_interviewer_{interview_session.id}.log", "wb") as f_out, \
|
||||
open(f"ai_interviewer_{interview_session.id}.err", "wb") as f_err:
|
||||
process = subprocess.Popen(
|
||||
agent_cmd,
|
||||
env=env,
|
||||
stdout=f_out,
|
||||
stderr=f_err,
|
||||
cwd="."
|
||||
)
|
||||
|
||||
print(f"[DEBUG] Started AI interviewer process {process.pid} for session {interview_session.id}")
|
||||
|
||||
# Сохраняем PID процесса в БД для управления
|
||||
await self.update_agent_process_info(
|
||||
interview_session.id,
|
||||
process.pid,
|
||||
"running"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error starting AI interviewer: {str(e)}")
|
||||
# Обновляем статус на failed
|
||||
await self.update_agent_process_info(
|
||||
interview_session.id,
|
||||
None,
|
||||
"failed"
|
||||
)
|
||||
|
||||
async def get_resume_data_for_interview(self, resume_id: int) -> dict:
|
||||
"""Получает готовый план интервью из базы данных"""
|
||||
try:
|
||||
# Получаем резюме с готовым планом интервью
|
||||
result = await self.db.execute(
|
||||
select(Resume).where(Resume.id == resume_id)
|
||||
)
|
||||
resume = result.scalar_one_or_none()
|
||||
|
||||
if not resume:
|
||||
return self._get_fallback_interview_plan()
|
||||
|
||||
# Если есть готовый план интервью - используем его
|
||||
if resume.interview_plan:
|
||||
return resume.interview_plan
|
||||
|
||||
# Если плана нет, создаем базовый план на основе имеющихся данных
|
||||
fallback_plan = {
|
||||
"interview_structure": {
|
||||
"duration_minutes": 30,
|
||||
"greeting": f"Привет, {resume.applicant_name}! Готов к интервью?",
|
||||
"sections": [
|
||||
{
|
||||
"name": "Знакомство",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Расскажи немного о себе", "Что тебя привлекло в этой позиции?"]
|
||||
},
|
||||
{
|
||||
"name": "Опыт работы",
|
||||
"duration_minutes": 15,
|
||||
"questions": ["Расскажи о своем опыте", "Какие технологии используешь?"]
|
||||
},
|
||||
{
|
||||
"name": "Вопросы кандидата",
|
||||
"duration_minutes": 10,
|
||||
"questions": ["Есть ли у тебя вопросы ко мне?"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"focus_areas": ["experience", "technical_skills"],
|
||||
"candidate_info": {
|
||||
"name": resume.applicant_name,
|
||||
"email": resume.applicant_email,
|
||||
"phone": resume.applicant_phone
|
||||
}
|
||||
}
|
||||
|
||||
# Добавляем parsed данные если есть
|
||||
if resume.parsed_data:
|
||||
fallback_plan["candidate_info"].update({
|
||||
"skills": resume.parsed_data.get("skills", []),
|
||||
"total_years": resume.parsed_data.get("total_years", 0),
|
||||
"education": resume.parsed_data.get("education", "")
|
||||
})
|
||||
|
||||
return fallback_plan
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting interview plan: {str(e)}")
|
||||
return self._get_fallback_interview_plan()
|
||||
|
||||
def _get_fallback_interview_plan(self) -> dict:
|
||||
"""Fallback план интервью если не удалось загрузить из БД"""
|
||||
return {
|
||||
"interview_structure": {
|
||||
"duration_minutes": 30,
|
||||
"greeting": "Привет! Готов к интервью?",
|
||||
"sections": [
|
||||
{
|
||||
"name": "Знакомство",
|
||||
"duration_minutes": 10,
|
||||
"questions": ["Расскажи о себе", "Что тебя привлекло в этой позиции?"]
|
||||
},
|
||||
{
|
||||
"name": "Опыт работы",
|
||||
"duration_minutes": 15,
|
||||
"questions": ["Расскажи о своем опыте", "Какие технологии используешь?"]
|
||||
},
|
||||
{
|
||||
"name": "Вопросы кандидата",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Есть ли у тебя вопросы?"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"focus_areas": ["experience", "technical_skills"],
|
||||
"candidate_info": {
|
||||
"name": "Кандидат",
|
||||
"skills": [],
|
||||
"total_years": 0
|
||||
}
|
||||
}
|
||||
|
||||
async def update_agent_process_info(self, session_id: int, pid: int = None, status: str = "not_started") -> bool:
|
||||
"""Обновляет информацию о процессе AI агента"""
|
||||
try:
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession).where(InterviewSession.id == session_id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session:
|
||||
return False
|
||||
|
||||
session.ai_agent_pid = pid
|
||||
session.ai_agent_status = status
|
||||
|
||||
await self.db.commit()
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
await self.db.rollback()
|
||||
print(f"Error updating agent process info: {str(e)}")
|
||||
return False
|
||||
|
||||
async def get_active_agent_processes(self) -> list:
|
||||
"""Получает список активных AI процессов"""
|
||||
try:
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession)
|
||||
.where(InterviewSession.ai_agent_status == "running")
|
||||
)
|
||||
return result.scalars().all()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting active processes: {str(e)}")
|
||||
return []
|
||||
|
||||
async def stop_agent_process(self, session_id: int) -> bool:
|
||||
"""Останавливает AI процесс для сессии"""
|
||||
try:
|
||||
result = await self.db.execute(
|
||||
select(InterviewSession).where(InterviewSession.id == session_id)
|
||||
)
|
||||
session = result.scalar_one_or_none()
|
||||
|
||||
if not session or not session.ai_agent_pid:
|
||||
return False
|
||||
|
||||
import psutil
|
||||
try:
|
||||
# Пытаемся gracefully остановить процесс
|
||||
process = psutil.Process(session.ai_agent_pid)
|
||||
process.terminate()
|
||||
|
||||
# Ждем завершения до 5 секунд
|
||||
import time
|
||||
for _ in range(50):
|
||||
if not process.is_running():
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Если не завершился, принудительно убиваем
|
||||
if process.is_running():
|
||||
process.kill()
|
||||
|
||||
# Обновляем статус в БД
|
||||
await self.update_agent_process_info(session_id, None, "stopped")
|
||||
|
||||
print(f"Stopped AI agent process {session.ai_agent_pid} for session {session_id}")
|
||||
return True
|
||||
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||||
# Процесс уже не существует
|
||||
await self.update_agent_process_info(session_id, None, "stopped")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error stopping agent process: {str(e)}")
|
||||
return False
|
||||
|
||||
async def cleanup_dead_processes(self) -> int:
|
||||
"""Очищает информацию о мертвых процессах"""
|
||||
try:
|
||||
import psutil
|
||||
|
||||
active_sessions = await self.get_active_agent_processes()
|
||||
cleaned_count = 0
|
||||
|
||||
for session in active_sessions:
|
||||
if session.ai_agent_pid:
|
||||
try:
|
||||
process = psutil.Process(session.ai_agent_pid)
|
||||
if not process.is_running():
|
||||
await self.update_agent_process_info(session.id, None, "stopped")
|
||||
cleaned_count += 1
|
||||
except psutil.NoSuchProcess:
|
||||
await self.update_agent_process_info(session.id, None, "stopped")
|
||||
cleaned_count += 1
|
||||
|
||||
print(f"Cleaned up {cleaned_count} dead processes")
|
||||
return cleaned_count
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error cleaning up processes: {str(e)}")
|
||||
return 0
|
0
celery_worker/__init__.py
Normal file
0
celery_worker/__init__.py
Normal file
17
celery_worker/celery_app.py
Normal file
17
celery_worker/celery_app.py
Normal file
@ -0,0 +1,17 @@
|
||||
from celery import Celery
|
||||
from rag.settings import settings
|
||||
|
||||
celery_app = Celery(
|
||||
"hr_ai_backend",
|
||||
broker=f"redis://{settings.redis_cache_url}:{settings.redis_cache_port}/{settings.redis_cache_db}",
|
||||
backend=f"redis://{settings.redis_cache_url}:{settings.redis_cache_port}/{settings.redis_cache_db}",
|
||||
include=["celery_worker.tasks"]
|
||||
)
|
||||
|
||||
celery_app.conf.update(
|
||||
task_serializer="json",
|
||||
accept_content=["json"],
|
||||
result_serializer="json",
|
||||
timezone="UTC",
|
||||
enable_utc=True,
|
||||
)
|
87
celery_worker/database.py
Normal file
87
celery_worker/database.py
Normal file
@ -0,0 +1,87 @@
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, Session
|
||||
from contextlib import contextmanager
|
||||
from rag.settings import settings
|
||||
|
||||
|
||||
# Создаем синхронный engine для Celery (так как Celery работает в отдельных процессах)
|
||||
sync_engine = create_engine(
|
||||
settings.database_url.replace("asyncpg", "psycopg2"), # Убираем asyncpg для синхронного подключения
|
||||
echo=False,
|
||||
future=True
|
||||
)
|
||||
|
||||
# Создаем синхронный session maker
|
||||
SyncSessionLocal = sessionmaker(
|
||||
bind=sync_engine,
|
||||
autocommit=False,
|
||||
autoflush=False
|
||||
)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_sync_session() -> Session:
|
||||
"""Получить синхронную сессию для использования в Celery tasks"""
|
||||
session = SyncSessionLocal()
|
||||
try:
|
||||
yield session
|
||||
session.commit()
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
|
||||
class SyncResumeRepository:
|
||||
"""Синхронный repository для работы с Resume в Celery tasks"""
|
||||
|
||||
def __init__(self, session: Session):
|
||||
self.session = session
|
||||
|
||||
def get_by_id(self, resume_id: int):
|
||||
"""Получить резюме по ID"""
|
||||
from app.models.resume import Resume
|
||||
return self.session.query(Resume).filter(Resume.id == resume_id).first()
|
||||
|
||||
def update_status(self, resume_id: int, status: str, parsed_data: dict = None, error_message: str = None):
|
||||
"""Обновить статус резюме"""
|
||||
from app.models.resume import Resume, ResumeStatus
|
||||
from datetime import datetime
|
||||
|
||||
resume = self.session.query(Resume).filter(Resume.id == resume_id).first()
|
||||
|
||||
if resume:
|
||||
# Обновляем статус
|
||||
if status == 'parsing':
|
||||
resume.status = ResumeStatus.PARSING
|
||||
elif status == 'parsed':
|
||||
resume.status = ResumeStatus.PARSED
|
||||
if parsed_data:
|
||||
resume.parsed_data = parsed_data
|
||||
# НЕ перезаписываем контактные данные из формы - они уже правильные
|
||||
elif status == 'failed':
|
||||
resume.status = ResumeStatus.PARSE_FAILED
|
||||
if error_message:
|
||||
resume.parse_error = error_message
|
||||
|
||||
resume.updated_at = datetime.utcnow()
|
||||
self.session.add(resume)
|
||||
return resume
|
||||
|
||||
return None
|
||||
|
||||
def update_interview_plan(self, resume_id: int, interview_plan: dict):
|
||||
"""Обновить план интервью"""
|
||||
from app.models.resume import Resume
|
||||
from datetime import datetime
|
||||
|
||||
resume = self.session.query(Resume).filter(Resume.id == resume_id).first()
|
||||
|
||||
if resume:
|
||||
resume.interview_plan = interview_plan
|
||||
resume.updated_at = datetime.utcnow()
|
||||
self.session.add(resume)
|
||||
return resume
|
||||
|
||||
return None
|
190
celery_worker/process_cleanup_task.py
Normal file
190
celery_worker/process_cleanup_task.py
Normal file
@ -0,0 +1,190 @@
|
||||
import asyncio
|
||||
from celery import current_task
|
||||
from celery_worker.celery_app import celery_app
|
||||
from celery_worker.database import get_sync_session
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
import psutil
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def cleanup_interview_processes_task(self):
|
||||
"""
|
||||
Периодическая задача очистки мертвых AI процессов
|
||||
"""
|
||||
try:
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={'status': 'Checking for dead AI processes...', 'progress': 10}
|
||||
)
|
||||
|
||||
# Используем синхронный подход для Celery
|
||||
with get_sync_session() as session:
|
||||
# Получаем все "активные" сессии из БД
|
||||
from app.models.interview import InterviewSession
|
||||
active_sessions = session.query(InterviewSession).filter(
|
||||
InterviewSession.ai_agent_status == "running"
|
||||
).all()
|
||||
|
||||
cleaned_count = 0
|
||||
total_sessions = len(active_sessions)
|
||||
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={'status': f'Found {total_sessions} potentially active sessions...', 'progress': 30}
|
||||
)
|
||||
|
||||
for i, interview_session in enumerate(active_sessions):
|
||||
if interview_session.ai_agent_pid:
|
||||
try:
|
||||
# Проверяем, жив ли процесс
|
||||
process = psutil.Process(interview_session.ai_agent_pid)
|
||||
|
||||
if not process.is_running():
|
||||
# Процесс мертв, обновляем статус
|
||||
interview_session.ai_agent_pid = None
|
||||
interview_session.ai_agent_status = "stopped"
|
||||
session.add(interview_session)
|
||||
cleaned_count += 1
|
||||
|
||||
except psutil.NoSuchProcess:
|
||||
# Процесс не существует
|
||||
interview_session.ai_agent_pid = None
|
||||
interview_session.ai_agent_status = "stopped"
|
||||
session.add(interview_session)
|
||||
cleaned_count += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error checking process {interview_session.ai_agent_pid}: {str(e)}")
|
||||
|
||||
# Обновляем прогресс
|
||||
progress = 30 + (i + 1) / total_sessions * 60
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={
|
||||
'status': f'Processed {i + 1}/{total_sessions} sessions...',
|
||||
'progress': progress
|
||||
}
|
||||
)
|
||||
|
||||
# Сохраняем изменения
|
||||
session.commit()
|
||||
|
||||
self.update_state(
|
||||
state='SUCCESS',
|
||||
meta={
|
||||
'status': f'Cleanup completed. Cleaned {cleaned_count} dead processes.',
|
||||
'progress': 100,
|
||||
'cleaned_count': cleaned_count,
|
||||
'total_checked': total_sessions
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
'status': 'completed',
|
||||
'cleaned_count': cleaned_count,
|
||||
'total_checked': total_sessions
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.update_state(
|
||||
state='FAILURE',
|
||||
meta={
|
||||
'status': f'Error during cleanup: {str(e)}',
|
||||
'progress': 0,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
raise
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def force_kill_interview_process_task(self, session_id: int):
|
||||
"""
|
||||
Принудительное завершение AI процесса для сессии
|
||||
"""
|
||||
try:
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={'status': f'Looking for session {session_id}...', 'progress': 20}
|
||||
)
|
||||
|
||||
with get_sync_session() as session:
|
||||
from app.models.interview import InterviewSession
|
||||
|
||||
interview_session = session.query(InterviewSession).filter(
|
||||
InterviewSession.id == session_id
|
||||
).first()
|
||||
|
||||
if not interview_session:
|
||||
return {
|
||||
'status': 'not_found',
|
||||
'message': f'Session {session_id} not found'
|
||||
}
|
||||
|
||||
if not interview_session.ai_agent_pid:
|
||||
return {
|
||||
'status': 'no_process',
|
||||
'message': f'No AI process found for session {session_id}'
|
||||
}
|
||||
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={'status': f'Terminating process {interview_session.ai_agent_pid}...', 'progress': 50}
|
||||
)
|
||||
|
||||
try:
|
||||
process = psutil.Process(interview_session.ai_agent_pid)
|
||||
|
||||
# Graceful terminate
|
||||
process.terminate()
|
||||
|
||||
# Ждем до 5 секунд
|
||||
import time
|
||||
for _ in range(50):
|
||||
if not process.is_running():
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Если не помогло, убиваем принудительно
|
||||
if process.is_running():
|
||||
process.kill()
|
||||
time.sleep(0.5) # Даем время на завершение
|
||||
|
||||
# Обновляем статус в БД
|
||||
interview_session.ai_agent_pid = None
|
||||
interview_session.ai_agent_status = "stopped"
|
||||
session.add(interview_session)
|
||||
session.commit()
|
||||
|
||||
self.update_state(
|
||||
state='SUCCESS',
|
||||
meta={'status': 'Process terminated successfully', 'progress': 100}
|
||||
)
|
||||
|
||||
return {
|
||||
'status': 'terminated',
|
||||
'message': f'AI process for session {session_id} terminated successfully'
|
||||
}
|
||||
|
||||
except psutil.NoSuchProcess:
|
||||
# Процесс уже не существует
|
||||
interview_session.ai_agent_pid = None
|
||||
interview_session.ai_agent_status = "stopped"
|
||||
session.add(interview_session)
|
||||
session.commit()
|
||||
|
||||
return {
|
||||
'status': 'already_dead',
|
||||
'message': f'Process was already dead, cleaned up database'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.update_state(
|
||||
state='FAILURE',
|
||||
meta={
|
||||
'status': f'Error terminating process: {str(e)}',
|
||||
'progress': 0,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
raise
|
412
celery_worker/tasks.py
Normal file
412
celery_worker/tasks.py
Normal file
@ -0,0 +1,412 @@
|
||||
import os
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
from celery import current_task
|
||||
from datetime import datetime
|
||||
|
||||
from celery_worker.celery_app import celery_app
|
||||
from celery_worker.database import get_sync_session, SyncResumeRepository
|
||||
from rag.llm.model import ResumeParser
|
||||
from rag.registry import registry
|
||||
|
||||
|
||||
def generate_interview_plan(resume_id: int, combined_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Генерирует план интервью на основе резюме и вакансии"""
|
||||
try:
|
||||
# Получаем данные о вакансии из БД
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
resume_record = repo.get_by_id(resume_id)
|
||||
|
||||
if not resume_record:
|
||||
return None
|
||||
|
||||
# Здесь нужно получить данные вакансии
|
||||
# Пока используем заглушку, потом добавим связь с vacancy
|
||||
vacancy_data = {
|
||||
"title": "Python Developer",
|
||||
"requirements": "Python, FastAPI, PostgreSQL, Docker",
|
||||
"company_name": "Tech Company",
|
||||
"experience_level": "Middle"
|
||||
}
|
||||
|
||||
# Генерируем план через LLM
|
||||
chat_model = registry.get_chat_model()
|
||||
|
||||
plan_prompt = f"""
|
||||
Создай детальный план интервью для кандидата на основе его резюме и требований вакансии.
|
||||
|
||||
РЕЗЮМЕ КАНДИДАТА:
|
||||
- Имя: {combined_data.get('name', 'Не указано')}
|
||||
- Навыки: {', '.join(combined_data.get('skills', []))}
|
||||
- Опыт: {combined_data.get('total_years', 0)} лет
|
||||
- Образование: {combined_data.get('education', 'Не указано')}
|
||||
|
||||
ВАКАНСИЯ:
|
||||
- Позиция: {vacancy_data['title']}
|
||||
- Требования: {vacancy_data['requirements']}
|
||||
- Компания: {vacancy_data['company_name']}
|
||||
- Уровень: {vacancy_data['experience_level']}
|
||||
|
||||
Создай план интервью в формате JSON:
|
||||
{{
|
||||
"interview_structure": {{
|
||||
"duration_minutes": 45,
|
||||
"greeting": "Краткое приветствие и знакомство (3 мин)",
|
||||
"sections": [
|
||||
{{
|
||||
"name": "Знакомство с кандидатом",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Расскажи о себе", "Что привлекло в этой позиции?"]
|
||||
}},
|
||||
{{
|
||||
"name": "Технические навыки",
|
||||
"duration_minutes": 20,
|
||||
"questions": ["Опыт с Python", "Работа с базами данных"]
|
||||
}},
|
||||
{{
|
||||
"name": "Опыт и проекты",
|
||||
"duration_minutes": 15,
|
||||
"questions": ["Расскажи о сложном проекте", "Как решаешь проблемы?"]
|
||||
}},
|
||||
{{
|
||||
"name": "Вопросы кандидата",
|
||||
"duration_minutes": 2,
|
||||
"questions": ["Есть ли вопросы ко мне?"]
|
||||
}}
|
||||
]
|
||||
}},
|
||||
"focus_areas": ["technical_skills", "problem_solving", "cultural_fit"],
|
||||
"key_evaluation_points": [
|
||||
"Глубина знаний Python",
|
||||
"Опыт командной работы",
|
||||
"Мотивация к изучению нового"
|
||||
],
|
||||
"red_flags_to_check": [],
|
||||
"personalization_notes": "Кандидат имеет хороший технический опыт"
|
||||
}}
|
||||
"""
|
||||
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
messages = [
|
||||
SystemMessage(content="Ты HR эксперт по планированию интервью. Создавай структурированные планы."),
|
||||
HumanMessage(content=plan_prompt)
|
||||
]
|
||||
|
||||
response = chat_model.get_llm().invoke(messages)
|
||||
response_text = response.content.strip()
|
||||
|
||||
# Парсим JSON ответ
|
||||
if response_text.startswith('{') and response_text.endswith('}'):
|
||||
return json.loads(response_text)
|
||||
else:
|
||||
# Ищем JSON в тексте
|
||||
start = response_text.find('{')
|
||||
end = response_text.rfind('}') + 1
|
||||
if start != -1 and end > start:
|
||||
return json.loads(response_text[start:end])
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка генерации плана интервью: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def parse_resume_task(self, resume_id: str, file_path: str):
|
||||
"""
|
||||
Асинхронная задача парсинга резюме
|
||||
|
||||
Args:
|
||||
resume_id: ID резюме
|
||||
file_path: Путь к PDF файлу резюме
|
||||
"""
|
||||
|
||||
try:
|
||||
# Шаг 0: Обновляем статус в БД - начали парсинг
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
repo.update_status(int(resume_id), 'parsing')
|
||||
|
||||
# Обновляем статус задачи
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Начинаем парсинг резюме...', 'progress': 10}
|
||||
)
|
||||
|
||||
# Инициализируем модели из registry
|
||||
try:
|
||||
chat_model = registry.get_chat_model()
|
||||
embeddings_model = registry.get_embeddings_model()
|
||||
vector_store = registry.get_vector_store()
|
||||
except Exception as e:
|
||||
# Обновляем статус в БД - ошибка инициализации
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
repo.update_status(int(resume_id), 'failed', error_message=f"Ошибка инициализации моделей: {str(e)}")
|
||||
raise Exception(f"Ошибка инициализации моделей: {str(e)}")
|
||||
|
||||
# Шаг 1: Парсинг резюме
|
||||
self.update_state(
|
||||
state='PROGRESS',
|
||||
meta={'status': 'Извлекаем текст из PDF...', 'progress': 20}
|
||||
)
|
||||
|
||||
parser = ResumeParser(chat_model)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
# Обновляем статус в БД - файл не найден
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
repo.update_status(int(resume_id), 'failed', error_message=f"Файл не найден: {file_path}")
|
||||
raise Exception(f"Файл не найден: {file_path}")
|
||||
|
||||
parsed_resume = parser.parse_resume_from_file(file_path)
|
||||
|
||||
# Получаем оригинальные данные из формы
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
resume_record = repo.get_by_id(int(resume_id))
|
||||
if not resume_record:
|
||||
raise Exception(f"Резюме с ID {resume_id} не найдено в базе данных")
|
||||
|
||||
# Извлекаем нужные данные пока сессия активна
|
||||
applicant_name = resume_record.applicant_name
|
||||
applicant_email = resume_record.applicant_email
|
||||
applicant_phone = resume_record.applicant_phone
|
||||
|
||||
# Создаем комбинированные данные: навыки и опыт из парсинга, контакты из формы
|
||||
combined_data = parsed_resume.copy()
|
||||
combined_data['name'] = applicant_name
|
||||
combined_data['email'] = applicant_email
|
||||
combined_data['phone'] = applicant_phone or parsed_resume.get('phone', '')
|
||||
|
||||
# Шаг 2: Векторизация и сохранение в Milvus
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Сохраняем в векторную базу...', 'progress': 60}
|
||||
)
|
||||
|
||||
vector_store.add_candidate_profile(str(resume_id), combined_data)
|
||||
|
||||
# Шаг 3: Обновляем статус в PostgreSQL - успешно обработано
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Обновляем статус в базе данных...', 'progress': 85}
|
||||
)
|
||||
|
||||
# Шаг 4: Генерируем план интервью
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Генерируем план интервью...', 'progress': 90}
|
||||
)
|
||||
|
||||
interview_plan = generate_interview_plan(int(resume_id), combined_data)
|
||||
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
repo.update_status(int(resume_id), 'parsed', parsed_data=combined_data)
|
||||
# Сохраняем план интервью
|
||||
if interview_plan:
|
||||
repo.update_interview_plan(int(resume_id), interview_plan)
|
||||
|
||||
# Завершено успешно
|
||||
self.update_state(
|
||||
state='SUCCESS',
|
||||
meta={
|
||||
'status': 'Резюме успешно обработано и план интервью готов',
|
||||
'progress': 100,
|
||||
'result': combined_data
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
'resume_id': resume_id,
|
||||
'status': 'completed',
|
||||
'parsed_data': combined_data
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# В случае ошибки
|
||||
self.update_state(
|
||||
state='FAILURE',
|
||||
meta={
|
||||
'status': f'Ошибка при обработке резюме: {str(e)}',
|
||||
'progress': 0,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
|
||||
# Обновляем статус в БД как failed
|
||||
try:
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
repo.update_status(int(resume_id), 'failed', error_message=str(e))
|
||||
except Exception as db_error:
|
||||
print(f"Ошибка при обновлении статуса в БД: {str(db_error)}")
|
||||
|
||||
raise
|
||||
|
||||
|
||||
# Функция больше не нужна - используем SyncResumeRepository напрямую
|
||||
|
||||
|
||||
@celery_app.task(bind=True)
|
||||
def generate_interview_questions_task(self, resume_id: str, job_description: str):
|
||||
"""
|
||||
Генерация персонализированных вопросов для интервью на основе резюме и описания вакансии
|
||||
|
||||
Args:
|
||||
resume_id: ID резюме
|
||||
job_description: Описание вакансии
|
||||
"""
|
||||
try:
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Начинаем генерацию вопросов...', 'progress': 10}
|
||||
)
|
||||
|
||||
# Инициализируем модели
|
||||
try:
|
||||
chat_model = registry.get_chat_model()
|
||||
vector_store = registry.get_vector_store()
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка инициализации моделей: {str(e)}")
|
||||
|
||||
# Шаг 1: Получить parsed резюме из базы данных
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Получаем данные резюме...', 'progress': 20}
|
||||
)
|
||||
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
resume = repo.get_by_id(int(resume_id))
|
||||
|
||||
if not resume:
|
||||
raise Exception(f"Резюме с ID {resume_id} не найдено")
|
||||
|
||||
if not resume.parsed_data:
|
||||
raise Exception(f"Резюме {resume_id} еще не обработано")
|
||||
|
||||
# Шаг 2: Получить похожие кандидатов из Milvus для анализа
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Анализируем профиль кандидата...', 'progress': 40}
|
||||
)
|
||||
|
||||
candidate_skills = " ".join(resume.parsed_data.get('skills', []))
|
||||
similar_candidates = vector_store.search_similar_candidates(candidate_skills, k=3)
|
||||
|
||||
# Шаг 3: Сгенерировать персонализированные вопросы через LLM
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Генерируем вопросы для интервью...', 'progress': 70}
|
||||
)
|
||||
|
||||
questions_prompt = f"""
|
||||
Сгенерируй 10 персонализированных вопросов для интервью кандидата на основе его резюме и описания вакансии.
|
||||
|
||||
РЕЗЮМЕ КАНДИДАТА:
|
||||
Имя: {resume.parsed_data.get('name', 'Не указано')}
|
||||
Навыки: {', '.join(resume.parsed_data.get('skills', []))}
|
||||
Опыт работы: {resume.parsed_data.get('total_years', 0)} лет
|
||||
Образование: {resume.parsed_data.get('education', 'Не указано')}
|
||||
|
||||
ОПИСАНИЕ ВАКАНСИИ:
|
||||
{job_description}
|
||||
|
||||
ИНСТРУКЦИИ:
|
||||
1. Задавай вопросы, которые помогут оценить технические навыки кандидата
|
||||
2. Включи вопросы о конкретном опыте работы из резюме
|
||||
3. Добавь вопросы на соответствие требованиям вакансии
|
||||
4. Включи 2-3 поведенческих вопроса
|
||||
5. Верни ответ в JSON формате
|
||||
|
||||
Формат ответа:
|
||||
{{
|
||||
"questions": [
|
||||
{{
|
||||
"id": 1,
|
||||
"category": "technical|experience|behavioral|vacancy_specific",
|
||||
"question": "Текст вопроса",
|
||||
"reasoning": "Почему этот вопрос важен для данного кандидата"
|
||||
}}
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
|
||||
messages = [
|
||||
SystemMessage(content="Ты эксперт по проведению технических интервью. Генерируй качественные, персонализированные вопросы."),
|
||||
HumanMessage(content=questions_prompt)
|
||||
]
|
||||
|
||||
response = chat_model.get_llm().invoke(messages)
|
||||
|
||||
# Парсим ответ
|
||||
import json
|
||||
response_text = response.content.strip()
|
||||
|
||||
# Извлекаем JSON из ответа
|
||||
if response_text.startswith('{') and response_text.endswith('}'):
|
||||
questions_data = json.loads(response_text)
|
||||
else:
|
||||
# Ищем JSON внутри текста
|
||||
start = response_text.find('{')
|
||||
end = response_text.rfind('}') + 1
|
||||
if start != -1 and end > start:
|
||||
json_str = response_text[start:end]
|
||||
questions_data = json.loads(json_str)
|
||||
else:
|
||||
raise ValueError("JSON не найден в ответе LLM")
|
||||
|
||||
# Шаг 4: Сохранить вопросы в notes резюме (пока так, потом можно создать отдельную таблицу)
|
||||
self.update_state(
|
||||
state='PENDING',
|
||||
meta={'status': 'Сохраняем вопросы...', 'progress': 90}
|
||||
)
|
||||
|
||||
with get_sync_session() as session:
|
||||
repo = SyncResumeRepository(session)
|
||||
resume = repo.get_by_id(int(resume_id))
|
||||
|
||||
if resume:
|
||||
# Сохраняем вопросы в notes (временно)
|
||||
existing_notes = resume.notes or ""
|
||||
interview_questions = json.dumps(questions_data, ensure_ascii=False, indent=2)
|
||||
resume.notes = f"{existing_notes}\n\nINTERVIEW QUESTIONS:\n{interview_questions}"
|
||||
from datetime import datetime
|
||||
resume.updated_at = datetime.utcnow()
|
||||
|
||||
session.add(resume)
|
||||
|
||||
# Завершено успешно
|
||||
self.update_state(
|
||||
state='SUCCESS',
|
||||
meta={
|
||||
'status': 'Вопросы для интервью успешно сгенерированы',
|
||||
'progress': 100,
|
||||
'result': questions_data
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
'resume_id': resume_id,
|
||||
'status': 'questions_generated',
|
||||
'questions': questions_data['questions']
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
# В случае ошибки
|
||||
self.update_state(
|
||||
state='FAILURE',
|
||||
meta={
|
||||
'status': f'Ошибка при генерации вопросов: {str(e)}',
|
||||
'progress': 0,
|
||||
'error': str(e)
|
||||
}
|
||||
)
|
||||
raise Exception(f"Ошибка при генерации вопросов: {str(e)}")
|
2
main.py
2
main.py
@ -4,6 +4,7 @@ from contextlib import asynccontextmanager
|
||||
from app.core.session_middleware import SessionMiddleware
|
||||
from app.routers import vacancy_router, resume_router
|
||||
from app.routers.session_router import router as session_router
|
||||
from app.routers.interview_router import router as interview_router
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@ -32,6 +33,7 @@ app.add_middleware(SessionMiddleware, cookie_name="session_id")
|
||||
app.include_router(vacancy_router, prefix="/api/v1")
|
||||
app.include_router(resume_router, prefix="/api/v1")
|
||||
app.include_router(session_router, prefix="/api/v1")
|
||||
app.include_router(interview_router, prefix="/api/v1")
|
||||
|
||||
|
||||
@app.get("/")
|
||||
|
@ -0,0 +1,30 @@
|
||||
"""add interview_plan to resume
|
||||
|
||||
Revision ID: 1a2cda4df181
|
||||
Revises: 4d04e6e32445
|
||||
Create Date: 2025-09-02 23:38:36.541565
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '1a2cda4df181'
|
||||
down_revision: Union[str, Sequence[str], None] = '4d04e6e32445'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Add interview_plan column to resume table
|
||||
op.add_column('resume', sa.Column('interview_plan', sa.JSON(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# Drop interview_plan column
|
||||
op.drop_column('resume', 'interview_plan')
|
@ -0,0 +1,32 @@
|
||||
"""Add InterviewStatus enum type
|
||||
|
||||
Revision ID: 385d03e3281c
|
||||
Revises: 4723b138a3bb
|
||||
Create Date: 2025-09-02 20:00:00.689080
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '385d03e3281c'
|
||||
down_revision: Union[str, Sequence[str], None] = '4723b138a3bb'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Create InterviewStatus enum type
|
||||
interview_status_enum = sa.Enum('created', 'active', 'completed', 'failed', name='interviewstatus')
|
||||
interview_status_enum.create(op.get_bind())
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# Drop InterviewStatus enum type
|
||||
interview_status_enum = sa.Enum('created', 'active', 'completed', 'failed', name='interviewstatus')
|
||||
interview_status_enum.drop(op.get_bind())
|
@ -0,0 +1,47 @@
|
||||
"""Add interview_sessions table
|
||||
|
||||
Revision ID: 4723b138a3bb
|
||||
Revises: dba37152ae9a
|
||||
Create Date: 2025-09-02 19:31:03.531702
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '4723b138a3bb'
|
||||
down_revision: Union[str, Sequence[str], None] = 'dba37152ae9a'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table(
|
||||
'interview_sessions',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('resume_id', sa.Integer(), nullable=False),
|
||||
sa.Column('room_name', sa.String(length=255), nullable=False),
|
||||
sa.Column('status', sa.String(length=50), nullable=False),
|
||||
sa.Column('transcript', sa.Text(), nullable=True),
|
||||
sa.Column('ai_feedback', sa.Text(), nullable=True),
|
||||
sa.Column('started_at', sa.DateTime(), nullable=False),
|
||||
sa.Column('completed_at', sa.DateTime(), nullable=True),
|
||||
sa.ForeignKeyConstraint(['resume_id'], ['resume.id'], ),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('room_name')
|
||||
)
|
||||
op.create_index(op.f('ix_interview_sessions_id'), 'interview_sessions', ['id'], unique=False)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_index(op.f('ix_interview_sessions_id'), table_name='interview_sessions')
|
||||
op.drop_table('interview_sessions')
|
||||
# ### end Alembic commands ###
|
@ -0,0 +1,63 @@
|
||||
"""Update InterviewSession model to use proper enum
|
||||
|
||||
Revision ID: 4d04e6e32445
|
||||
Revises: 96ffcf34e1de
|
||||
Create Date: 2025-09-02 20:10:52.321402
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '4d04e6e32445'
|
||||
down_revision: Union[str, Sequence[str], None] = '96ffcf34e1de'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Recreate interview_sessions table with proper enum (enum already exists)
|
||||
op.drop_index(op.f('ix_interview_sessions_id'), table_name='interview_sessions')
|
||||
op.drop_table('interview_sessions')
|
||||
|
||||
# Create table with existing enum type
|
||||
op.create_table('interview_sessions',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('resume_id', sa.Integer(), nullable=False),
|
||||
sa.Column('room_name', sa.String(length=255), nullable=False),
|
||||
sa.Column('status', postgresql.ENUM('created', 'active', 'completed', 'failed', name='interviewstatus', create_type=False), nullable=False),
|
||||
sa.Column('transcript', sa.Text(), nullable=True),
|
||||
sa.Column('ai_feedback', sa.Text(), nullable=True),
|
||||
sa.Column('started_at', sa.DateTime(), nullable=False),
|
||||
sa.Column('completed_at', sa.DateTime(), nullable=True),
|
||||
sa.ForeignKeyConstraint(['resume_id'], ['resume.id'], ),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('room_name')
|
||||
)
|
||||
op.create_index(op.f('ix_interview_sessions_id'), 'interview_sessions', ['id'], unique=False)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
op.drop_index(op.f('ix_interview_sessions_id'), table_name='interview_sessions')
|
||||
op.drop_table('interview_sessions')
|
||||
|
||||
# Recreate old table structure
|
||||
op.create_table('interview_sessions',
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('resume_id', sa.Integer(), nullable=False),
|
||||
sa.Column('room_name', sa.String(length=255), nullable=False),
|
||||
sa.Column('status', sa.String(length=50), nullable=False),
|
||||
sa.Column('transcript', sa.Text(), nullable=True),
|
||||
sa.Column('ai_feedback', sa.Text(), nullable=True),
|
||||
sa.Column('started_at', sa.DateTime(), nullable=False),
|
||||
sa.Column('completed_at', sa.DateTime(), nullable=True),
|
||||
sa.ForeignKeyConstraint(['resume_id'], ['resume.id'], ),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('room_name')
|
||||
)
|
||||
op.create_index(op.f('ix_interview_sessions_id'), 'interview_sessions', ['id'], unique=False)
|
@ -0,0 +1,30 @@
|
||||
"""Update interview_sessions status column to use enum
|
||||
|
||||
Revision ID: 96ffcf34e1de
|
||||
Revises: 385d03e3281c
|
||||
Create Date: 2025-09-02 20:01:52.904608
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '96ffcf34e1de'
|
||||
down_revision: Union[str, Sequence[str], None] = '385d03e3281c'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Update status column to use interviewstatus enum
|
||||
op.execute("ALTER TABLE interview_sessions ALTER COLUMN status TYPE interviewstatus USING status::interviewstatus")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# Revert status column back to VARCHAR
|
||||
op.execute("ALTER TABLE interview_sessions ALTER COLUMN status TYPE VARCHAR(50)")
|
@ -0,0 +1,43 @@
|
||||
"""Add parsing statuses to ResumeStatus enum
|
||||
|
||||
Revision ID: dba37152ae9a
|
||||
Revises: 4e19b8fe4a88
|
||||
Create Date: 2025-09-02 14:45:30.749202
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import sqlmodel
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'dba37152ae9a'
|
||||
down_revision: Union[str, Sequence[str], None] = '4e19b8fe4a88'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Add new ENUM values to ResumeStatus
|
||||
op.execute("ALTER TYPE resumestatus ADD VALUE IF NOT EXISTS 'PARSING'")
|
||||
op.execute("ALTER TYPE resumestatus ADD VALUE IF NOT EXISTS 'PARSED'")
|
||||
op.execute("ALTER TYPE resumestatus ADD VALUE IF NOT EXISTS 'PARSE_FAILED'")
|
||||
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('resume', sa.Column('parsed_data', sa.JSON(), nullable=True))
|
||||
op.add_column('resume', sa.Column('parse_error', sqlmodel.sql.sqltypes.AutoString(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('resume', 'parse_error')
|
||||
op.drop_column('resume', 'parsed_data')
|
||||
# ### end Alembic commands ###
|
||||
|
||||
# Note: Cannot remove ENUM values in PostgreSQL, they are permanent once added
|
||||
# If needed, would require recreating the ENUM type
|
@ -0,0 +1,32 @@
|
||||
"""add ai agent process tracking
|
||||
|
||||
Revision ID: de11b016b35a
|
||||
Revises: 1a2cda4df181
|
||||
Create Date: 2025-09-03 00:02:24.263636
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'de11b016b35a'
|
||||
down_revision: Union[str, Sequence[str], None] = '1a2cda4df181'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# Add AI agent process tracking columns
|
||||
op.add_column('interview_sessions', sa.Column('ai_agent_pid', sa.Integer(), nullable=True))
|
||||
op.add_column('interview_sessions', sa.Column('ai_agent_status', sa.String(), server_default='not_started', nullable=False))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# Drop AI agent process tracking columns
|
||||
op.drop_column('interview_sessions', 'ai_agent_status')
|
||||
op.drop_column('interview_sessions', 'ai_agent_pid')
|
@ -13,6 +13,20 @@ dependencies = [
|
||||
"boto3>=1.34.0",
|
||||
"python-dotenv>=1.0.0",
|
||||
"pydantic-settings>=2.1.0",
|
||||
"langchain>=0.1.0",
|
||||
"langchain-core>=0.1.0",
|
||||
"langchain-community>=0.0.10",
|
||||
"redis>=5.0.0",
|
||||
"langchain-openai>=0.3.32",
|
||||
"langchain-milvus>=0.2.1",
|
||||
"celery>=5.3.0",
|
||||
"pdfplumber>=0.10.0",
|
||||
"psycopg2-binary>=2.9.0",
|
||||
"python-docx>=1.2.0",
|
||||
"docx2txt>=0.9",
|
||||
"livekit>=1.0.12",
|
||||
"livekit-api>=1.0.5",
|
||||
"livekit-agents[cartesia,deepgram,openai,silero,resemble,turn-detector]~=1.2",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
@ -45,4 +59,4 @@ line_length = 88
|
||||
python_version = "3.11"
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
disallow_untyped_defs = true
|
||||
disallow_untyped_defs = true
|
||||
|
186
quick_api_test.py
Normal file
186
quick_api_test.py
Normal file
@ -0,0 +1,186 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Quick API testing script"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
BASE_URL = "http://localhost:8000"
|
||||
|
||||
def test_health():
|
||||
"""Test API health"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/health", timeout=5)
|
||||
print(f"Health check: {response.status_code}")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
print(f"API not available: {str(e)}")
|
||||
return False
|
||||
|
||||
def upload_test_resume():
|
||||
"""Upload test resume"""
|
||||
try:
|
||||
# Check if test resume exists
|
||||
resume_path = Path("test_resume.txt")
|
||||
if not resume_path.exists():
|
||||
print("test_resume.txt not found!")
|
||||
return None
|
||||
|
||||
# Upload file
|
||||
with open(resume_path, 'r', encoding='utf-8') as f:
|
||||
files = {'file': (resume_path.name, f, 'text/plain')}
|
||||
data = {
|
||||
'applicant_name': 'Иванов Иван Иванович',
|
||||
'applicant_email': 'ivan.ivanov@example.com',
|
||||
'applicant_phone': '+7 (999) 123-45-67',
|
||||
'vacancy_id': '1'
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{BASE_URL}/resume/upload",
|
||||
files=files,
|
||||
data=data,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
print(f"Resume upload: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"Resume ID: {result.get('resume_id')}")
|
||||
return result.get('resume_id')
|
||||
else:
|
||||
print(f"Upload failed: {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Upload error: {str(e)}")
|
||||
return None
|
||||
|
||||
def check_resume_processing(resume_id):
|
||||
"""Check resume processing status"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/resume/{resume_id}")
|
||||
print(f"Resume status check: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
resume = response.json()
|
||||
print(f"Status: {resume.get('status')}")
|
||||
print(f"Has interview plan: {'interview_plan' in resume and resume['interview_plan'] is not None}")
|
||||
return resume
|
||||
else:
|
||||
print(f"Resume check failed: {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Status check error: {str(e)}")
|
||||
return None
|
||||
|
||||
def create_interview_session(resume_id):
|
||||
"""Create interview session"""
|
||||
try:
|
||||
response = requests.post(f"{BASE_URL}/interview/{resume_id}/token")
|
||||
print(f"Interview session creation: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"Room: {result.get('room_name')}")
|
||||
print(f"Token length: {len(result.get('token', ''))}")
|
||||
return result
|
||||
else:
|
||||
print(f"Interview creation failed: {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Interview creation error: {str(e)}")
|
||||
return None
|
||||
|
||||
def check_admin_processes():
|
||||
"""Check admin process monitoring"""
|
||||
try:
|
||||
response = requests.get(f"{BASE_URL}/admin/interview-processes")
|
||||
print(f"Admin processes check: {response.status_code}")
|
||||
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
print(f"Active sessions: {result.get('total_active_sessions')}")
|
||||
for proc in result.get('processes', []):
|
||||
print(f" Session {proc['session_id']}: PID {proc['pid']}, Running: {proc['is_running']}")
|
||||
return result
|
||||
else:
|
||||
print(f"Admin check failed: {response.text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Admin check error: {str(e)}")
|
||||
return None
|
||||
|
||||
def main():
|
||||
"""Run quick API tests"""
|
||||
print("=" * 50)
|
||||
print("QUICK API TEST")
|
||||
print("=" * 50)
|
||||
|
||||
# 1. Check if API is running
|
||||
if not test_health():
|
||||
print("❌ API not running! Start with: uvicorn app.main:app --reload")
|
||||
return
|
||||
|
||||
print("✅ API is running")
|
||||
|
||||
# 2. Upload test resume
|
||||
print("\n--- Testing Resume Upload ---")
|
||||
resume_id = upload_test_resume()
|
||||
|
||||
if not resume_id:
|
||||
print("❌ Resume upload failed!")
|
||||
return
|
||||
|
||||
print(f"✅ Resume uploaded with ID: {resume_id}")
|
||||
|
||||
# 3. Wait for processing and check status
|
||||
print("\n--- Checking Resume Processing ---")
|
||||
print("Waiting 10 seconds for Celery processing...")
|
||||
time.sleep(10)
|
||||
|
||||
resume_data = check_resume_processing(resume_id)
|
||||
|
||||
if not resume_data:
|
||||
print("❌ Could not check resume status!")
|
||||
return
|
||||
|
||||
if resume_data.get('status') == 'parsed':
|
||||
print("✅ Resume processed successfully")
|
||||
else:
|
||||
print(f"⚠️ Resume status: {resume_data.get('status')}")
|
||||
|
||||
# 4. Create interview session
|
||||
print("\n--- Testing Interview Session ---")
|
||||
interview_data = create_interview_session(resume_id)
|
||||
|
||||
if interview_data:
|
||||
print("✅ Interview session created")
|
||||
else:
|
||||
print("❌ Interview session creation failed")
|
||||
|
||||
# 5. Check admin monitoring
|
||||
print("\n--- Testing Admin Monitoring ---")
|
||||
admin_data = check_admin_processes()
|
||||
|
||||
if admin_data:
|
||||
print("✅ Admin monitoring works")
|
||||
else:
|
||||
print("❌ Admin monitoring failed")
|
||||
|
||||
print("\n" + "=" * 50)
|
||||
print("QUICK TEST COMPLETED")
|
||||
print("=" * 50)
|
||||
|
||||
print("\nNext steps:")
|
||||
print("1. Check Celery worker logs for task processing")
|
||||
print("2. Inspect database for interview_plan data")
|
||||
print("3. For voice testing, start LiveKit server")
|
||||
print("4. Monitor system with: curl http://localhost:8000/admin/system-stats")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
5
rag/__init__.py
Normal file
5
rag/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
from .database import VectorStore
|
||||
from .llm import ChatModel, EmbeddingsModel
|
||||
from .service import RagService
|
||||
|
||||
__all__ = ['RagService', 'ChatModel', 'EmbeddingsModel']
|
3
rag/database/__init__.py
Normal file
3
rag/database/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .model import VectorStoreModel as VectorStore
|
||||
|
||||
__all__ = ['VectorStore']
|
17
rag/database/model.py
Normal file
17
rag/database/model.py
Normal file
@ -0,0 +1,17 @@
|
||||
from langchain_core.vectorstores.base import VectorStore
|
||||
|
||||
|
||||
class VectorStoreModel:
|
||||
def __init__(self, store: VectorStore):
|
||||
self.store = store
|
||||
|
||||
def get_store(self):
|
||||
return self.store
|
||||
|
||||
def similarity_search(self, query: str):
|
||||
results = self.store.similarity_search(
|
||||
query,
|
||||
k=5,
|
||||
)
|
||||
|
||||
return results
|
3
rag/llm/__init__.py
Normal file
3
rag/llm/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .model import ChatModel, EmbeddingsModel
|
||||
|
||||
__all__ = ['ChatModel', 'EmbeddingsModel']
|
206
rag/llm/model.py
Normal file
206
rag/llm/model.py
Normal file
@ -0,0 +1,206 @@
|
||||
import json
|
||||
import pdfplumber
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.language_models import BaseChatModel
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
|
||||
try:
|
||||
from docx import Document
|
||||
except ImportError:
|
||||
Document = None
|
||||
|
||||
try:
|
||||
import docx2txt
|
||||
except ImportError:
|
||||
docx2txt = None
|
||||
|
||||
class EmbeddingsModel:
|
||||
def __init__(self, model: Embeddings):
|
||||
self.model = model
|
||||
|
||||
def get_model(self):
|
||||
return self.model
|
||||
|
||||
class ChatModel:
|
||||
def __init__(self, model: BaseChatModel):
|
||||
self.model = model
|
||||
|
||||
def get_llm(self):
|
||||
return self.model
|
||||
|
||||
class ResumeParser:
|
||||
def __init__(self, chat_model: ChatModel):
|
||||
self.llm = chat_model.get_llm()
|
||||
self.resume_prompt = """
|
||||
Проанализируй текст резюме и извлеки из него структурированные данные в JSON формате.
|
||||
Верни только JSON без дополнительных комментариев.
|
||||
|
||||
Формат ответа:
|
||||
{{
|
||||
"name": "Имя кандидата",
|
||||
"email": "email@example.com",
|
||||
"phone": "+7-XXX-XXX-XX-XX",
|
||||
"skills": ["навык1", "навык2", "навык3"],
|
||||
"experience": [
|
||||
{{
|
||||
"company": "Название компании",
|
||||
"position": "Должность",
|
||||
"period": "2021-2024",
|
||||
"description": "Краткое описание обязанностей"
|
||||
}}
|
||||
],
|
||||
"total_years": 3.5,
|
||||
"education": "Образование",
|
||||
"summary": "Краткое резюме о кандидате"
|
||||
}}
|
||||
|
||||
Текст резюме:
|
||||
{resume_text}
|
||||
"""
|
||||
|
||||
def extract_text_from_pdf(self, file_path: str) -> str:
|
||||
"""Извлекает текст из PDF файла"""
|
||||
try:
|
||||
with pdfplumber.open(file_path) as pdf:
|
||||
text = '\n'.join([page.extract_text() or '' for page in pdf.pages])
|
||||
return text.strip()
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при чтении PDF: {str(e)}")
|
||||
|
||||
def extract_text_from_docx(self, file_path: str) -> str:
|
||||
"""Извлекает текст из DOCX файла"""
|
||||
try:
|
||||
print(f"[DEBUG] Extracting DOCX text from: {file_path}")
|
||||
|
||||
if docx2txt:
|
||||
# Предпочитаем docx2txt для простого извлечения текста
|
||||
print("[DEBUG] Using docx2txt")
|
||||
text = docx2txt.process(file_path)
|
||||
if text:
|
||||
print(f"[DEBUG] Extracted {len(text)} characters using docx2txt")
|
||||
return text.strip()
|
||||
else:
|
||||
print("[DEBUG] docx2txt returned empty text")
|
||||
|
||||
if Document:
|
||||
# Используем python-docx как fallback
|
||||
print("[DEBUG] Using python-docx as fallback")
|
||||
doc = Document(file_path)
|
||||
text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
|
||||
print(f"[DEBUG] Extracted {len(text)} characters using python-docx")
|
||||
return text.strip()
|
||||
|
||||
raise Exception("Библиотеки для чтения DOCX не установлены (docx2txt или python-docx)")
|
||||
except Exception as e:
|
||||
print(f"[DEBUG] DOCX extraction failed: {str(e)}")
|
||||
raise Exception(f"Ошибка при чтении DOCX: {str(e)}")
|
||||
|
||||
def extract_text_from_doc(self, file_path: str) -> str:
|
||||
"""Извлекает текст из DOC файла"""
|
||||
try:
|
||||
# Для .doc файлов используем antiword (если установлен) или попробуем python-docx
|
||||
if Document:
|
||||
try:
|
||||
doc = Document(file_path)
|
||||
text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
|
||||
return text.strip()
|
||||
except:
|
||||
# Если python-docx не может прочитать .doc, пытаемся использовать системные утилиты
|
||||
pass
|
||||
|
||||
# Попытка использовать системную команду antiword (для Linux/Mac)
|
||||
import subprocess
|
||||
try:
|
||||
result = subprocess.run(['antiword', file_path], capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
return result.stdout.strip()
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
|
||||
raise Exception("Не удалось найти подходящий инструмент для чтения DOC файлов. Рекомендуется использовать DOCX формат.")
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при чтении DOC: {str(e)}")
|
||||
|
||||
def extract_text_from_txt(self, file_path: str) -> str:
|
||||
"""Извлекает текст из TXT файла"""
|
||||
try:
|
||||
# Попробуем разные кодировки
|
||||
encodings = ['utf-8', 'cp1251', 'latin-1', 'cp1252']
|
||||
|
||||
for encoding in encodings:
|
||||
try:
|
||||
with open(file_path, 'r', encoding=encoding) as file:
|
||||
text = file.read()
|
||||
return text.strip()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
raise Exception("Не удалось определить кодировку текстового файла")
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при чтении TXT: {str(e)}")
|
||||
|
||||
def extract_text_from_file(self, file_path: str) -> str:
|
||||
"""Универсальный метод извлечения текста из файла"""
|
||||
if not os.path.exists(file_path):
|
||||
raise Exception(f"Файл не найден: {file_path}")
|
||||
|
||||
# Определяем расширение файла
|
||||
_, ext = os.path.splitext(file_path.lower())
|
||||
|
||||
# Добавляем отладочную информацию
|
||||
print(f"[DEBUG] Parsing file: {file_path}, detected extension: {ext}")
|
||||
|
||||
if ext == '.pdf':
|
||||
return self.extract_text_from_pdf(file_path)
|
||||
elif ext == '.docx':
|
||||
return self.extract_text_from_docx(file_path)
|
||||
elif ext == '.doc':
|
||||
return self.extract_text_from_doc(file_path)
|
||||
elif ext == '.txt':
|
||||
return self.extract_text_from_txt(file_path)
|
||||
else:
|
||||
raise Exception(f"Неподдерживаемый формат файла: {ext}. Поддерживаемые форматы: PDF, DOCX, DOC, TXT")
|
||||
|
||||
def parse_resume_text(self, resume_text: str) -> Dict[str, Any]:
|
||||
"""Парсит текст резюме через LLM"""
|
||||
try:
|
||||
messages = [
|
||||
SystemMessage(content="Ты эксперт по анализу резюме. Извлекай данные точно в указанном JSON формате."),
|
||||
HumanMessage(content=self.resume_prompt.format(resume_text=resume_text))
|
||||
]
|
||||
|
||||
response = self.llm.invoke(messages)
|
||||
|
||||
# Извлекаем JSON из ответа
|
||||
response_text = response.content.strip()
|
||||
|
||||
# Пытаемся найти JSON в ответе
|
||||
if response_text.startswith('{') and response_text.endswith('}'):
|
||||
return json.loads(response_text)
|
||||
else:
|
||||
# Ищем JSON внутри текста
|
||||
start = response_text.find('{')
|
||||
end = response_text.rfind('}') + 1
|
||||
if start != -1 and end > start:
|
||||
json_str = response_text[start:end]
|
||||
return json.loads(json_str)
|
||||
else:
|
||||
raise ValueError("JSON не найден в ответе LLM")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
raise Exception(f"Ошибка парсинга JSON из ответа LLM: {str(e)}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при обращении к LLM: {str(e)}")
|
||||
|
||||
def parse_resume_from_file(self, file_path: str) -> Dict[str, Any]:
|
||||
"""Полный цикл парсинга резюме из файла"""
|
||||
# Шаг 1: Извлекаем текст из файла (поддерживаем PDF, DOCX, DOC, TXT)
|
||||
resume_text = self.extract_text_from_file(file_path)
|
||||
|
||||
if not resume_text:
|
||||
raise Exception("Не удалось извлечь текст из файла")
|
||||
|
||||
# Шаг 2: Парсим через LLM
|
||||
return self.parse_resume_text(resume_text)
|
91
rag/memory.py
Normal file
91
rag/memory.py
Normal file
@ -0,0 +1,91 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
import redis
|
||||
from sqlmodel import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from langchain.schema import HumanMessage, AIMessage
|
||||
from langchain.memory import ConversationSummaryBufferMemory
|
||||
|
||||
from rag.settings import settings
|
||||
|
||||
|
||||
class ChatMemoryManager:
|
||||
def __init__(self, llm, token_limit=3000):
|
||||
self.redis = redis.Redis(host=settings.redis_cache_url, port=settings.redis_cache_port, db=settings.redis_cache_db)
|
||||
self.llm = llm
|
||||
self.token_limit = token_limit
|
||||
|
||||
def _convert_to_langchain(self, messages: List[dict]):
|
||||
return [
|
||||
AIMessage(content=msg["content"]) if msg["is_ai"]
|
||||
else HumanMessage(content=msg["content"])
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
def _annotate_messages(self, messages: List):
|
||||
# Convert to format compatible with langchain
|
||||
# Assuming messages have some way to identify if they're from AI
|
||||
return [
|
||||
{
|
||||
**msg,
|
||||
"is_ai": msg.get("user_type") == "AI" or msg.get("username") == "SOMMELIER"
|
||||
}
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
def _serialize_messages(self, messages: List[dict]):
|
||||
return [
|
||||
{**msg, "created_at": msg["created_at"].isoformat()}
|
||||
for msg in messages
|
||||
]
|
||||
|
||||
def _cache_key(self, session_id: int) -> str:
|
||||
return f"chat_memory:{session_id}"
|
||||
|
||||
async def load_chat_history(self, session_id: int, session: AsyncSession) -> List[HumanMessage | AIMessage]:
|
||||
cache_key = self._cache_key(session_id)
|
||||
serialized = self.redis.get(cache_key)
|
||||
|
||||
if serialized:
|
||||
cached_messages = json.loads(serialized)
|
||||
if cached_messages:
|
||||
last_time = datetime.fromisoformat(cached_messages[-1]["created_at"])
|
||||
|
||||
# TODO: Replace with actual Message model query when available
|
||||
# This would need to be implemented with SQLModel/SQLAlchemy
|
||||
new_messages = [] # Placeholder for actual DB query
|
||||
|
||||
if new_messages:
|
||||
annotated_messages = self._annotate_messages(new_messages)
|
||||
all_messages = cached_messages + self._serialize_messages(annotated_messages)
|
||||
self.redis.setex(cache_key, 3600, json.dumps(all_messages))
|
||||
return self._convert_to_langchain(all_messages)
|
||||
|
||||
return self._convert_to_langchain(cached_messages)
|
||||
|
||||
# TODO: Replace with actual Message model query when available
|
||||
# This would need to be implemented with SQLModel/SQLAlchemy
|
||||
db_messages = [] # Placeholder for actual DB query
|
||||
|
||||
if db_messages:
|
||||
annotated_messages = self._annotate_messages(db_messages)
|
||||
self.redis.setex(cache_key, 3600, json.dumps(self._serialize_messages(annotated_messages)))
|
||||
return self._convert_to_langchain(annotated_messages)
|
||||
|
||||
return []
|
||||
|
||||
async def get_session_memory(self, session_id: int, session: AsyncSession) -> ConversationSummaryBufferMemory:
|
||||
memory = ConversationSummaryBufferMemory(
|
||||
llm=self.llm,
|
||||
max_token_limit=self.token_limit
|
||||
)
|
||||
|
||||
messages = await self.load_chat_history(session_id, session)
|
||||
for msg in messages:
|
||||
if isinstance(msg, HumanMessage):
|
||||
memory.chat_memory.add_user_message(msg.content)
|
||||
elif isinstance(msg, AIMessage):
|
||||
memory.chat_memory.add_ai_message(msg.content)
|
||||
return memory
|
109
rag/registry.py
Normal file
109
rag/registry.py
Normal file
@ -0,0 +1,109 @@
|
||||
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
||||
from rag.llm.model import ChatModel, EmbeddingsModel
|
||||
from rag.database.model import VectorStoreModel
|
||||
from rag.service.model import RagService
|
||||
from rag.vector_store import MilvusVectorStore
|
||||
from rag.settings import settings
|
||||
from langchain_milvus import Milvus
|
||||
|
||||
|
||||
class ModelRegistry:
|
||||
"""Реестр для инициализации и получения моделей"""
|
||||
|
||||
_instance = None
|
||||
_initialized = False
|
||||
|
||||
def __new__(cls):
|
||||
if cls._instance is None:
|
||||
cls._instance = super(ModelRegistry, cls).__new__(cls)
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
if not self._initialized:
|
||||
self._chat_model = None
|
||||
self._embeddings_model = None
|
||||
self._vector_store = None
|
||||
self._rag_service = None
|
||||
self._initialized = True
|
||||
|
||||
def get_chat_model(self) -> ChatModel:
|
||||
"""Получить или создать chat модель"""
|
||||
if self._chat_model is None:
|
||||
if settings.openai_api_key:
|
||||
llm = ChatOpenAI(
|
||||
api_key=settings.openai_api_key,
|
||||
model="gpt-4o-mini",
|
||||
temperature=0
|
||||
)
|
||||
self._chat_model = ChatModel(llm)
|
||||
else:
|
||||
raise ValueError("OpenAI API key не настроен в settings")
|
||||
return self._chat_model
|
||||
|
||||
def get_embeddings_model(self) -> EmbeddingsModel:
|
||||
"""Получить или создать embeddings модель"""
|
||||
if self._embeddings_model is None:
|
||||
if settings.openai_api_key:
|
||||
embeddings = OpenAIEmbeddings(
|
||||
api_key=settings.openai_api_key,
|
||||
model=settings.openai_embeddings_model
|
||||
)
|
||||
self._embeddings_model = EmbeddingsModel(embeddings)
|
||||
else:
|
||||
raise ValueError("OpenAI API key не настроен в settings")
|
||||
return self._embeddings_model
|
||||
|
||||
def get_vector_store(self) -> MilvusVectorStore:
|
||||
"""Получить или создать vector store"""
|
||||
if self._vector_store is None:
|
||||
embeddings_model = self.get_embeddings_model()
|
||||
self._vector_store = MilvusVectorStore(
|
||||
embeddings_model.get_model(),
|
||||
collection_name=settings.milvus_collection
|
||||
)
|
||||
return self._vector_store
|
||||
|
||||
def get_rag_service(self) -> RagService:
|
||||
"""Получить или создать RAG сервис"""
|
||||
if self._rag_service is None:
|
||||
# Создаем VectorStoreModel для совместимости с существующим кодом
|
||||
# Парсим URI для получения host и port
|
||||
uri_without_protocol = settings.milvus_uri.replace("http://", "").replace("https://", "")
|
||||
if ":" in uri_without_protocol:
|
||||
host, port = uri_without_protocol.split(":", 1)
|
||||
port = int(port)
|
||||
else:
|
||||
host = uri_without_protocol
|
||||
port = 19530 # Default Milvus port
|
||||
|
||||
try:
|
||||
# Попробуем использовать URI напрямую
|
||||
milvus_store = Milvus(
|
||||
embedding_function=self.get_embeddings_model().get_model(),
|
||||
connection_args={
|
||||
"uri": settings.milvus_uri,
|
||||
},
|
||||
collection_name=settings.milvus_collection,
|
||||
)
|
||||
except Exception as e:
|
||||
# Если не сработало, попробуем host/port
|
||||
milvus_store = Milvus(
|
||||
embedding_function=self.get_embeddings_model().get_model(),
|
||||
connection_args={
|
||||
"host": host,
|
||||
"port": port,
|
||||
},
|
||||
collection_name=settings.milvus_collection,
|
||||
)
|
||||
|
||||
vector_store_model = VectorStoreModel(milvus_store)
|
||||
|
||||
self._rag_service = RagService(
|
||||
vector_store=vector_store_model,
|
||||
llm=self.get_chat_model()
|
||||
)
|
||||
return self._rag_service
|
||||
|
||||
|
||||
# Singleton instance
|
||||
registry = ModelRegistry()
|
3
rag/service/__init__.py
Normal file
3
rag/service/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .model import RagService
|
||||
|
||||
__all__ = ['RagService']
|
121
rag/service/model.py
Normal file
121
rag/service/model.py
Normal file
@ -0,0 +1,121 @@
|
||||
from rag.database.model import VectorStoreModel
|
||||
from langchain_core.runnables import RunnableWithMessageHistory
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from rag.llm.model import ChatModel
|
||||
from langchain.schema import HumanMessage, SystemMessage
|
||||
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
|
||||
from langchain.chains.combine_documents import create_stuff_documents_chain
|
||||
|
||||
from rag.memory import ChatMemoryManager
|
||||
|
||||
rag_template: str = """
|
||||
You are a beverage and alcohol expert — like a sommelier, but for all kinds of alcoholic drinks, including beer, wine, spirits, cocktails, etc
|
||||
Answer clearly and stay within your expertise in alcohol and related topics
|
||||
Rules:
|
||||
1. Speak in first person: "I recommend", "I think"
|
||||
2. Be conversational and personable - like a knowledgeable friend at a bar
|
||||
3. Use facts from the context for specific characteristics, but speak generally when needed
|
||||
4. Do not disclose sources or metadata from contextual documents
|
||||
5. Answer questions about alcohol and related topics (food pairings, culture, serving, etc) but politely decline unrelated subjects
|
||||
6. Be brief and useful - keep answers to 2-4 sentences
|
||||
7. Use chat history to maintain a natural conversation flow
|
||||
8. Feel free to use casual language and humor when appropriate
|
||||
|
||||
Context: {context}
|
||||
"""
|
||||
|
||||
get_summary_template = """Create a concise 3-5 word title for the following conversation.
|
||||
Focus on the main topic. Reply only with the title.\n\n
|
||||
Chat history:\n"""
|
||||
|
||||
rephrase_prompt = ChatPromptTemplate.from_messages([
|
||||
("system", "Given a chat history and the latest user question which might reference context in the chat history, "
|
||||
"formulate a standalone question. Do NOT answer the question."),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
|
||||
qa_prompt = ChatPromptTemplate.from_messages([
|
||||
("system", rag_template),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
|
||||
|
||||
class RagService:
|
||||
def __init__(self, vector_store: VectorStoreModel, llm: ChatModel):
|
||||
self.vector_store = vector_store.get_store()
|
||||
self.llm = llm.get_llm()
|
||||
|
||||
retriever = self.vector_store.as_retriever()
|
||||
|
||||
self.rephrase_prompt = ChatPromptTemplate.from_messages([
|
||||
("system",
|
||||
"Given a chat history and the latest user question which might reference context in the chat history, "
|
||||
"formulate a standalone question. Do NOT answer the question."),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
|
||||
self.qa_prompt = ChatPromptTemplate.from_messages([
|
||||
("system", rag_template),
|
||||
MessagesPlaceholder("chat_history"),
|
||||
("human", "{input}"),
|
||||
])
|
||||
|
||||
self.history_aware_retriever = create_history_aware_retriever(
|
||||
self.llm, retriever, self.rephrase_prompt
|
||||
)
|
||||
|
||||
self.question_answer_chain = create_stuff_documents_chain(
|
||||
self.llm, self.qa_prompt
|
||||
)
|
||||
|
||||
self.rag_chain = create_retrieval_chain(
|
||||
self.history_aware_retriever, self.question_answer_chain
|
||||
)
|
||||
|
||||
def get_qa_from_query(self, query: str, session_id: int) -> str:
|
||||
memory = ChatMemoryManager(self.llm)
|
||||
|
||||
async def get_session_history(_):
|
||||
# TODO: Pass actual AsyncSession here
|
||||
return (await memory.get_session_memory(session_id, None)).chat_memory
|
||||
|
||||
conversational_rag_chain = RunnableWithMessageHistory(
|
||||
self.rag_chain,
|
||||
get_session_history,
|
||||
input_messages_key="input",
|
||||
history_messages_key="chat_history",
|
||||
output_messages_key="answer"
|
||||
)
|
||||
|
||||
for chunk in conversational_rag_chain.stream(
|
||||
{"input": query},
|
||||
config={"configurable": {"session_id": str(session_id)}}
|
||||
):
|
||||
answer = chunk.get('answer', '')
|
||||
if answer:
|
||||
yield answer
|
||||
|
||||
def generate_title_with_llm(self, chat_history: str | list[str]) -> str:
|
||||
|
||||
# Вариант 1: Если chat_history — строка
|
||||
if isinstance(chat_history, str):
|
||||
prompt = get_summary_template + chat_history
|
||||
|
||||
messages = [
|
||||
SystemMessage(content="You are a helpful assistant that generates chat titles."),
|
||||
HumanMessage(content=prompt)
|
||||
]
|
||||
|
||||
# Вариант 2: Если chat_history — список сообщений (например, ["user: ...", "bot: ..."])
|
||||
else:
|
||||
prompt = get_summary_template + "\n".join(chat_history)
|
||||
messages = [
|
||||
SystemMessage(content="You are a helpful assistant that generates chat titles."),
|
||||
HumanMessage(content=prompt)
|
||||
]
|
||||
|
||||
response = self.llm.invoke(messages)
|
||||
return response.content.strip()
|
52
rag/settings.py
Normal file
52
rag/settings.py
Normal file
@ -0,0 +1,52 @@
|
||||
import os
|
||||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class RagSettings(BaseSettings):
|
||||
# Database
|
||||
database_url: str = "postgresql+asyncpg://tdjx:1309@localhost:5432/hr_ai"
|
||||
|
||||
# Milvus Settings
|
||||
milvus_uri: str = "http://5.188.159.90:19530"
|
||||
milvus_collection: str = "candidate_profiles"
|
||||
|
||||
# Redis
|
||||
redis_cache_url: str = "localhost"
|
||||
redis_cache_port: int = 6379
|
||||
redis_cache_db: int = 0
|
||||
|
||||
# S3 Configuration
|
||||
s3_endpoint_url: str
|
||||
s3_access_key_id: str
|
||||
s3_secret_access_key: str
|
||||
s3_bucket_name: str
|
||||
s3_region: str = "ru-1"
|
||||
|
||||
# LLM Settings
|
||||
openai_api_key: Optional[str] = None
|
||||
anthropic_api_key: Optional[str] = None
|
||||
openai_model: str = "gpt-4o-mini"
|
||||
openai_embeddings_model: str = "text-embedding-3-small"
|
||||
|
||||
# AI Agent Settings
|
||||
deepgram_api_key: Optional[str] = None
|
||||
cartesia_api_key: Optional[str] = None
|
||||
elevenlabs_api_key: Optional[str] = None
|
||||
resemble_api_key: Optional[str] = None
|
||||
|
||||
# LiveKit Configuration
|
||||
livekit_url: str = "ws://localhost:7880"
|
||||
livekit_api_key: str = "devkey"
|
||||
livekit_api_secret: str = "devkey_secret_32chars_minimum_length"
|
||||
|
||||
# App Configuration
|
||||
app_env: str = "development"
|
||||
debug: bool = True
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
|
||||
settings = RagSettings()
|
104
rag/vector_store.py
Normal file
104
rag/vector_store.py
Normal file
@ -0,0 +1,104 @@
|
||||
from typing import List, Dict, Any
|
||||
from langchain_milvus import Milvus
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from rag.settings import settings
|
||||
|
||||
|
||||
class MilvusVectorStore:
|
||||
def __init__(self, embeddings_model: Embeddings, collection_name: str = "candidate_profiles"):
|
||||
self.embeddings = embeddings_model
|
||||
self.collection_name = collection_name
|
||||
|
||||
# Попробуем использовать URI напрямую
|
||||
self.vector_store = Milvus(
|
||||
embedding_function=embeddings_model,
|
||||
connection_args={
|
||||
"uri": settings.milvus_uri,
|
||||
},
|
||||
collection_name=collection_name,
|
||||
)
|
||||
|
||||
def add_candidate_profile(self, candidate_id: str, resume_data: Dict[str, Any]):
|
||||
"""Добавляет профиль кандидата в векторную базу"""
|
||||
try:
|
||||
# Создаем текст для векторизации из навыков и опыта
|
||||
skills_text = " ".join(resume_data.get("skills", []))
|
||||
experience_text = " ".join([
|
||||
f"{exp.get('position', '')} {exp.get('company', '')} {exp.get('description', '')}"
|
||||
for exp in resume_data.get("experience", [])
|
||||
])
|
||||
|
||||
combined_text = f"{skills_text} {experience_text} {resume_data.get('summary', '')}"
|
||||
|
||||
# Метаданные для поиска
|
||||
metadata = {
|
||||
"candidate_id": candidate_id,
|
||||
"name": resume_data.get("name", ""),
|
||||
"email": resume_data.get("email", ""),
|
||||
"phone": resume_data.get("phone", ""),
|
||||
"total_years": resume_data.get("total_years", 0),
|
||||
"skills": resume_data.get("skills", []),
|
||||
"education": resume_data.get("education", "")
|
||||
}
|
||||
|
||||
# Добавляем в векторную базу
|
||||
self.vector_store.add_texts(
|
||||
texts=[combined_text],
|
||||
metadatas=[metadata],
|
||||
ids=[candidate_id]
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при добавлении кандидата в Milvus: {str(e)}")
|
||||
|
||||
def search_similar_candidates(self, query: str, k: int = 5) -> List[Dict[str, Any]]:
|
||||
"""Поиск похожих кандидатов по запросу"""
|
||||
try:
|
||||
results = self.vector_store.similarity_search_with_score(query, k=k)
|
||||
|
||||
candidates = []
|
||||
for doc, score in results:
|
||||
candidate = {
|
||||
"content": doc.page_content,
|
||||
"metadata": doc.metadata,
|
||||
"similarity_score": score
|
||||
}
|
||||
candidates.append(candidate)
|
||||
|
||||
return candidates
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при поиске кандидатов в Milvus: {str(e)}")
|
||||
|
||||
def get_candidate_by_id(self, candidate_id: str) -> Dict[str, Any]:
|
||||
"""Получает кандидата по ID"""
|
||||
try:
|
||||
results = self.vector_store.similarity_search(
|
||||
query="",
|
||||
k=1,
|
||||
expr=f"candidate_id == '{candidate_id}'"
|
||||
)
|
||||
|
||||
if results:
|
||||
doc = results[0]
|
||||
return {
|
||||
"content": doc.page_content,
|
||||
"metadata": doc.metadata
|
||||
}
|
||||
else:
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при получении кандидата из Milvus: {str(e)}")
|
||||
|
||||
def delete_candidate(self, candidate_id: str):
|
||||
"""Удаляет кандидата из векторной базы"""
|
||||
try:
|
||||
# В Milvus удаление по ID
|
||||
self.vector_store.delete([candidate_id])
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Ошибка при удалении кандидата из Milvus: {str(e)}")
|
208
simple_test.py
Normal file
208
simple_test.py
Normal file
@ -0,0 +1,208 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Simple system test without Unicode characters"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add root directory to PYTHONPATH
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
|
||||
async def test_database():
|
||||
"""Test PostgreSQL connection"""
|
||||
print("Testing database connection...")
|
||||
|
||||
try:
|
||||
from app.core.database import get_session as get_db
|
||||
from app.models.resume import Resume
|
||||
from sqlalchemy import select
|
||||
|
||||
async for db in get_db():
|
||||
result = await db.execute(select(Resume).limit(1))
|
||||
resumes = result.scalars().all()
|
||||
|
||||
print("PASS - Database connection successful")
|
||||
print(f"Found resumes: {len(resumes)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL - Database error: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_rag():
|
||||
"""Test RAG system"""
|
||||
print("\nTesting RAG system...")
|
||||
|
||||
try:
|
||||
from rag.registry import registry
|
||||
from rag.llm.model import ResumeParser
|
||||
|
||||
chat_model = registry.get_chat_model()
|
||||
parser = ResumeParser(chat_model)
|
||||
|
||||
# Test resume text
|
||||
test_text = """
|
||||
John Doe
|
||||
Python Developer
|
||||
Experience: 3 years
|
||||
Skills: Python, Django, PostgreSQL
|
||||
Education: Computer Science
|
||||
"""
|
||||
|
||||
parsed_resume = parser.parse_resume_text(test_text)
|
||||
|
||||
print("PASS - RAG system working")
|
||||
print(f"Parsed data keys: {list(parsed_resume.keys())}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL - RAG error: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def test_redis():
|
||||
"""Test Redis connection"""
|
||||
print("\nTesting Redis connection...")
|
||||
|
||||
try:
|
||||
import redis
|
||||
from rag.settings import settings
|
||||
|
||||
r = redis.Redis(
|
||||
host=settings.redis_cache_url,
|
||||
port=settings.redis_cache_port,
|
||||
db=settings.redis_cache_db
|
||||
)
|
||||
|
||||
r.ping()
|
||||
print("PASS - Redis connection successful")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL - Redis error: {str(e)}")
|
||||
print("TIP: Start Redis with: docker run -d -p 6379:6379 redis:alpine")
|
||||
return False
|
||||
|
||||
|
||||
async def test_interview_service():
|
||||
"""Test interview service"""
|
||||
print("\nTesting interview service...")
|
||||
|
||||
try:
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
from app.core.database import get_session as get_db
|
||||
|
||||
async for db in get_db():
|
||||
service = InterviewRoomService(db)
|
||||
|
||||
# Test token generation
|
||||
token = service.generate_access_token("test_room", "test_user")
|
||||
print(f"PASS - Token generated (length: {len(token)})")
|
||||
|
||||
# Test fallback plan
|
||||
plan = service._get_fallback_interview_plan()
|
||||
print(f"PASS - Interview plan structure: {list(plan.keys())}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL - Interview service error: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def test_ai_agent():
|
||||
"""Test AI agent"""
|
||||
print("\nTesting AI agent...")
|
||||
|
||||
try:
|
||||
from ai_interviewer_agent import InterviewAgent
|
||||
|
||||
test_plan = {
|
||||
"interview_structure": {
|
||||
"duration_minutes": 15,
|
||||
"greeting": "Hello! Test interview",
|
||||
"sections": [
|
||||
{
|
||||
"name": "Introduction",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Tell me about yourself"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"candidate_info": {
|
||||
"name": "Test Candidate",
|
||||
"skills": ["Python"],
|
||||
"total_years": 2
|
||||
}
|
||||
}
|
||||
|
||||
agent = InterviewAgent(test_plan)
|
||||
|
||||
print(f"PASS - AI Agent initialized with {len(agent.sections)} sections")
|
||||
print(f"Current section: {agent.get_current_section().get('name')}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL - AI Agent error: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def main():
|
||||
"""Run all tests"""
|
||||
print("=" * 50)
|
||||
print("HR-AI SYSTEM TEST")
|
||||
print("=" * 50)
|
||||
|
||||
tests = [
|
||||
("Database", test_database),
|
||||
("RAG System", test_rag),
|
||||
("Redis", lambda: test_redis()),
|
||||
("Interview Service", test_interview_service),
|
||||
("AI Agent", lambda: test_ai_agent()),
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for test_name, test_func in tests:
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(test_func):
|
||||
result = await test_func()
|
||||
else:
|
||||
result = test_func()
|
||||
results.append((test_name, result))
|
||||
except Exception as e:
|
||||
print(f"CRITICAL ERROR in {test_name}: {str(e)}")
|
||||
results.append((test_name, False))
|
||||
|
||||
# Summary
|
||||
print("\n" + "=" * 50)
|
||||
print("TEST RESULTS")
|
||||
print("=" * 50)
|
||||
|
||||
passed = 0
|
||||
for test_name, result in results:
|
||||
status = "PASS" if result else "FAIL"
|
||||
print(f"{test_name:20} {status}")
|
||||
if result:
|
||||
passed += 1
|
||||
|
||||
total = len(results)
|
||||
print(f"\nRESULT: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print("\nSYSTEM READY FOR TESTING!")
|
||||
print("Next steps:")
|
||||
print("1. Start FastAPI: uvicorn app.main:app --reload")
|
||||
print("2. Start Celery: celery -A celery_worker.celery_app worker --loglevel=info")
|
||||
print("3. Upload test resume via /resume/upload")
|
||||
print("4. Check interview plan generation")
|
||||
else:
|
||||
print("\nSOME COMPONENTS NEED SETUP")
|
||||
print("Check error messages above for troubleshooting")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
300
test_system.py
Normal file
300
test_system.py
Normal file
@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Тестирование HR-AI системы по компонентам
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
# Добавляем корневую директорию в PYTHONPATH
|
||||
sys.path.append(str(Path(__file__).parent))
|
||||
|
||||
async def test_database_connection():
|
||||
"""Тест подключения к PostgreSQL"""
|
||||
print("Testing database connection...")
|
||||
|
||||
try:
|
||||
from app.core.database import get_db
|
||||
from app.models.resume import Resume
|
||||
from sqlalchemy import select
|
||||
|
||||
# Получаем async сессию
|
||||
async for db in get_db():
|
||||
# Пробуем выполнить простой запрос
|
||||
result = await db.execute(select(Resume).limit(1))
|
||||
resumes = result.scalars().all()
|
||||
|
||||
print("OK Database: connection successful")
|
||||
print(f"Found resumes in database: {len(resumes)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"FAIL Database: connection error - {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_rag_system():
|
||||
"""Тест RAG системы (парсинг резюме)"""
|
||||
print("\n🔍 Тестируем RAG систему...")
|
||||
|
||||
try:
|
||||
from rag.registry import registry
|
||||
from rag.llm.model import ResumeParser
|
||||
|
||||
# Инициализируем модели
|
||||
chat_model = registry.get_chat_model()
|
||||
embeddings_model = registry.get_embeddings_model()
|
||||
|
||||
print("✅ RAG система: модели инициализированы")
|
||||
|
||||
# Тестируем парсер резюме
|
||||
parser = ResumeParser(chat_model)
|
||||
|
||||
# Создаем тестовый текст резюме
|
||||
test_resume_text = """
|
||||
Иван Петров
|
||||
Python разработчик
|
||||
Опыт работы: 3 года
|
||||
Навыки: Python, Django, PostgreSQL, Docker
|
||||
Образование: МГУ, факультет ВМК
|
||||
"""
|
||||
|
||||
parsed_resume = parser.parse_resume_text(test_resume_text)
|
||||
|
||||
print("✅ RAG система: парсинг резюме работает")
|
||||
print(f"📋 Распарсенные данные: {parsed_resume}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ RAG система: ошибка - {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def test_redis_connection():
|
||||
"""Тест подключения к Redis"""
|
||||
print("\n🔍 Тестируем подключение к Redis...")
|
||||
|
||||
try:
|
||||
import redis
|
||||
from rag.settings import settings
|
||||
|
||||
r = redis.Redis(
|
||||
host=settings.redis_cache_url,
|
||||
port=settings.redis_cache_port,
|
||||
db=settings.redis_cache_db
|
||||
)
|
||||
|
||||
# Пробуем ping
|
||||
r.ping()
|
||||
|
||||
print("✅ Redis: подключение успешно")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Redis: ошибка подключения - {str(e)}")
|
||||
print("💡 Для запуска Redis используйте: docker run -d -p 6379:6379 redis:alpine")
|
||||
return False
|
||||
|
||||
|
||||
async def test_celery_tasks():
|
||||
"""Тест Celery задач"""
|
||||
print("\n🔍 Тестируем Celery задачи...")
|
||||
|
||||
try:
|
||||
from celery_worker.tasks import parse_resume_task
|
||||
|
||||
print("✅ Celery: задачи импортируются")
|
||||
print("💡 Для полного теста запустите: celery -A celery_worker.celery_app worker --loglevel=info")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Celery: ошибка - {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
async def test_interview_service():
|
||||
"""Тест сервиса интервью (без LiveKit)"""
|
||||
print("\n🔍 Тестируем сервис интервью...")
|
||||
|
||||
try:
|
||||
from app.services.interview_service import InterviewRoomService
|
||||
from app.core.database import get_db
|
||||
|
||||
async for db in get_db():
|
||||
service = InterviewRoomService(db)
|
||||
|
||||
# Тестируем генерацию токена (должен работать даже без LiveKit сервера)
|
||||
try:
|
||||
token = service.generate_access_token("test_room", "test_user")
|
||||
print("✅ Interview Service: генерация токенов работает")
|
||||
print(f"🎫 Тестовый токен сгенерирован (длина: {len(token)})")
|
||||
except Exception as e:
|
||||
print(f"⚠️ Interview Service: ошибка токена - {str(e)}")
|
||||
|
||||
# Тестируем fallback план интервью
|
||||
fallback_plan = service._get_fallback_interview_plan()
|
||||
print("✅ Interview Service: fallback план работает")
|
||||
print(f"📋 Структура плана: {list(fallback_plan.keys())}")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Interview Service: ошибка - {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def test_ai_agent_import():
|
||||
"""Тест импорта AI агента"""
|
||||
print("\n🔍 Тестируем AI агента...")
|
||||
|
||||
try:
|
||||
from ai_interviewer_agent import InterviewAgent
|
||||
|
||||
# Тестовый план интервью
|
||||
test_plan = {
|
||||
"interview_structure": {
|
||||
"duration_minutes": 15,
|
||||
"greeting": "Привет! Тест интервью",
|
||||
"sections": [
|
||||
{
|
||||
"name": "Знакомство",
|
||||
"duration_minutes": 5,
|
||||
"questions": ["Расскажи о себе"]
|
||||
},
|
||||
{
|
||||
"name": "Опыт",
|
||||
"duration_minutes": 10,
|
||||
"questions": ["Какой у тебя опыт?"]
|
||||
}
|
||||
]
|
||||
},
|
||||
"candidate_info": {
|
||||
"name": "Тестовый кандидат",
|
||||
"skills": ["Python"],
|
||||
"total_years": 2
|
||||
}
|
||||
}
|
||||
|
||||
agent = InterviewAgent(test_plan)
|
||||
|
||||
print("✅ AI Agent: импорт и инициализация работают")
|
||||
print(f"📊 Секций в плане: {len(agent.sections)}")
|
||||
print(f"🎯 Текущая секция: {agent.get_current_section().get('name')}")
|
||||
|
||||
# Тестируем извлечение системных инструкций
|
||||
instructions = agent.get_system_instructions()
|
||||
print(f"📝 Инструкции сгенерированы (длина: {len(instructions)})")
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ AI Agent: ошибка - {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def check_external_services():
|
||||
"""Проверка внешних сервисов"""
|
||||
print("\n🔍 Проверяем внешние сервисы...")
|
||||
|
||||
# Проверяем Milvus
|
||||
try:
|
||||
from rag.settings import settings
|
||||
response = requests.get(f"{settings.milvus_uri}/health", timeout=5)
|
||||
if response.status_code == 200:
|
||||
print("✅ Milvus: сервер доступен")
|
||||
else:
|
||||
print("⚠️ Milvus: сервер недоступен")
|
||||
except Exception:
|
||||
print("❌ Milvus: сервер недоступен")
|
||||
|
||||
# Проверяем LiveKit (если запущен)
|
||||
try:
|
||||
# LiveKit health check обычно на HTTP порту
|
||||
livekit_http_url = settings.livekit_url.replace("ws://", "http://").replace(":7880", ":7880")
|
||||
response = requests.get(livekit_http_url, timeout=2)
|
||||
print("✅ LiveKit: сервер запущен")
|
||||
except Exception:
|
||||
print("❌ LiveKit: сервер не запущен")
|
||||
print("💡 Для запуска LiveKit используйте Docker: docker run --rm -p 7880:7880 -p 7881:7881 livekit/livekit-server --dev")
|
||||
|
||||
|
||||
async def run_all_tests():
|
||||
"""Запуск всех тестов"""
|
||||
print("=== HR-AI System Testing ===")
|
||||
print("=" * 50)
|
||||
|
||||
tests = [
|
||||
("Database", test_database_connection),
|
||||
("RAG System", test_rag_system),
|
||||
("Redis", lambda: test_redis_connection()),
|
||||
("Celery", test_celery_tasks),
|
||||
("Interview Service", test_interview_service),
|
||||
("AI Agent", lambda: test_ai_agent_import()),
|
||||
]
|
||||
|
||||
results = {}
|
||||
|
||||
for test_name, test_func in tests:
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(test_func):
|
||||
result = await test_func()
|
||||
else:
|
||||
result = test_func()
|
||||
results[test_name] = result
|
||||
except Exception as e:
|
||||
print(f"❌ {test_name}: критическая ошибка - {str(e)}")
|
||||
results[test_name] = False
|
||||
|
||||
# Проверяем внешние сервисы
|
||||
check_external_services()
|
||||
|
||||
# Итоговый отчет
|
||||
print("\n" + "=" * 50)
|
||||
print("📊 ИТОГОВЫЙ ОТЧЕТ")
|
||||
print("=" * 50)
|
||||
|
||||
passed = sum(1 for r in results.values() if r)
|
||||
total = len(results)
|
||||
|
||||
for test_name, result in results.items():
|
||||
status = "✅ PASS" if result else "❌ FAIL"
|
||||
print(f"{test_name:20} {status}")
|
||||
|
||||
print(f"\n🎯 Результат: {passed}/{total} тестов прошли успешно")
|
||||
|
||||
if passed == total:
|
||||
print("🎉 Система готова к тестированию!")
|
||||
print_next_steps()
|
||||
else:
|
||||
print("⚠️ Некоторые компоненты требуют настройки")
|
||||
print_troubleshooting()
|
||||
|
||||
|
||||
def print_next_steps():
|
||||
"""Следующие шаги для полного тестирования"""
|
||||
print("\n📋 СЛЕДУЮЩИЕ ШАГИ:")
|
||||
print("1. Запустите FastAPI сервер: uvicorn app.main:app --reload")
|
||||
print("2. Запустите Celery worker: celery -A celery_worker.celery_app worker --loglevel=info")
|
||||
print("3. Загрузите тестовое резюме через /resume/upload")
|
||||
print("4. Проверьте генерацию плана интервью в базе данных")
|
||||
print("5. Для полного теста голосовых интервью потребуются:")
|
||||
print(" - API ключи Deepgram/Cartesia")
|
||||
print(" - LiveKit сервер")
|
||||
|
||||
|
||||
def print_troubleshooting():
|
||||
"""Устранение неисправностей"""
|
||||
print("\n🔧 УСТРАНЕНИЕ ПРОБЛЕМ:")
|
||||
print("• Redis не запущен: docker run -d -p 6379:6379 redis:alpine")
|
||||
print("• Milvus недоступен: проверьте настройки MILVUS_URI")
|
||||
print("• RAG ошибки: проверьте OPENAI_API_KEY")
|
||||
print("• База данных: проверьте DATABASE_URL и запустите alembic upgrade head")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run_all_tests())
|
Loading…
Reference in New Issue
Block a user