ai-hackaton-backend/rag/vector_store.py
2025-09-07 22:37:26 +05:00

110 lines
4.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import Any
from langchain_core.embeddings import Embeddings
from langchain_milvus import Milvus
from rag.settings import settings
class MilvusVectorStore:
def __init__(
self, embeddings_model: Embeddings, collection_name: str = "candidate_profiles"
):
self.embeddings = embeddings_model
self.collection_name = collection_name
# Попробуем использовать URI напрямую
self.vector_store = Milvus(
embedding_function=embeddings_model,
connection_args={
"uri": settings.milvus_uri,
},
collection_name=collection_name,
)
def add_candidate_profile(self, candidate_id: str, resume_data: dict[str, Any]):
"""Добавляет профиль кандидата в векторную базу"""
try:
# Создаем текст для векторизации из навыков и опыта
skills_text = " ".join(resume_data.get("skills", []))
experience_text = " ".join(
[
f"{exp.get('position', '')} {exp.get('company', '')} {exp.get('description', '')}"
for exp in resume_data.get("experience", [])
]
)
combined_text = (
f"{skills_text} {experience_text} {resume_data.get('summary', '')}"
)
# Метаданные для поиска
metadata = {
"candidate_id": candidate_id,
"name": resume_data.get("name", ""),
"email": resume_data.get("email", ""),
"phone": resume_data.get("phone", ""),
"total_years": resume_data.get("total_years", 0),
"skills": resume_data.get("skills", []),
"education": resume_data.get("education", ""),
}
# Добавляем в векторную базу
self.vector_store.add_texts(
texts=[combined_text], metadatas=[metadata], ids=[candidate_id]
)
return True
except Exception as e:
raise Exception(
f"Ошибка при добавлении кандидата в Milvus: {str(e)}"
) from e
def search_similar_candidates(self, query: str, k: int = 5) -> list[dict[str, Any]]:
"""Поиск похожих кандидатов по запросу"""
try:
results = self.vector_store.similarity_search_with_score(query, k=k)
candidates = []
for doc, score in results:
candidate = {
"content": doc.page_content,
"metadata": doc.metadata,
"similarity_score": score,
}
candidates.append(candidate)
return candidates
except Exception as e:
raise Exception(f"Ошибка при поиске кандидатов в Milvus: {str(e)}") from e
def get_candidate_by_id(self, candidate_id: str) -> dict[str, Any]:
"""Получает кандидата по ID"""
try:
results = self.vector_store.similarity_search(
query="", k=1, expr=f"candidate_id == '{candidate_id}'"
)
if results:
doc = results[0]
return {"content": doc.page_content, "metadata": doc.metadata}
else:
return None
except Exception as e:
raise Exception(
f"Ошибка при получении кандидата из Milvus: {str(e)}"
) from e
def delete_candidate(self, candidate_id: str):
"""Удаляет кандидата из векторной базы"""
try:
# В Milvus удаление по ID
self.vector_store.delete([candidate_id])
return True
except Exception as e:
raise Exception(f"Ошибка при удалении кандидата из Milvus: {str(e)}") from e