Spaces:
Runtime error
Runtime error
import json | |
import os | |
from dotenv import load_dotenv | |
from qdrant_client import QdrantClient | |
from qdrant_client.models import Distance, VectorParams, PointStruct | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain.schema import Document | |
load_dotenv() | |
# Configurar cliente Qdrant | |
qdrant = QdrantClient( | |
url=os.environ.get("QDRANT_URL"), | |
api_key=os.environ.get("QDRANT_SERVICE_KEY"), | |
timeout=60 | |
) | |
# Configurar embeddings | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/static-similarity-mrl-multilingual-v1", | |
model_kwargs={'device': 'cpu'} | |
) | |
collection_name = "documents" | |
def create_collection(): | |
"""Crear colección si no existe""" | |
try: | |
qdrant.get_collection(collection_name) | |
print(f"Colección '{collection_name}' ya existe") | |
except Exception: | |
print(f"Creando colección '{collection_name}'...") | |
qdrant.create_collection( | |
collection_name=collection_name, | |
vectors_config=VectorParams( | |
size=1024, # Dimensión correcta | |
distance=Distance.COSINE | |
) | |
) | |
print("Colección creada exitosamente") | |
def upload_embeddings_from_jsonl(file_path: str): | |
with open(file_path, 'r') as jsonl_file: | |
json_list = list(jsonl_file) | |
json_QA = [] | |
for json_str in json_list: | |
json_data = json.loads(json_str) | |
json_QA.append(json_data) | |
docs = [] | |
for sample in json_QA: | |
content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}" | |
doc = { | |
"content" : content, | |
"metadata" : { | |
"source" : sample['task_id'] | |
}, | |
"embedding" : embeddings.embed_query(content), | |
} | |
docs.append(doc) | |
print(f"Subiendo {len(docs)} documentos a Qdrant...") | |
try: | |
points = [] | |
for idx, doc in enumerate(docs): | |
point = PointStruct( | |
id=idx, | |
vector=doc["embedding"], | |
payload={ | |
"content": doc["content"], | |
"metadata": doc["metadata"] | |
} | |
) | |
points.append(point) | |
response = qdrant.upsert( | |
collection_name=collection_name, | |
points=points, | |
wait=True | |
) | |
print(response) | |
except Exception as exception: | |
print("Error inserting data into Qdrant:", exception) | |
def main(): | |
# Crear colección | |
create_collection() | |
# Subir embeddings | |
jsonl_file = "./metadata.jsonl" # Ajusta la ruta si es necesario | |
if os.path.exists(jsonl_file): | |
print(f"Subiendo embeddings desde {jsonl_file}...") | |
# random_data() | |
upload_embeddings_from_jsonl(jsonl_file) | |
print("¡Embeddings subidos exitosamente!") | |
else: | |
print(f"Archivo {jsonl_file} no encontrado") | |
if __name__ == "__main__": | |
main() |