Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PyPDF2 import PdfReader | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
import faiss | |
import numpy as np | |
# Load models | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
qa_pipeline = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad') | |
st.set_page_config(page_title="QuickLit - AI Research Assistant") | |
st.title("π QuickLit: Literature Q&A Assistant") | |
# File upload | |
uploaded_file = st.file_uploader("Upload a research paper (PDF)", type=["pdf"]) | |
if uploaded_file: | |
reader = PdfReader(uploaded_file) | |
full_text = "" | |
for page in reader.pages: | |
full_text += page.extract_text() | |
# Split text into chunks | |
sentences = full_text.split('. ') | |
chunks = ['. '.join(sentences[i:i+3]) for i in range(0, len(sentences), 3)] | |
# Generate embeddings | |
st.info("π Generating embeddings...") | |
embeddings = embedder.encode(chunks) | |
# Create FAISS index | |
index = faiss.IndexFlatL2(embeddings[0].shape[0]) | |
index.add(np.array(embeddings)) | |
# Input question | |
question = st.text_input("Ask a question about the paper:") | |
if question: | |
# Embed the question | |
q_embedding = embedder.encode([question]) | |
# Retrieve top 3 similar chunks | |
D, I = index.search(np.array(q_embedding), k=3) | |
retrieved_contexts = [chunks[i] for i in I[0]] | |
context = " ".join(retrieved_contexts) | |
# Answer using transformer | |
st.info("π‘ Answering with AI...") | |
answer = qa_pipeline(question=question, context=context) | |
st.success(f"**Answer:** {answer['answer']}") | |