Spaces:

mihirinamdar
/

arxiv-rag-optimized

Running

App Files Files Community

mihirinamdar commited on Jun 12

Commit

da1b347

verified ·

1 Parent(s): 82723d8

Update app.py

Browse files

Files changed (1) hide show

app.py +614 -487

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Enhanced ArXiv RAG System - Hugging Face Spaces Compatible Version
 """
 import os
@@ -13,12 +13,16 @@ from datetime import datetime, timedelta
 import logging
 import tempfile
 import shutil
 # Core ML libraries
 import torch
 from sentence_transformers import SentenceTransformer, CrossEncoder
-from transformers import pipeline
 import gradio as gr
 # BM25 and text processing
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -32,17 +36,25 @@ from nltk.stem import PorterStemmer
 try:
     nltk.data.find('tokenizers/punkt')
 except LookupError:
-    nltk.download('punkt')
 try:
     nltk.data.find('corpora/stopwords')
 except LookupError:
-    nltk.download('stopwords')
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 @dataclass
 class Paper:
     """Data class for storing paper information"""
@@ -63,9 +75,36 @@ class Chunk:
     chunk_type: str
     metadata: Dict[str, Any]
-class BM25Retriever:
-    """BM25 retriever for keyword-based search"""
     def __init__(self, k1: float = 1.5, b: float = 0.75):
         self.k1 = k1
         self.b = b
@@ -77,542 +116,598 @@ class BM25Retriever:
             self.stop_words = set(stopwords.words('english'))
         except:
             self.stop_words = set()
     def preprocess_text(self, text: str) -> List[str]:
         """Preprocess text for BM25"""
-        tokens = word_tokenize(text.lower())
-        processed_tokens = [
-            self.stemmer.stem(token)
-            for token in tokens
-            if token.isalpha() and token not in self.stop_words
-        ]
-        return processed_tokens
     def fit(self, documents: List[str]):
-        """Fit BM25 on documents"""
-        self.documents = [self.preprocess_text(doc) for doc in documents]
-        self.doc_lengths = [len(doc) for doc in self.documents]
-        self.avg_doc_length = sum(self.doc_lengths) / len(self.doc_lengths) if self.doc_lengths else 0
-        vocab = set()
-        for doc in self.documents:
-            vocab.update(doc)
-        self.vocab = list(vocab)
-        self.term_freqs = []
-        for doc in self.documents:
-            tf = {}
-            for term in doc:
-                tf[term] = tf.get(term, 0) + 1
-            self.term_freqs.append(tf)
-        self.idf = {}
-        for term in self.vocab:
-            containing_docs = sum(1 for tf in self.term_freqs if term in tf)
-            self.idf[term] = np.log((len(self.documents) - containing_docs + 0.5) / (containing_docs + 0.5))
-    def score(self, query: str, top_k: int = 10) -> List[Tuple[int, float]]:
-        """Score documents against query"""
-        query_terms = self.preprocess_text(query)
-        scores = []
-        for i, (doc, tf, doc_len) in enumerate(zip(self.documents, self.term_freqs, self.doc_lengths)):
-            score = 0
-            for term in query_terms:
-                if term in tf:
-                    term_freq = tf[term]
-                    idf = self.idf.get(term, 0)
-                    numerator = term_freq * (self.k1 + 1)
-                    denominator = term_freq + self.k1 * (1 - self.b + self.b * (doc_len / self.avg_doc_length))
-                    score += idf * (numerator / denominator)
-            scores.append((i, score))
-        scores.sort(key=lambda x: x[1], reverse=True)
-        return scores[:top_k]
-class SimpleVectorStore:
-    """Simple in-memory vector store for HF Spaces compatibility"""
     def __init__(self):
-        self.embeddings = []
-        self.documents = []
-        self.metadatas = []
-        self.ids = []
-    def add(self, ids: List[str], embeddings: List[List[float]],
-            documents: List[str], metadatas: List[Dict]):
-        """Add documents to the store"""
-        self.ids.extend(ids)
-        self.embeddings.extend(embeddings)
-        self.documents.extend(documents)
-        self.metadatas.extend(metadatas)
-    def query(self, query_embedding: List[float], n_results: int = 10) -> Dict:
-        """Query the vector store"""
-        if not self.embeddings:
-            return {'ids': [[]], 'documents': [[]], 'metadatas': [[]]}
-        # Calculate cosine similarities
-        query_embedding = np.array(query_embedding)
-        similarities = []
-        for emb in self.embeddings:
-            emb_array = np.array(emb)
-            similarity = np.dot(query_embedding, emb_array) / (
-                np.linalg.norm(query_embedding) * np.linalg.norm(emb_array)
             )
-            similarities.append(similarity)
-        # Get top results
-        top_indices = np.argsort(similarities)[::-1][:n_results]
-        return {
-            'ids': [[self.ids[i] for i in top_indices]],
-            'documents': [[self.documents[i] for i in top_indices]],
-            'metadatas': [[self.metadatas[i] for i in top_indices]]
-        }
-    def get(self, ids: Optional[List[str]] = None) -> Dict:
-        """Get documents by IDs or all documents"""
-        if ids is None:
-            return {
-                'ids': self.ids,
-                'documents': self.documents,
-                'metadatas': self.metadatas
-            }
-        else:
-            indices = [self.ids.index(id_) for id_ in ids if id_ in self.ids]
-            return {
-                'ids': [self.ids[i] for i in indices],
-                'documents': [self.documents[i] for i in indices],
-                'metadatas': [self.metadatas[i] for i in indices]
-            }
-    def clear(self):
-        """Clear the store"""
-        self.embeddings.clear()
-        self.documents.clear()
-        self.metadatas.clear()
-        self.ids.clear()
-class EnhancedArxivRAG:
-    """Enhanced RAG system optimized for Hugging Face Spaces"""
-    def __init__(self):
-        logger.info("Initializing Enhanced ArXiv RAG System for HF Spaces...")
-        # Use smaller, faster models for HF Spaces
-        self.embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-        self.reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-2-v2')  # Smaller reranker
-        self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn",
-                                  device=0 if torch.cuda.is_available() else -1)
-        # Use simple vector store instead of ChromaDB for HF Spaces
-        self.vector_store = SimpleVectorStore()
-        self.bm25_retriever = BM25Retriever()
-        # Cache for papers and chunks
-        self.papers_cache = {}
-        self.chunks_cache = {}
-        self.bm25_fitted = False
-        logger.info("RAG system initialized successfully!")
-    def fetch_papers(self, query: str, max_results: int = 15,
-                    categories: Optional[List[str]] = None) -> List[Paper]:
-        """Fetch papers from ArXiv"""
-        search_query = query
-        if categories:
-            category_filter = " OR ".join([f"cat:{cat}" for cat in categories])
-            search_query = f"({query}) AND ({category_filter})"
-        logger.info(f"Fetching papers with query: {search_query}")
         try:
             search = arxiv.Search(
                 query=search_query,
                 max_results=max_results,
-                sort_by=arxiv.SortCriterion.Relevance
             )
-            papers = []
             for result in search.results():
-                paper = Paper(
-                    id=result.entry_id.split('/')[-1],
-                    title=result.title.strip().replace('\n', ' '),
-                    abstract=result.summary.strip().replace('\n', ' '),
-                    authors=[author.name for author in result.authors],
-                    categories=result.categories,
-                    published=result.published.replace(tzinfo=None),
-                    url=result.entry_id
-                )
-                papers.append(paper)
-                self.papers_cache[paper.id] = paper
-            logger.info(f"Fetched {len(papers)} papers")
             return papers
         except Exception as e:
-            logger.error(f"Error fetching papers: {e}")
             return []
     def create_chunks(self, papers: List[Paper]) -> List[Chunk]:
         """Create text chunks from papers"""
         chunks = []
         for paper in papers:
-            # Title chunk
-            title_chunk = Chunk(
-                id=f"{paper.id}_title",
-                paper_id=paper.id,
-                text=paper.title,
-                chunk_type="title",
-                metadata={
-                    "authors": paper.authors,
-                    "categories": paper.categories,
-                    "published": paper.published.isoformat(),
-                    "url": paper.url
-                }
-            )
-            # Abstract chunk
-            abstract_chunk = Chunk(
-                id=f"{paper.id}_abstract",
-                paper_id=paper.id,
-                text=paper.abstract,
-                chunk_type="abstract",
-                metadata={
-                    "authors": paper.authors,
-                    "categories": paper.categories,
-                    "published": paper.published.isoformat(),
-                    "url": paper.url
-                }
-            )
-            # Combined chunk
-            combined_text = f"Title: {paper.title}\n\nAbstract: {paper.abstract}"
-            combined_chunk = Chunk(
-                id=f"{paper.id}_combined",
-                paper_id=paper.id,
-                text=combined_text,
-                chunk_type="combined",
-                metadata={
-                    "authors": paper.authors,
-                    "categories": paper.categories,
-                    "published": paper.published.isoformat(),
-                    "url": paper.url
-                }
-            )
-            chunks.extend([title_chunk, abstract_chunk, combined_chunk])
-            # Cache chunks
-            for chunk in [title_chunk, abstract_chunk, combined_chunk]:
-                self.chunks_cache[chunk.id] = chunk
-        return chunks
-    def process_and_store(self, papers: List[Paper]):
-        """Process papers and store in vector store"""
-        logger.info("Processing and storing papers...")
-        # Clear previous data
-        self.vector_store.clear()
-        # Create chunks
-        chunks = self.create_chunks(papers)
-        if not chunks:
-            return
-        # Generate embeddings
-        texts = [chunk.text for chunk in chunks]
-        logger.info("Generating embeddings...")
-        embeddings = self.embedding_model.encode(texts, show_progress_bar=False)
-        # Store in vector store
-        ids = [chunk.id for chunk in chunks]
-        metadatas = [chunk.metadata for chunk in chunks]
-        self.vector_store.add(
-            ids=ids,
-            embeddings=embeddings.tolist(),
-            documents=texts,
-            metadatas=metadatas
-        )
-        # Fit BM25
-        logger.info("Fitting BM25...")
-        self.bm25_retriever.fit(texts)
-        self.bm25_fitted = True
-        logger.info(f"Stored {len(chunks)} chunks")
-    def hybrid_search(self, query: str, top_k: int = 10,
-                     semantic_weight: float = 0.7) -> List[Dict]:
-        """Perform hybrid search"""
-        # Semantic search
-        query_embedding = self.embedding_model.encode([query])
-        semantic_results = self.vector_store.query(
-            query_embedding=query_embedding[0].tolist(),
-            n_results=top_k * 2
-        )
-        # BM25 search
-        bm25_results = []
-        if self.bm25_fitted:
-            all_docs = self.vector_store.get()
-            bm25_scores = self.bm25_retriever.score(query, top_k * 2)
-            for idx, score in bm25_scores:
-                if idx < len(all_docs['ids']):
-                    bm25_results.append({
-                        'id': all_docs['ids'][idx],
-                        'document': all_docs['documents'][idx],
-                        'metadata': all_docs['metadatas'][idx],
-                        'score': score
-                    })
-        # Combine results using RRF
-        combined_scores = {}
-        bm25_weight = 1.0 - semantic_weight
-        # Add semantic scores
-        for i, doc_id in enumerate(semantic_results['ids'][0]):
-            rank = i + 1
-            combined_scores[doc_id] = combined_scores.get(doc_id, 0) + semantic_weight / rank
-        # Add BM25 scores
-        for i, result in enumerate(bm25_results):
-            doc_id = result['id']
-            rank = i + 1
-            combined_scores[doc_id] = combined_scores.get(doc_id, 0) + bm25_weight / rank
-        # Sort by combined score
-        sorted_results = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)
-        # Prepare final results
-        final_results = []
-        for doc_id, score in sorted_results[:top_k]:
-            doc_result = self.vector_store.get(ids=[doc_id])
-            if doc_result['ids']:
-                final_results.append({
-                    'id': doc_id,
-                    'document': doc_result['documents'][0],
-                    'metadata': doc_result['metadatas'][0],
-                    'combined_score': score
-                })
-        return final_results
-    def rerank_results(self, query: str, results: List[Dict], top_k: int = 5) -> List[Dict]:
-        """Rerank results using cross-encoder"""
-        if not results:
             return results
-        # Prepare query-document pairs
-        query_doc_pairs = [(query, result['document']) for result in results]
-        # Get reranking scores
-        rerank_scores = self.reranker.predict(query_doc_pairs)
-        # Add rerank scores to results
-        for i, result in enumerate(results):
-            result['rerank_score'] = float(rerank_scores[i])
-        # Sort by rerank score
-        reranked_results = sorted(results, key=lambda x: x['rerank_score'], reverse=True)
-        return reranked_results[:top_k]
-    def generate_answer(self, query: str, context_chunks: List[Dict]) -> str:
-        """Generate answer using retrieved context"""
-        if not context_chunks:
-            return "No relevant information found to answer your query."
-        # Combine context from top chunks
-        context_texts = [chunk['document'] for chunk in context_chunks[:3]]
-        combined_context = "\n\n".join(context_texts)
-        # Limit context length
-        max_context_length = 800
-        if len(combined_context) > max_context_length:
-            combined_context = combined_context[:max_context_length] + "..."
         try:
-            summary_input = f"Based on the following research papers, answer this question: {query}\n\nContext: {combined_context}"
-            summary = self.summarizer(summary_input,
-                                    max_length=120,
-                                    min_length=30,
-                                    do_sample=False)[0]['summary_text']
-            return summary
         except Exception as e:
-            logger.error(f"Error generating summary: {e}")
-            return f"Based on the retrieved papers about '{query}', here are the key findings:\n\n" + \
-                   "\n\n".join([chunk['document'][:150] + "..." for chunk in context_chunks[:2]])
-    def search_and_answer(self, query: str, max_papers: int = 15,
-                         top_k_retrieval: int = 10, top_k_rerank: int = 5,
-                         categories: Optional[List[str]] = None,
-                         semantic_weight: float = 0.7) -> Dict[str, Any]:
-        """Main search and answer pipeline"""
-        if not query.strip():
-            return {
-                'answer': "Please enter a valid research query.",
-                'papers': [],
-                'retrieved_chunks': [],
-                'search_stats': {'papers_found': 0, 'chunks_retrieved': 0}
-            }
         try:
-            # Fetch papers
-            papers = self.fetch_papers(query, max_papers, categories)
-            if not papers:
-                return {
-                    'answer': "No papers found for your query. Please try different keywords.",
-                    'papers': [],
-                    'retrieved_chunks': [],
-                    'search_stats': {'papers_found': 0, 'chunks_retrieved': 0}
-                }
-            # Process and store papers
-            self.process_and_store(papers)
-            # Hybrid search
-            search_results = self.hybrid_search(query, top_k_retrieval, semantic_weight)
-            # Rerank results
-            reranked_results = self.rerank_results(query, search_results, top_k_rerank)
-            # Generate answer
-            answer = self.generate_answer(query, reranked_results)
-            # Prepare unique papers
-            unique_papers = {}
-            for chunk in reranked_results:
-                paper_id = chunk['id'].split('_')[0]
-                if paper_id in self.papers_cache and paper_id not in unique_papers:
-                    paper = self.papers_cache[paper_id]
-                    unique_papers[paper_id] = {
-                        'title': paper.title,
-                        'authors': paper.authors,
-                        'abstract': paper.abstract,
-                        'url': paper.url,
-                        'categories': paper.categories,
-                        'published': paper.published.strftime('%Y-%m-%d')
                     }
-            return {
-                'answer': answer,
-                'papers': list(unique_papers.values()),
-                'retrieved_chunks': reranked_results,
-                'search_stats': {
-                    'papers_found': len(papers),
-                    'chunks_retrieved': len(reranked_results),
-                    'unique_papers_in_results': len(unique_papers)
-                }
-            }
         except Exception as e:
-            logger.error(f"Error in search_and_answer: {e}")
-            return {
-                'answer': f"An error occurred while processing your query: {str(e)}",
-                'papers': [],
-                'retrieved_chunks': [],
-                'search_stats': {'papers_found': 0, 'chunks_retrieved': 0}
-            }
-# Global RAG instance
 rag_system = None
-def initialize_rag():
-    """Initialize RAG system"""
     global rag_system
-    if rag_system is None:
-        rag_system = EnhancedArxivRAG()
-    return rag_system
-def search_papers(query: str, max_papers: int = 15, top_k_retrieval: int = 10,
-                 top_k_rerank: int = 5, categories: str = "",
-                 semantic_weight: float = 0.7) -> tuple:
-    """Main search function for Gradio interface"""
-    if not query.strip():
-        return "❌ Please enter a research topic or question.", "", ""
     try:
-        # Initialize RAG system
-        rag = initialize_rag()
         # Parse categories
-        category_list = None
         if categories.strip():
             category_list = [cat.strip() for cat in categories.split(',') if cat.strip()]
-        # Perform search
-        result = rag.search_and_answer(
-            query=query,
-            max_papers=max_papers,
-            top_k_retrieval=top_k_retrieval,
-            top_k_rerank=top_k_rerank,
-            categories=category_list,
-            semantic_weight=semantic_weight
-        )
-        # Format answer
-        answer = f"## 🤖 AI-Generated Answer\n\n{result['answer']}\n\n"
-        answer += f"**Search Statistics:**\n"
-        answer += f"- Papers found: {result['search_stats']['papers_found']}\n"
-        answer += f"- Chunks retrieved: {result['search_stats']['chunks_retrieved']}\n"
-        answer += f"- Unique papers in results: {result['search_stats']['unique_papers_in_results']}\n\n"
-        # Format papers
-        papers_md = "## 📚 Relevant Papers\n\n"
-        for i, paper in enumerate(result['papers'], 1):
-            papers_md += f"### {i}. {paper['title']}\n\n"
-            papers_md += f"**Authors:** {', '.join(paper['authors'][:3])}{'...' if len(paper['authors']) > 3 else ''}\n\n"
-            papers_md += f"**Categories:** {', '.join(paper['categories'])}\n\n"
-            papers_md += f"**Published:** {paper['published']}\n\n"
-            papers_md += f"**Abstract:** {paper['abstract'][:250]}{'...' if len(paper['abstract']) > 250 else ''}\n\n"
-            papers_md += f"**URL:** [{paper['url']}]({paper['url']})\n\n"
-            papers_md += "---\n\n"
-        # Create papers dataframe
-        papers_df = pd.DataFrame([
-            {
-                'Title': paper['title'][:50] + '...' if len(paper['title']) > 50 else paper['title'],
-                'Authors': ', '.join(paper['authors'][:2]) + ('...' if len(paper['authors']) > 2 else ''),
-                'Categories': ', '.join(paper['categories'][:2]),
-                'Published': paper['published'],
-                'URL': paper['url']
-            }
-            for paper in result['papers']
-        ])
-        return answer, papers_md, papers_df
     except Exception as e:
-        logger.error(f"Error processing query: {e}")
         error_msg = f"❌ An error occurred: {str(e)}\n\nPlease try different keywords or check your internet connection."
         return error_msg, "", pd.DataFrame()
 # Create Gradio interface
 def create_interface():
-    """Create Gradio interface"""
     css = """
     .gradio-container {
         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
     """
-    with gr.Blocks(css=css, title="Enhanced ArXiv RAG System") as interface:
-        gr.HTML("""
         <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem;">
             <h1>🚀 Enhanced ArXiv RAG System</h1>
-            <p>Advanced scientific paper discovery with semantic search, BM25, and neural reranking</p>
         </div>
         """)
@@ -634,29 +729,45 @@ def create_interface():
                     value=""
                 )
-                with gr.Accordion("Advanced Settings", open=False):
                     with gr.Row():
                         top_k_retrieval = gr.Slider(5, 15, value=10, step=1, label="Top-K Retrieval")
                         top_k_rerank = gr.Slider(3, 8, value=5, step=1, label="Top-K Reranking")
-                search_btn = gr.Button("🔍 Search Papers", variant="primary")
             with gr.Column(scale=1):
                 gr.HTML("""
                 <div style="background: #e3f2fd; padding: 1rem; border-radius: 8px;">
-                    <h4>💡 Tips</h4>
                     <ul>
                         <li>Use specific technical terms</li>
                         <li>Try different category filters</li>
                         <li>Adjust semantic weight for different search styles</li>
                     </ul>
-                    <h4>📊 Categories</h4>
                     <ul>
                         <li><code>cs.AI</code> - Artificial Intelligence</li>
                         <li><code>cs.CL</code> - Computation and Language</li>
                         <li><code>cs.LG</code> - Machine Learning</li>
                         <li><code>cs.CV</code> - Computer Vision</li>
                     </ul>
                 </div>
                 """)
@@ -669,15 +780,20 @@ def create_interface():
                 papers_output = gr.Markdown(label="Relevant Papers")
             with gr.TabItem("📊 Papers Table"):
-                papers_table = gr.Dataframe(label="Papers Summary")
         # Examples
         gr.Examples(
             examples=[
                 ["transformer attention mechanisms", 15, 10, 5, "cs.CL, cs.AI", 0.7],
-                ["graph neural networks", 12, 8, 4, "cs.LG", 0.6],
                 ["computer vision deep learning", 15, 10, 5, "cs.CV", 0.8],
-                ["reinforcement learning", 18, 10, 5, "cs.AI", 0.7]
             ],
             inputs=[query_input, max_papers, top_k_retrieval, top_k_rerank, categories_input, semantic_weight]
         )
@@ -691,7 +807,8 @@ def create_interface():
         gr.HTML("""
         <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f5f5f5; border-radius: 8px;">
-            <p><strong>Enhanced ArXiv RAG System</strong> | Semantic Search + BM25 + Neural Reranking</p>
         </div>
         """)
@@ -699,6 +816,16 @@ def create_interface():
 # Launch interface
 if __name__ == "__main__":
     interface = create_interface()
-    interface.launch()

 """
+Enhanced ArXiv RAG System - GPU Optimized for Hugging Face Spaces
 """
 import os
 import logging
 import tempfile
 import shutil
+import gc
+import time
 # Core ML libraries
 import torch
+import torch.nn.functional as F
 from sentence_transformers import SentenceTransformer, CrossEncoder
+from transformers import pipeline, AutoTokenizer, AutoModel
 import gradio as gr
+import spaces  # HuggingFace Spaces GPU support
 # BM25 and text processing
 from sklearn.feature_extraction.text import TfidfVectorizer
 try:
     nltk.data.find('tokenizers/punkt')
 except LookupError:
+    nltk.download('punkt', quiet=True)
 try:
     nltk.data.find('corpora/stopwords')
 except LookupError:
+    nltk.download('stopwords', quiet=True)
 # Setup logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# GPU Configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+logger.info(f"Using device: {DEVICE}")
+if DEVICE == "cuda":
+    logger.info(f"GPU: {torch.cuda.get_device_name()}")
+    logger.info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
 @dataclass
 class Paper:
     """Data class for storing paper information"""
     chunk_type: str
     metadata: Dict[str, Any]
+class GPUMemoryManager:
+    """Manages GPU memory efficiently"""
+    @staticmethod
+    def clear_cache():
+        """Clear GPU cache"""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            gc.collect()
+    @staticmethod
+    def get_memory_info():
+        """Get GPU memory information"""
+        if torch.cuda.is_available():
+            allocated = torch.cuda.memory_allocated() / 1e9
+            cached = torch.cuda.memory_reserved() / 1e9
+            return f"Allocated: {allocated:.1f}GB, Cached: {cached:.1f}GB"
+        return "CPU mode"
+    @staticmethod
+    def optimize_memory():
+        """Optimize memory usage"""
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.backends.cudnn.benchmark = True
+            torch.backends.cuda.matmul.allow_tf32 = True
+class BM25Retriever:
+    """Optimized BM25 retriever for keyword-based search"""
     def __init__(self, k1: float = 1.5, b: float = 0.75):
         self.k1 = k1
         self.b = b
             self.stop_words = set(stopwords.words('english'))
         except:
             self.stop_words = set()
     def preprocess_text(self, text: str) -> List[str]:
         """Preprocess text for BM25"""
+        try:
+            tokens = word_tokenize(text.lower())
+            processed_tokens = [
+                self.stemmer.stem(token)
+                for token in tokens
+                if token.isalpha() and token not in self.stop_words and len(token) > 2
+            ]
+            return processed_tokens
+        except Exception as e:
+            logger.warning(f"Text preprocessing error: {e}")
+            return text.lower().split()
     def fit(self, documents: List[str]):
+        """Fit BM25 on documents with memory optimization"""
+        try:
+            self.documents = [self.preprocess_text(doc) for doc in documents]
+            self.doc_lengths = [len(doc) for doc in self.documents]
+            self.avg_doc_length = sum(self.doc_lengths) / len(self.doc_lengths) if self.doc_lengths else 0
+            # Build vocabulary
+            vocab = set()
+            for doc in self.documents:
+                vocab.update(doc)
+            self.vocab = list(vocab)
+            # Calculate term frequencies
+            self.term_freqs = []
+            for doc in self.documents:
+                tf = {}
+                for term in doc:
+                    tf[term] = tf.get(term, 0) + 1
+                self.term_freqs.append(tf)
+            # Calculate IDF
+            self.idf = {}
+            for term in self.vocab:
+                df = sum(1 for doc in self.documents if term in doc)
+                self.idf[term] = np.log((len(self.documents) - df + 0.5) / (df + 0.5))
+        except Exception as e:
+            logger.error(f"BM25 fitting error: {e}")
+            self.documents = []
+    def get_scores(self, query: str) -> np.ndarray:
+        """Get BM25 scores for query"""
+        try:
+            query_terms = self.preprocess_text(query)
+            scores = np.zeros(len(self.documents))
+            for i, doc_tf in enumerate(self.term_freqs):
+                score = 0
+                doc_length = self.doc_lengths[i]
+                for term in query_terms:
+                    if term in doc_tf:
+                        tf = doc_tf[term]
+                        idf = self.idf.get(term, 0)
+                        score += idf * (tf * (self.k1 + 1)) / (
+                            tf + self.k1 * (1 - self.b + self.b * (doc_length / self.avg_doc_length))
+                        )
+                scores[i] = score
+            return scores
+        except Exception as e:
+            logger.error(f"BM25 scoring error: {e}")
+            return np.zeros(len(self.documents))
+class OptimizedRagSystem:
+    """GPU-optimized RAG system for ArXiv papers"""
     def __init__(self):
+        self.papers = []
+        self.chunks = []
+        self.embeddings = None
+        self.embedding_model = None
+        self.reranker = None
+        self.bm25 = BM25Retriever()
+        self.generator = None
+        self.memory_manager = GPUMemoryManager()
+        # Initialize models
+        self._load_models()
+    def _load_models(self):
+        """Load models with GPU optimization"""
+        try:
+            logger.info("Loading models...")
+            # Load embedding model
+            self.embedding_model = SentenceTransformer(
+                'sentence-transformers/all-MiniLM-L6-v2',
+                device=DEVICE
             )
+            # Optimize for GPU if available
+            if DEVICE == "cuda":
+                self.embedding_model.half()  # Use FP16 for memory efficiency
+            # Load reranker (smaller model for efficiency)
+            self.reranker = CrossEncoder(
+                'cross-encoder/ms-marco-MiniLM-L-6-v2',
+                device=DEVICE
+            )
+            # Load text generator with optimization
+            self.generator = pipeline(
+                "text-generation",
+                model="microsoft/DialoGPT-small",  # Smaller model for efficiency
+                tokenizer="microsoft/DialoGPT-small",
+                device=0 if DEVICE == "cuda" else -1,
+                torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
+                return_full_text=False,
+                max_new_tokens=512,
+                do_sample=True,
+                temperature=0.7,
+                pad_token_id=50256
+            )
+            self.memory_manager.optimize_memory()
+            logger.info("Models loaded successfully")
+        except Exception as e:
+            logger.error(f"Model loading error: {e}")
+            raise
+    def search_arxiv(self, query: str, max_results: int = 15, categories: List[str] = None) -> List[Paper]:
+        """Search ArXiv with error handling and rate limiting"""
         try:
+            papers = []
+            search_query = query
+            if categories:
+                category_filter = " OR ".join([f"cat:{cat.strip()}" for cat in categories])
+                search_query = f"({query}) AND ({category_filter})"
+            logger.info(f"Searching ArXiv for: {search_query}")
             search = arxiv.Search(
                 query=search_query,
                 max_results=max_results,
+                sort_by=arxiv.SortCriterion.Relevance,
+                sort_order=arxiv.SortOrder.Descending
             )
             for result in search.results():
+                try:
+                    paper = Paper(
+                        id=result.entry_id.split('/')[-1],
+                        title=result.title,
+                        abstract=result.summary,
+                        authors=[author.name for author in result.authors],
+                        categories=result.categories,
+                        published=result.published,
+                        url=result.entry_id
+                    )
+                    papers.append(paper)
+                    # Rate limiting
+                    time.sleep(0.1)
+                except Exception as e:
+                    logger.warning(f"Error processing paper: {e}")
+                    continue
+            logger.info(f"Found {len(papers)} papers")
             return papers
         except Exception as e:
+            logger.error(f"ArXiv search error: {e}")
             return []
     def create_chunks(self, papers: List[Paper]) -> List[Chunk]:
         """Create text chunks from papers"""
         chunks = []
         for paper in papers:
+            try:
+                # Title chunk
+                chunks.append(Chunk(
+                    id=f"{paper.id}_title",
+                    paper_id=paper.id,
+                    text=paper.title,
+                    chunk_type="title",
+                    metadata={"paper": paper}
+                ))
+                # Abstract chunks (split if too long)
+                abstract_sentences = sent_tokenize(paper.abstract)
+                chunk_size = 3  # sentences per chunk
+                for i in range(0, len(abstract_sentences), chunk_size):
+                    chunk_text = ' '.join(abstract_sentences[i:i + chunk_size])
+                    chunks.append(Chunk(
+                        id=f"{paper.id}_abstract_{i}",
+                        paper_id=paper.id,
+                        text=chunk_text,
+                        chunk_type="abstract",
+                        metadata={"paper": paper}
+                    ))
+            except Exception as e:
+                logger.warning(f"Error creating chunks for paper {paper.id}: {e}")
+                continue
+        return chunks
+    @spaces.GPU(duration=120)  # HuggingFace Spaces GPU decorator
+    def embed_chunks(self, chunks: List[Chunk]) -> np.ndarray:
+        """Create embeddings for chunks with GPU optimization"""
+        try:
+            if not chunks:
+                return np.array([])
+            logger.info(f"Creating embeddings for {len(chunks)} chunks")
+            self.memory_manager.clear_cache()
+            texts = [chunk.text for chunk in chunks]
+            # Batch processing for efficiency
+            batch_size = 32 if DEVICE == "cuda" else 8
+            embeddings = []
+            for i in range(0, len(texts), batch_size):
+                batch_texts = texts[i:i + batch_size]
+                with torch.cuda.amp.autocast() if DEVICE == "cuda" else torch.no_grad():
+                    batch_embeddings = self.embedding_model.encode(
+                        batch_texts,
+                        convert_to_tensor=True,
+                        show_progress_bar=False,
+                        batch_size=len(batch_texts)
+                    )
+                    if DEVICE == "cuda":
+                        batch_embeddings = batch_embeddings.cpu()
+                    embeddings.append(batch_embeddings.numpy())
+                # Memory management
+                if i % (batch_size * 4) == 0:
+                    self.memory_manager.clear_cache()
+            result = np.vstack(embeddings) if embeddings else np.array([])
+            self.memory_manager.clear_cache()
+            logger.info(f"Created embeddings shape: {result.shape}")
+            return result
+        except Exception as e:
+            logger.error(f"Embedding error: {e}")
+            self.memory_manager.clear_cache()
+            return np.array([])
+    @spaces.GPU(duration=60)  # HuggingFace Spaces GPU decorator
+    def hybrid_retrieval(self, query: str, top_k: int = 10, semantic_weight: float = 0.7) -> List[Tuple[Chunk, float]]:
+        """Perform hybrid retrieval with GPU optimization"""
+        try:
+            if not self.chunks or self.embeddings is None or len(self.embeddings) == 0:
+                return []
+            self.memory_manager.clear_cache()
+            # Semantic search
+            with torch.cuda.amp.autocast() if DEVICE == "cuda" else torch.no_grad():
+                query_embedding = self.embedding_model.encode(
+                    [query],
+                    convert_to_tensor=True,
+                    show_progress_bar=False
+                )
+                if DEVICE == "cuda":
+                    query_embedding = query_embedding.cpu()
+                query_embedding = query_embedding.numpy()
+            semantic_scores = cosine_similarity(query_embedding, self.embeddings)[0]
+            # BM25 search
+            bm25_scores = self.bm25.get_scores(query)
+            # Ensure same length
+            min_length = min(len(semantic_scores), len(bm25_scores), len(self.chunks))
+            semantic_scores = semantic_scores[:min_length]
+            bm25_scores = bm25_scores[:min_length]
+            chunks = self.chunks[:min_length]
+            # Normalize scores
+            if len(semantic_scores) > 0:
+                semantic_scores = (semantic_scores - semantic_scores.min()) / (semantic_scores.max() - semantic_scores.min() + 1e-8)
+            if len(bm25_scores) > 0:
+                bm25_scores = (bm25_scores - bm25_scores.min()) / (bm25_scores.max() - bm25_scores.min() + 1e-8)
+            # Combine scores
+            combined_scores = semantic_weight * semantic_scores + (1 - semantic_weight) * bm25_scores
+            # Get top results
+            top_indices = np.argsort(combined_scores)[::-1][:top_k]
+            results = [(chunks[i], float(combined_scores[i])) for i in top_indices]
+            self.memory_manager.clear_cache()
             return results
+        except Exception as e:
+            logger.error(f"Retrieval error: {e}")
+            self.memory_manager.clear_cache()
+            return []
+    @spaces.GPU(duration=60)  # HuggingFace Spaces GPU decorator
+    def rerank_results(self, query: str, results: List[Tuple[Chunk, float]], top_k: int = 5) -> List[Tuple[Chunk, float]]:
+        """Rerank results using cross-encoder with GPU optimization"""
         try:
+            if not results or not self.reranker:
+                return results[:top_k]
+            self.memory_manager.clear_cache()
+            pairs = [(query, chunk.text) for chunk, _ in results]
+            with torch.cuda.amp.autocast() if DEVICE == "cuda" else torch.no_grad():
+                rerank_scores = self.reranker.predict(pairs, show_progress_bar=False)
+            # Combine with original scores
+            reranked_results = []
+            for i, (chunk, original_score) in enumerate(results):
+                combined_score = 0.6 * float(rerank_scores[i]) + 0.4 * original_score
+                reranked_results.append((chunk, combined_score))
+            # Sort by new scores
+            reranked_results.sort(key=lambda x: x[1], reverse=True)
+            self.memory_manager.clear_cache()
+            return reranked_results[:top_k]
         except Exception as e:
+            logger.error(f"Reranking error: {e}")
+            self.memory_manager.clear_cache()
+            return results[:top_k]
+    @spaces.GPU(duration=90)  # HuggingFace Spaces GPU decorator
+    def generate_answer(self, query: str, context_chunks: List[Chunk]) -> str:
+        """Generate answer using retrieved context with GPU optimization"""
         try:
+            if not context_chunks or not self.generator:
+                return "No relevant information found to answer your query."
+            self.memory_manager.clear_cache()
+            # Create context
+            context_parts = []
+            for chunk in context_chunks[:3]:  # Limit context
+                paper = chunk.metadata.get("paper")
+                if paper:
+                    context_parts.append(f"Title: {paper.title}\nContent: {chunk.text}")
+            context = "\n\n".join(context_parts)
+            # Create prompt
+            prompt = f"""Based on the following research papers, provide a comprehensive answer to the query:
+Query: {query}
+Research Context:
+{context[:2000]}
+Answer:"""
+            with torch.cuda.amp.autocast() if DEVICE == "cuda" else torch.no_grad():
+                response = self.generator(
+                    prompt,
+                    max_new_tokens=300,
+                    temperature=0.7,
+                    do_sample=True,
+                    pad_token_id=50256
+                )
+            answer = response[0]['generated_text'].strip()
+            self.memory_manager.clear_cache()
+            return answer
+        except Exception as e:
+            logger.error(f"Answer generation error: {e}")
+            self.memory_manager.clear_cache()
+            return f"Error generating answer: {str(e)}"
+    def format_results(self, results: List[Tuple[Chunk, float]]) -> Tuple[str, pd.DataFrame]:
+        """Format results for display"""
+        try:
+            if not results:
+                return "No relevant papers found.", pd.DataFrame()
+            # Group by paper
+            papers_dict = {}
+            for chunk, score in results:
+                paper = chunk.metadata.get("paper")
+                if paper and paper.id not in papers_dict:
+                    papers_dict[paper.id] = {
+                        'paper': paper,
+                        'max_score': score,
+                        'chunks': [(chunk, score)]
                     }
+                elif paper:
+                    papers_dict[paper.id]['chunks'].append((chunk, score))
+                    papers_dict[paper.id]['max_score'] = max(papers_dict[paper.id]['max_score'], score)
+            # Sort by max score
+            sorted_papers = sorted(papers_dict.values(), key=lambda x: x['max_score'], reverse=True)
+            # Format markdown
+            markdown_parts = []
+            table_data = []
+            for i, paper_info in enumerate(sorted_papers[:8], 1):
+                paper = paper_info['paper']
+                score = paper_info['max_score']
+                # Markdown format
+                authors_str = ", ".join(paper.authors[:3])
+                if len(paper.authors) > 3:
+                    authors_str += " et al."
+                categories_str = ", ".join(paper.categories[:3])
+                markdown_parts.append(f"""
+### {i}. [{paper.title}]({paper.url})
+**Authors:** {authors_str}
+**Categories:** {categories_str}
+**Published:** {paper.published.strftime('%Y-%m-%d')}
+**Relevance Score:** {score:.3f}
+**Abstract:** {paper.abstract[:300]}{'...' if len(paper.abstract) > 300 else ''}
+---
+""")
+                # Table data
+                table_data.append({
+                    'Rank': i,
+                    'Title': paper.title[:60] + ('...' if len(paper.title) > 60 else ''),
+                    'Authors': authors_str,
+                    'Categories': categories_str,
+                    'Published': paper.published.strftime('%Y-%m-%d'),
+                    'Score': f"{score:.3f}",
+                    'URL': paper.url
+                })
+            markdown_text = "".join(markdown_parts)
+            df = pd.DataFrame(table_data)
+            return markdown_text, df
         except Exception as e:
+            logger.error(f"Formatting error: {e}")
+            return f"Error formatting results: {str(e)}", pd.DataFrame()
+# Global RAG system instance
 rag_system = None
+def initialize_system():
+    """Initialize the RAG system"""
     global rag_system
     try:
+        if rag_system is None:
+            logger.info("Initializing RAG system...")
+            rag_system = OptimizedRagSystem()
+            logger.info("RAG system initialized successfully")
+    except Exception as e:
+        logger.error(f"System initialization error: {e}")
+        raise
+# Main search function
+@spaces.GPU(duration=180)  # HuggingFace Spaces GPU decorator for main function
+def search_papers(query: str, max_papers: int = 15, top_k_retrieval: int = 10,
+                 top_k_rerank: int = 5, categories: str = "", semantic_weight: float = 0.7):
+    """Main search function with GPU optimization"""
+    try:
+        if not query.strip():
+            return "❌ Please enter a search query.", "", pd.DataFrame()
+        # Initialize system if needed
+        initialize_system()
+        start_time = time.time()
         # Parse categories
+        category_list = []
         if categories.strip():
             category_list = [cat.strip() for cat in categories.split(',') if cat.strip()]
+        # Search ArXiv
+        papers = rag_system.search_arxiv(query, max_papers, category_list)
+        if not papers:
+            return "❌ No papers found. Try different keywords or check your internet connection.", "", pd.DataFrame()
+        # Create chunks and embeddings
+        rag_system.papers = papers
+        rag_system.chunks = rag_system.create_chunks(papers)
+        if not rag_system.chunks:
+            return "❌ Error processing papers.", "", pd.DataFrame()
+        # Create embeddings with GPU acceleration
+        rag_system.embeddings = rag_system.embed_chunks(rag_system.chunks)
+        if rag_system.embeddings is None or len(rag_system.embeddings) == 0:
+            return "❌ Error creating embeddings.", "", pd.DataFrame()
+        # Fit BM25
+        chunk_texts = [chunk.text for chunk in rag_system.chunks]
+        rag_system.bm25.fit(chunk_texts)
+        # Hybrid retrieval with GPU acceleration
+        retrieved_results = rag_system.hybrid_retrieval(query, top_k_retrieval, semantic_weight)
+        if not retrieved_results:
+            return "❌ No relevant content found.", "", pd.DataFrame()
+        # Rerank results with GPU acceleration
+        reranked_results = rag_system.rerank_results(query, retrieved_results, top_k_rerank)
+        # Generate answer with GPU acceleration
+        answer = rag_system.generate_answer(query, [chunk for chunk, _ in reranked_results])
+        # Format results
+        papers_md, papers_df = rag_system.format_results(reranked_results)
+        # Create response with statistics
+        end_time = time.time()
+        processing_time = end_time - start_time
+        stats = f"""
+## 🤖 AI-Generated Answer
+{answer}
+## 📊 Search Statistics
+- **Query:** {query}
+- **Papers Found:** {len(papers)}
+- **Chunks Processed:** {len(rag_system.chunks)}
+- **Top Results:** {len(reranked_results)}
+- **Processing Time:** {processing_time:.2f}s
+- **GPU Memory:** {rag_system.memory_manager.get_memory_info()}
+- **Semantic Weight:** {semantic_weight}
+---
+"""
+        # Clean up GPU memory
+        rag_system.memory_manager.clear_cache()
+        return stats, papers_md, papers_df
     except Exception as e:
+        logger.error(f"Search error: {e}")
         error_msg = f"❌ An error occurred: {str(e)}\n\nPlease try different keywords or check your internet connection."
         return error_msg, "", pd.DataFrame()
 # Create Gradio interface
 def create_interface():
+    """Create optimized Gradio interface"""
     css = """
     .gradio-container {
         font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
     }
+    .gpu-badge {
+        background: linear-gradient(45deg, #00d4aa, #00b4d8);
+        color: white;
+        padding: 0.5rem 1rem;
+        border-radius: 20px;
+        font-weight: bold;
+        display: inline-block;
+        margin-bottom: 1rem;
+    }
     """
+    with gr.Blocks(css=css, title="Enhanced ArXiv RAG System - GPU Optimized") as interface:
+        gr.HTML(f"""
         <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 10px; margin-bottom: 2rem;">
             <h1>🚀 Enhanced ArXiv RAG System</h1>
+            <p>GPU-Optimized scientific paper discovery with semantic search, BM25, and neural reranking</p>
+            <div class="gpu-badge">
+                🔥 GPU Accelerated • Device: {DEVICE.upper()}
+            </div>
         </div>
         """)
                     value=""
                 )
+                with gr.Accordion("🔧 Advanced GPU Settings", open=False):
                     with gr.Row():
                         top_k_retrieval = gr.Slider(5, 15, value=10, step=1, label="Top-K Retrieval")
                         top_k_rerank = gr.Slider(3, 8, value=5, step=1, label="Top-K Reranking")
+                    gr.HTML(f"""
+                    <div style="background: #e8f5e8; padding: 1rem; border-radius: 8px; margin-top: 1rem;">
+                        <h4>⚡ GPU Optimization Info</h4>
+                        <ul>
+                            <li><strong>Device:</strong> {DEVICE.upper()}</li>
+                            <li><strong>Mixed Precision:</strong> {'Enabled' if DEVICE == 'cuda' else 'Disabled'}</li>
+                            <li><strong>Memory Management:</strong> Automatic cleanup</li>
+                            <li><strong>Batch Processing:</strong> Optimized for GPU</li>
+                        </ul>
+                    </div>
+                    """)
+                search_btn = gr.Button("🔍 Search Papers", variant="primary", size="lg")
             with gr.Column(scale=1):
                 gr.HTML("""
                 <div style="background: #e3f2fd; padding: 1rem; border-radius: 8px;">
+                    <h4>💡 Tips for Best Results</h4>
                     <ul>
                         <li>Use specific technical terms</li>
                         <li>Try different category filters</li>
                         <li>Adjust semantic weight for different search styles</li>
+                        <li>Higher semantic weight = more conceptual matching</li>
+                        <li>Lower semantic weight = more keyword matching</li>
                     </ul>
+                    <h4>📊 Popular Categories</h4>
                     <ul>
                         <li><code>cs.AI</code> - Artificial Intelligence</li>
                         <li><code>cs.CL</code> - Computation and Language</li>
                         <li><code>cs.LG</code> - Machine Learning</li>
                         <li><code>cs.CV</code> - Computer Vision</li>
+                        <li><code>cs.RO</code> - Robotics</li>
+                        <li><code>stat.ML</code> - Machine Learning (Stats)</li>
                     </ul>
                 </div>
                 """)
                 papers_output = gr.Markdown(label="Relevant Papers")
             with gr.TabItem("📊 Papers Table"):
+                papers_table = gr.Dataframe(
+                    label="Papers Summary",
+                    wrap=True,
+                    interactive=False
+                )
         # Examples
         gr.Examples(
             examples=[
                 ["transformer attention mechanisms", 15, 10, 5, "cs.CL, cs.AI", 0.7],
+                ["graph neural networks for molecular property prediction", 12, 8, 4, "cs.LG", 0.6],
                 ["computer vision deep learning", 15, 10, 5, "cs.CV", 0.8],
+                ["reinforcement learning robotics", 18, 10, 5, "cs.AI, cs.RO", 0.7],
+                ["large language models fine-tuning", 20, 12, 6, "cs.CL", 0.75]
             ],
             inputs=[query_input, max_papers, top_k_retrieval, top_k_rerank, categories_input, semantic_weight]
         )
         gr.HTML("""
         <div style="text-align: center; margin-top: 2rem; padding: 1rem; background: #f5f5f5; border-radius: 8px;">
+            <p><strong>Enhanced ArXiv RAG System</strong> | GPU-Optimized • Semantic Search + BM25 + Neural Reranking</p>
+            <p><em>Powered by Hugging Face Spaces GPU • Optimized for high-performance research</em></p>
         </div>
         """)
 # Launch interface
 if __name__ == "__main__":
+    # Pre-initialize system to reduce first-run latency
+    try:
+        initialize_system()
+    except Exception as e:
+        logger.error(f"Pre-initialization failed: {e}")
     interface = create_interface()
+    interface.launch(
+        show_error=True,
+        share=True,
+        enable_queue=True,
+        max_threads=4
+    )