Spaces:

Rivalcoder
/

Issurance_Agent_Rag

Running

File size: 13,527 Bytes

import os
import warnings
import logging
import time
import json
import hashlib
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
import re

# Set up cache directory for HuggingFace models
cache_dir = os.path.join(os.getcwd(), ".cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ['HF_HOME'] = cache_dir
os.environ['TRANSFORMERS_CACHE'] = cache_dir

# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_LOGGING_LEVEL'] = 'ERROR'
os.environ['TF_ENABLE_DEPRECATION_WARNINGS'] = '0'

warnings.filterwarnings('ignore', category=DeprecationWarning, module='tensorflow')
logging.getLogger('tensorflow').setLevel(logging.ERROR)

from fastapi import FastAPI, HTTPException, Depends, Header, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from pdf_parser import parse_pdf_from_url_multithreaded as parse_pdf_from_url, parse_pdf_from_file_multithreaded as parse_pdf_from_file
from embedder import build_faiss_index, preload_model
from retriever import retrieve_chunks
from llm import query_gemini
import uvicorn

app = FastAPI(title="HackRx Insurance Policy Assistant", version="1.0.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.on_event("startup")
async def startup_event():
    print("Starting up HackRx Insurance Policy Assistant...")
    print("Preloading sentence transformer model...")
    preload_model()
    print("Model preloading completed. API is ready to serve requests!")

@app.get("/")
async def root():
    return {"message": "HackRx Insurance Policy Assistant API is running!"}

@app.get("/health")
async def health_check():
    return {"status": "healthy"}

class QueryRequest(BaseModel):
    documents: str
    questions: list[str]

class LocalQueryRequest(BaseModel):
    document_path: str
    questions: list[str]

def verify_token(authorization: str = Header(None)):
    if not authorization or not authorization.startswith("Bearer "):
        raise HTTPException(status_code=401, detail="Invalid authorization header")
    token = authorization.replace("Bearer ", "")
    if not token:
        raise HTTPException(status_code=401, detail="Invalid token")
    return token

def process_batch(batch_questions, context_chunks):
    return query_gemini(batch_questions, context_chunks)

def get_document_id_from_url(url: str) -> str:
    return hashlib.md5(url.encode()).hexdigest()

def get_cache_key(doc_id, question):
    return hashlib.md5(f"{doc_id}:{question.strip().lower()}".encode()).hexdigest()

BANNED_CACHE_QUESTIONS = {
    "what is my flight number?"
}

def is_banned_cache_question(q: str) -> bool:
    return q.strip().lower() in BANNED_CACHE_QUESTIONS

def question_has_https_link(q: str) -> bool:
    return bool(re.search(r"https://[^\s]+", q))

# Document cache with thread safety
doc_cache = {}
doc_cache_lock = Lock()

# Question-answer cache with thread safety
qa_cache = {}
qa_cache_lock = Lock()

# ----------------- CACHE CLEAR ENDPOINT -----------------
@app.delete("/api/v1/cache/clear")
async def clear_cache(doc_id: str = Query(None, description="Optional document ID to clear"),
                      url: str = Query(None, description="Optional document URL to clear"),
                      qa_only: bool = Query(False, description="If true, only clear QA cache"),
                      doc_only: bool = Query(False, description="If true, only clear document cache")):
    """
    Clear cache data.
    - No params: Clears ALL caches.
    - doc_id: Clears caches for that document only.
    - url: Same as doc_id but computed automatically from URL.
    - qa_only: Clears only QA cache.
    - doc_only: Clears only document cache.
    """
    cleared = {}

    # If URL is provided, convert to doc_id
    if url:
        doc_id = get_document_id_from_url(url)

    if doc_id:
        if not qa_only:
            with doc_cache_lock:
                if doc_id in doc_cache:
                    del doc_cache[doc_id]
                    cleared["doc_cache"] = f"Cleared document {doc_id}"
        if not doc_only:
            with qa_cache_lock:
                to_delete = [k for k in qa_cache if k.startswith(doc_id)]
                for k in to_delete:
                    del qa_cache[k]
                cleared["qa_cache"] = f"Cleared {len(to_delete)} QA entries for document {doc_id}"
    else:
        if not qa_only:
            with doc_cache_lock:
                doc_cache.clear()
                cleared["doc_cache"] = "Cleared ALL documents"
        if not doc_only:
            with qa_cache_lock:
                qa_cache.clear()
                cleared["qa_cache"] = "Cleared ALL QA entries"

    return {"status": "success", "cleared": cleared}

@app.post("/api/v1/hackrx/run")
async def run_query(request: QueryRequest, token: str = Depends(verify_token)):
    start_time = time.time()
    timing_data = {}
    try:
        print("=== INPUT JSON ===")
        print(json.dumps({"documents": request.documents, "questions": request.questions}, indent=2))
        print("==================\n")

        print(f"Processing {len(request.questions)} questions...")

        # PDF Parsing and FAISS Caching
        doc_id = get_document_id_from_url(request.documents)
        with doc_cache_lock:
            if doc_id in doc_cache:
                print("✅ Using cached document...")
                cached = doc_cache[doc_id]
                text_chunks = cached["chunks"]
                index = cached["index"]
                texts = cached["texts"]
            else:
                print("⚙️ Parsing and indexing new document...")
                pdf_start = time.time()
                text_chunks = parse_pdf_from_url(request.documents)
                timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)

                index_start = time.time()
                index, texts = build_faiss_index(text_chunks)
                timing_data['faiss_index_building'] = round(time.time() - index_start, 2)

                doc_cache[doc_id] = {
                    "chunks": text_chunks,
                    "index": index,
                    "texts": texts
                }

        # Chunk Retrieval + Question-level Cache Check
        retrieval_start = time.time()
        all_chunks = set()
        new_questions = []
        question_positions = {}
        results_dict = {}

        for idx, question in enumerate(request.questions):
            if question_has_https_link(question) or is_banned_cache_question(question):
                print(f"🌐 Question contains link, skipping cache: {question}")
                top_chunks = retrieve_chunks(index, texts, question)
                all_chunks.update(top_chunks)
                new_questions.append(question)
                question_positions.setdefault(question, []).append(idx)
                continue

            q_key = get_cache_key(doc_id, question)
            with qa_cache_lock:
                if q_key in qa_cache:
                    print(f"⚡ Using cached answer for question: {question}")
                    results_dict[idx] = qa_cache[q_key]
                else:
                    top_chunks = retrieve_chunks(index, texts, question)
                    all_chunks.update(top_chunks)
                    new_questions.append(question)
                    question_positions.setdefault(question, []).append(idx)

        timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
        print(f"Retrieved {len(all_chunks)} unique chunks for new questions")

        # LLM Processing for only new questions
        if new_questions:
            context_chunks = list(all_chunks)
            batch_size = 10
            batches = [(i, new_questions[i:i + batch_size]) for i in range(0, len(new_questions), batch_size)]

            llm_start = time.time()
            with ThreadPoolExecutor(max_workers=min(5, len(batches))) as executor:
                futures = [executor.submit(process_batch, batch, context_chunks) for _, batch in batches]
                for (_, batch), future in zip(batches, futures):
                    try:
                        result = future.result()
                        if isinstance(result, dict) and "answers" in result:
                            for q, ans in zip(batch, result["answers"]):
                                if question_has_https_link(q) or is_banned_cache_question(q):
                                    print(f"⏩ Not caching answer for dynamic link question: {q}")
                                    for pos in question_positions[q]:
                                        results_dict[pos] = ans
                                    continue
                                q_key = get_cache_key(doc_id, q)
                                with qa_cache_lock:
                                    qa_cache[q_key] = ans
                                for pos in question_positions[q]:
                                    results_dict[pos] = ans
                        else:
                            for q in batch:
                                for pos in question_positions[q]:
                                    results_dict[pos] = "Error in response"
                    except Exception as e:
                        for q in batch:
                            for pos in question_positions[q]:
                                results_dict[pos] = f"Error: {str(e)}"
            timing_data['llm_processing'] = round(time.time() - llm_start, 2)
        else:
            timing_data['llm_processing'] = 0.0

        responses = [results_dict.get(i, "Not Found") for i in range(len(request.questions))]
        timing_data['total_time'] = round(time.time() - start_time, 2)

        print(f"\n=== TIMING BREAKDOWN ===")
        for k, v in timing_data.items():
            print(f"{k}: {v}s")
        print(f"=======================\n")

        print(f"=== OUTPUT JSON ===")
        print(json.dumps({"answers": responses}, indent=2))
        print(f"==================\n")

        return {"answers": responses}

    except Exception as e:
        print(f"Error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

@app.post("/api/v1/hackrx/local")
async def run_local_query(request: LocalQueryRequest):
    start_time = time.time()
    timing_data = {}
    try:
        print("=== INPUT JSON ===")
        print(json.dumps({"document_path": request.document_path, "questions": request.questions}, indent=2))
        print("==================\n")

        print(f"Processing {len(request.questions)} questions locally...")

        pdf_start = time.time()
        text_chunks = parse_pdf_from_file(request.document_path)
        timing_data['pdf_parsing'] = round(time.time() - pdf_start, 2)
        print(f"Extracted {len(text_chunks)} text chunks from PDF")

        index_start = time.time()
        index, texts = build_faiss_index(text_chunks)
        timing_data['faiss_index_building'] = round(time.time() - index_start, 2)

        retrieval_start = time.time()
        all_chunks = set()
        for question in request.questions:
            top_chunks = retrieve_chunks(index, texts, question)
            all_chunks.update(top_chunks)
        timing_data['chunk_retrieval'] = round(time.time() - retrieval_start, 2)
        print(f"Retrieved {len(all_chunks)} unique chunks")

        questions = request.questions
        context_chunks = list(all_chunks)
        batch_size = 20
        batches = [(i, questions[i:i + batch_size]) for i in range(0, len(questions), batch_size)]

        llm_start = time.time()
        results_dict = {}
        with ThreadPoolExecutor(max_workers=min(5, len(batches))) as executor:
            futures = [executor.submit(process_batch, batch, context_chunks) for _, batch in batches]
            for (start_idx, batch), future in zip(batches, futures):
                try:
                    result = future.result()
                    if isinstance(result, dict) and "answers" in result:
                        for j, answer in enumerate(result["answers"]):
                            results_dict[start_idx + j] = answer
                    else:
                        for j in range(len(batch)):
                            results_dict[start_idx + j] = "Error in response"
                except Exception as e:
                    for j in range(len(batch)):
                        results_dict[start_idx + j] = f"Error: {str(e)}"
        timing_data['llm_processing'] = round(time.time() - llm_start, 2)

        responses = [results_dict.get(i, "Not Found") for i in range(len(questions))]
        timing_data['total_time'] = round(time.time() - start_time, 2)

        print(f"\n=== TIMING BREAKDOWN ===")
        for k, v in timing_data.items():
            print(f"{k}: {v}s")
        print(f"=======================\n")

        print(f"=== OUTPUT JSON ===")
        print(json.dumps({"answers": responses}, indent=2))
        print(f"==================\n")

        return {"answers": responses}

    except Exception as e:
        print(f"Error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")

if __name__ == "__main__":
    port = int(os.environ.get("PORT", 7860))
    uvicorn.run("app:app", host="0.0.0.0", port=port)