import fitz  # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import (
    SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, 
    supabase, HF_MODELS, query, embedding_model
)
from sentence_transformers import SentenceTransformer, util
import spacy
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
import streamlit as st
from fuzzywuzzy import fuzz

# These functions will be called in the app.py file

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

def evaluate_resumes(uploaded_files, job_description, min_keyword_match=2):
    """Evaluates uploaded resumes, filters by keywords and score, and returns shortlisted candidates."""
    candidates = []
    removed_candidates = [] 

    for pdf_file in uploaded_files:
        resume_text = parse_resume(pdf_file)
        score = score_candidate(resume_text, job_description)
        email = extract_email(resume_text)
        summary = summarize_resume(resume_text)

        # If score is below 0.20, remove the candidate immediately
        if score < 0.20:
            removed_candidates.append({"name": pdf_file.name, "reason": "Low confidence score (< 0.20)"})
            continue  # Skip adding to candidates list

        candidates.append({
            "name": pdf_file.name,
            "resume": resume_text,
            "score": score,
            "email": email,
            "summary": summary
        })

    # Filter resumes based on job description keywords
    filtered_candidates, keyword_removed = filter_resumes_by_keywords(candidates, job_description, min_keyword_match=2)

    # Store removed candidates with a reason
    for name in keyword_removed:
        removed_candidates.append({"name": name, "reason": "Insufficient keyword matches"})

    return sorted(filtered_candidates, key=lambda x: x["score"], reverse=True)[:5], removed_candidates

def extract_keywords(text, top_n=10):
    """Extracts key terms from the job description using TF-IDF and spaCy."""
    if not text.strip():  # Handle empty job descriptions
        return []

    doc = nlp(text.lower())

    # Extract meaningful words (nouns, proper nouns, verbs, adjectives)
    keywords = [token.text for token in doc if token.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not token.is_stop]

    if not keywords:  # If no valid keywords were found, return an empty list
        return []

    # Use TF-IDF to rank keywords
    vectorizer = TfidfVectorizer(stop_words="english", ngram_range=(1, 2))

    try:
        tfidf_matrix = vectorizer.fit_transform([" ".join(keywords)])
        feature_array = vectorizer.get_feature_names_out()
        tfidf_scores = tfidf_matrix.toarray()[0]

        # Sort by highest TF-IDF scores
        keyword_scores = sorted(zip(feature_array, tfidf_scores), key=lambda x: x[1], reverse=True)

        return [kw for kw, score in keyword_scores[:top_n]]

    except ValueError:  # Catch empty vocabulary error
        return []

def filter_resumes_by_keywords(resumes, job_description, min_keyword_match=2):
    """Filters resumes based on keyword presence and similarity."""
    job_keywords = extract_keywords(job_description)
    filtered_resumes = []
    removed_resumes = []

    if len(job_keywords) < min_keyword_match:
        st.warning("⚠️ Job description is either too short or absent for keyword filtering.")
        return resumes, []  # Skip keyword filtering if job description lacks enough keywords

    for resume in resumes:
        resume_text = resume["resume"].lower()
        matched_keywords = []

        # Apply fuzzy matching to allow flexible keyword detection
        for keyword in job_keywords:
            for word in resume_text.split():
                if fuzz.partial_ratio(keyword, word) > 80:  # 80% similarity threshold
                    matched_keywords.append(keyword)

        # Enforce minimum keyword matches
        if len(set(matched_keywords)) >= min_keyword_match:
            filtered_resumes.append(resume)
        else:
            removed_resumes.append(resume["name"])

    return filtered_resumes, removed_resumes

def parse_resume(pdf_file):
    """Extracts text from a resume PDF."""
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = "\n".join([page.get_text("text") for page in doc])
    return text

def extract_email(resume_text):
    """Extracts an email address from resume text."""
    match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
    return match.group(0) if match else None

def score_candidate(resume_text, job_description):
    """
    Scores the candidate's resume based on the job description using sentence-transformers.
    
    :param resume_text: The extracted resume text.
    :param job_description: The job description for comparison.
    :return: A numerical score (cosine similarity between 0 and 1).
    """
    try:
        # Generate embeddings
        resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
        job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)

        # Compute cosine similarity
        score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()

        return round(score, 4)  # Return similarity score rounded to 4 decimal places

    except Exception as e:
        print(f"Error computing similarity score: {e}")
        return 0  # Return 0 if scoring fails

def summarize_resume(resume_text):
    """
    Summarizes a resume using the Hugging Face BART model.
    """
    payload = {"inputs": f"Summarize this resume: {resume_text}"}
    response = query(payload, model="bart")  # Call API

    if not response:
        print("Error: API response is None")
        return "Summary could not be generated."

    try:
        # Check if response is a list (sometimes HF returns a list with a dict inside)
        if isinstance(response, list) and len(response) > 0:
            response = response[0]

        # Adjust for different response formats
        if isinstance(response, dict) and ("generated_text" in response or "summary_text" in response):
            return response.get("generated_text", response.get("summary_text", "Summary not available"))
        else:
            print("Unexpected API response format:", response)
            return "Summary could not be generated."

    except (TypeError, ValueError) as e:
        print(f"Error parsing summary: {e}")
        return "Summary could not be generated."

def store_in_supabase(resume_text, score, candidate_name, email, summary):
    """
    Stores resume data in Supabase.

    :param resume_text: The extracted resume text.
    :param score: The candidate's score (must be a valid number).
    :param candidate_name: The candidate's name.
    :param email: Candidate's email address.
    :param summary: A summarized version of the resume.
    """
    if score is None:
        score = 0  # Ensure score is never NULL

    data = {
        "name": candidate_name,
        "resume": resume_text,
        "score": score,
        "email": email,
        "summary": summary
    }

    response = supabase.table("candidates").insert(data).execute()
    return response

# Test with 10 resumes, if they will be shortlisted
def generate_pdf_report(shortlisted_candidates):
    """Generates a PDF summary of shortlisted candidates."""
    pdf = BytesIO()
    doc = fitz.open()

    for candidate in shortlisted_candidates:
        page = doc.new_page()

        # Use the stored summary, or provide a fallback
        summary = candidate.get("summary", "No summary available")

        page.insert_text(
            (50, 50),
            f"Candidate: {candidate['name']}\n"
            f"Email: {candidate['email']}\n"
            f"Score: {candidate['score']}\n"
            f"Summary: {summary}"
        )

    doc.save(pdf)
    pdf.seek(0)
    return pdf