TalentLensAI / utils.py
Johnny
removed score and summarization functions until bug is fixed
a5446bf
raw
history blame
4.89 kB
import fitz # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
# These functions will be called in the main.py file
def evaluate_resumes(uploaded_files, job_description):
"""Evaluates uploaded resumes and returns shortlisted candidates."""
candidates = []
for pdf_file in uploaded_files:
resume_text = parse_resume(pdf_file)
#score = score_candidate(resume_text, job_description)
email = extract_email(resume_text)
# # Generate a summary of the resume
# summary = summarize_resume(resume_text)
candidates.append({
"name": pdf_file.name,
"resume": resume_text,
#"score": score,
"email": email,
#"summary": summary
})
# Store all details including summary in Supabase
store_in_supabase(resume_text, pdf_file.name, email) # removed score, summary
return candidates # removed , key=lambda x: x["score"], reverse=True)[:5]
def parse_resume(pdf_file):
"""Extracts text from a resume PDF."""
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text
def extract_email(resume_text):
"""Extracts an email address from resume text."""
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
return match.group(0) if match else None
# def score_candidate(resume_text, job_description):
# """
# Scores the candidate's resume based on the job description using the Hugging Face API.
# :param resume_text: The extracted resume text.
# :param job_description: The job description for comparison.
# :return: A numerical score (default 0 if scoring fails).
# """
# payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
# response_gemma = query(payload, model="gemma") # Call Hugging Face API
# if response_gemma is None:
# print("API response is None")
# return 0
# print("API Response:", response_gemma) # Debugging
# # Handle list response
# if isinstance(response_gemma, list) and len(response_gemma) > 0:
# response_gemma = response_gemma[0] # Extract first item if response is a list
# try:
# if isinstance(response_gemma, dict) and "score" in response_gemma:
# return float(response_gemma["score"])
# else:
# print("Unexpected API response format:", response_gemma) # Debugging
# return 0 # Default if score is missing
# except (TypeError, ValueError) as e:
# print(f"Error parsing score: {e}")
# return 0
# # summarize_resume function will use HuggingFace BART model
# def summarize_resume(resume_text):
# """
# Summarizes the resume using Facebook's BART-Large-CNN model.
# :param resume_text: The extracted resume text.
# :return: A summarized version of the resume or an error message.
# """
# payload = {"inputs": resume_text}
# response_bart = query(payload, model="bart")
# if response_bart is None:
# return "Summary could not be generated." # Handle API failures gracefully
# try:
# summary = response_bart[0].get("summary_text", "Summary not available.")
# return summary
# except (IndexError, KeyError):
# return "Summary not available."
def store_in_supabase(resume_text, candidate_name, email, ): # removed score, summary
"""
Stores resume data in Supabase.
:param resume_text: The extracted resume text.
:param score: The candidate's score (must be a valid number).
:param candidate_name: The candidate's name.
:param email: Candidate's email address.
:param summary: A summarized version of the resume.
"""
# if score is None:
# score = 0 # Ensure score is never NULL
data = {
"name": candidate_name,
"resume": resume_text,
#"score": score,
"email": email,
#"summary": summary
}
response = supabase.table("candidates").insert(data).execute()
return response
def generate_pdf_report(shortlisted_candidates):
"""Generates a PDF summary of shortlisted candidates."""
pdf = BytesIO()
doc = fitz.open()
for candidate in shortlisted_candidates:
page = doc.new_page()
# # Use the stored summary, or provide a fallback
# summary = candidate.get("summary", "No summary available")
page.insert_text(
(50, 50),
f"Candidate: {candidate['name']}\n"
f"Email: {candidate['email']}\n"
#f"Score: {candidate['score']}\n"
#f"Summary: {summary}"
)
doc.save(pdf)
pdf.seek(0)
return pdf