Spaces:
Running
Running
import fitz # PyMuPDF for PDF processing | |
import requests | |
import json | |
import re | |
from io import BytesIO | |
import supabase | |
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query | |
# These functions will be called in the main.py file | |
def evaluate_resumes(uploaded_files, job_description): | |
"""Evaluates uploaded resumes and returns shortlisted candidates.""" | |
candidates = [] | |
for pdf_file in uploaded_files: | |
resume_text = parse_resume(pdf_file) | |
#score = score_candidate(resume_text, job_description) | |
email = extract_email(resume_text) | |
# # Generate a summary of the resume | |
# summary = summarize_resume(resume_text) | |
candidates.append({ | |
"name": pdf_file.name, | |
"resume": resume_text, | |
#"score": score, | |
"email": email, | |
#"summary": summary | |
}) | |
# Store all details including summary in Supabase | |
store_in_supabase(resume_text, pdf_file.name, email) # removed score, summary | |
return candidates # removed , key=lambda x: x["score"], reverse=True)[:5] | |
def parse_resume(pdf_file): | |
"""Extracts text from a resume PDF.""" | |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
text = "\n".join([page.get_text("text") for page in doc]) | |
return text | |
def extract_email(resume_text): | |
"""Extracts an email address from resume text.""" | |
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text) | |
return match.group(0) if match else None | |
# def score_candidate(resume_text, job_description): | |
# """ | |
# Scores the candidate's resume based on the job description using the Hugging Face API. | |
# :param resume_text: The extracted resume text. | |
# :param job_description: The job description for comparison. | |
# :return: A numerical score (default 0 if scoring fails). | |
# """ | |
# payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"} | |
# response_gemma = query(payload, model="gemma") # Call Hugging Face API | |
# if response_gemma is None: | |
# print("API response is None") | |
# return 0 | |
# print("API Response:", response_gemma) # Debugging | |
# # Handle list response | |
# if isinstance(response_gemma, list) and len(response_gemma) > 0: | |
# response_gemma = response_gemma[0] # Extract first item if response is a list | |
# try: | |
# if isinstance(response_gemma, dict) and "score" in response_gemma: | |
# return float(response_gemma["score"]) | |
# else: | |
# print("Unexpected API response format:", response_gemma) # Debugging | |
# return 0 # Default if score is missing | |
# except (TypeError, ValueError) as e: | |
# print(f"Error parsing score: {e}") | |
# return 0 | |
# # summarize_resume function will use HuggingFace BART model | |
# def summarize_resume(resume_text): | |
# """ | |
# Summarizes the resume using Facebook's BART-Large-CNN model. | |
# :param resume_text: The extracted resume text. | |
# :return: A summarized version of the resume or an error message. | |
# """ | |
# payload = {"inputs": resume_text} | |
# response_bart = query(payload, model="bart") | |
# if response_bart is None: | |
# return "Summary could not be generated." # Handle API failures gracefully | |
# try: | |
# summary = response_bart[0].get("summary_text", "Summary not available.") | |
# return summary | |
# except (IndexError, KeyError): | |
# return "Summary not available." | |
def store_in_supabase(resume_text, candidate_name, email, ): # removed score, summary | |
""" | |
Stores resume data in Supabase. | |
:param resume_text: The extracted resume text. | |
:param score: The candidate's score (must be a valid number). | |
:param candidate_name: The candidate's name. | |
:param email: Candidate's email address. | |
:param summary: A summarized version of the resume. | |
""" | |
# if score is None: | |
# score = 0 # Ensure score is never NULL | |
data = { | |
"name": candidate_name, | |
"resume": resume_text, | |
#"score": score, | |
"email": email, | |
#"summary": summary | |
} | |
response = supabase.table("candidates").insert(data).execute() | |
return response | |
def generate_pdf_report(shortlisted_candidates): | |
"""Generates a PDF summary of shortlisted candidates.""" | |
pdf = BytesIO() | |
doc = fitz.open() | |
for candidate in shortlisted_candidates: | |
page = doc.new_page() | |
# # Use the stored summary, or provide a fallback | |
# summary = candidate.get("summary", "No summary available") | |
page.insert_text( | |
(50, 50), | |
f"Candidate: {candidate['name']}\n" | |
f"Email: {candidate['email']}\n" | |
#f"Score: {candidate['score']}\n" | |
#f"Summary: {summary}" | |
) | |
doc.save(pdf) | |
pdf.seek(0) | |
return pdf |