Spaces:
Running
Running
File size: 4,531 Bytes
56325dc edfcf73 19ea0c5 56325dc edfcf73 56325dc edfcf73 56325dc edfcf73 56325dc edfcf73 19ea0c5 edfcf73 56325dc edfcf73 56325dc 19ea0c5 edfcf73 56325dc 19ea0c5 edfcf73 56325dc 19ea0c5 edfcf73 56325dc edfcf73 56325dc edfcf73 56325dc edfcf73 19ea0c5 edfcf73 19ea0c5 edfcf73 56325dc edfcf73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import fitz # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
# These functions will be called in the main.py file
def evaluate_resumes(uploaded_files, job_description):
"""Evaluates uploaded resumes and returns shortlisted candidates."""
candidates = []
for pdf_file in uploaded_files:
resume_text = parse_resume(pdf_file)
score = score_candidate(resume_text, job_description)
email = extract_email(resume_text)
# Generate a summary of the resume
summary = summarize_resume(resume_text)
candidates.append({
"name": pdf_file.name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
})
# Store all details including summary in Supabase
store_in_supabase(resume_text, score, pdf_file.name, email, summary)
return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5] # Return top 5 candidates
def parse_resume(pdf_file):
"""Extracts text from a resume PDF."""
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text
def extract_email(resume_text):
"""Extracts an email address from resume text."""
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
return match.group(0) if match else None
# Test on why score 0 is returned even though resume matches key words
# score_candidate function will use HuggingFace gemini model
def score_candidate(resume_text, job_description):
"""
Scores the candidate's resume based on the job description using the Hugging Face API.
:param resume_text: The extracted resume text.
:param job_description: The job description for comparison.
:return: A numerical score (default 0 if scoring fails).
"""
payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
response_gemma = query(payload, model="gemma") # Use Google Gemma Model for scoring
if response_gemma is None:
return 0 # Return 0 if API call fails
try:
return float(response_gemma.get("score", 0)) # Ensure score is always a float
except (TypeError, ValueError):
return 0 # Return 0 if score parsing fails
# summarize_resume function will use HuggingFace BART model
def summarize_resume(resume_text):
"""
Summarizes the resume using Facebook's BART-Large-CNN model.
:param resume_text: The extracted resume text.
:return: A summarized version of the resume or an error message.
"""
payload = {"inputs": resume_text}
response_bart = query(payload, model="bart")
if response_bart is None:
return "Summary could not be generated." # Handle API failures gracefully
try:
summary = response_bart[0].get("summary_text", "Summary not available.")
return summary
except (IndexError, KeyError):
return "Summary not available."
def store_in_supabase(resume_text, score, candidate_name, email, summary):
"""
Stores resume data in Supabase.
:param resume_text: The extracted resume text.
:param score: The candidate's score (must be a valid number).
:param candidate_name: The candidate's name.
:param email: Candidate's email address.
:param summary: A summarized version of the resume.
"""
if score is None:
score = 0 # Ensure score is never NULL
data = {
"name": candidate_name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
}
response = supabase.table("candidates").insert(data).execute()
return response
# Test with 10 resumes, if they will be shortlisted
def generate_pdf_report(shortlisted_candidates):
"""Generates a PDF summary of shortlisted candidates."""
pdf = BytesIO()
doc = fitz.open()
for candidate in shortlisted_candidates:
page = doc.new_page()
# Use the stored summary, or provide a fallback
summary = candidate.get("summary", "No summary available")
page.insert_text(
(50, 50),
f"Candidate: {candidate['name']}\n"
f"Email: {candidate['email']}\n"
f"Score: {candidate['score']}\n"
f"Summary: {summary}"
)
doc.save(pdf)
pdf.seek(0)
return pdf |