Spaces:
Running
Running
File size: 4,922 Bytes
56325dc 4f034fb 19ea0c5 4f034fb 56325dc edfcf73 56325dc edfcf73 4f034fb 8f771eb edfcf73 4f034fb edfcf73 4f034fb 8f771eb 4f034fb 8f771eb 4f034fb edfcf73 4f034fb edfcf73 4f034fb edfcf73 4f034fb edfcf73 4f034fb 19ea0c5 4f034fb 56325dc 4f034fb 56325dc 19ea0c5 edfcf73 56325dc 19ea0c5 edfcf73 56325dc 19ea0c5 edfcf73 56325dc edfcf73 56325dc edfcf73 56325dc edfcf73 19ea0c5 edfcf73 19ea0c5 edfcf73 56325dc edfcf73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import fitz # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model
from sentence_transformers import SentenceTransformer, util
# These functions will be called in the app.py file
def evaluate_resumes(uploaded_files, job_description):
"""Evaluates uploaded resumes and returns shortlisted candidates."""
candidates = []
for pdf_file in uploaded_files:
resume_text = parse_resume(pdf_file)
score = score_candidate(resume_text, job_description)
email = extract_email(resume_text)
# Generate a summary of the resume
summary = summarize_resume(resume_text)
candidates.append({
"name": pdf_file.name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
})
# Store all details including summary in Supabase
store_in_supabase(resume_text, score, pdf_file.name, email, summary)
return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5] # Return top 5 candidates
def parse_resume(pdf_file):
"""Extracts text from a resume PDF."""
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text
def extract_email(resume_text):
"""Extracts an email address from resume text."""
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
return match.group(0) if match else None
def score_candidate(resume_text, job_description):
"""
Scores the candidate's resume based on the job description using sentence-transformers.
:param resume_text: The extracted resume text.
:param job_description: The job description for comparison.
:return: A numerical score (cosine similarity between 0 and 1).
"""
try:
# Generate embeddings
resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)
# Compute cosine similarity
score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()
return round(score, 4) # Return similarity score rounded to 4 decimal places
except Exception as e:
print(f"Error computing similarity score: {e}")
return 0 # Return 0 if scoring fails
def summarize_resume(resume_text):
"""
Summarizes a resume using the Google gemma model.
:param resume_text: The resume text to summarize.
:return: A summarized version of the resume.
"""
payload = {"inputs": f"Summarize this resume: {resume_text}"}
response = query(payload, model="gemma") # Use gemma for summarization
if response is None:
print("Error: API response is None")
return "Summary could not be generated."
# If the response is a list, extract the first element
if isinstance(response, list) and len(response) > 0:
response = response[0]
try:
if isinstance(response, dict) and "generated_text" in response:
return response["generated_text"]
else:
print("Unexpected API response format:", response)
return "Summary could not be generated."
except (TypeError, ValueError) as e:
print(f"Error parsing summary: {e}")
return "Summary could not be generated."
def store_in_supabase(resume_text, score, candidate_name, email, summary):
"""
Stores resume data in Supabase.
:param resume_text: The extracted resume text.
:param score: The candidate's score (must be a valid number).
:param candidate_name: The candidate's name.
:param email: Candidate's email address.
:param summary: A summarized version of the resume.
"""
if score is None:
score = 0 # Ensure score is never NULL
data = {
"name": candidate_name,
"resume": resume_text,
"score": score,
"email": email,
"summary": summary
}
response = supabase.table("candidates").insert(data).execute()
return response
# Test with 10 resumes, if they will be shortlisted
def generate_pdf_report(shortlisted_candidates):
"""Generates a PDF summary of shortlisted candidates."""
pdf = BytesIO()
doc = fitz.open()
for candidate in shortlisted_candidates:
page = doc.new_page()
# Use the stored summary, or provide a fallback
summary = candidate.get("summary", "No summary available")
page.insert_text(
(50, 50),
f"Candidate: {candidate['name']}\n"
f"Email: {candidate['email']}\n"
f"Score: {candidate['score']}\n"
f"Summary: {summary}"
)
doc.save(pdf)
pdf.seek(0)
return pdf |