Spaces:
Running
Running
File size: 4,889 Bytes
56325dc edfcf73 19ea0c5 56325dc edfcf73 a5446bf edfcf73 a5446bf edfcf73 a5446bf edfcf73 a5446bf edfcf73 a5446bf edfcf73 a5446bf edfcf73 56325dc a5446bf 8f771eb a5446bf edfcf73 a5446bf edfcf73 a5446bf edfcf73 56325dc 19ea0c5 a5446bf 19ea0c5 a5446bf 56325dc 19ea0c5 edfcf73 56325dc edfcf73 56325dc edfcf73 a5446bf edfcf73 19ea0c5 edfcf73 a5446bf 19ea0c5 edfcf73 56325dc edfcf73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import fitz # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query
# These functions will be called in the main.py file
def evaluate_resumes(uploaded_files, job_description):
"""Evaluates uploaded resumes and returns shortlisted candidates."""
candidates = []
for pdf_file in uploaded_files:
resume_text = parse_resume(pdf_file)
#score = score_candidate(resume_text, job_description)
email = extract_email(resume_text)
# # Generate a summary of the resume
# summary = summarize_resume(resume_text)
candidates.append({
"name": pdf_file.name,
"resume": resume_text,
#"score": score,
"email": email,
#"summary": summary
})
# Store all details including summary in Supabase
store_in_supabase(resume_text, pdf_file.name, email) # removed score, summary
return candidates # removed , key=lambda x: x["score"], reverse=True)[:5]
def parse_resume(pdf_file):
"""Extracts text from a resume PDF."""
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "\n".join([page.get_text("text") for page in doc])
return text
def extract_email(resume_text):
"""Extracts an email address from resume text."""
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
return match.group(0) if match else None
# def score_candidate(resume_text, job_description):
# """
# Scores the candidate's resume based on the job description using the Hugging Face API.
# :param resume_text: The extracted resume text.
# :param job_description: The job description for comparison.
# :return: A numerical score (default 0 if scoring fails).
# """
# payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"}
# response_gemma = query(payload, model="gemma") # Call Hugging Face API
# if response_gemma is None:
# print("API response is None")
# return 0
# print("API Response:", response_gemma) # Debugging
# # Handle list response
# if isinstance(response_gemma, list) and len(response_gemma) > 0:
# response_gemma = response_gemma[0] # Extract first item if response is a list
# try:
# if isinstance(response_gemma, dict) and "score" in response_gemma:
# return float(response_gemma["score"])
# else:
# print("Unexpected API response format:", response_gemma) # Debugging
# return 0 # Default if score is missing
# except (TypeError, ValueError) as e:
# print(f"Error parsing score: {e}")
# return 0
# # summarize_resume function will use HuggingFace BART model
# def summarize_resume(resume_text):
# """
# Summarizes the resume using Facebook's BART-Large-CNN model.
# :param resume_text: The extracted resume text.
# :return: A summarized version of the resume or an error message.
# """
# payload = {"inputs": resume_text}
# response_bart = query(payload, model="bart")
# if response_bart is None:
# return "Summary could not be generated." # Handle API failures gracefully
# try:
# summary = response_bart[0].get("summary_text", "Summary not available.")
# return summary
# except (IndexError, KeyError):
# return "Summary not available."
def store_in_supabase(resume_text, candidate_name, email, ): # removed score, summary
"""
Stores resume data in Supabase.
:param resume_text: The extracted resume text.
:param score: The candidate's score (must be a valid number).
:param candidate_name: The candidate's name.
:param email: Candidate's email address.
:param summary: A summarized version of the resume.
"""
# if score is None:
# score = 0 # Ensure score is never NULL
data = {
"name": candidate_name,
"resume": resume_text,
#"score": score,
"email": email,
#"summary": summary
}
response = supabase.table("candidates").insert(data).execute()
return response
def generate_pdf_report(shortlisted_candidates):
"""Generates a PDF summary of shortlisted candidates."""
pdf = BytesIO()
doc = fitz.open()
for candidate in shortlisted_candidates:
page = doc.new_page()
# # Use the stored summary, or provide a fallback
# summary = candidate.get("summary", "No summary available")
page.insert_text(
(50, 50),
f"Candidate: {candidate['name']}\n"
f"Email: {candidate['email']}\n"
#f"Score: {candidate['score']}\n"
#f"Summary: {summary}"
)
doc.save(pdf)
pdf.seek(0)
return pdf |