Spaces:
Running
Running
import fitz # PyMuPDF for PDF processing | |
import requests | |
import json | |
import re | |
from io import BytesIO | |
import supabase | |
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_API_URL, HF_HEADERS, supabase | |
#from config import supabase | |
# These functions will be called in the main.py file | |
def parse_resume(pdf_file): | |
"""Extracts text from a resume PDF.""" | |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
text = "\n".join([page.get_text("text") for page in doc]) | |
return text | |
def extract_email(resume_text): | |
"""Extracts an email address from resume text.""" | |
match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text) | |
return match.group(0) if match else None | |
def score_candidate(resume_text, job_description): | |
"""Sends resume and job description to Hugging Face for scoring.""" | |
payload = {"inputs": f"Resume: {resume_text}\nJob Description: {job_description}"} | |
response = requests.post(HF_API_URL, headers=HF_HEADERS, json=payload) | |
# Debugging: Print response | |
if response.status_code != 200: | |
print(f"Error: {response.status_code}, {response.text}") # Log any errors | |
return 0 # Return default score if API fails | |
try: | |
return response.json().get("score", 0) | |
except requests.exceptions.JSONDecodeError: | |
print("Failed to decode JSON response:", response.text) # Debugging output | |
return 0 # Return default score if JSON decoding fails | |
def store_in_supabase(resume_text, score, candidate_name, email, summary): | |
"""Stores candidate data in Supabase""" | |
data = { | |
"name": candidate_name, | |
"resume": resume_text, | |
"score": score, | |
"email": email, | |
"summary": summary # Store summary in Supabase | |
} | |
response = supabase.table("candidates").insert(data).execute() | |
print("Inserted into Supabase:", response) # Debugging output | |
def generate_pdf_report(shortlisted_candidates): | |
"""Generates a PDF summary of shortlisted candidates.""" | |
pdf = BytesIO() | |
doc = fitz.open() | |
for candidate in shortlisted_candidates: | |
page = doc.new_page() | |
summary = candidate.get("summary", "No summary available") # Avoid KeyError | |
page.insert_text( | |
(50, 50), | |
f"Candidate: {candidate['name']}\nEmail: {candidate['email']}\nScore: {candidate['score']}\nSummary: {summary}" | |
) | |
doc.save(pdf) | |
pdf.seek(0) | |
return pdf | |
def process_resumes(uploaded_files, job_description): | |
"""Processes uploaded resumes and returns shortlisted candidates.""" | |
candidates = [] | |
for pdf_file in uploaded_files: | |
resume_text = parse_resume(pdf_file) | |
score = score_candidate(resume_text, job_description) | |
email = extract_email(resume_text) | |
# Generate summary (replace with actual summarization logic later) | |
summary = f"{pdf_file.name} has a score of {score} for this job." | |
candidates.append({ | |
"name": pdf_file.name, | |
"resume": resume_text, | |
"score": score, | |
"email": email, | |
"summary": summary | |
}) | |
# Store all details including summary in Supabase | |
store_in_supabase(resume_text, score, pdf_file.name, email, summary) | |
return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5] # Return top 5 candidates |