File size: 4,922 Bytes
56325dc
 
 
 
 
 
4f034fb
 
19ea0c5
4f034fb
56325dc
edfcf73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56325dc
 
 
 
 
 
 
 
 
 
 
 
edfcf73
4f034fb
8f771eb
edfcf73
 
4f034fb
edfcf73
4f034fb
 
 
 
8f771eb
4f034fb
 
8f771eb
4f034fb
edfcf73
4f034fb
 
 
edfcf73
 
 
4f034fb
edfcf73
4f034fb
 
edfcf73
4f034fb
 
 
 
 
 
 
19ea0c5
4f034fb
 
 
56325dc
 
4f034fb
 
 
 
 
 
 
 
56325dc
19ea0c5
edfcf73
 
 
 
 
 
 
 
 
 
 
 
56325dc
19ea0c5
 
 
 
edfcf73
56325dc
19ea0c5
 
edfcf73
56325dc
edfcf73
56325dc
 
 
 
edfcf73
56325dc
 
edfcf73
 
 
 
19ea0c5
 
edfcf73
 
 
 
19ea0c5
edfcf73
56325dc
 
edfcf73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import fitz  # PyMuPDF for PDF processing
import requests
import json
import re
from io import BytesIO
import supabase
from config import SUPABASE_URL, SUPABASE_KEY, HF_API_TOKEN, HF_HEADERS, supabase, HF_MODELS, query, embedding_model
from sentence_transformers import SentenceTransformer, util

# These functions will be called in the app.py file

def evaluate_resumes(uploaded_files, job_description):
    """Evaluates uploaded resumes and returns shortlisted candidates."""
    candidates = []
    for pdf_file in uploaded_files:
        resume_text = parse_resume(pdf_file)
        score = score_candidate(resume_text, job_description)
        email = extract_email(resume_text)

        # Generate a summary of the resume
        summary = summarize_resume(resume_text)

        candidates.append({
            "name": pdf_file.name,
            "resume": resume_text,
            "score": score,
            "email": email,
            "summary": summary
        })

        # Store all details including summary in Supabase
        store_in_supabase(resume_text, score, pdf_file.name, email, summary)

    return sorted(candidates, key=lambda x: x["score"], reverse=True)[:5]  # Return top 5 candidates

def parse_resume(pdf_file):
    """Extracts text from a resume PDF."""
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = "\n".join([page.get_text("text") for page in doc])
    return text

def extract_email(resume_text):
    """Extracts an email address from resume text."""
    match = re.search(r"[\w\.-]+@[\w\.-]+", resume_text)
    return match.group(0) if match else None

def score_candidate(resume_text, job_description):
    """
    Scores the candidate's resume based on the job description using sentence-transformers.
    
    :param resume_text: The extracted resume text.
    :param job_description: The job description for comparison.
    :return: A numerical score (cosine similarity between 0 and 1).
    """
    try:
        # Generate embeddings
        resume_embedding = embedding_model.encode(resume_text, convert_to_tensor=True)
        job_embedding = embedding_model.encode(job_description, convert_to_tensor=True)

        # Compute cosine similarity
        score = util.pytorch_cos_sim(resume_embedding, job_embedding).item()

        return round(score, 4)  # Return similarity score rounded to 4 decimal places

    except Exception as e:
        print(f"Error computing similarity score: {e}")
        return 0  # Return 0 if scoring fails

def summarize_resume(resume_text):
    """
    Summarizes a resume using the Google gemma model.
    
    :param resume_text: The resume text to summarize.
    :return: A summarized version of the resume.
    """
    payload = {"inputs": f"Summarize this resume: {resume_text}"}
    
    response = query(payload, model="gemma")  # Use gemma for summarization

    if response is None:
        print("Error: API response is None")
        return "Summary could not be generated."

    # If the response is a list, extract the first element
    if isinstance(response, list) and len(response) > 0:
        response = response[0]

    try:
        if isinstance(response, dict) and "generated_text" in response:
            return response["generated_text"]
        else:
            print("Unexpected API response format:", response)
            return "Summary could not be generated."
    except (TypeError, ValueError) as e:
        print(f"Error parsing summary: {e}")
        return "Summary could not be generated."

def store_in_supabase(resume_text, score, candidate_name, email, summary):
    """
    Stores resume data in Supabase.

    :param resume_text: The extracted resume text.
    :param score: The candidate's score (must be a valid number).
    :param candidate_name: The candidate's name.
    :param email: Candidate's email address.
    :param summary: A summarized version of the resume.
    """
    if score is None:
        score = 0  # Ensure score is never NULL

    data = {
        "name": candidate_name,
        "resume": resume_text,
        "score": score,
        "email": email,
        "summary": summary
    }

    response = supabase.table("candidates").insert(data).execute()
    return response

# Test with 10 resumes, if they will be shortlisted
def generate_pdf_report(shortlisted_candidates):
    """Generates a PDF summary of shortlisted candidates."""
    pdf = BytesIO()
    doc = fitz.open()

    for candidate in shortlisted_candidates:
        page = doc.new_page()

        # Use the stored summary, or provide a fallback
        summary = candidate.get("summary", "No summary available")

        page.insert_text(
            (50, 50),
            f"Candidate: {candidate['name']}\n"
            f"Email: {candidate['email']}\n"
            f"Score: {candidate['score']}\n"
            f"Summary: {summary}"
        )

    doc.save(pdf)
    pdf.seek(0)
    return pdf