Spaces:
Running
Running
Johnny
commited on
Commit
·
79b5c9c
1
Parent(s):
102e49d
feat: Update resume builder with LFS-tracked assets
Browse files- Add header and footer images using Git LFS
- Update configuration and dependencies
- Improve resume builder and OpenAI extractor
- Update app components and utility functions
- Remove unused blank resume template
- .gitattributes +2 -0
- .gitignore +7 -0
- .streamlit/config.toml +0 -1
- app.py +57 -4
- config.py +105 -27
- footer.png +3 -0
- header.png +3 -0
- pages/Template.py +64 -13
- requirements.txt +2 -1
- templates/blank_resume.docx +0 -0
- utils/builder.py +192 -280
- utils/openai_extractor.py +142 -219
- utils/parser.py +1 -1
- utils/reporting.py +2 -2
- utils/screening.py +1 -1
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.docx filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
CHANGED
@@ -37,3 +37,10 @@ debug_*.docx
|
|
37 |
.sfdx/
|
38 |
*.cls
|
39 |
apex.db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
.sfdx/
|
38 |
*.cls
|
39 |
apex.db
|
40 |
+
|
41 |
+
.DS_Store
|
42 |
+
utils/.DS_Store
|
43 |
+
utils/cursor-updates
|
44 |
+
utils/prompt-updates
|
45 |
+
Youlin Joseph Li qvell.docx
|
46 |
+
Template.py
|
.streamlit/config.toml
CHANGED
@@ -7,7 +7,6 @@ font="sans serif"
|
|
7 |
|
8 |
[ui]
|
9 |
hideTopBar = false
|
10 |
-
hideSidebarNav = true
|
11 |
|
12 |
[server]
|
13 |
headless = true
|
|
|
7 |
|
8 |
[ui]
|
9 |
hideTopBar = false
|
|
|
10 |
|
11 |
[server]
|
12 |
headless = true
|
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
# TalentLens
|
2 |
|
3 |
import os
|
|
|
4 |
from io import BytesIO
|
5 |
|
6 |
import streamlit as st
|
@@ -8,13 +9,40 @@ import fitz # PyMuPDF
|
|
8 |
import requests
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
-
from config import supabase, HF_API_TOKEN, HF_HEADERS,
|
12 |
from utils.parser import parse_resume, extract_email, summarize_resume
|
13 |
from utils.hybrid_extractor import extract_resume_sections
|
14 |
from utils.builder import build_resume_from_data
|
15 |
from utils.screening import evaluate_resumes
|
16 |
from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# ------------------------- Main App Function -------------------------
|
20 |
def main():
|
@@ -61,11 +89,11 @@ def main():
|
|
61 |
|
62 |
with col1:
|
63 |
# Evaluation trigger
|
64 |
-
evaluate_clicked = st.button("
|
65 |
|
66 |
with col2:
|
67 |
# Format Resume redirect button
|
68 |
-
format_clicked = st.button("
|
69 |
|
70 |
# Handle Format Resume redirect
|
71 |
if format_clicked:
|
@@ -81,7 +109,7 @@ def main():
|
|
81 |
st.error("⚠️ Please upload at least one resume.")
|
82 |
return
|
83 |
|
84 |
-
st.write("###
|
85 |
|
86 |
# Resume Evaluation
|
87 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
@@ -109,6 +137,31 @@ def main():
|
|
109 |
for removed in removed_candidates:
|
110 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
# ------------------------- Run the App -------------------------
|
113 |
if __name__ == "__main__":
|
114 |
main()
|
|
|
1 |
# TalentLens
|
2 |
|
3 |
import os
|
4 |
+
import time # Add time module import
|
5 |
from io import BytesIO
|
6 |
|
7 |
import streamlit as st
|
|
|
9 |
import requests
|
10 |
from dotenv import load_dotenv
|
11 |
|
12 |
+
from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_ENDPOINTS
|
13 |
from utils.parser import parse_resume, extract_email, summarize_resume
|
14 |
from utils.hybrid_extractor import extract_resume_sections
|
15 |
from utils.builder import build_resume_from_data
|
16 |
from utils.screening import evaluate_resumes
|
17 |
from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
|
18 |
|
19 |
+
def toggle_endpoint(endpoint_name, action):
|
20 |
+
"""Start or stop an endpoint"""
|
21 |
+
try:
|
22 |
+
from config import HF_HEADERS, HF_ENDPOINTS
|
23 |
+
# Use the health endpoint
|
24 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
25 |
+
url = f"{endpoint_info['url']}/health"
|
26 |
+
|
27 |
+
# Use HEAD request to start the endpoint
|
28 |
+
response = requests.head(url, headers=HF_HEADERS)
|
29 |
+
|
30 |
+
if response.status_code == 503:
|
31 |
+
st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
|
32 |
+
time.sleep(2) # Wait briefly before refreshing status
|
33 |
+
from config import check_endpoint_status
|
34 |
+
new_status = check_endpoint_status(endpoint_name)
|
35 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
36 |
+
elif response.status_code == 200:
|
37 |
+
st.success("✅ Endpoint is running")
|
38 |
+
time.sleep(2) # Wait briefly before refreshing status
|
39 |
+
from config import check_endpoint_status
|
40 |
+
new_status = check_endpoint_status(endpoint_name)
|
41 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
42 |
+
else:
|
43 |
+
st.error(f"❌ Failed to {action} endpoint: {response.text}")
|
44 |
+
except Exception as e:
|
45 |
+
st.error(f"❌ Failed to {action} endpoint: {str(e)}")
|
46 |
|
47 |
# ------------------------- Main App Function -------------------------
|
48 |
def main():
|
|
|
89 |
|
90 |
with col1:
|
91 |
# Evaluation trigger
|
92 |
+
evaluate_clicked = st.button("\U0001F4CA Evaluate Resumes", type="primary", use_container_width=True)
|
93 |
|
94 |
with col2:
|
95 |
# Format Resume redirect button
|
96 |
+
format_clicked = st.button("\U0001F4C4 Format Resume", use_container_width=True)
|
97 |
|
98 |
# Handle Format Resume redirect
|
99 |
if format_clicked:
|
|
|
109 |
st.error("⚠️ Please upload at least one resume.")
|
110 |
return
|
111 |
|
112 |
+
st.write("### �� Evaluating Resumes...")
|
113 |
|
114 |
# Resume Evaluation
|
115 |
shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
|
|
|
137 |
for removed in removed_candidates:
|
138 |
st.write(f"**{removed['name']}** - {removed['reason']}")
|
139 |
|
140 |
+
|
141 |
+
# Get current status using DNS resolution
|
142 |
+
from config import check_endpoint_status
|
143 |
+
endpoint_name = "vzwjawyxvu030jsw" # Updated to match endpoint ID
|
144 |
+
current_status = check_endpoint_status(endpoint_name)
|
145 |
+
state = current_status.get('status', 'unknown')
|
146 |
+
|
147 |
+
# Update session state with current status
|
148 |
+
st.session_state['endpoint_status'] = {endpoint_name: current_status}
|
149 |
+
|
150 |
+
# Show Start button and status
|
151 |
+
start_button = st.empty() # Placeholder for Start button
|
152 |
+
if state in ['stopped', 'error']:
|
153 |
+
if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
|
154 |
+
toggle_endpoint(endpoint_name, "start")
|
155 |
+
# Refresh status after starting
|
156 |
+
new_status = check_endpoint_status(endpoint_name)
|
157 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
158 |
+
if new_status.get('status') == 'running':
|
159 |
+
st.success("✅ Endpoint is running")
|
160 |
+
elif new_status.get('status') == 'starting':
|
161 |
+
st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
|
162 |
+
elif new_status.get('status') == 'error':
|
163 |
+
st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
|
164 |
+
|
165 |
# ------------------------- Run the App -------------------------
|
166 |
if __name__ == "__main__":
|
167 |
main()
|
config.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
import os
|
3 |
import time
|
4 |
import requests
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
from supabase import create_client
|
7 |
from sentence_transformers import SentenceTransformer
|
@@ -20,44 +21,121 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
20 |
# === Embedding Model for Scoring ===
|
21 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
22 |
|
23 |
-
# === Hugging Face API Configuration
|
24 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
25 |
if not HF_API_TOKEN:
|
26 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
|
|
|
|
27 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
28 |
|
29 |
# === Hugging Face Model Endpoints ===
|
30 |
-
|
31 |
-
"
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
}
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
# ===
|
42 |
-
def query(payload
|
43 |
"""
|
44 |
-
|
45 |
"""
|
46 |
-
if
|
47 |
-
|
|
|
|
|
48 |
|
49 |
-
|
|
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
58 |
return response.json()
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
import time
|
4 |
import requests
|
5 |
+
import socket
|
6 |
from dotenv import load_dotenv
|
7 |
from supabase import create_client
|
8 |
from sentence_transformers import SentenceTransformer
|
|
|
21 |
# === Embedding Model for Scoring ===
|
22 |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
23 |
|
24 |
+
# === Hugging Face API Configuration ===
|
25 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
26 |
if not HF_API_TOKEN:
|
27 |
raise ValueError("Missing Hugging Face API key. Check your .env file.")
|
28 |
+
|
29 |
+
# Headers for API requests
|
30 |
HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
|
31 |
|
32 |
# === Hugging Face Model Endpoints ===
|
33 |
+
HF_ENDPOINTS = {
|
34 |
+
"bart-large-cnn-ovt": {
|
35 |
+
"url": "https://hedemwou4oqkk65c.us-east-1.aws.endpoints.huggingface.cloud",
|
36 |
+
"task": "summarization",
|
37 |
+
"model_id": "facebook/bart-large-cnn"
|
38 |
+
},
|
39 |
+
"vzwjawyxvu030jsw": { # Updated endpoint name to match URL
|
40 |
+
"url": "https://vzwjawyxvu030jsw.us-east-1.aws.endpoints.huggingface.cloud",
|
41 |
+
"task": "text-generation",
|
42 |
+
"model_id": "google/gemma-7b"
|
43 |
+
}
|
44 |
}
|
45 |
|
46 |
+
def check_endpoint_status(endpoint_name: str) -> dict:
|
47 |
+
"""
|
48 |
+
Check the status of a private Hugging Face endpoint using DNS resolution
|
49 |
+
"""
|
50 |
+
if endpoint_name not in HF_ENDPOINTS:
|
51 |
+
return {
|
52 |
+
"status": "error",
|
53 |
+
"error": f"Unknown endpoint: {endpoint_name}"
|
54 |
+
}
|
55 |
+
|
56 |
+
try:
|
57 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
58 |
+
hostname = endpoint_info['url'].replace('https://', '').split('/')[0]
|
59 |
+
|
60 |
+
# Try DNS resolution
|
61 |
+
try:
|
62 |
+
socket.gethostbyname(hostname)
|
63 |
+
# If DNS resolves, endpoint exists but may be stopped
|
64 |
+
return {
|
65 |
+
"status": "stopped",
|
66 |
+
"scaled": True,
|
67 |
+
"pending": 0,
|
68 |
+
"error": None
|
69 |
+
}
|
70 |
+
except socket.gaierror:
|
71 |
+
# If DNS fails, endpoint doesn't exist
|
72 |
+
return {
|
73 |
+
"status": "error",
|
74 |
+
"error": "Endpoint not found"
|
75 |
+
}
|
76 |
+
except Exception as e:
|
77 |
+
return {
|
78 |
+
"status": "error",
|
79 |
+
"error": str(e)
|
80 |
+
}
|
81 |
+
|
82 |
+
def toggle_endpoint(endpoint_name: str, action: str) -> dict:
|
83 |
+
"""
|
84 |
+
Start or stop a private Hugging Face endpoint
|
85 |
+
"""
|
86 |
+
try:
|
87 |
+
# For private endpoints, use the Endpoints API
|
88 |
+
api_base = "https://api.endpoints.huggingface.cloud"
|
89 |
+
action_url = f"{api_base}/v2/endpoint/{endpoint_name}/{action}"
|
90 |
+
|
91 |
+
response = requests.post(
|
92 |
+
action_url,
|
93 |
+
headers=HF_HEADERS,
|
94 |
+
timeout=10
|
95 |
+
)
|
96 |
+
|
97 |
+
if response.status_code in [200, 202]:
|
98 |
+
return {
|
99 |
+
"success": True,
|
100 |
+
"message": f"Successfully {action}ed endpoint"
|
101 |
+
}
|
102 |
+
else:
|
103 |
+
return {
|
104 |
+
"error": f"Failed to {action} endpoint: {response.text}"
|
105 |
+
}
|
106 |
+
except Exception as e:
|
107 |
+
return {
|
108 |
+
"error": f"Failed to {action} endpoint: {str(e)}"
|
109 |
+
}
|
110 |
|
111 |
+
# === Query Helper ===
|
112 |
+
def query(payload: dict, endpoint_name: str) -> dict:
|
113 |
"""
|
114 |
+
Send a query to a Hugging Face endpoint
|
115 |
"""
|
116 |
+
if endpoint_name not in HF_ENDPOINTS:
|
117 |
+
return {
|
118 |
+
"error": f"Unknown endpoint: {endpoint_name}"
|
119 |
+
}
|
120 |
|
121 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
122 |
+
url = endpoint_info['url']
|
123 |
|
124 |
+
try:
|
125 |
+
response = requests.post(
|
126 |
+
url,
|
127 |
+
headers=HF_HEADERS,
|
128 |
+
json=payload,
|
129 |
+
timeout=30
|
130 |
+
)
|
131 |
+
|
132 |
+
if response.status_code == 200:
|
133 |
return response.json()
|
134 |
+
else:
|
135 |
+
return {
|
136 |
+
"error": f"Query failed with status {response.status_code}: {response.text}"
|
137 |
+
}
|
138 |
+
except Exception as e:
|
139 |
+
return {
|
140 |
+
"error": str(e)
|
141 |
+
}
|
footer.png
ADDED
![]() |
Git LFS Details
|
header.png
ADDED
![]() |
Git LFS Details
|
pages/Template.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
-
# pages/
|
2 |
|
3 |
import os, sys, streamlit as st
|
4 |
import json
|
5 |
from io import BytesIO
|
|
|
|
|
6 |
|
7 |
# Add parent directory to path so we can import utils
|
8 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
@@ -11,17 +13,13 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
11 |
from dotenv import load_dotenv
|
12 |
load_dotenv(override=True)
|
13 |
|
|
|
14 |
from utils.hybrid_extractor import extract_resume_sections
|
15 |
from utils.builder import build_resume_from_data
|
16 |
-
from utils.parser import parse_resume
|
17 |
-
|
18 |
-
# Path to your blank template (header/footer only)
|
19 |
-
template_path = os.path.join(
|
20 |
-
os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
|
21 |
-
)
|
22 |
|
23 |
st.set_page_config(
|
24 |
-
page_title='Resume
|
25 |
layout='centered',
|
26 |
initial_sidebar_state="collapsed"
|
27 |
)
|
@@ -40,17 +38,70 @@ st.markdown("""
|
|
40 |
</style>
|
41 |
""", unsafe_allow_html=True)
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
# Home button at the top
|
44 |
-
if st.button("
|
45 |
st.switch_page("app.py")
|
46 |
|
47 |
-
st.title('📄 Resume
|
48 |
st.markdown("---")
|
49 |
|
50 |
uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
|
51 |
if not uploaded:
|
52 |
st.info("Please upload a resume to get started.")
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
st.success(f'Uploaded: {uploaded.name}')
|
56 |
|
@@ -239,7 +290,7 @@ if st.button('📄 Generate Formatted Resume', type='primary'):
|
|
239 |
try:
|
240 |
with st.spinner('Building formatted resume...'):
|
241 |
# Build the resume document
|
242 |
-
doc = build_resume_from_data(
|
243 |
|
244 |
# Save to buffer
|
245 |
buf = BytesIO()
|
@@ -329,4 +380,4 @@ st.markdown(
|
|
329 |
"🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
|
330 |
"</div>",
|
331 |
unsafe_allow_html=True
|
332 |
-
)
|
|
|
1 |
+
# pages/Format_Resume.py
|
2 |
|
3 |
import os, sys, streamlit as st
|
4 |
import json
|
5 |
from io import BytesIO
|
6 |
+
import time # Added for API status check
|
7 |
+
import requests # Added for endpoint control
|
8 |
|
9 |
# Add parent directory to path so we can import utils
|
10 |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
|
13 |
from dotenv import load_dotenv
|
14 |
load_dotenv(override=True)
|
15 |
|
16 |
+
from config import HF_ENDPOINTS # Update import
|
17 |
from utils.hybrid_extractor import extract_resume_sections
|
18 |
from utils.builder import build_resume_from_data
|
19 |
+
from utils.parser import parse_resume
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
st.set_page_config(
|
22 |
+
page_title='Resume Formatter',
|
23 |
layout='centered',
|
24 |
initial_sidebar_state="collapsed"
|
25 |
)
|
|
|
38 |
</style>
|
39 |
""", unsafe_allow_html=True)
|
40 |
|
41 |
+
def toggle_endpoint(endpoint_name, action):
|
42 |
+
"""Start or stop an endpoint"""
|
43 |
+
try:
|
44 |
+
from config import HF_HEADERS, HF_ENDPOINTS
|
45 |
+
# Use the health endpoint
|
46 |
+
endpoint_info = HF_ENDPOINTS[endpoint_name]
|
47 |
+
url = f"{endpoint_info['url']}/health"
|
48 |
+
|
49 |
+
# Use HEAD request to start the endpoint
|
50 |
+
response = requests.head(url, headers=HF_HEADERS)
|
51 |
+
|
52 |
+
if response.status_code == 503:
|
53 |
+
st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
|
54 |
+
time.sleep(2) # Wait briefly before refreshing status
|
55 |
+
from config import check_endpoint_status
|
56 |
+
new_status = check_endpoint_status(endpoint_name)
|
57 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
58 |
+
elif response.status_code == 200:
|
59 |
+
st.success("✅ Endpoint is running")
|
60 |
+
time.sleep(2) # Wait briefly before refreshing status
|
61 |
+
from config import check_endpoint_status
|
62 |
+
new_status = check_endpoint_status(endpoint_name)
|
63 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
64 |
+
else:
|
65 |
+
st.error(f"❌ Failed to {action} endpoint: {response.text}")
|
66 |
+
except Exception as e:
|
67 |
+
st.error(f"❌ Failed to {action} endpoint: {str(e)}")
|
68 |
+
|
69 |
# Home button at the top
|
70 |
+
if st.button("\U0001F3E0 Home", help="Return to main TalentLens.AI page"):
|
71 |
st.switch_page("app.py")
|
72 |
|
73 |
+
st.title('📄 Resume Formatter')
|
74 |
st.markdown("---")
|
75 |
|
76 |
uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
|
77 |
if not uploaded:
|
78 |
st.info("Please upload a resume to get started.")
|
79 |
+
|
80 |
+
# Get current status using DNS resolution
|
81 |
+
from config import check_endpoint_status
|
82 |
+
endpoint_name = "bart-large-cnn-ovt"
|
83 |
+
current_status = check_endpoint_status(endpoint_name)
|
84 |
+
state = current_status.get('status', 'unknown')
|
85 |
+
|
86 |
+
# Update session state with current status
|
87 |
+
st.session_state['endpoint_status'] = {endpoint_name: current_status}
|
88 |
+
|
89 |
+
# Show Start button and status
|
90 |
+
start_button = st.empty() # Placeholder for Start button
|
91 |
+
if state in ['stopped', 'error']:
|
92 |
+
if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
|
93 |
+
toggle_endpoint(endpoint_name, "start")
|
94 |
+
# Refresh status after starting
|
95 |
+
new_status = check_endpoint_status(endpoint_name)
|
96 |
+
st.session_state['endpoint_status'] = {endpoint_name: new_status}
|
97 |
+
if new_status.get('status') == 'running':
|
98 |
+
st.success("✅ Endpoint is running")
|
99 |
+
elif new_status.get('status') == 'starting':
|
100 |
+
st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
|
101 |
+
elif new_status.get('status') == 'error':
|
102 |
+
st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
|
103 |
+
|
104 |
+
st.stop() # Stop here if no file is uploaded
|
105 |
|
106 |
st.success(f'Uploaded: {uploaded.name}')
|
107 |
|
|
|
290 |
try:
|
291 |
with st.spinner('Building formatted resume...'):
|
292 |
# Build the resume document
|
293 |
+
doc = build_resume_from_data(tmpl="", sections=data)
|
294 |
|
295 |
# Save to buffer
|
296 |
buf = BytesIO()
|
|
|
380 |
"🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
|
381 |
"</div>",
|
382 |
unsafe_allow_html=True
|
383 |
+
)
|
requirements.txt
CHANGED
@@ -10,4 +10,5 @@ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1
|
|
10 |
openai
|
11 |
fuzzywuzzy
|
12 |
python-docx
|
13 |
-
numpy<2.0
|
|
|
|
10 |
openai
|
11 |
fuzzywuzzy
|
12 |
python-docx
|
13 |
+
numpy<2.0
|
14 |
+
from torch._C import * # noqa: F403
|
templates/blank_resume.docx
DELETED
Binary file (48.2 kB)
|
|
utils/builder.py
CHANGED
@@ -1,20 +1,19 @@
|
|
|
|
|
|
|
|
1 |
from datetime import datetime
|
2 |
from dateutil.parser import parse as date_parse
|
3 |
-
import re, math
|
4 |
from docx import Document
|
5 |
-
from docx.
|
6 |
-
from docx.
|
7 |
-
import logging
|
8 |
|
9 |
logger = logging.getLogger(__name__)
|
10 |
|
11 |
-
# ---------- helpers ---------------------------------------------------
|
12 |
-
def _date(dt_str:str)->datetime:
|
13 |
-
try: return date_parse(dt_str, default=datetime(1900,1,1))
|
14 |
-
except: return datetime(1900,1,1)
|
15 |
|
16 |
-
def fmt_range(raw:str)->str:
|
17 |
-
|
|
|
|
|
18 |
parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
|
19 |
|
20 |
formatted_parts = []
|
@@ -23,284 +22,197 @@ def fmt_range(raw:str)->str:
|
|
23 |
formatted_parts.append("Present")
|
24 |
else:
|
25 |
try:
|
26 |
-
date_obj =
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
30 |
|
31 |
return " – ".join(formatted_parts)
|
32 |
|
33 |
-
# ---------- main ------------------------------------------------------
|
34 |
-
def build_resume_from_data(tmpl:str, sections:dict)->Document:
|
35 |
-
logger.info(f"BUILDER: Attempting to load document template from: {tmpl}")
|
36 |
-
doc = Document(tmpl)
|
37 |
-
logger.info(f"BUILDER: Template {tmpl} loaded successfully.")
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
tables_to_remove = list(doc.tables) # Create a copy of the list
|
61 |
-
for table in tables_to_remove:
|
62 |
-
tbl = table._element
|
63 |
-
tbl.getparent().remove(tbl)
|
64 |
-
|
65 |
-
logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
|
66 |
-
|
67 |
-
# Verify headers/footers are still intact
|
68 |
-
logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections")
|
69 |
-
for i, section_obj in enumerate(doc.sections):
|
70 |
-
if section_obj.header:
|
71 |
-
logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs")
|
72 |
-
if section_obj.footer:
|
73 |
-
logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs")
|
74 |
-
|
75 |
-
logger.info(f"BUILDER: Template preserved with original headers and footers")
|
76 |
-
|
77 |
-
# --- easy builders ---
|
78 |
-
def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12)
|
79 |
-
def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"• {txt}").font.size=Pt(11)
|
80 |
-
def two_col(l,r):
|
81 |
-
tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True
|
82 |
-
tbl.cell(0,0).paragraphs[0].add_run(l).bold=True
|
83 |
-
rp = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT
|
84 |
-
rr = rp.add_run(r); rr.italic=True
|
85 |
-
|
86 |
-
# --- header (name + current role) ---
|
87 |
-
exps = sections.get("StructuredExperiences",[])
|
88 |
-
if exps:
|
89 |
-
try:
|
90 |
-
# Filter to only dictionary experiences
|
91 |
-
dict_exps = [e for e in exps if isinstance(e, dict)]
|
92 |
-
if dict_exps:
|
93 |
-
newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("–")[0] if "–" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range","")))
|
94 |
-
cur_title = newest.get("title","")
|
95 |
-
else:
|
96 |
-
cur_title = ""
|
97 |
-
except:
|
98 |
-
# Fallback: try to get title from first dictionary experience
|
99 |
-
for exp in exps:
|
100 |
-
if isinstance(exp, dict) and exp.get("title"):
|
101 |
-
cur_title = exp.get("title","")
|
102 |
-
break
|
103 |
-
else:
|
104 |
-
cur_title = ""
|
105 |
-
else:
|
106 |
-
# Try to extract job title from summary if no structured experiences
|
107 |
-
cur_title = ""
|
108 |
-
summary = sections.get("Summary", "")
|
109 |
-
if summary:
|
110 |
-
# Look for job titles in the summary
|
111 |
-
title_patterns = [
|
112 |
-
r'(?i)(.*?engineer)',
|
113 |
-
r'(?i)(.*?developer)',
|
114 |
-
r'(?i)(.*?analyst)',
|
115 |
-
r'(?i)(.*?manager)',
|
116 |
-
r'(?i)(.*?specialist)',
|
117 |
-
r'(?i)(.*?consultant)',
|
118 |
-
r'(?i)(.*?architect)',
|
119 |
-
r'(?i)(.*?lead)',
|
120 |
-
r'(?i)(.*?director)',
|
121 |
-
r'(?i)(.*?coordinator)'
|
122 |
-
]
|
123 |
-
|
124 |
-
for pattern in title_patterns:
|
125 |
-
match = re.search(pattern, summary)
|
126 |
-
if match:
|
127 |
-
potential_title = match.group(1).strip()
|
128 |
-
# Clean up the title
|
129 |
-
potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I)
|
130 |
-
if len(potential_title) > 3 and len(potential_title) < 50:
|
131 |
-
cur_title = potential_title.title()
|
132 |
-
break
|
133 |
-
|
134 |
-
if sections.get("Name"):
|
135 |
-
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
|
136 |
-
run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16)
|
137 |
-
if cur_title:
|
138 |
-
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
|
139 |
-
p.add_run(cur_title).font.size=Pt(12)
|
140 |
-
|
141 |
-
# --- summary ---
|
142 |
-
if sections.get("Summary"):
|
143 |
-
heading("Professional Summary:")
|
144 |
-
pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12)
|
145 |
-
pg.add_run(sections["Summary"]).font.size=Pt(11)
|
146 |
-
|
147 |
-
# --- skills ---
|
148 |
-
if sections.get("Skills"):
|
149 |
-
heading("Skills:")
|
150 |
-
skills = sorted(set(sections["Skills"]))
|
151 |
-
cols = 3
|
152 |
-
rows = math.ceil(len(skills)/cols)
|
153 |
-
tbl = doc.add_table(rows=rows, cols=cols); tbl.autofit=True
|
154 |
-
k=0
|
155 |
-
for r in range(rows):
|
156 |
-
for c in range(cols):
|
157 |
-
if k < len(skills):
|
158 |
-
tbl.cell(r,c).paragraphs[0].add_run(f"• {skills[k]}").font.size=Pt(11)
|
159 |
-
k+=1
|
160 |
-
|
161 |
-
# --- experience ---
|
162 |
-
if exps:
|
163 |
-
heading("Professional Experience:")
|
164 |
-
for e in exps:
|
165 |
-
# Ensure e is a dictionary, not a string
|
166 |
-
if isinstance(e, str):
|
167 |
-
# If it's a string, create a basic experience entry
|
168 |
-
bullet(e, 0)
|
169 |
-
continue
|
170 |
-
elif not isinstance(e, dict):
|
171 |
-
# Skip if it's neither string nor dict
|
172 |
-
continue
|
173 |
-
|
174 |
-
# Process dictionary experience entry
|
175 |
-
title = e.get("title", "")
|
176 |
-
company = e.get("company", "")
|
177 |
-
date_range = e.get("date_range", "")
|
178 |
-
responsibilities = e.get("responsibilities", [])
|
179 |
-
|
180 |
-
# Create the job header
|
181 |
-
two_col(" | ".join(filter(None, [title, company])),
|
182 |
-
fmt_range(date_range))
|
183 |
-
|
184 |
-
# Add responsibilities
|
185 |
-
if isinstance(responsibilities, list):
|
186 |
-
for resp in responsibilities:
|
187 |
-
if isinstance(resp, str) and resp.strip():
|
188 |
-
bullet(resp, 1)
|
189 |
-
elif isinstance(responsibilities, str) and responsibilities.strip():
|
190 |
-
bullet(responsibilities, 1)
|
191 |
-
else:
|
192 |
-
# If no structured experiences found, try to extract from summary
|
193 |
-
heading("Professional Experience:")
|
194 |
-
summary = sections.get("Summary", "")
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
# Filter to only dictionary experiences and sort by date (most recent first)
|
226 |
-
dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")]
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
|
|
|
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
|
|
|
|
251 |
|
252 |
-
|
253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
# Clean up the education entry (remove bullets)
|
272 |
-
clean_ed = ed.replace('•', '').strip()
|
273 |
-
if re.match(r'^\d+\s+years?$', clean_ed, re.I):
|
274 |
-
# This is experience duration, not education
|
275 |
-
experience_years = clean_ed
|
276 |
-
else:
|
277 |
-
processed_education.append(clean_ed)
|
278 |
-
has_real_education = True
|
279 |
-
|
280 |
-
# Show education section
|
281 |
-
if has_real_education:
|
282 |
-
heading("Education:")
|
283 |
-
for ed in processed_education:
|
284 |
-
bullet(ed)
|
285 |
-
elif experience_years:
|
286 |
-
# If only experience years found, show it as a note
|
287 |
-
heading("Education:")
|
288 |
-
pg = doc.add_paragraph()
|
289 |
-
pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11)
|
290 |
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
# Ensure tr is a string
|
295 |
-
if isinstance(tr, str) and tr.strip():
|
296 |
-
bullet(tr)
|
297 |
-
|
298 |
-
# Final diagnostic before returning
|
299 |
-
logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections")
|
300 |
-
for i, section_obj in enumerate(doc.sections):
|
301 |
-
if section_obj.header:
|
302 |
-
logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
|
303 |
-
if section_obj.footer:
|
304 |
-
logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")
|
305 |
-
|
306 |
-
return doc
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import re
|
4 |
from datetime import datetime
|
5 |
from dateutil.parser import parse as date_parse
|
|
|
6 |
from docx import Document
|
7 |
+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
|
8 |
+
from docx.shared import Inches, Pt
|
|
|
9 |
|
10 |
logger = logging.getLogger(__name__)
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def fmt_range(raw: str) -> str:
|
14 |
+
"""Formats a date range string nicely."""
|
15 |
+
if not raw:
|
16 |
+
return ""
|
17 |
parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
|
18 |
|
19 |
formatted_parts = []
|
|
|
22 |
formatted_parts.append("Present")
|
23 |
else:
|
24 |
try:
|
25 |
+
date_obj = date_parse(part, fuzzy=True, default=datetime(1900, 1, 1))
|
26 |
+
if date_obj.year == 1900:
|
27 |
+
formatted_parts.append(part)
|
28 |
+
else:
|
29 |
+
formatted_parts.append(date_obj.strftime("%B %Y"))
|
30 |
+
except (ValueError, TypeError):
|
31 |
+
formatted_parts.append(part)
|
32 |
|
33 |
return " – ".join(formatted_parts)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
def add_section_heading(doc, text):
|
37 |
+
"""Adds a centered section heading."""
|
38 |
+
p = doc.add_paragraph()
|
39 |
+
run = p.add_run(text.upper())
|
40 |
+
run.bold = True
|
41 |
+
font = run.font
|
42 |
+
font.size = Pt(12)
|
43 |
+
font.name = 'Arial'
|
44 |
+
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
45 |
+
p.paragraph_format.space_after = Pt(6)
|
46 |
+
|
47 |
+
|
48 |
+
def build_resume_from_data(tmpl: str, sections: dict, remove_blank_pages_enabled: bool = True) -> Document:
|
49 |
+
"""
|
50 |
+
Builds a formatted resume from structured data, inserting header/footer images and logging the process.
|
51 |
+
"""
|
52 |
+
logger.info("BUILDER: Starting image-based resume build process.")
|
53 |
+
try:
|
54 |
+
# 1. Create a new blank document, ignoring the template file
|
55 |
+
doc = Document()
|
56 |
+
logger.info("BUILDER: Successfully created a new blank document.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
# Get section and enable different first page header/footer
|
59 |
+
section = doc.sections[0]
|
60 |
+
section.different_first_page = True
|
61 |
+
|
62 |
+
# Move header and footer to the very edge of the page
|
63 |
+
section.header_distance = Pt(0)
|
64 |
+
section.footer_distance = Pt(0)
|
65 |
+
logger.info("BUILDER: Set header/footer distance to 0 to remove whitespace.")
|
66 |
+
|
67 |
+
# 2. Define image paths relative to the project root
|
68 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
69 |
+
project_root = os.path.dirname(script_dir)
|
70 |
+
header_path = os.path.join(project_root, 'header.png')
|
71 |
+
footer_path = os.path.join(project_root, 'footer.png')
|
72 |
+
|
73 |
+
logger.info(f"BUILDER: Attempting to use header image from: {header_path}")
|
74 |
+
logger.info(f"BUILDER: Attempting to use footer image from: {footer_path}")
|
75 |
+
|
76 |
+
if not os.path.exists(header_path):
|
77 |
+
logger.error(f"BUILDER FATAL: Header image not found at '{header_path}'. Cannot proceed.")
|
78 |
+
return doc # Return empty doc
|
79 |
+
if not os.path.exists(footer_path):
|
80 |
+
logger.error(f"BUILDER FATAL: Footer image not found at '{footer_path}'. Cannot proceed.")
|
81 |
+
return doc # Return empty doc
|
82 |
+
|
83 |
+
# 3. Setup Headers
|
84 |
+
candidate_name = sections.get("Name", "Candidate Name Not Found")
|
85 |
+
experiences = sections.get("StructuredExperiences", [])
|
86 |
+
job_title = experiences[0].get("title", "") if experiences else ""
|
87 |
+
|
88 |
+
# -- First Page Header (Image + Name + Title) --
|
89 |
+
first_page_header = section.first_page_header
|
90 |
+
first_page_header.is_linked_to_previous = False
|
91 |
+
|
92 |
+
# Safely get or create a paragraph for the image
|
93 |
+
p_header_img_first = first_page_header.paragraphs[0] if first_page_header.paragraphs else first_page_header.add_paragraph()
|
94 |
+
p_header_img_first.clear()
|
95 |
+
|
96 |
+
p_header_img_first.paragraph_format.space_before = Pt(0)
|
97 |
+
p_header_img_first.paragraph_format.space_after = Pt(0)
|
98 |
+
p_header_img_first.paragraph_format.left_indent = -section.left_margin
|
99 |
+
p_header_img_first.add_run().add_picture(header_path, width=section.page_width)
|
100 |
+
logger.info("BUILDER: Inserted header.png into FIRST PAGE header.")
|
101 |
+
|
102 |
+
# Add Name
|
103 |
+
p_name = first_page_header.add_paragraph()
|
104 |
+
run_name = p_name.add_run(candidate_name.upper())
|
105 |
+
run_name.font.name = 'Arial'
|
106 |
+
run_name.font.size = Pt(14)
|
107 |
+
run_name.bold = True
|
108 |
+
p_name.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
109 |
+
p_name.paragraph_format.space_before = Pt(6)
|
110 |
+
p_name.paragraph_format.space_after = Pt(0)
|
111 |
+
logger.info(f"BUILDER: Added candidate name '{candidate_name}' to FIRST PAGE header.")
|
112 |
+
|
113 |
+
# Add Job Title
|
114 |
+
if job_title:
|
115 |
+
p_title = first_page_header.add_paragraph()
|
116 |
+
run_title = p_title.add_run(job_title)
|
117 |
+
run_title.font.name = 'Arial'
|
118 |
+
run_title.font.size = Pt(11)
|
119 |
+
p_title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
120 |
+
p_title.paragraph_format.space_before = Pt(0)
|
121 |
+
logger.info(f"BUILDER: Added job title '{job_title}' to FIRST PAGE header.")
|
122 |
+
|
123 |
+
# -- Primary Header for subsequent pages (Image Only) --
|
124 |
+
primary_header = section.header
|
125 |
+
primary_header.is_linked_to_previous = False
|
126 |
+
|
127 |
+
# Safely get or create a paragraph for the image
|
128 |
+
p_header_img_primary = primary_header.paragraphs[0] if primary_header.paragraphs else primary_header.add_paragraph()
|
129 |
+
p_header_img_primary.clear()
|
130 |
+
|
131 |
+
p_header_img_primary.paragraph_format.space_before = Pt(0)
|
132 |
+
p_header_img_primary.paragraph_format.space_after = Pt(0)
|
133 |
+
p_header_img_primary.paragraph_format.left_indent = -section.left_margin
|
134 |
+
p_header_img_primary.add_run().add_picture(header_path, width=section.page_width)
|
135 |
+
logger.info("BUILDER: Inserted header.png into PRIMARY header for subsequent pages.")
|
136 |
+
|
137 |
+
# 4. Insert Footer Image (same for all pages)
|
138 |
+
footer = section.footer
|
139 |
+
footer.is_linked_to_previous = False
|
140 |
+
|
141 |
+
# Safely get or create a paragraph for the image
|
142 |
+
p_footer_img = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
|
143 |
+
p_footer_img.clear()
|
144 |
+
|
145 |
+
p_footer_img.paragraph_format.space_before = Pt(0)
|
146 |
+
p_footer_img.paragraph_format.space_after = Pt(0)
|
147 |
+
p_footer_img.paragraph_format.left_indent = -section.left_margin
|
148 |
+
p_footer_img.add_run().add_picture(footer_path, width=section.page_width)
|
149 |
+
|
150 |
+
# Link the first page footer to the primary footer so we only define it once.
|
151 |
+
section.first_page_footer.is_linked_to_previous = True
|
152 |
+
logger.info("BUILDER: Inserted footer.png and configured for all pages.")
|
153 |
|
154 |
+
# 5. Build Resume Body
|
155 |
+
logger.info("BUILDER: Proceeding to add structured resume content to document body.")
|
|
|
|
|
156 |
|
157 |
+
# --- Professional Summary ---
|
158 |
+
if sections.get("Summary"):
|
159 |
+
add_section_heading(doc, "Professional Summary")
|
160 |
+
doc.add_paragraph(sections["Summary"]).paragraph_format.space_after = Pt(12)
|
161 |
+
|
162 |
+
# --- Skills ---
|
163 |
+
if sections.get("Skills"):
|
164 |
+
add_section_heading(doc, "Skills")
|
165 |
+
skills_text = ", ".join(sections["Skills"])
|
166 |
+
p = doc.add_paragraph(skills_text)
|
167 |
+
p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
168 |
+
p.paragraph_format.space_after = Pt(12)
|
169 |
+
|
170 |
+
# --- Professional Experience ---
|
171 |
+
if experiences:
|
172 |
+
add_section_heading(doc, "Professional Experience")
|
173 |
+
for exp in experiences:
|
174 |
+
if not isinstance(exp, dict):
|
175 |
+
continue
|
176 |
|
177 |
+
p = doc.add_paragraph()
|
178 |
+
p.add_run(exp.get("title", "N/A")).bold = True
|
179 |
+
p.add_run(" | ").bold = True
|
180 |
+
p.add_run(exp.get("company", "N/A")).italic = True
|
181 |
+
p.add_run(f'\t{fmt_range(exp.get("date_range", ""))}')
|
182 |
+
|
183 |
+
tab_stops = p.paragraph_format.tab_stops
|
184 |
+
tab_stops.add_tab_stop(Inches(6.5), WD_TAB_ALIGNMENT.RIGHT)
|
185 |
|
186 |
+
responsibilities = exp.get("responsibilities", [])
|
187 |
+
if responsibilities and isinstance(responsibilities, list):
|
188 |
+
for resp in responsibilities:
|
189 |
+
if resp.strip():
|
190 |
+
try:
|
191 |
+
p_resp = doc.add_paragraph(resp, style='List Bullet')
|
192 |
+
except KeyError:
|
193 |
+
p_resp = doc.add_paragraph(f"• {resp}")
|
194 |
+
|
195 |
+
p_resp.paragraph_format.left_indent = Inches(0.25)
|
196 |
+
p_resp.paragraph_format.space_before = Pt(0)
|
197 |
+
p_resp.paragraph_format.space_after = Pt(3)
|
198 |
|
199 |
+
doc.add_paragraph().paragraph_format.space_after = Pt(6)
|
200 |
+
|
201 |
+
# --- Education ---
|
202 |
+
if sections.get("Education"):
|
203 |
+
add_section_heading(doc, "Education")
|
204 |
+
for edu in sections.get("Education", []):
|
205 |
+
if edu.strip():
|
206 |
+
try:
|
207 |
+
p_edu = doc.add_paragraph(edu, style='List Bullet')
|
208 |
+
except KeyError:
|
209 |
+
p_edu = doc.add_paragraph(f"• {edu}")
|
210 |
+
|
211 |
+
p_edu.paragraph_format.left_indent = Inches(0.25)
|
212 |
+
|
213 |
+
logger.info("BUILDER: Resume build process completed successfully.")
|
214 |
+
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
+
except Exception:
|
217 |
+
logger.error("BUILDER: An unexpected error occurred during resume generation.", exc_info=True)
|
218 |
+
return Document()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/openai_extractor.py
CHANGED
@@ -1,165 +1,175 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
"""
|
3 |
-
OpenAI
|
4 |
-
|
5 |
-
This module provides resume extraction using OpenAI's GPT-4o model (GPT-4.1),
|
6 |
-
which is the latest and most capable model for complex resume parsing.
|
7 |
"""
|
8 |
|
9 |
import json
|
10 |
import re
|
11 |
import logging
|
12 |
-
import os
|
13 |
from typing import Dict, Any, List, Optional
|
|
|
|
|
14 |
from openai import OpenAI
|
15 |
|
16 |
-
#
|
17 |
-
logging.basicConfig(level=logging.INFO)
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
|
|
20 |
class OpenAIResumeExtractor:
|
21 |
"""
|
22 |
-
|
23 |
"""
|
24 |
|
25 |
def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
|
26 |
-
"""
|
27 |
-
|
28 |
-
|
29 |
-
Args:
|
30 |
-
api_key: OpenAI API key (optional, will use env var if not provided)
|
31 |
-
model: OpenAI model to use (gpt-4o is the latest and most capable GPT-4 model)
|
32 |
-
"""
|
33 |
-
self.api_key = api_key or os.getenv('OPENAI_API_KEY')
|
34 |
self.model = model
|
35 |
-
|
36 |
-
if not self.api_key:
|
37 |
-
raise ValueError("No OpenAI API key found. Set OPENAI_API_KEY environment variable.")
|
38 |
-
|
39 |
-
self.client = OpenAI(api_key=self.api_key)
|
40 |
|
41 |
def extract_sections_openai(self, text: str) -> Dict[str, Any]:
|
42 |
"""
|
43 |
-
Extract resume sections using OpenAI
|
44 |
|
45 |
Args:
|
46 |
text: Raw resume text
|
47 |
|
48 |
Returns:
|
49 |
-
|
50 |
"""
|
51 |
-
logger.info("Starting OpenAI
|
52 |
|
53 |
try:
|
54 |
-
# Create
|
55 |
prompt = self._create_extraction_prompt(text)
|
56 |
|
57 |
-
#
|
58 |
response = self.client.chat.completions.create(
|
59 |
model=self.model,
|
60 |
messages=[
|
61 |
-
{
|
62 |
-
|
63 |
-
"content": "You are an expert resume parser. Extract information accurately and return valid JSON only."
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"role": "user",
|
67 |
-
"content": prompt
|
68 |
-
}
|
69 |
],
|
70 |
-
temperature=0.1,
|
71 |
max_tokens=2000
|
72 |
)
|
73 |
|
74 |
-
# Parse
|
75 |
-
|
|
|
76 |
|
77 |
-
# Clean
|
78 |
-
|
79 |
-
|
80 |
-
elif "```" in result_text:
|
81 |
-
result_text = result_text.split("```")[1]
|
82 |
|
83 |
-
#
|
84 |
-
result = json.loads(result_text)
|
85 |
-
|
86 |
-
# Validate and clean the result
|
87 |
result = self._validate_and_clean_result(result)
|
88 |
|
89 |
-
#
|
90 |
contact_info = self._extract_contact_info(text)
|
91 |
result["ContactInfo"] = contact_info
|
92 |
|
93 |
logger.info("✅ OpenAI extraction completed successfully")
|
94 |
return result
|
95 |
|
|
|
|
|
|
|
|
|
|
|
96 |
except Exception as e:
|
97 |
logger.error(f"OpenAI extraction failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
return self._get_empty_result()
|
104 |
|
105 |
-
|
106 |
-
return self._fallback_extraction(text)
|
107 |
|
108 |
def _create_extraction_prompt(self, text: str) -> str:
|
109 |
-
"""Create
|
110 |
-
|
111 |
prompt = f"""
|
112 |
-
Extract
|
113 |
-
|
114 |
-
RESUME TEXT:
|
115 |
-
{text}
|
116 |
-
|
117 |
-
Extract and return ONLY a JSON object with this exact structure:
|
118 |
|
119 |
{{
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
133 |
}}
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
2. Summary: Extract the complete professional summary/objective section
|
138 |
-
3. Skills: Extract technical skills only (programming languages, tools, frameworks)
|
139 |
-
4. StructuredExperiences: For each job, extract:
|
140 |
-
- title: The job title/position
|
141 |
-
- company: Company name (include location if provided)
|
142 |
-
- date_range: Employment dates
|
143 |
-
- responsibilities: List of bullet points describing what they did
|
144 |
-
5. Education: Extract degrees, institutions, and graduation years
|
145 |
-
6. Training: Extract certifications, courses, training programs
|
146 |
|
147 |
-
|
|
|
|
|
|
|
148 |
- Return ONLY valid JSON, no explanations
|
149 |
- If a section is not found, use empty string or empty array
|
150 |
-
-
|
151 |
-
- For experiences, look for patterns like "Title | Company | Dates" or similar
|
152 |
-
- Extract ALL job experiences found in the resume
|
153 |
-
- Include ALL bullet points under each job as responsibilities
|
154 |
"""
|
155 |
-
|
156 |
return prompt
|
157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
159 |
-
"""Validate and clean the extraction result"""
|
160 |
|
161 |
# Ensure all required keys exist
|
162 |
-
required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training"]
|
163 |
for key in required_keys:
|
164 |
if key not in result:
|
165 |
result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
|
@@ -187,59 +197,45 @@ IMPORTANT:
|
|
187 |
|
188 |
return result
|
189 |
|
190 |
-
def _get_empty_result(self) -> Dict[str, Any]:
|
191 |
-
"""Return empty result structure for API failures"""
|
192 |
-
return {
|
193 |
-
"Name": "",
|
194 |
-
"Summary": "",
|
195 |
-
"Skills": [],
|
196 |
-
"StructuredExperiences": [],
|
197 |
-
"Education": [],
|
198 |
-
"Training": [],
|
199 |
-
"ContactInfo": {}
|
200 |
-
}
|
201 |
-
|
202 |
def _is_company_name(self, text: str) -> bool:
|
203 |
-
"""Check if text looks like a company name rather than a skill"""
|
204 |
company_indicators = [
|
205 |
"inc", "llc", "corp", "ltd", "company", "solutions", "services",
|
206 |
-
"systems", "technologies", "financial", "insurance"
|
207 |
]
|
208 |
text_lower = text.lower()
|
209 |
return any(indicator in text_lower for indicator in company_indicators)
|
210 |
|
211 |
def _fallback_extraction(self, text: str) -> Dict[str, Any]:
|
212 |
-
"""Fallback to regex-based extraction if OpenAI fails"""
|
213 |
logger.info("Using regex fallback extraction...")
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
"Training": [],
|
226 |
-
"ContactInfo": self._extract_contact_info(text)
|
227 |
-
}
|
228 |
|
229 |
def _extract_name_regex(self, text: str) -> str:
|
230 |
-
"""Regex fallback for name extraction"""
|
231 |
lines = text.split('\n')[:5]
|
232 |
for line in lines:
|
233 |
line = line.strip()
|
234 |
if re.search(r'@|phone|email|linkedin|github', line.lower()):
|
235 |
continue
|
236 |
-
|
|
|
237 |
if name_match:
|
238 |
return name_match.group(1)
|
239 |
return ""
|
240 |
|
241 |
def _extract_summary_regex(self, text: str) -> str:
|
242 |
-
"""Regex fallback for summary extraction"""
|
243 |
summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
|
244 |
match = re.search(summary_pattern, text, re.DOTALL)
|
245 |
if match:
|
@@ -250,7 +246,7 @@ IMPORTANT:
|
|
250 |
return ""
|
251 |
|
252 |
def _extract_skills_regex(self, text: str) -> List[str]:
|
253 |
-
"""Regex fallback for skills extraction"""
|
254 |
skills = set()
|
255 |
|
256 |
# Look for technical skills section
|
@@ -269,7 +265,7 @@ IMPORTANT:
|
|
269 |
return sorted(list(skills))
|
270 |
|
271 |
def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
|
272 |
-
"""Regex fallback for experience extraction"""
|
273 |
experiences = []
|
274 |
|
275 |
# Look for work experience section
|
@@ -303,7 +299,7 @@ IMPORTANT:
|
|
303 |
return experiences
|
304 |
|
305 |
def _extract_education_regex(self, text: str) -> List[str]:
|
306 |
-
"""Regex fallback for education extraction"""
|
307 |
education = []
|
308 |
|
309 |
edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
|
@@ -319,98 +315,25 @@ IMPORTANT:
|
|
319 |
|
320 |
return education
|
321 |
|
322 |
-
def
|
323 |
-
"""
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
contact_info["email"] = email_match.group(0)
|
330 |
-
|
331 |
-
# Extract phone
|
332 |
-
phone_patterns = [
|
333 |
-
r'\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})',
|
334 |
-
r'(\d{3})[-.\s](\d{3})[-.\s](\d{4})',
|
335 |
-
r'\+\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
|
336 |
-
]
|
337 |
-
|
338 |
-
for pattern in phone_patterns:
|
339 |
-
phone_match = re.search(pattern, text)
|
340 |
-
if phone_match:
|
341 |
-
contact_info["phone"] = phone_match.group(0)
|
342 |
-
break
|
343 |
-
|
344 |
-
# Extract LinkedIn
|
345 |
-
linkedin_patterns = [
|
346 |
-
r'linkedin\.com/in/[\w-]+',
|
347 |
-
r'linkedin\.com/[\w-]+',
|
348 |
-
r'(?i)linkedin[:\s]+[\w.-]+',
|
349 |
]
|
350 |
|
351 |
-
for pattern in
|
352 |
-
|
353 |
-
if
|
354 |
-
|
355 |
-
if not linkedin_url.startswith('http'):
|
356 |
-
linkedin_url = f"https://{linkedin_url}"
|
357 |
-
contact_info["linkedin"] = linkedin_url
|
358 |
-
break
|
359 |
|
360 |
-
return
|
|
|
361 |
|
362 |
-
#
|
363 |
def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
|
364 |
-
"""
|
365 |
-
Extract resume sections using OpenAI GPT-4o (GPT-4.1)
|
366 |
-
|
367 |
-
Args:
|
368 |
-
text: Raw resume text
|
369 |
-
api_key: OpenAI API key (optional)
|
370 |
-
|
371 |
-
Returns:
|
372 |
-
Structured resume data
|
373 |
-
"""
|
374 |
extractor = OpenAIResumeExtractor(api_key=api_key)
|
375 |
-
return extractor.extract_sections_openai(text)
|
376 |
-
|
377 |
-
# Test function
|
378 |
-
def test_openai_extraction():
|
379 |
-
"""Test the OpenAI extraction with sample resume"""
|
380 |
-
|
381 |
-
sample_text = """
|
382 |
-
John Doe
|
383 |
-
Selenium Java Automation Engineer
|
384 |
-
Email: johndoe@example.com | Phone: +1-123-456-7890
|
385 |
-
|
386 |
-
Professional Summary
|
387 |
-
Results-driven Automation Test Engineer with 8 years of experience in Selenium and Java,
|
388 |
-
specializing in automation frameworks for financial and insurance domains.
|
389 |
-
|
390 |
-
Technical Skills
|
391 |
-
Selenium WebDriver, Java, TestNG, Cucumber, Jenkins, Maven, Git, REST Assured, Postman,
|
392 |
-
JIRA, Agile/Scrum, CI/CD
|
393 |
-
|
394 |
-
Work Experience
|
395 |
-
Senior Automation Test Engineer | ABC Financial Services | Jan 2021 - Present
|
396 |
-
- Led automation framework enhancements using Selenium and Java, improving test efficiency.
|
397 |
-
- Automated end-to-end UI and API testing for financial applications, reducing manual effort by 40%.
|
398 |
-
|
399 |
-
Automation Test Engineer | XYZ Insurance Solutions | Jun 2017 - Dec 2020
|
400 |
-
- Designed and implemented Selenium automation framework using Java and TestNG.
|
401 |
-
- Developed automated test scripts for insurance policy management applications.
|
402 |
-
|
403 |
-
Education
|
404 |
-
Bachelor of Technology in Computer Science | ABC University | 2015
|
405 |
-
"""
|
406 |
-
|
407 |
-
extractor = OpenAIResumeExtractor()
|
408 |
-
result = extractor.extract_sections_openai(sample_text)
|
409 |
-
|
410 |
-
print("OpenAI Extraction Results:")
|
411 |
-
print(json.dumps(result, indent=2))
|
412 |
-
|
413 |
-
return result
|
414 |
-
|
415 |
-
if __name__ == "__main__":
|
416 |
-
test_openai_extraction()
|
|
|
|
|
1 |
"""
|
2 |
+
OpenAI-based resume data extraction.
|
3 |
+
Uses GPT models to extract structured information from resume text.
|
|
|
|
|
4 |
"""
|
5 |
|
6 |
import json
|
7 |
import re
|
8 |
import logging
|
|
|
9 |
from typing import Dict, Any, List, Optional
|
10 |
+
|
11 |
+
import openai
|
12 |
from openai import OpenAI
|
13 |
|
14 |
+
# Set up logging
|
|
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
+
|
18 |
class OpenAIResumeExtractor:
|
19 |
"""
|
20 |
+
Resume data extractor using OpenAI's GPT models.
|
21 |
"""
|
22 |
|
23 |
def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
|
24 |
+
"""Initialize with OpenAI API key and model."""
|
25 |
+
self.client = OpenAI(api_key=api_key) if api_key else OpenAI()
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
self.model = model
|
27 |
+
logger.info(f"OpenAI extractor initialized with model: {model}")
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def extract_sections_openai(self, text: str) -> Dict[str, Any]:
|
30 |
"""
|
31 |
+
Extract resume sections using OpenAI API.
|
32 |
|
33 |
Args:
|
34 |
text: Raw resume text
|
35 |
|
36 |
Returns:
|
37 |
+
Dict containing extracted sections
|
38 |
"""
|
39 |
+
logger.info("Starting OpenAI extraction...")
|
40 |
|
41 |
try:
|
42 |
+
# Create extraction prompt
|
43 |
prompt = self._create_extraction_prompt(text)
|
44 |
|
45 |
+
# Call OpenAI API
|
46 |
response = self.client.chat.completions.create(
|
47 |
model=self.model,
|
48 |
messages=[
|
49 |
+
{"role": "system", "content": "You are an expert resume parser. Extract information and return ONLY valid JSON."},
|
50 |
+
{"role": "user", "content": prompt}
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
],
|
52 |
+
temperature=0.1,
|
53 |
max_tokens=2000
|
54 |
)
|
55 |
|
56 |
+
# Parse response
|
57 |
+
content = response.choices[0].message.content.strip()
|
58 |
+
logger.debug(f"OpenAI response: {content[:200]}...")
|
59 |
|
60 |
+
# Clean and parse JSON
|
61 |
+
content = self._clean_json_response(content)
|
62 |
+
result = json.loads(content)
|
|
|
|
|
63 |
|
64 |
+
# Validate and enhance result
|
|
|
|
|
|
|
65 |
result = self._validate_and_clean_result(result)
|
66 |
|
67 |
+
# Add contact info extraction
|
68 |
contact_info = self._extract_contact_info(text)
|
69 |
result["ContactInfo"] = contact_info
|
70 |
|
71 |
logger.info("✅ OpenAI extraction completed successfully")
|
72 |
return result
|
73 |
|
74 |
+
except json.JSONDecodeError as e:
|
75 |
+
logger.error(f"JSON parsing error: {e}")
|
76 |
+
logger.debug(f"Response content: {content}")
|
77 |
+
return self._fallback_extraction(text)
|
78 |
+
|
79 |
except Exception as e:
|
80 |
logger.error(f"OpenAI extraction failed: {e}")
|
81 |
+
return self._fallback_extraction(text)
|
82 |
+
|
83 |
+
def _clean_json_response(self, content: str) -> str:
|
84 |
+
"""Clean JSON response from OpenAI."""
|
85 |
+
# Remove markdown code blocks
|
86 |
+
content = re.sub(r'```json\s*', '', content)
|
87 |
+
content = re.sub(r'```\s*$', '', content)
|
88 |
+
|
89 |
+
# Remove any text before first {
|
90 |
+
start = content.find('{')
|
91 |
+
if start > 0:
|
92 |
+
content = content[start:]
|
93 |
|
94 |
+
# Remove any text after last }
|
95 |
+
end = content.rfind('}')
|
96 |
+
if end > 0 and end < len(content) - 1:
|
97 |
+
content = content[:end + 1]
|
|
|
98 |
|
99 |
+
return content.strip()
|
|
|
100 |
|
101 |
def _create_extraction_prompt(self, text: str) -> str:
|
102 |
+
"""Create prompt for OpenAI extraction."""
|
|
|
103 |
prompt = f"""
|
104 |
+
Extract information from this resume and return ONLY valid JSON in this exact format:
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
{{
|
107 |
+
"Name": "Full Name with credentials (PhD, MBA, etc.)",
|
108 |
+
"Summary": "Professional summary or objective",
|
109 |
+
"Skills": ["skill1", "skill2", "skill3"],
|
110 |
+
"StructuredExperiences": [
|
111 |
+
{{
|
112 |
+
"title": "Job Title",
|
113 |
+
"company": "Company Name",
|
114 |
+
"date_range": "Start Date - End Date",
|
115 |
+
"responsibilities": ["responsibility1", "responsibility2"]
|
116 |
+
}}
|
117 |
+
],
|
118 |
+
"Education": ["degree info", "school info"],
|
119 |
+
"Training": ["certification1", "training1"],
|
120 |
+
"Address": "Full address if available"
|
121 |
}}
|
122 |
|
123 |
+
Resume text:
|
124 |
+
{text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
CRITICAL INSTRUCTIONS:
|
127 |
+
- For NAME: Include ALL credentials (PhD, MBA, M.S., B.S., etc.) - example: "John Doe, PhD, MBA"
|
128 |
+
- Read the ENTIRE resume text carefully, don't miss content
|
129 |
+
- Extract ALL work experiences with full details
|
130 |
- Return ONLY valid JSON, no explanations
|
131 |
- If a section is not found, use empty string or empty array
|
132 |
+
- Extract actual technical skills, not company names
|
|
|
|
|
|
|
133 |
"""
|
|
|
134 |
return prompt
|
135 |
|
136 |
+
def _extract_contact_info(self, text: str) -> Dict[str, str]:
|
137 |
+
"""Extract contact information from resume text."""
|
138 |
+
contact_info = {}
|
139 |
+
|
140 |
+
# Extract email
|
141 |
+
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
142 |
+
email_match = re.search(email_pattern, text)
|
143 |
+
if email_match:
|
144 |
+
contact_info['email'] = email_match.group()
|
145 |
+
|
146 |
+
# Extract phone number
|
147 |
+
phone_patterns = [
|
148 |
+
r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
|
149 |
+
r'\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
|
150 |
+
r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
|
151 |
+
]
|
152 |
+
|
153 |
+
for pattern in phone_patterns:
|
154 |
+
phone_match = re.search(pattern, text)
|
155 |
+
if phone_match:
|
156 |
+
contact_info['phone'] = phone_match.group().strip()
|
157 |
+
break
|
158 |
+
|
159 |
+
# Extract LinkedIn
|
160 |
+
linkedin_pattern = r'linkedin\.com/in/[A-Za-z0-9-]+'
|
161 |
+
linkedin_match = re.search(linkedin_pattern, text)
|
162 |
+
if linkedin_match:
|
163 |
+
contact_info['linkedin'] = linkedin_match.group()
|
164 |
+
|
165 |
+
logger.info(f"OPENAI: Extracted ContactInfo as dict: {contact_info}")
|
166 |
+
return contact_info
|
167 |
+
|
168 |
def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
|
169 |
+
"""Validate and clean the extraction result."""
|
170 |
|
171 |
# Ensure all required keys exist
|
172 |
+
required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training", "Address"]
|
173 |
for key in required_keys:
|
174 |
if key not in result:
|
175 |
result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
|
|
|
197 |
|
198 |
return result
|
199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
def _is_company_name(self, text: str) -> bool:
|
201 |
+
"""Check if text looks like a company name rather than a skill."""
|
202 |
company_indicators = [
|
203 |
"inc", "llc", "corp", "ltd", "company", "solutions", "services",
|
204 |
+
"systems", "technologies", "financial", "insurance"
|
205 |
]
|
206 |
text_lower = text.lower()
|
207 |
return any(indicator in text_lower for indicator in company_indicators)
|
208 |
|
209 |
def _fallback_extraction(self, text: str) -> Dict[str, Any]:
|
210 |
+
"""Fallback to regex-based extraction if OpenAI fails."""
|
211 |
logger.info("Using regex fallback extraction...")
|
212 |
+
|
213 |
+
return {
|
214 |
+
"Name": self._extract_name_regex(text),
|
215 |
+
"Summary": self._extract_summary_regex(text),
|
216 |
+
"Skills": self._extract_skills_regex(text),
|
217 |
+
"StructuredExperiences": self._extract_experiences_regex(text),
|
218 |
+
"Education": self._extract_education_regex(text),
|
219 |
+
"Training": [],
|
220 |
+
"Address": self._extract_address_regex(text),
|
221 |
+
"ContactInfo": self._extract_contact_info(text)
|
222 |
+
}
|
|
|
|
|
|
|
223 |
|
224 |
def _extract_name_regex(self, text: str) -> str:
|
225 |
+
"""Regex fallback for name extraction."""
|
226 |
lines = text.split('\n')[:5]
|
227 |
for line in lines:
|
228 |
line = line.strip()
|
229 |
if re.search(r'@|phone|email|linkedin|github', line.lower()):
|
230 |
continue
|
231 |
+
# Match name with potential credentials (PhD, MBA, etc.)
|
232 |
+
name_match = re.match(r'^([A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?(?:,\s*[A-Z][a-z.]+(?:,\s*[A-Z][a-z.]+)?)?)', line)
|
233 |
if name_match:
|
234 |
return name_match.group(1)
|
235 |
return ""
|
236 |
|
237 |
def _extract_summary_regex(self, text: str) -> str:
|
238 |
+
"""Regex fallback for summary extraction."""
|
239 |
summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
|
240 |
match = re.search(summary_pattern, text, re.DOTALL)
|
241 |
if match:
|
|
|
246 |
return ""
|
247 |
|
248 |
def _extract_skills_regex(self, text: str) -> List[str]:
|
249 |
+
"""Regex fallback for skills extraction."""
|
250 |
skills = set()
|
251 |
|
252 |
# Look for technical skills section
|
|
|
265 |
return sorted(list(skills))
|
266 |
|
267 |
def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
|
268 |
+
"""Regex fallback for experience extraction."""
|
269 |
experiences = []
|
270 |
|
271 |
# Look for work experience section
|
|
|
299 |
return experiences
|
300 |
|
301 |
def _extract_education_regex(self, text: str) -> List[str]:
|
302 |
+
"""Regex fallback for education extraction."""
|
303 |
education = []
|
304 |
|
305 |
edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
|
|
|
315 |
|
316 |
return education
|
317 |
|
318 |
+
def _extract_address_regex(self, text: str) -> str:
|
319 |
+
"""Regex fallback for address extraction."""
|
320 |
+
# Look for address patterns like "6001 Tain Dr. Suite 203, Dublin, OH, 43016"
|
321 |
+
address_patterns = [
|
322 |
+
r'(\d+\s+[A-Za-z\s\.]+(?:Suite|Apt|Unit)\s+\d+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
|
323 |
+
r'(\d+\s+[A-Za-z\s\.]+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
|
324 |
+
r'([A-Za-z\s\d\.]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
]
|
326 |
|
327 |
+
for pattern in address_patterns:
|
328 |
+
match = re.search(pattern, text)
|
329 |
+
if match:
|
330 |
+
return match.group(1).strip()
|
|
|
|
|
|
|
|
|
331 |
|
332 |
+
return ""
|
333 |
+
|
334 |
|
335 |
+
# Main extraction function for compatibility
|
336 |
def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
|
337 |
+
"""Extract resume sections using OpenAI API."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
extractor = OpenAIResumeExtractor(api_key=api_key)
|
339 |
+
return extractor.extract_sections_openai(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/parser.py
CHANGED
@@ -3,7 +3,7 @@ import fitz # PyMuPDF
|
|
3 |
import re
|
4 |
from io import BytesIO
|
5 |
from docx import Document
|
6 |
-
from config import supabase, embedding_model,
|
7 |
|
8 |
def extract_name(resume_text: str) -> str:
|
9 |
# look at the very top lines for a capitalized full name
|
|
|
3 |
import re
|
4 |
from io import BytesIO
|
5 |
from docx import Document
|
6 |
+
from config import supabase, embedding_model, HF_ENDPOINTS, query
|
7 |
|
8 |
def extract_name(resume_text: str) -> str:
|
9 |
# look at the very top lines for a capitalized full name
|
utils/reporting.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
import re
|
3 |
import fitz # PyMuPDF
|
4 |
from io import BytesIO
|
5 |
-
from config import supabase, embedding_model,
|
6 |
from .screening import evaluate_resumes
|
7 |
|
8 |
def generate_pdf_report(shortlisted_candidates, questions=None):
|
@@ -45,7 +45,7 @@ def generate_interview_questions_from_summaries(candidates):
|
|
45 |
)
|
46 |
|
47 |
try:
|
48 |
-
response =
|
49 |
model="tgi",
|
50 |
messages=[{"role": "user", "content": prompt}],
|
51 |
temperature=0.7,
|
|
|
2 |
import re
|
3 |
import fitz # PyMuPDF
|
4 |
from io import BytesIO
|
5 |
+
from config import supabase, embedding_model, query
|
6 |
from .screening import evaluate_resumes
|
7 |
|
8 |
def generate_pdf_report(shortlisted_candidates, questions=None):
|
|
|
45 |
)
|
46 |
|
47 |
try:
|
48 |
+
response = supabase.ai.chat.completions.create(
|
49 |
model="tgi",
|
50 |
messages=[{"role": "user", "content": prompt}],
|
51 |
temperature=0.7,
|
utils/screening.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
from .parser import parse_resume, extract_email, summarize_resume
|
3 |
from .hybrid_extractor import extract_resume_sections
|
4 |
from .spacy_loader import get_nlp, is_spacy_available
|
5 |
-
from config import supabase, embedding_model
|
6 |
from fuzzywuzzy import fuzz
|
7 |
from sentence_transformers import util
|
8 |
import streamlit as st
|
|
|
2 |
from .parser import parse_resume, extract_email, summarize_resume
|
3 |
from .hybrid_extractor import extract_resume_sections
|
4 |
from .spacy_loader import get_nlp, is_spacy_available
|
5 |
+
from config import supabase, embedding_model
|
6 |
from fuzzywuzzy import fuzz
|
7 |
from sentence_transformers import util
|
8 |
import streamlit as st
|