Johnny commited on
Commit
79b5c9c
·
1 Parent(s): 102e49d

feat: Update resume builder with LFS-tracked assets

Browse files

- Add header and footer images using Git LFS
- Update configuration and dependencies
- Improve resume builder and OpenAI extractor
- Update app components and utility functions
- Remove unused blank resume template

.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.docx filter=lfs diff=lfs merge=lfs -text
37
+ *.png filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -37,3 +37,10 @@ debug_*.docx
37
  .sfdx/
38
  *.cls
39
  apex.db
 
 
 
 
 
 
 
 
37
  .sfdx/
38
  *.cls
39
  apex.db
40
+
41
+ .DS_Store
42
+ utils/.DS_Store
43
+ utils/cursor-updates
44
+ utils/prompt-updates
45
+ Youlin Joseph Li qvell.docx
46
+ Template.py
.streamlit/config.toml CHANGED
@@ -7,7 +7,6 @@ font="sans serif"
7
 
8
  [ui]
9
  hideTopBar = false
10
- hideSidebarNav = true
11
 
12
  [server]
13
  headless = true
 
7
 
8
  [ui]
9
  hideTopBar = false
 
10
 
11
  [server]
12
  headless = true
app.py CHANGED
@@ -1,6 +1,7 @@
1
  # TalentLens
2
 
3
  import os
 
4
  from io import BytesIO
5
 
6
  import streamlit as st
@@ -8,13 +9,40 @@ import fitz # PyMuPDF
8
  import requests
9
  from dotenv import load_dotenv
10
 
11
- from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_MODELS
12
  from utils.parser import parse_resume, extract_email, summarize_resume
13
  from utils.hybrid_extractor import extract_resume_sections
14
  from utils.builder import build_resume_from_data
15
  from utils.screening import evaluate_resumes
16
  from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # ------------------------- Main App Function -------------------------
20
  def main():
@@ -61,11 +89,11 @@ def main():
61
 
62
  with col1:
63
  # Evaluation trigger
64
- evaluate_clicked = st.button("📊 Evaluate Resumes", type="primary", use_container_width=True)
65
 
66
  with col2:
67
  # Format Resume redirect button
68
- format_clicked = st.button("📄 Format Resume", use_container_width=True)
69
 
70
  # Handle Format Resume redirect
71
  if format_clicked:
@@ -81,7 +109,7 @@ def main():
81
  st.error("⚠️ Please upload at least one resume.")
82
  return
83
 
84
- st.write("### 📊 Evaluating Resumes...")
85
 
86
  # Resume Evaluation
87
  shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
@@ -109,6 +137,31 @@ def main():
109
  for removed in removed_candidates:
110
  st.write(f"**{removed['name']}** - {removed['reason']}")
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  # ------------------------- Run the App -------------------------
113
  if __name__ == "__main__":
114
  main()
 
1
  # TalentLens
2
 
3
  import os
4
+ import time # Add time module import
5
  from io import BytesIO
6
 
7
  import streamlit as st
 
9
  import requests
10
  from dotenv import load_dotenv
11
 
12
+ from config import supabase, HF_API_TOKEN, HF_HEADERS, HF_ENDPOINTS
13
  from utils.parser import parse_resume, extract_email, summarize_resume
14
  from utils.hybrid_extractor import extract_resume_sections
15
  from utils.builder import build_resume_from_data
16
  from utils.screening import evaluate_resumes
17
  from utils.reporting import generate_pdf_report, generate_interview_questions_from_summaries
18
 
19
+ def toggle_endpoint(endpoint_name, action):
20
+ """Start or stop an endpoint"""
21
+ try:
22
+ from config import HF_HEADERS, HF_ENDPOINTS
23
+ # Use the health endpoint
24
+ endpoint_info = HF_ENDPOINTS[endpoint_name]
25
+ url = f"{endpoint_info['url']}/health"
26
+
27
+ # Use HEAD request to start the endpoint
28
+ response = requests.head(url, headers=HF_HEADERS)
29
+
30
+ if response.status_code == 503:
31
+ st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
32
+ time.sleep(2) # Wait briefly before refreshing status
33
+ from config import check_endpoint_status
34
+ new_status = check_endpoint_status(endpoint_name)
35
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
36
+ elif response.status_code == 200:
37
+ st.success("✅ Endpoint is running")
38
+ time.sleep(2) # Wait briefly before refreshing status
39
+ from config import check_endpoint_status
40
+ new_status = check_endpoint_status(endpoint_name)
41
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
42
+ else:
43
+ st.error(f"❌ Failed to {action} endpoint: {response.text}")
44
+ except Exception as e:
45
+ st.error(f"❌ Failed to {action} endpoint: {str(e)}")
46
 
47
  # ------------------------- Main App Function -------------------------
48
  def main():
 
89
 
90
  with col1:
91
  # Evaluation trigger
92
+ evaluate_clicked = st.button("\U0001F4CA Evaluate Resumes", type="primary", use_container_width=True)
93
 
94
  with col2:
95
  # Format Resume redirect button
96
+ format_clicked = st.button("\U0001F4C4 Format Resume", use_container_width=True)
97
 
98
  # Handle Format Resume redirect
99
  if format_clicked:
 
109
  st.error("⚠️ Please upload at least one resume.")
110
  return
111
 
112
+ st.write("### �� Evaluating Resumes...")
113
 
114
  # Resume Evaluation
115
  shortlisted, removed_candidates = evaluate_resumes(uploaded_files, job_description)
 
137
  for removed in removed_candidates:
138
  st.write(f"**{removed['name']}** - {removed['reason']}")
139
 
140
+
141
+ # Get current status using DNS resolution
142
+ from config import check_endpoint_status
143
+ endpoint_name = "vzwjawyxvu030jsw" # Updated to match endpoint ID
144
+ current_status = check_endpoint_status(endpoint_name)
145
+ state = current_status.get('status', 'unknown')
146
+
147
+ # Update session state with current status
148
+ st.session_state['endpoint_status'] = {endpoint_name: current_status}
149
+
150
+ # Show Start button and status
151
+ start_button = st.empty() # Placeholder for Start button
152
+ if state in ['stopped', 'error']:
153
+ if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
154
+ toggle_endpoint(endpoint_name, "start")
155
+ # Refresh status after starting
156
+ new_status = check_endpoint_status(endpoint_name)
157
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
158
+ if new_status.get('status') == 'running':
159
+ st.success("✅ Endpoint is running")
160
+ elif new_status.get('status') == 'starting':
161
+ st.info("🚀 Starting endpoint... This may take 5-6 minutes. Click on 'Start' again to refresh status.")
162
+ elif new_status.get('status') == 'error':
163
+ st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
164
+
165
  # ------------------------- Run the App -------------------------
166
  if __name__ == "__main__":
167
  main()
config.py CHANGED
@@ -2,6 +2,7 @@
2
  import os
3
  import time
4
  import requests
 
5
  from dotenv import load_dotenv
6
  from supabase import create_client
7
  from sentence_transformers import SentenceTransformer
@@ -20,44 +21,121 @@ supabase = create_client(SUPABASE_URL, SUPABASE_KEY)
20
  # === Embedding Model for Scoring ===
21
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
22
 
23
- # === Hugging Face API Configuration (for summarization/other) ===
24
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
25
  if not HF_API_TOKEN:
26
  raise ValueError("Missing Hugging Face API key. Check your .env file.")
 
 
27
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
28
 
29
  # === Hugging Face Model Endpoints ===
30
- HF_MODELS = {
31
- "pegasus": "https://router.huggingface.co/hf-inference/models/google/pegasus-xsum",
32
- "gemma": "tgi" # Used as the model name with OpenAI-compatible client
 
 
 
 
 
 
 
 
33
  }
34
 
35
- # === OpenAI-Compatible Client (for Gemma) ===
36
- client = OpenAI(
37
- base_url="https://vzwjawyxvu030jsw.us-east-1.aws.endpoints.huggingface.cloud/v1/",
38
- api_key=HF_API_TOKEN,
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # === Optional: General Query Helper (for non-chat models like pegasus) ===
42
- def query(payload, model="pegasus", retries=5, delay=5):
43
  """
44
- Sends a request to the Hugging Face API with retries and error handling.
45
  """
46
- if model not in HF_MODELS:
47
- raise ValueError(f"Invalid model name: {model}. Available: {list(HF_MODELS.keys())}")
 
 
48
 
49
- api_url = HF_MODELS[model]
 
50
 
51
- for attempt in range(retries):
52
- try:
53
- response = requests.post(api_url, headers=HF_HEADERS, json=payload, timeout=10)
54
- if response.status_code in (401, 402):
55
- print(f"❌ HF error {response.status_code}")
56
- return None
57
- response.raise_for_status()
 
 
58
  return response.json()
59
- except requests.exceptions.RequestException as e:
60
- print(f"⚠️ Attempt {attempt+1} failed: {e}")
61
- time.sleep(delay)
62
- print("🚨 All retry attempts failed.")
63
- return None
 
 
 
 
2
  import os
3
  import time
4
  import requests
5
+ import socket
6
  from dotenv import load_dotenv
7
  from supabase import create_client
8
  from sentence_transformers import SentenceTransformer
 
21
  # === Embedding Model for Scoring ===
22
  embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
23
 
24
+ # === Hugging Face API Configuration ===
25
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
26
  if not HF_API_TOKEN:
27
  raise ValueError("Missing Hugging Face API key. Check your .env file.")
28
+
29
+ # Headers for API requests
30
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_TOKEN}"}
31
 
32
  # === Hugging Face Model Endpoints ===
33
+ HF_ENDPOINTS = {
34
+ "bart-large-cnn-ovt": {
35
+ "url": "https://hedemwou4oqkk65c.us-east-1.aws.endpoints.huggingface.cloud",
36
+ "task": "summarization",
37
+ "model_id": "facebook/bart-large-cnn"
38
+ },
39
+ "vzwjawyxvu030jsw": { # Updated endpoint name to match URL
40
+ "url": "https://vzwjawyxvu030jsw.us-east-1.aws.endpoints.huggingface.cloud",
41
+ "task": "text-generation",
42
+ "model_id": "google/gemma-7b"
43
+ }
44
  }
45
 
46
+ def check_endpoint_status(endpoint_name: str) -> dict:
47
+ """
48
+ Check the status of a private Hugging Face endpoint using DNS resolution
49
+ """
50
+ if endpoint_name not in HF_ENDPOINTS:
51
+ return {
52
+ "status": "error",
53
+ "error": f"Unknown endpoint: {endpoint_name}"
54
+ }
55
+
56
+ try:
57
+ endpoint_info = HF_ENDPOINTS[endpoint_name]
58
+ hostname = endpoint_info['url'].replace('https://', '').split('/')[0]
59
+
60
+ # Try DNS resolution
61
+ try:
62
+ socket.gethostbyname(hostname)
63
+ # If DNS resolves, endpoint exists but may be stopped
64
+ return {
65
+ "status": "stopped",
66
+ "scaled": True,
67
+ "pending": 0,
68
+ "error": None
69
+ }
70
+ except socket.gaierror:
71
+ # If DNS fails, endpoint doesn't exist
72
+ return {
73
+ "status": "error",
74
+ "error": "Endpoint not found"
75
+ }
76
+ except Exception as e:
77
+ return {
78
+ "status": "error",
79
+ "error": str(e)
80
+ }
81
+
82
+ def toggle_endpoint(endpoint_name: str, action: str) -> dict:
83
+ """
84
+ Start or stop a private Hugging Face endpoint
85
+ """
86
+ try:
87
+ # For private endpoints, use the Endpoints API
88
+ api_base = "https://api.endpoints.huggingface.cloud"
89
+ action_url = f"{api_base}/v2/endpoint/{endpoint_name}/{action}"
90
+
91
+ response = requests.post(
92
+ action_url,
93
+ headers=HF_HEADERS,
94
+ timeout=10
95
+ )
96
+
97
+ if response.status_code in [200, 202]:
98
+ return {
99
+ "success": True,
100
+ "message": f"Successfully {action}ed endpoint"
101
+ }
102
+ else:
103
+ return {
104
+ "error": f"Failed to {action} endpoint: {response.text}"
105
+ }
106
+ except Exception as e:
107
+ return {
108
+ "error": f"Failed to {action} endpoint: {str(e)}"
109
+ }
110
 
111
+ # === Query Helper ===
112
+ def query(payload: dict, endpoint_name: str) -> dict:
113
  """
114
+ Send a query to a Hugging Face endpoint
115
  """
116
+ if endpoint_name not in HF_ENDPOINTS:
117
+ return {
118
+ "error": f"Unknown endpoint: {endpoint_name}"
119
+ }
120
 
121
+ endpoint_info = HF_ENDPOINTS[endpoint_name]
122
+ url = endpoint_info['url']
123
 
124
+ try:
125
+ response = requests.post(
126
+ url,
127
+ headers=HF_HEADERS,
128
+ json=payload,
129
+ timeout=30
130
+ )
131
+
132
+ if response.status_code == 200:
133
  return response.json()
134
+ else:
135
+ return {
136
+ "error": f"Query failed with status {response.status_code}: {response.text}"
137
+ }
138
+ except Exception as e:
139
+ return {
140
+ "error": str(e)
141
+ }
footer.png ADDED

Git LFS Details

  • SHA256: b673e89dcea8e3e2533789aa14441e471ce5e169e734718da5813c4cf043e3b2
  • Pointer size: 130 Bytes
  • Size of remote file: 30 kB
header.png ADDED

Git LFS Details

  • SHA256: f7bc089bb6134bd545d9f8ef974ddb418b0e2157d357eafa32fb6b6e84dda726
  • Pointer size: 131 Bytes
  • Size of remote file: 184 kB
pages/Template.py CHANGED
@@ -1,8 +1,10 @@
1
- # pages/Template.py
2
 
3
  import os, sys, streamlit as st
4
  import json
5
  from io import BytesIO
 
 
6
 
7
  # Add parent directory to path so we can import utils
8
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -11,17 +13,13 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
  from dotenv import load_dotenv
12
  load_dotenv(override=True)
13
 
 
14
  from utils.hybrid_extractor import extract_resume_sections
15
  from utils.builder import build_resume_from_data
16
- from utils.parser import parse_resume # whatever parse_resume you already have
17
-
18
- # Path to your blank template (header/footer only)
19
- template_path = os.path.join(
20
- os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
21
- )
22
 
23
  st.set_page_config(
24
- page_title='Resume Template Builder',
25
  layout='centered',
26
  initial_sidebar_state="collapsed"
27
  )
@@ -40,17 +38,70 @@ st.markdown("""
40
  </style>
41
  """, unsafe_allow_html=True)
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Home button at the top
44
- if st.button("🏠 Home", help="Return to main TalentLens.AI page"):
45
  st.switch_page("app.py")
46
 
47
- st.title('📄 Resume Template Builder')
48
  st.markdown("---")
49
 
50
  uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
51
  if not uploaded:
52
  st.info("Please upload a resume to get started.")
53
- st.stop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  st.success(f'Uploaded: {uploaded.name}')
56
 
@@ -239,7 +290,7 @@ if st.button('📄 Generate Formatted Resume', type='primary'):
239
  try:
240
  with st.spinner('Building formatted resume...'):
241
  # Build the resume document
242
- doc = build_resume_from_data(template_path, data)
243
 
244
  # Save to buffer
245
  buf = BytesIO()
@@ -329,4 +380,4 @@ st.markdown(
329
  "🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
330
  "</div>",
331
  unsafe_allow_html=True
332
- )
 
1
+ # pages/Format_Resume.py
2
 
3
  import os, sys, streamlit as st
4
  import json
5
  from io import BytesIO
6
+ import time # Added for API status check
7
+ import requests # Added for endpoint control
8
 
9
  # Add parent directory to path so we can import utils
10
  sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
13
  from dotenv import load_dotenv
14
  load_dotenv(override=True)
15
 
16
+ from config import HF_ENDPOINTS # Update import
17
  from utils.hybrid_extractor import extract_resume_sections
18
  from utils.builder import build_resume_from_data
19
+ from utils.parser import parse_resume
 
 
 
 
 
20
 
21
  st.set_page_config(
22
+ page_title='Resume Formatter',
23
  layout='centered',
24
  initial_sidebar_state="collapsed"
25
  )
 
38
  </style>
39
  """, unsafe_allow_html=True)
40
 
41
+ def toggle_endpoint(endpoint_name, action):
42
+ """Start or stop an endpoint"""
43
+ try:
44
+ from config import HF_HEADERS, HF_ENDPOINTS
45
+ # Use the health endpoint
46
+ endpoint_info = HF_ENDPOINTS[endpoint_name]
47
+ url = f"{endpoint_info['url']}/health"
48
+
49
+ # Use HEAD request to start the endpoint
50
+ response = requests.head(url, headers=HF_HEADERS)
51
+
52
+ if response.status_code == 503:
53
+ st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
54
+ time.sleep(2) # Wait briefly before refreshing status
55
+ from config import check_endpoint_status
56
+ new_status = check_endpoint_status(endpoint_name)
57
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
58
+ elif response.status_code == 200:
59
+ st.success("✅ Endpoint is running")
60
+ time.sleep(2) # Wait briefly before refreshing status
61
+ from config import check_endpoint_status
62
+ new_status = check_endpoint_status(endpoint_name)
63
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
64
+ else:
65
+ st.error(f"❌ Failed to {action} endpoint: {response.text}")
66
+ except Exception as e:
67
+ st.error(f"❌ Failed to {action} endpoint: {str(e)}")
68
+
69
  # Home button at the top
70
+ if st.button("\U0001F3E0 Home", help="Return to main TalentLens.AI page"):
71
  st.switch_page("app.py")
72
 
73
+ st.title('📄 Resume Formatter')
74
  st.markdown("---")
75
 
76
  uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
77
  if not uploaded:
78
  st.info("Please upload a resume to get started.")
79
+
80
+ # Get current status using DNS resolution
81
+ from config import check_endpoint_status
82
+ endpoint_name = "bart-large-cnn-ovt"
83
+ current_status = check_endpoint_status(endpoint_name)
84
+ state = current_status.get('status', 'unknown')
85
+
86
+ # Update session state with current status
87
+ st.session_state['endpoint_status'] = {endpoint_name: current_status}
88
+
89
+ # Show Start button and status
90
+ start_button = st.empty() # Placeholder for Start button
91
+ if state in ['stopped', 'error']:
92
+ if start_button.button("▶️ Start", key=f"start_{endpoint_name}", use_container_width=True):
93
+ toggle_endpoint(endpoint_name, "start")
94
+ # Refresh status after starting
95
+ new_status = check_endpoint_status(endpoint_name)
96
+ st.session_state['endpoint_status'] = {endpoint_name: new_status}
97
+ if new_status.get('status') == 'running':
98
+ st.success("✅ Endpoint is running")
99
+ elif new_status.get('status') == 'starting':
100
+ st.info("🚀 Starting endpoint... This may take 3-4 minutes. Click on 'Start' again to refresh status.")
101
+ elif new_status.get('status') == 'error':
102
+ st.error(f"❌ Error: {new_status.get('error', 'Unknown error')}")
103
+
104
+ st.stop() # Stop here if no file is uploaded
105
 
106
  st.success(f'Uploaded: {uploaded.name}')
107
 
 
290
  try:
291
  with st.spinner('Building formatted resume...'):
292
  # Build the resume document
293
+ doc = build_resume_from_data(tmpl="", sections=data)
294
 
295
  # Save to buffer
296
  buf = BytesIO()
 
380
  "🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
381
  "</div>",
382
  unsafe_allow_html=True
383
+ )
requirements.txt CHANGED
@@ -10,4 +10,5 @@ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1
10
  openai
11
  fuzzywuzzy
12
  python-docx
13
- numpy<2.0
 
 
10
  openai
11
  fuzzywuzzy
12
  python-docx
13
+ numpy<2.0
14
+ from torch._C import * # noqa: F403
templates/blank_resume.docx DELETED
Binary file (48.2 kB)
 
utils/builder.py CHANGED
@@ -1,20 +1,19 @@
 
 
 
1
  from datetime import datetime
2
  from dateutil.parser import parse as date_parse
3
- import re, math
4
  from docx import Document
5
- from docx.shared import Pt
6
- from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_ALIGN_PARAGRAPH
7
- import logging
8
 
9
  logger = logging.getLogger(__name__)
10
 
11
- # ---------- helpers ---------------------------------------------------
12
- def _date(dt_str:str)->datetime:
13
- try: return date_parse(dt_str, default=datetime(1900,1,1))
14
- except: return datetime(1900,1,1)
15
 
16
- def fmt_range(raw:str)->str:
17
- if not raw: return ""
 
 
18
  parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
19
 
20
  formatted_parts = []
@@ -23,284 +22,197 @@ def fmt_range(raw:str)->str:
23
  formatted_parts.append("Present")
24
  else:
25
  try:
26
- date_obj = _date(part)
27
- formatted_parts.append(date_obj.strftime("%B %Y"))
28
- except:
29
- formatted_parts.append(part) # fallback to original text
 
 
 
30
 
31
  return " – ".join(formatted_parts)
32
 
33
- # ---------- main ------------------------------------------------------
34
- def build_resume_from_data(tmpl:str, sections:dict)->Document:
35
- logger.info(f"BUILDER: Attempting to load document template from: {tmpl}")
36
- doc = Document(tmpl)
37
- logger.info(f"BUILDER: Template {tmpl} loaded successfully.")
38
 
39
- # Log the template state
40
- logger.info(f"BUILDER: Template has {len(doc.sections)} sections")
41
- for i, section_obj in enumerate(doc.sections):
42
- if section_obj.header:
43
- logger.info(f"BUILDER: Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
44
- if section_obj.footer:
45
- logger.info(f"BUILDER: Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")
46
-
47
- # MOST CONSERVATIVE APPROACH: Clear paragraph content but don't remove elements
48
- # This should preserve all document structure including sections
49
- logger.info(f"BUILDER: Before clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
50
-
51
- # Clear paragraph text content only, don't remove elements
52
- for paragraph in doc.paragraphs:
53
- # Clear all runs in the paragraph but keep the paragraph element
54
- for run in paragraph.runs:
55
- run.text = ""
56
- # Also clear the paragraph text directly
57
- paragraph.text = ""
58
-
59
- # Remove tables (these are less likely to affect sections)
60
- tables_to_remove = list(doc.tables) # Create a copy of the list
61
- for table in tables_to_remove:
62
- tbl = table._element
63
- tbl.getparent().remove(tbl)
64
-
65
- logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
66
-
67
- # Verify headers/footers are still intact
68
- logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections")
69
- for i, section_obj in enumerate(doc.sections):
70
- if section_obj.header:
71
- logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs")
72
- if section_obj.footer:
73
- logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs")
74
-
75
- logger.info(f"BUILDER: Template preserved with original headers and footers")
76
-
77
- # --- easy builders ---
78
- def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12)
79
- def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"• {txt}").font.size=Pt(11)
80
- def two_col(l,r):
81
- tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True
82
- tbl.cell(0,0).paragraphs[0].add_run(l).bold=True
83
- rp = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT
84
- rr = rp.add_run(r); rr.italic=True
85
-
86
- # --- header (name + current role) ---
87
- exps = sections.get("StructuredExperiences",[])
88
- if exps:
89
- try:
90
- # Filter to only dictionary experiences
91
- dict_exps = [e for e in exps if isinstance(e, dict)]
92
- if dict_exps:
93
- newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("–")[0] if "–" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range","")))
94
- cur_title = newest.get("title","")
95
- else:
96
- cur_title = ""
97
- except:
98
- # Fallback: try to get title from first dictionary experience
99
- for exp in exps:
100
- if isinstance(exp, dict) and exp.get("title"):
101
- cur_title = exp.get("title","")
102
- break
103
- else:
104
- cur_title = ""
105
- else:
106
- # Try to extract job title from summary if no structured experiences
107
- cur_title = ""
108
- summary = sections.get("Summary", "")
109
- if summary:
110
- # Look for job titles in the summary
111
- title_patterns = [
112
- r'(?i)(.*?engineer)',
113
- r'(?i)(.*?developer)',
114
- r'(?i)(.*?analyst)',
115
- r'(?i)(.*?manager)',
116
- r'(?i)(.*?specialist)',
117
- r'(?i)(.*?consultant)',
118
- r'(?i)(.*?architect)',
119
- r'(?i)(.*?lead)',
120
- r'(?i)(.*?director)',
121
- r'(?i)(.*?coordinator)'
122
- ]
123
-
124
- for pattern in title_patterns:
125
- match = re.search(pattern, summary)
126
- if match:
127
- potential_title = match.group(1).strip()
128
- # Clean up the title
129
- potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I)
130
- if len(potential_title) > 3 and len(potential_title) < 50:
131
- cur_title = potential_title.title()
132
- break
133
-
134
- if sections.get("Name"):
135
- p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
136
- run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16)
137
- if cur_title:
138
- p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
139
- p.add_run(cur_title).font.size=Pt(12)
140
-
141
- # --- summary ---
142
- if sections.get("Summary"):
143
- heading("Professional Summary:")
144
- pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12)
145
- pg.add_run(sections["Summary"]).font.size=Pt(11)
146
-
147
- # --- skills ---
148
- if sections.get("Skills"):
149
- heading("Skills:")
150
- skills = sorted(set(sections["Skills"]))
151
- cols = 3
152
- rows = math.ceil(len(skills)/cols)
153
- tbl = doc.add_table(rows=rows, cols=cols); tbl.autofit=True
154
- k=0
155
- for r in range(rows):
156
- for c in range(cols):
157
- if k < len(skills):
158
- tbl.cell(r,c).paragraphs[0].add_run(f"• {skills[k]}").font.size=Pt(11)
159
- k+=1
160
-
161
- # --- experience ---
162
- if exps:
163
- heading("Professional Experience:")
164
- for e in exps:
165
- # Ensure e is a dictionary, not a string
166
- if isinstance(e, str):
167
- # If it's a string, create a basic experience entry
168
- bullet(e, 0)
169
- continue
170
- elif not isinstance(e, dict):
171
- # Skip if it's neither string nor dict
172
- continue
173
-
174
- # Process dictionary experience entry
175
- title = e.get("title", "")
176
- company = e.get("company", "")
177
- date_range = e.get("date_range", "")
178
- responsibilities = e.get("responsibilities", [])
179
-
180
- # Create the job header
181
- two_col(" | ".join(filter(None, [title, company])),
182
- fmt_range(date_range))
183
-
184
- # Add responsibilities
185
- if isinstance(responsibilities, list):
186
- for resp in responsibilities:
187
- if isinstance(resp, str) and resp.strip():
188
- bullet(resp, 1)
189
- elif isinstance(responsibilities, str) and responsibilities.strip():
190
- bullet(responsibilities, 1)
191
- else:
192
- # If no structured experiences found, try to extract from summary
193
- heading("Professional Experience:")
194
- summary = sections.get("Summary", "")
195
 
196
- if summary and cur_title:
197
- # Extract years of experience from summary
198
- years_match = re.search(r'(\d+)\s+years?\s+of\s+experience', summary, re.I)
199
- years_text = f"{years_match.group(1)} years of experience" if years_match else "Multiple years of experience"
200
-
201
- # Create a basic experience entry from summary
202
- two_col(cur_title, years_text)
203
-
204
- # Extract key responsibilities/skills from summary
205
- sentences = re.split(r'[.!]', summary)
206
- responsibilities = []
207
-
208
- for sentence in sentences:
209
- sentence = sentence.strip()
210
- if len(sentence) > 30 and any(keyword in sentence.lower() for keyword in
211
- ['expert', 'specializing', 'experience', 'developing', 'designing', 'implementing', 'managing', 'leading']):
212
- responsibilities.append(sentence)
213
-
214
- # Add responsibilities as bullet points
215
- for resp in responsibilities[:5]: # Limit to 5 key points
216
- bullet(resp.strip(), 1)
217
- else:
218
- # Fallback message
219
- pg = doc.add_paragraph()
220
- pg.add_run("Experience details are included in the Professional Summary above.").font.size = Pt(11)
221
- pg.add_run(" For specific job titles, companies, and dates, please refer to the original resume.").font.size = Pt(11)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- # --- job history timeline (chronological list) ---
224
- if exps:
225
- # Filter to only dictionary experiences and sort by date (most recent first)
226
- dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")]
227
 
228
- if dict_exps:
229
- # Sort experiences by start date (most recent first)
230
- try:
231
- sorted_exps = sorted(dict_exps, key=lambda e: _date(
232
- e.get("date_range", "").split("–")[0] if "–" in e.get("date_range", "")
233
- else e.get("date_range", "").split("-")[0] if "-" in e.get("date_range", "")
234
- else e.get("date_range", "")
235
- ), reverse=True)
236
- except:
237
- # If sorting fails, use original order
238
- sorted_exps = dict_exps
239
-
240
- heading("Career Timeline:")
241
- for exp in sorted_exps:
242
- title = exp.get("title", "")
243
- company = exp.get("company", "")
244
- date_range = exp.get("date_range", "")
 
 
245
 
246
- # Format: "Job Title at Company (Dates)"
247
- if company:
248
- timeline_entry = f"{title} at {company}"
249
- else:
250
- timeline_entry = title
 
 
 
251
 
252
- if date_range:
253
- timeline_entry += f" ({fmt_range(date_range)})"
 
 
 
 
 
 
 
 
 
 
254
 
255
- bullet(timeline_entry, 0)
256
-
257
- # --- education / training ---
258
- education = sections.get("Education", [])
259
- training = sections.get("Training", [])
260
-
261
- # Check if we have any real education or if it's just experience duration
262
- has_real_education = False
263
- processed_education = []
264
- experience_years = None
265
-
266
- for ed in education:
267
- # Ensure ed is a string
268
- if not isinstance(ed, str):
269
- continue
270
-
271
- # Clean up the education entry (remove bullets)
272
- clean_ed = ed.replace('•', '').strip()
273
- if re.match(r'^\d+\s+years?$', clean_ed, re.I):
274
- # This is experience duration, not education
275
- experience_years = clean_ed
276
- else:
277
- processed_education.append(clean_ed)
278
- has_real_education = True
279
-
280
- # Show education section
281
- if has_real_education:
282
- heading("Education:")
283
- for ed in processed_education:
284
- bullet(ed)
285
- elif experience_years:
286
- # If only experience years found, show it as a note
287
- heading("Education:")
288
- pg = doc.add_paragraph()
289
- pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11)
290
 
291
- if training:
292
- heading("Training:")
293
- for tr in training:
294
- # Ensure tr is a string
295
- if isinstance(tr, str) and tr.strip():
296
- bullet(tr)
297
-
298
- # Final diagnostic before returning
299
- logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections")
300
- for i, section_obj in enumerate(doc.sections):
301
- if section_obj.header:
302
- logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
303
- if section_obj.footer:
304
- logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")
305
-
306
- return doc
 
1
+ import logging
2
+ import os
3
+ import re
4
  from datetime import datetime
5
  from dateutil.parser import parse as date_parse
 
6
  from docx import Document
7
+ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
8
+ from docx.shared import Inches, Pt
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
 
 
 
 
12
 
13
+ def fmt_range(raw: str) -> str:
14
+ """Formats a date range string nicely."""
15
+ if not raw:
16
+ return ""
17
  parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
18
 
19
  formatted_parts = []
 
22
  formatted_parts.append("Present")
23
  else:
24
  try:
25
+ date_obj = date_parse(part, fuzzy=True, default=datetime(1900, 1, 1))
26
+ if date_obj.year == 1900:
27
+ formatted_parts.append(part)
28
+ else:
29
+ formatted_parts.append(date_obj.strftime("%B %Y"))
30
+ except (ValueError, TypeError):
31
+ formatted_parts.append(part)
32
 
33
  return " – ".join(formatted_parts)
34
 
 
 
 
 
 
35
 
36
+ def add_section_heading(doc, text):
37
+ """Adds a centered section heading."""
38
+ p = doc.add_paragraph()
39
+ run = p.add_run(text.upper())
40
+ run.bold = True
41
+ font = run.font
42
+ font.size = Pt(12)
43
+ font.name = 'Arial'
44
+ p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
45
+ p.paragraph_format.space_after = Pt(6)
46
+
47
+
48
+ def build_resume_from_data(tmpl: str, sections: dict, remove_blank_pages_enabled: bool = True) -> Document:
49
+ """
50
+ Builds a formatted resume from structured data, inserting header/footer images and logging the process.
51
+ """
52
+ logger.info("BUILDER: Starting image-based resume build process.")
53
+ try:
54
+ # 1. Create a new blank document, ignoring the template file
55
+ doc = Document()
56
+ logger.info("BUILDER: Successfully created a new blank document.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # Get section and enable different first page header/footer
59
+ section = doc.sections[0]
60
+ section.different_first_page = True
61
+
62
+ # Move header and footer to the very edge of the page
63
+ section.header_distance = Pt(0)
64
+ section.footer_distance = Pt(0)
65
+ logger.info("BUILDER: Set header/footer distance to 0 to remove whitespace.")
66
+
67
+ # 2. Define image paths relative to the project root
68
+ script_dir = os.path.dirname(os.path.abspath(__file__))
69
+ project_root = os.path.dirname(script_dir)
70
+ header_path = os.path.join(project_root, 'header.png')
71
+ footer_path = os.path.join(project_root, 'footer.png')
72
+
73
+ logger.info(f"BUILDER: Attempting to use header image from: {header_path}")
74
+ logger.info(f"BUILDER: Attempting to use footer image from: {footer_path}")
75
+
76
+ if not os.path.exists(header_path):
77
+ logger.error(f"BUILDER FATAL: Header image not found at '{header_path}'. Cannot proceed.")
78
+ return doc # Return empty doc
79
+ if not os.path.exists(footer_path):
80
+ logger.error(f"BUILDER FATAL: Footer image not found at '{footer_path}'. Cannot proceed.")
81
+ return doc # Return empty doc
82
+
83
+ # 3. Setup Headers
84
+ candidate_name = sections.get("Name", "Candidate Name Not Found")
85
+ experiences = sections.get("StructuredExperiences", [])
86
+ job_title = experiences[0].get("title", "") if experiences else ""
87
+
88
+ # -- First Page Header (Image + Name + Title) --
89
+ first_page_header = section.first_page_header
90
+ first_page_header.is_linked_to_previous = False
91
+
92
+ # Safely get or create a paragraph for the image
93
+ p_header_img_first = first_page_header.paragraphs[0] if first_page_header.paragraphs else first_page_header.add_paragraph()
94
+ p_header_img_first.clear()
95
+
96
+ p_header_img_first.paragraph_format.space_before = Pt(0)
97
+ p_header_img_first.paragraph_format.space_after = Pt(0)
98
+ p_header_img_first.paragraph_format.left_indent = -section.left_margin
99
+ p_header_img_first.add_run().add_picture(header_path, width=section.page_width)
100
+ logger.info("BUILDER: Inserted header.png into FIRST PAGE header.")
101
+
102
+ # Add Name
103
+ p_name = first_page_header.add_paragraph()
104
+ run_name = p_name.add_run(candidate_name.upper())
105
+ run_name.font.name = 'Arial'
106
+ run_name.font.size = Pt(14)
107
+ run_name.bold = True
108
+ p_name.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
109
+ p_name.paragraph_format.space_before = Pt(6)
110
+ p_name.paragraph_format.space_after = Pt(0)
111
+ logger.info(f"BUILDER: Added candidate name '{candidate_name}' to FIRST PAGE header.")
112
+
113
+ # Add Job Title
114
+ if job_title:
115
+ p_title = first_page_header.add_paragraph()
116
+ run_title = p_title.add_run(job_title)
117
+ run_title.font.name = 'Arial'
118
+ run_title.font.size = Pt(11)
119
+ p_title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
120
+ p_title.paragraph_format.space_before = Pt(0)
121
+ logger.info(f"BUILDER: Added job title '{job_title}' to FIRST PAGE header.")
122
+
123
+ # -- Primary Header for subsequent pages (Image Only) --
124
+ primary_header = section.header
125
+ primary_header.is_linked_to_previous = False
126
+
127
+ # Safely get or create a paragraph for the image
128
+ p_header_img_primary = primary_header.paragraphs[0] if primary_header.paragraphs else primary_header.add_paragraph()
129
+ p_header_img_primary.clear()
130
+
131
+ p_header_img_primary.paragraph_format.space_before = Pt(0)
132
+ p_header_img_primary.paragraph_format.space_after = Pt(0)
133
+ p_header_img_primary.paragraph_format.left_indent = -section.left_margin
134
+ p_header_img_primary.add_run().add_picture(header_path, width=section.page_width)
135
+ logger.info("BUILDER: Inserted header.png into PRIMARY header for subsequent pages.")
136
+
137
+ # 4. Insert Footer Image (same for all pages)
138
+ footer = section.footer
139
+ footer.is_linked_to_previous = False
140
+
141
+ # Safely get or create a paragraph for the image
142
+ p_footer_img = footer.paragraphs[0] if footer.paragraphs else footer.add_paragraph()
143
+ p_footer_img.clear()
144
+
145
+ p_footer_img.paragraph_format.space_before = Pt(0)
146
+ p_footer_img.paragraph_format.space_after = Pt(0)
147
+ p_footer_img.paragraph_format.left_indent = -section.left_margin
148
+ p_footer_img.add_run().add_picture(footer_path, width=section.page_width)
149
+
150
+ # Link the first page footer to the primary footer so we only define it once.
151
+ section.first_page_footer.is_linked_to_previous = True
152
+ logger.info("BUILDER: Inserted footer.png and configured for all pages.")
153
 
154
+ # 5. Build Resume Body
155
+ logger.info("BUILDER: Proceeding to add structured resume content to document body.")
 
 
156
 
157
+ # --- Professional Summary ---
158
+ if sections.get("Summary"):
159
+ add_section_heading(doc, "Professional Summary")
160
+ doc.add_paragraph(sections["Summary"]).paragraph_format.space_after = Pt(12)
161
+
162
+ # --- Skills ---
163
+ if sections.get("Skills"):
164
+ add_section_heading(doc, "Skills")
165
+ skills_text = ", ".join(sections["Skills"])
166
+ p = doc.add_paragraph(skills_text)
167
+ p.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
168
+ p.paragraph_format.space_after = Pt(12)
169
+
170
+ # --- Professional Experience ---
171
+ if experiences:
172
+ add_section_heading(doc, "Professional Experience")
173
+ for exp in experiences:
174
+ if not isinstance(exp, dict):
175
+ continue
176
 
177
+ p = doc.add_paragraph()
178
+ p.add_run(exp.get("title", "N/A")).bold = True
179
+ p.add_run(" | ").bold = True
180
+ p.add_run(exp.get("company", "N/A")).italic = True
181
+ p.add_run(f'\t{fmt_range(exp.get("date_range", ""))}')
182
+
183
+ tab_stops = p.paragraph_format.tab_stops
184
+ tab_stops.add_tab_stop(Inches(6.5), WD_TAB_ALIGNMENT.RIGHT)
185
 
186
+ responsibilities = exp.get("responsibilities", [])
187
+ if responsibilities and isinstance(responsibilities, list):
188
+ for resp in responsibilities:
189
+ if resp.strip():
190
+ try:
191
+ p_resp = doc.add_paragraph(resp, style='List Bullet')
192
+ except KeyError:
193
+ p_resp = doc.add_paragraph(f"• {resp}")
194
+
195
+ p_resp.paragraph_format.left_indent = Inches(0.25)
196
+ p_resp.paragraph_format.space_before = Pt(0)
197
+ p_resp.paragraph_format.space_after = Pt(3)
198
 
199
+ doc.add_paragraph().paragraph_format.space_after = Pt(6)
200
+
201
+ # --- Education ---
202
+ if sections.get("Education"):
203
+ add_section_heading(doc, "Education")
204
+ for edu in sections.get("Education", []):
205
+ if edu.strip():
206
+ try:
207
+ p_edu = doc.add_paragraph(edu, style='List Bullet')
208
+ except KeyError:
209
+ p_edu = doc.add_paragraph(f"• {edu}")
210
+
211
+ p_edu.paragraph_format.left_indent = Inches(0.25)
212
+
213
+ logger.info("BUILDER: Resume build process completed successfully.")
214
+ return doc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ except Exception:
217
+ logger.error("BUILDER: An unexpected error occurred during resume generation.", exc_info=True)
218
+ return Document()
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/openai_extractor.py CHANGED
@@ -1,165 +1,175 @@
1
- #!/usr/bin/env python3
2
  """
3
- OpenAI GPT-4o Resume Extractor
4
-
5
- This module provides resume extraction using OpenAI's GPT-4o model (GPT-4.1),
6
- which is the latest and most capable model for complex resume parsing.
7
  """
8
 
9
  import json
10
  import re
11
  import logging
12
- import os
13
  from typing import Dict, Any, List, Optional
 
 
14
  from openai import OpenAI
15
 
16
- # Configure logging
17
- logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
 
20
  class OpenAIResumeExtractor:
21
  """
22
- Production-ready resume extractor using OpenAI GPT-4o (GPT-4.1)
23
  """
24
 
25
  def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
26
- """
27
- Initialize the OpenAI extractor
28
-
29
- Args:
30
- api_key: OpenAI API key (optional, will use env var if not provided)
31
- model: OpenAI model to use (gpt-4o is the latest and most capable GPT-4 model)
32
- """
33
- self.api_key = api_key or os.getenv('OPENAI_API_KEY')
34
  self.model = model
35
-
36
- if not self.api_key:
37
- raise ValueError("No OpenAI API key found. Set OPENAI_API_KEY environment variable.")
38
-
39
- self.client = OpenAI(api_key=self.api_key)
40
 
41
  def extract_sections_openai(self, text: str) -> Dict[str, Any]:
42
  """
43
- Extract resume sections using OpenAI GPT-4o
44
 
45
  Args:
46
  text: Raw resume text
47
 
48
  Returns:
49
- Structured resume data
50
  """
51
- logger.info("Starting OpenAI GPT-4o extraction...")
52
 
53
  try:
54
- # Create a comprehensive prompt for structured extraction
55
  prompt = self._create_extraction_prompt(text)
56
 
57
- # Make API call to OpenAI
58
  response = self.client.chat.completions.create(
59
  model=self.model,
60
  messages=[
61
- {
62
- "role": "system",
63
- "content": "You are an expert resume parser. Extract information accurately and return valid JSON only."
64
- },
65
- {
66
- "role": "user",
67
- "content": prompt
68
- }
69
  ],
70
- temperature=0.1, # Low temperature for consistent results
71
  max_tokens=2000
72
  )
73
 
74
- # Parse the response
75
- result_text = response.choices[0].message.content.strip()
 
76
 
77
- # Clean up the response to extract JSON
78
- if "```json" in result_text:
79
- result_text = result_text.split("```json")[1].split("```")[0]
80
- elif "```" in result_text:
81
- result_text = result_text.split("```")[1]
82
 
83
- # Parse JSON
84
- result = json.loads(result_text)
85
-
86
- # Validate and clean the result
87
  result = self._validate_and_clean_result(result)
88
 
89
- # Extract contact info from the original text
90
  contact_info = self._extract_contact_info(text)
91
  result["ContactInfo"] = contact_info
92
 
93
  logger.info("✅ OpenAI extraction completed successfully")
94
  return result
95
 
 
 
 
 
 
96
  except Exception as e:
97
  logger.error(f"OpenAI extraction failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- # Check if it's an API key issue
100
- if "401" in str(e) or "invalid_api_key" in str(e):
101
- logger.error("❌ Invalid OpenAI API key - please check your OPENAI_API_KEY environment variable")
102
- # Return empty result to force hybrid system to try other methods
103
- return self._get_empty_result()
104
 
105
- # For other errors, fallback to regex extraction
106
- return self._fallback_extraction(text)
107
 
108
  def _create_extraction_prompt(self, text: str) -> str:
109
- """Create a comprehensive prompt for resume extraction"""
110
-
111
  prompt = f"""
112
- Extract the following information from this resume text and return it as valid JSON:
113
-
114
- RESUME TEXT:
115
- {text}
116
-
117
- Extract and return ONLY a JSON object with this exact structure:
118
 
119
  {{
120
- "Name": "Full name of the person",
121
- "Summary": "Professional summary or objective (full text)",
122
- "Skills": ["skill1", "skill2", "skill3"],
123
- "StructuredExperiences": [
124
- {{
125
- "title": "Job title",
126
- "company": "Company name",
127
- "date_range": "Date range (e.g., Jan 2021 - Present)",
128
- "responsibilities": ["responsibility 1", "responsibility 2"]
129
- }}
130
- ],
131
- "Education": ["degree | institution | year"],
132
- "Training": []
 
133
  }}
134
 
135
- EXTRACTION RULES:
136
- 1. Name: Extract the full name from the top of the resume
137
- 2. Summary: Extract the complete professional summary/objective section
138
- 3. Skills: Extract technical skills only (programming languages, tools, frameworks)
139
- 4. StructuredExperiences: For each job, extract:
140
- - title: The job title/position
141
- - company: Company name (include location if provided)
142
- - date_range: Employment dates
143
- - responsibilities: List of bullet points describing what they did
144
- 5. Education: Extract degrees, institutions, and graduation years
145
- 6. Training: Extract certifications, courses, training programs
146
 
147
- IMPORTANT:
 
 
 
148
  - Return ONLY valid JSON, no explanations
149
  - If a section is not found, use empty string or empty array
150
- - For skills, exclude company names and focus on technical skills
151
- - For experiences, look for patterns like "Title | Company | Dates" or similar
152
- - Extract ALL job experiences found in the resume
153
- - Include ALL bullet points under each job as responsibilities
154
  """
155
-
156
  return prompt
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
159
- """Validate and clean the extraction result"""
160
 
161
  # Ensure all required keys exist
162
- required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training"]
163
  for key in required_keys:
164
  if key not in result:
165
  result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
@@ -187,59 +197,45 @@ IMPORTANT:
187
 
188
  return result
189
 
190
- def _get_empty_result(self) -> Dict[str, Any]:
191
- """Return empty result structure for API failures"""
192
- return {
193
- "Name": "",
194
- "Summary": "",
195
- "Skills": [],
196
- "StructuredExperiences": [],
197
- "Education": [],
198
- "Training": [],
199
- "ContactInfo": {}
200
- }
201
-
202
  def _is_company_name(self, text: str) -> bool:
203
- """Check if text looks like a company name rather than a skill"""
204
  company_indicators = [
205
  "inc", "llc", "corp", "ltd", "company", "solutions", "services",
206
- "systems", "technologies", "financial", "insurance", "abc", "xyz"
207
  ]
208
  text_lower = text.lower()
209
  return any(indicator in text_lower for indicator in company_indicators)
210
 
211
  def _fallback_extraction(self, text: str) -> Dict[str, Any]:
212
- """Fallback to regex-based extraction if OpenAI fails"""
213
  logger.info("Using regex fallback extraction...")
214
- try:
215
- from utils.hf_extractor_simple import extract_sections_hf_simple
216
- return extract_sections_hf_simple(text)
217
- except ImportError:
218
- # Basic regex fallback
219
- return {
220
- "Name": self._extract_name_regex(text),
221
- "Summary": self._extract_summary_regex(text),
222
- "Skills": self._extract_skills_regex(text),
223
- "StructuredExperiences": self._extract_experiences_regex(text),
224
- "Education": self._extract_education_regex(text),
225
- "Training": [],
226
- "ContactInfo": self._extract_contact_info(text)
227
- }
228
 
229
  def _extract_name_regex(self, text: str) -> str:
230
- """Regex fallback for name extraction"""
231
  lines = text.split('\n')[:5]
232
  for line in lines:
233
  line = line.strip()
234
  if re.search(r'@|phone|email|linkedin|github', line.lower()):
235
  continue
236
- name_match = re.match(r'^([A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)', line)
 
237
  if name_match:
238
  return name_match.group(1)
239
  return ""
240
 
241
  def _extract_summary_regex(self, text: str) -> str:
242
- """Regex fallback for summary extraction"""
243
  summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
244
  match = re.search(summary_pattern, text, re.DOTALL)
245
  if match:
@@ -250,7 +246,7 @@ IMPORTANT:
250
  return ""
251
 
252
  def _extract_skills_regex(self, text: str) -> List[str]:
253
- """Regex fallback for skills extraction"""
254
  skills = set()
255
 
256
  # Look for technical skills section
@@ -269,7 +265,7 @@ IMPORTANT:
269
  return sorted(list(skills))
270
 
271
  def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
272
- """Regex fallback for experience extraction"""
273
  experiences = []
274
 
275
  # Look for work experience section
@@ -303,7 +299,7 @@ IMPORTANT:
303
  return experiences
304
 
305
  def _extract_education_regex(self, text: str) -> List[str]:
306
- """Regex fallback for education extraction"""
307
  education = []
308
 
309
  edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
@@ -319,98 +315,25 @@ IMPORTANT:
319
 
320
  return education
321
 
322
- def _extract_contact_info(self, text: str) -> Dict[str, str]:
323
- """Extract contact information (email, phone, LinkedIn)"""
324
- contact_info = {}
325
-
326
- # Extract email
327
- email_match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text)
328
- if email_match:
329
- contact_info["email"] = email_match.group(0)
330
-
331
- # Extract phone
332
- phone_patterns = [
333
- r'\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})',
334
- r'(\d{3})[-.\s](\d{3})[-.\s](\d{4})',
335
- r'\+\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
336
- ]
337
-
338
- for pattern in phone_patterns:
339
- phone_match = re.search(pattern, text)
340
- if phone_match:
341
- contact_info["phone"] = phone_match.group(0)
342
- break
343
-
344
- # Extract LinkedIn
345
- linkedin_patterns = [
346
- r'linkedin\.com/in/[\w-]+',
347
- r'linkedin\.com/[\w-]+',
348
- r'(?i)linkedin[:\s]+[\w.-]+',
349
  ]
350
 
351
- for pattern in linkedin_patterns:
352
- linkedin_match = re.search(pattern, text)
353
- if linkedin_match:
354
- linkedin_url = linkedin_match.group(0)
355
- if not linkedin_url.startswith('http'):
356
- linkedin_url = f"https://{linkedin_url}"
357
- contact_info["linkedin"] = linkedin_url
358
- break
359
 
360
- return contact_info
 
361
 
362
- # Convenience function for easy usage
363
  def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
364
- """
365
- Extract resume sections using OpenAI GPT-4o (GPT-4.1)
366
-
367
- Args:
368
- text: Raw resume text
369
- api_key: OpenAI API key (optional)
370
-
371
- Returns:
372
- Structured resume data
373
- """
374
  extractor = OpenAIResumeExtractor(api_key=api_key)
375
- return extractor.extract_sections_openai(text)
376
-
377
- # Test function
378
- def test_openai_extraction():
379
- """Test the OpenAI extraction with sample resume"""
380
-
381
- sample_text = """
382
- John Doe
383
- Selenium Java Automation Engineer
384
- Email: johndoe@example.com | Phone: +1-123-456-7890
385
-
386
- Professional Summary
387
- Results-driven Automation Test Engineer with 8 years of experience in Selenium and Java,
388
- specializing in automation frameworks for financial and insurance domains.
389
-
390
- Technical Skills
391
- Selenium WebDriver, Java, TestNG, Cucumber, Jenkins, Maven, Git, REST Assured, Postman,
392
- JIRA, Agile/Scrum, CI/CD
393
-
394
- Work Experience
395
- Senior Automation Test Engineer | ABC Financial Services | Jan 2021 - Present
396
- - Led automation framework enhancements using Selenium and Java, improving test efficiency.
397
- - Automated end-to-end UI and API testing for financial applications, reducing manual effort by 40%.
398
-
399
- Automation Test Engineer | XYZ Insurance Solutions | Jun 2017 - Dec 2020
400
- - Designed and implemented Selenium automation framework using Java and TestNG.
401
- - Developed automated test scripts for insurance policy management applications.
402
-
403
- Education
404
- Bachelor of Technology in Computer Science | ABC University | 2015
405
- """
406
-
407
- extractor = OpenAIResumeExtractor()
408
- result = extractor.extract_sections_openai(sample_text)
409
-
410
- print("OpenAI Extraction Results:")
411
- print(json.dumps(result, indent=2))
412
-
413
- return result
414
-
415
- if __name__ == "__main__":
416
- test_openai_extraction()
 
 
1
  """
2
+ OpenAI-based resume data extraction.
3
+ Uses GPT models to extract structured information from resume text.
 
 
4
  """
5
 
6
  import json
7
  import re
8
  import logging
 
9
  from typing import Dict, Any, List, Optional
10
+
11
+ import openai
12
  from openai import OpenAI
13
 
14
+ # Set up logging
 
15
  logger = logging.getLogger(__name__)
16
 
17
+
18
  class OpenAIResumeExtractor:
19
  """
20
+ Resume data extractor using OpenAI's GPT models.
21
  """
22
 
23
  def __init__(self, api_key: Optional[str] = None, model: str = "gpt-4o"):
24
+ """Initialize with OpenAI API key and model."""
25
+ self.client = OpenAI(api_key=api_key) if api_key else OpenAI()
 
 
 
 
 
 
26
  self.model = model
27
+ logger.info(f"OpenAI extractor initialized with model: {model}")
 
 
 
 
28
 
29
  def extract_sections_openai(self, text: str) -> Dict[str, Any]:
30
  """
31
+ Extract resume sections using OpenAI API.
32
 
33
  Args:
34
  text: Raw resume text
35
 
36
  Returns:
37
+ Dict containing extracted sections
38
  """
39
+ logger.info("Starting OpenAI extraction...")
40
 
41
  try:
42
+ # Create extraction prompt
43
  prompt = self._create_extraction_prompt(text)
44
 
45
+ # Call OpenAI API
46
  response = self.client.chat.completions.create(
47
  model=self.model,
48
  messages=[
49
+ {"role": "system", "content": "You are an expert resume parser. Extract information and return ONLY valid JSON."},
50
+ {"role": "user", "content": prompt}
 
 
 
 
 
 
51
  ],
52
+ temperature=0.1,
53
  max_tokens=2000
54
  )
55
 
56
+ # Parse response
57
+ content = response.choices[0].message.content.strip()
58
+ logger.debug(f"OpenAI response: {content[:200]}...")
59
 
60
+ # Clean and parse JSON
61
+ content = self._clean_json_response(content)
62
+ result = json.loads(content)
 
 
63
 
64
+ # Validate and enhance result
 
 
 
65
  result = self._validate_and_clean_result(result)
66
 
67
+ # Add contact info extraction
68
  contact_info = self._extract_contact_info(text)
69
  result["ContactInfo"] = contact_info
70
 
71
  logger.info("✅ OpenAI extraction completed successfully")
72
  return result
73
 
74
+ except json.JSONDecodeError as e:
75
+ logger.error(f"JSON parsing error: {e}")
76
+ logger.debug(f"Response content: {content}")
77
+ return self._fallback_extraction(text)
78
+
79
  except Exception as e:
80
  logger.error(f"OpenAI extraction failed: {e}")
81
+ return self._fallback_extraction(text)
82
+
83
+ def _clean_json_response(self, content: str) -> str:
84
+ """Clean JSON response from OpenAI."""
85
+ # Remove markdown code blocks
86
+ content = re.sub(r'```json\s*', '', content)
87
+ content = re.sub(r'```\s*$', '', content)
88
+
89
+ # Remove any text before first {
90
+ start = content.find('{')
91
+ if start > 0:
92
+ content = content[start:]
93
 
94
+ # Remove any text after last }
95
+ end = content.rfind('}')
96
+ if end > 0 and end < len(content) - 1:
97
+ content = content[:end + 1]
 
98
 
99
+ return content.strip()
 
100
 
101
  def _create_extraction_prompt(self, text: str) -> str:
102
+ """Create prompt for OpenAI extraction."""
 
103
  prompt = f"""
104
+ Extract information from this resume and return ONLY valid JSON in this exact format:
 
 
 
 
 
105
 
106
  {{
107
+ "Name": "Full Name with credentials (PhD, MBA, etc.)",
108
+ "Summary": "Professional summary or objective",
109
+ "Skills": ["skill1", "skill2", "skill3"],
110
+ "StructuredExperiences": [
111
+ {{
112
+ "title": "Job Title",
113
+ "company": "Company Name",
114
+ "date_range": "Start Date - End Date",
115
+ "responsibilities": ["responsibility1", "responsibility2"]
116
+ }}
117
+ ],
118
+ "Education": ["degree info", "school info"],
119
+ "Training": ["certification1", "training1"],
120
+ "Address": "Full address if available"
121
  }}
122
 
123
+ Resume text:
124
+ {text}
 
 
 
 
 
 
 
 
 
125
 
126
+ CRITICAL INSTRUCTIONS:
127
+ - For NAME: Include ALL credentials (PhD, MBA, M.S., B.S., etc.) - example: "John Doe, PhD, MBA"
128
+ - Read the ENTIRE resume text carefully, don't miss content
129
+ - Extract ALL work experiences with full details
130
  - Return ONLY valid JSON, no explanations
131
  - If a section is not found, use empty string or empty array
132
+ - Extract actual technical skills, not company names
 
 
 
133
  """
 
134
  return prompt
135
 
136
+ def _extract_contact_info(self, text: str) -> Dict[str, str]:
137
+ """Extract contact information from resume text."""
138
+ contact_info = {}
139
+
140
+ # Extract email
141
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
142
+ email_match = re.search(email_pattern, text)
143
+ if email_match:
144
+ contact_info['email'] = email_match.group()
145
+
146
+ # Extract phone number
147
+ phone_patterns = [
148
+ r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
149
+ r'\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
150
+ r'\d{3}[-.\s]?\d{3}[-.\s]?\d{4}'
151
+ ]
152
+
153
+ for pattern in phone_patterns:
154
+ phone_match = re.search(pattern, text)
155
+ if phone_match:
156
+ contact_info['phone'] = phone_match.group().strip()
157
+ break
158
+
159
+ # Extract LinkedIn
160
+ linkedin_pattern = r'linkedin\.com/in/[A-Za-z0-9-]+'
161
+ linkedin_match = re.search(linkedin_pattern, text)
162
+ if linkedin_match:
163
+ contact_info['linkedin'] = linkedin_match.group()
164
+
165
+ logger.info(f"OPENAI: Extracted ContactInfo as dict: {contact_info}")
166
+ return contact_info
167
+
168
  def _validate_and_clean_result(self, result: Dict[str, Any]) -> Dict[str, Any]:
169
+ """Validate and clean the extraction result."""
170
 
171
  # Ensure all required keys exist
172
+ required_keys = ["Name", "Summary", "Skills", "StructuredExperiences", "Education", "Training", "Address"]
173
  for key in required_keys:
174
  if key not in result:
175
  result[key] = [] if key in ["Skills", "StructuredExperiences", "Education", "Training"] else ""
 
197
 
198
  return result
199
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  def _is_company_name(self, text: str) -> bool:
201
+ """Check if text looks like a company name rather than a skill."""
202
  company_indicators = [
203
  "inc", "llc", "corp", "ltd", "company", "solutions", "services",
204
+ "systems", "technologies", "financial", "insurance"
205
  ]
206
  text_lower = text.lower()
207
  return any(indicator in text_lower for indicator in company_indicators)
208
 
209
  def _fallback_extraction(self, text: str) -> Dict[str, Any]:
210
+ """Fallback to regex-based extraction if OpenAI fails."""
211
  logger.info("Using regex fallback extraction...")
212
+
213
+ return {
214
+ "Name": self._extract_name_regex(text),
215
+ "Summary": self._extract_summary_regex(text),
216
+ "Skills": self._extract_skills_regex(text),
217
+ "StructuredExperiences": self._extract_experiences_regex(text),
218
+ "Education": self._extract_education_regex(text),
219
+ "Training": [],
220
+ "Address": self._extract_address_regex(text),
221
+ "ContactInfo": self._extract_contact_info(text)
222
+ }
 
 
 
223
 
224
  def _extract_name_regex(self, text: str) -> str:
225
+ """Regex fallback for name extraction."""
226
  lines = text.split('\n')[:5]
227
  for line in lines:
228
  line = line.strip()
229
  if re.search(r'@|phone|email|linkedin|github', line.lower()):
230
  continue
231
+ # Match name with potential credentials (PhD, MBA, etc.)
232
+ name_match = re.match(r'^([A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?(?:,\s*[A-Z][a-z.]+(?:,\s*[A-Z][a-z.]+)?)?)', line)
233
  if name_match:
234
  return name_match.group(1)
235
  return ""
236
 
237
  def _extract_summary_regex(self, text: str) -> str:
238
+ """Regex fallback for summary extraction."""
239
  summary_pattern = r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))'
240
  match = re.search(summary_pattern, text, re.DOTALL)
241
  if match:
 
246
  return ""
247
 
248
  def _extract_skills_regex(self, text: str) -> List[str]:
249
+ """Regex fallback for skills extraction."""
250
  skills = set()
251
 
252
  # Look for technical skills section
 
265
  return sorted(list(skills))
266
 
267
  def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]:
268
+ """Regex fallback for experience extraction."""
269
  experiences = []
270
 
271
  # Look for work experience section
 
299
  return experiences
300
 
301
  def _extract_education_regex(self, text: str) -> List[str]:
302
+ """Regex fallback for education extraction."""
303
  education = []
304
 
305
  edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))'
 
315
 
316
  return education
317
 
318
+ def _extract_address_regex(self, text: str) -> str:
319
+ """Regex fallback for address extraction."""
320
+ # Look for address patterns like "6001 Tain Dr. Suite 203, Dublin, OH, 43016"
321
+ address_patterns = [
322
+ r'(\d+\s+[A-Za-z\s\.]+(?:Suite|Apt|Unit)\s+\d+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
323
+ r'(\d+\s+[A-Za-z\s\.]+,?\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})',
324
+ r'([A-Za-z\s\d\.]+,\s*[A-Za-z\s]+,\s*[A-Z]{2}\s*\d{5})'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  ]
326
 
327
+ for pattern in address_patterns:
328
+ match = re.search(pattern, text)
329
+ if match:
330
+ return match.group(1).strip()
 
 
 
 
331
 
332
+ return ""
333
+
334
 
335
+ # Main extraction function for compatibility
336
  def extract_sections_openai(text: str, api_key: Optional[str] = None) -> Dict[str, Any]:
337
+ """Extract resume sections using OpenAI API."""
 
 
 
 
 
 
 
 
 
338
  extractor = OpenAIResumeExtractor(api_key=api_key)
339
+ return extractor.extract_sections_openai(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/parser.py CHANGED
@@ -3,7 +3,7 @@ import fitz # PyMuPDF
3
  import re
4
  from io import BytesIO
5
  from docx import Document
6
- from config import supabase, embedding_model, client, query
7
 
8
  def extract_name(resume_text: str) -> str:
9
  # look at the very top lines for a capitalized full name
 
3
  import re
4
  from io import BytesIO
5
  from docx import Document
6
+ from config import supabase, embedding_model, HF_ENDPOINTS, query
7
 
8
  def extract_name(resume_text: str) -> str:
9
  # look at the very top lines for a capitalized full name
utils/reporting.py CHANGED
@@ -2,7 +2,7 @@
2
  import re
3
  import fitz # PyMuPDF
4
  from io import BytesIO
5
- from config import supabase, embedding_model, client, query
6
  from .screening import evaluate_resumes
7
 
8
  def generate_pdf_report(shortlisted_candidates, questions=None):
@@ -45,7 +45,7 @@ def generate_interview_questions_from_summaries(candidates):
45
  )
46
 
47
  try:
48
- response = client.chat.completions.create(
49
  model="tgi",
50
  messages=[{"role": "user", "content": prompt}],
51
  temperature=0.7,
 
2
  import re
3
  import fitz # PyMuPDF
4
  from io import BytesIO
5
+ from config import supabase, embedding_model, query
6
  from .screening import evaluate_resumes
7
 
8
  def generate_pdf_report(shortlisted_candidates, questions=None):
 
45
  )
46
 
47
  try:
48
+ response = supabase.ai.chat.completions.create(
49
  model="tgi",
50
  messages=[{"role": "user", "content": prompt}],
51
  temperature=0.7,
utils/screening.py CHANGED
@@ -2,7 +2,7 @@
2
  from .parser import parse_resume, extract_email, summarize_resume
3
  from .hybrid_extractor import extract_resume_sections
4
  from .spacy_loader import get_nlp, is_spacy_available
5
- from config import supabase, embedding_model, client
6
  from fuzzywuzzy import fuzz
7
  from sentence_transformers import util
8
  import streamlit as st
 
2
  from .parser import parse_resume, extract_email, summarize_resume
3
  from .hybrid_extractor import extract_resume_sections
4
  from .spacy_loader import get_nlp, is_spacy_available
5
+ from config import supabase, embedding_model
6
  from fuzzywuzzy import fuzz
7
  from sentence_transformers import util
8
  import streamlit as st