Johnny commited on
Commit
102e49d
Β·
1 Parent(s): c07f72a

updated resume_format > template, hide sidebar, download Spacy model with spacy_loader.py

Browse files
.streamlit/config.toml CHANGED
@@ -6,4 +6,8 @@ textColor="#262730"
6
  font="sans serif"
7
 
8
  [ui]
9
- sidebarState = "collapsed"
 
 
 
 
 
6
  font="sans serif"
7
 
8
  [ui]
9
+ hideTopBar = false
10
+ hideSidebarNav = true
11
+
12
+ [server]
13
+ headless = true
README.md CHANGED
@@ -15,7 +15,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
15
 
16
  TalentLensAI is an AI-powered resume screening and evaluation tool that leverages Hugging Face models for summarization and scoring. It integrates with Supabase for candidate data storage and utilizes Streamlit for an interactive user interface.
17
 
18
- Features
19
 
20
  Resume Summarization: Uses Facebook's BART model (facebook/bart-large-cnn) to generate a concise summary of the resume.
21
 
@@ -27,7 +27,24 @@ PDF Report Generation: Generates a PDF report summarizing the evaluation results
27
 
28
  Streamlit UI: Provides a user-friendly interface for uploading resumes and reviewing results.
29
 
30
- Setup Instructions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  1. Clone the Repository
33
  ```
 
15
 
16
  TalentLensAI is an AI-powered resume screening and evaluation tool that leverages Hugging Face models for summarization and scoring. It integrates with Supabase for candidate data storage and utilizes Streamlit for an interactive user interface.
17
 
18
+ ## Features
19
 
20
  Resume Summarization: Uses Facebook's BART model (facebook/bart-large-cnn) to generate a concise summary of the resume.
21
 
 
27
 
28
  Streamlit UI: Provides a user-friendly interface for uploading resumes and reviewing results.
29
 
30
+ ## Deployment Notes
31
+
32
+ ### SpaCy Model Handling
33
+ The application uses spaCy for natural language processing. To handle deployment environments where the `en_core_web_sm` model might not be available:
34
+
35
+ - The spaCy model is automatically downloaded via requirements.txt
36
+ - A fallback system (`utils/spacy_loader.py`) provides graceful degradation
37
+ - If spaCy is unavailable, the system uses regex-based extraction methods
38
+
39
+ ### Hugging Face Spaces Deployment
40
+ For Hugging Face Spaces deployment, the following files are configured:
41
+
42
+ - `requirements.txt`: Includes direct spaCy model download link
43
+ - `packages.txt`: System dependencies for spaCy compilation
44
+ - `utils/spacy_loader.py`: Robust model loading with fallbacks
45
+ - NumPy version pinned to `<2.0` for compatibility
46
+
47
+ ## Setup Instructions
48
 
49
  1. Clone the Repository
50
  ```
Template.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/Format_Resume.py
2
+
3
+ import os, sys, streamlit as st
4
+ import json
5
+ from io import BytesIO
6
+
7
+ # Add parent directory to path so we can import utils
8
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9
+
10
+ # Force reload environment variables for Streamlit
11
+ from dotenv import load_dotenv
12
+ load_dotenv(override=True)
13
+
14
+ from utils.hybrid_extractor import extract_resume_sections
15
+ from utils.builder import build_resume_from_data
16
+ from utils.parser import parse_resume # whatever parse_resume you already have
17
+
18
+ # Path to your blank template (header/footer only)
19
+ template_path = os.path.join(
20
+ os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
21
+ )
22
+
23
+ st.set_page_config(
24
+ page_title='Resume Formatter',
25
+ layout='centered',
26
+ initial_sidebar_state="collapsed"
27
+ )
28
+
29
+ # Hide sidebar completely with CSS
30
+ st.markdown("""
31
+ <style>
32
+ .css-1d391kg {display: none}
33
+ .css-1rs6os {display: none}
34
+ .css-17ziqus {display: none}
35
+ [data-testid="stSidebar"] {display: none}
36
+ [data-testid="collapsedControl"] {display: none}
37
+ .css-1lcbmhc {display: none}
38
+ .css-1outpf7 {display: none}
39
+ .sidebar .sidebar-content {display: none}
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ # Home button at the top
44
+ if st.button("🏠 Home", help="Return to main TalentLens.AI page"):
45
+ st.switch_page("app.py")
46
+
47
+ st.title('πŸ“„ Resume Formatter')
48
+ st.markdown("---")
49
+
50
+ uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
51
+ if not uploaded:
52
+ st.info("Please upload a resume to get started.")
53
+
54
+ # Show help information when no file is uploaded
55
+ st.markdown("### πŸ’‘ How to Use Resume Formatter")
56
+ st.markdown("""
57
+ 1. **Upload your resume** in PDF or DOCX format
58
+ 2. **Review extracted data** - our AI will parse your resume sections
59
+ 3. **Edit if needed** - make any corrections to the extracted information
60
+ 4. **Generate formatted resume** - download a professionally formatted version
61
+ """)
62
+
63
+ st.markdown("### ✨ Features")
64
+ col1, col2 = st.columns(2)
65
+ with col1:
66
+ st.markdown("""
67
+ **πŸ€– AI-Powered Extraction:**
68
+ - OpenAI GPT-4o for highest accuracy
69
+ - Hugging Face Cloud as backup
70
+ - Regex fallback for reliability
71
+ """)
72
+ with col2:
73
+ st.markdown("""
74
+ **πŸ“„ Professional Formatting:**
75
+ - Clean, modern design
76
+ - Consistent layout
77
+ - ATS-friendly format
78
+ """)
79
+
80
+ st.stop()
81
+
82
+ st.success(f'Uploaded: {uploaded.name}')
83
+
84
+ # 1) Extract raw text
85
+ ext = uploaded.name.split('.')[-1].lower()
86
+ resume_text = parse_resume(uploaded, ext)
87
+
88
+ st.subheader('πŸ“„ Raw Resume Text')
89
+ st.text_area(
90
+ label='Raw Resume Text',
91
+ value=resume_text,
92
+ height=300,
93
+ label_visibility='visible'
94
+ )
95
+
96
+ # 2) Parse into structured fields using improved hybrid approach
97
+ st.subheader('πŸ” Extracting Resume Data...')
98
+
99
+ # Show extraction progress
100
+ with st.spinner('Analyzing resume with AI models...'):
101
+ # Use OpenAI as primary, HF Cloud as backup
102
+ data = extract_resume_sections(
103
+ resume_text,
104
+ prefer_ai=True,
105
+ use_openai=True, # Try OpenAI GPT-4o first (best results)
106
+ use_hf_cloud=True # Fallback to HF Cloud (good backup)
107
+ )
108
+
109
+ # Show extraction success and method used
110
+ from utils.hybrid_extractor import HybridResumeExtractor
111
+ extractor = HybridResumeExtractor(prefer_ai=True, use_openai=True, use_hf_cloud=True)
112
+ extractor.extract_sections(resume_text) # Just to get the method used
113
+ stats = extractor.get_extraction_stats()
114
+
115
+ method_used = stats.get('method_used', 'unknown')
116
+ if method_used == 'openai_gpt4o':
117
+ st.success('βœ… Extracted using OpenAI GPT-4o (highest accuracy)')
118
+ elif method_used == 'huggingface_cloud':
119
+ st.info('ℹ️ Extracted using Hugging Face Cloud (good accuracy)')
120
+ else:
121
+ st.warning('⚠️ Used fallback extraction method')
122
+
123
+ # Show extraction quality indicators
124
+ name_found = bool(data.get('Name'))
125
+ experiences_found = len(data.get('StructuredExperiences', []))
126
+ skills_found = len(data.get('Skills', []))
127
+
128
+ col1, col2, col3 = st.columns(3)
129
+ with col1:
130
+ st.metric("Name", "βœ…" if name_found else "❌", "Found" if name_found else "Missing")
131
+ with col2:
132
+ st.metric("Job Experiences", experiences_found, f"{experiences_found} positions")
133
+ with col3:
134
+ st.metric("Technical Skills", skills_found, f"{skills_found} skills")
135
+
136
+ # πŸ‘‡ TEMP – remove after test (show raw JSON for debugging)
137
+ with st.expander("πŸ”§ Debug: Raw Extraction Data"):
138
+ import json, textwrap
139
+ st.code(textwrap.indent(json.dumps(data, indent=2), " "), language="json")
140
+
141
+ st.subheader('πŸ“‹ Parsed Resume Sections')
142
+
143
+ # Display sections in a more user-friendly way
144
+ col1, col2 = st.columns(2)
145
+
146
+ with col1:
147
+ # Name and Summary
148
+ st.markdown("**πŸ‘€ Personal Information**")
149
+ if data.get('Name'):
150
+ st.write(f"**Name:** {data['Name']}")
151
+ else:
152
+ st.error("❌ Name not found")
153
+
154
+ if data.get('Summary'):
155
+ st.markdown("**πŸ“ Professional Summary:**")
156
+ st.write(data['Summary'])
157
+ else:
158
+ st.warning("⚠️ No professional summary found")
159
+
160
+ # Education
161
+ st.markdown("**πŸŽ“ Education**")
162
+ education = data.get('Education', [])
163
+ if education:
164
+ for edu in education:
165
+ st.write(f"β€’ {edu}")
166
+ else:
167
+ st.warning("⚠️ No education information found")
168
+
169
+ with col2:
170
+ # Skills
171
+ st.markdown("**πŸ› οΈ Technical Skills**")
172
+ skills = data.get('Skills', [])
173
+ if skills:
174
+ # Show skills in a nice format
175
+ skills_text = ", ".join(skills)
176
+ st.write(skills_text)
177
+
178
+ # Show skills quality
179
+ company_names = [s for s in skills if any(word in s.lower() for word in ['abc', 'xyz', 'financial', 'insurance', 'solutions'])]
180
+ if company_names:
181
+ st.warning(f"⚠️ Found {len(company_names)} company names in skills (will be cleaned)")
182
+ else:
183
+ st.error("❌ No technical skills found")
184
+
185
+ # Training/Certifications
186
+ training = data.get('Training', [])
187
+ if training:
188
+ st.markdown("**πŸ“œ Certifications/Training**")
189
+ for cert in training:
190
+ st.write(f"β€’ {cert}")
191
+
192
+ # Work Experience (full width)
193
+ st.markdown("**πŸ’Ό Professional Experience**")
194
+ experiences = data.get('StructuredExperiences', [])
195
+ if experiences:
196
+ for i, exp in enumerate(experiences, 1):
197
+ with st.expander(f"Job {i}: {exp.get('title', 'Unknown Title')} at {exp.get('company', 'Unknown Company')}"):
198
+ st.write(f"**Position:** {exp.get('title', 'N/A')}")
199
+ st.write(f"**Company:** {exp.get('company', 'N/A')}")
200
+ st.write(f"**Duration:** {exp.get('date_range', 'N/A')}")
201
+
202
+ responsibilities = exp.get('responsibilities', [])
203
+ if responsibilities:
204
+ st.write("**Key Responsibilities:**")
205
+ for resp in responsibilities:
206
+ st.write(f"β€’ {resp}")
207
+ else:
208
+ st.warning("⚠️ No responsibilities found for this position")
209
+ else:
210
+ st.error("❌ No work experience found")
211
+
212
+ # Show editable sections for user to modify if needed
213
+ st.subheader('✏️ Edit Extracted Data (Optional)')
214
+ with st.expander("Click to edit extracted data before formatting"):
215
+ for section, content in data.items():
216
+ st.markdown(f"**{section}:**")
217
+
218
+ # pure list of strings
219
+ if isinstance(content, list) and all(isinstance(i, str) for i in content):
220
+ edited_content = st.text_area(
221
+ label=section,
222
+ value="\n".join(content),
223
+ height=100,
224
+ label_visibility='collapsed',
225
+ key=f"edit_{section}"
226
+ )
227
+ # Update data with edited content
228
+ data[section] = [line.strip() for line in edited_content.split('\n') if line.strip()]
229
+
230
+ # list of dicts β†’ show as JSON (read-only for now)
231
+ elif isinstance(content, list) and all(isinstance(i, dict) for i in content):
232
+ st.json(content)
233
+
234
+ # everything else (e.g. single string)
235
+ else:
236
+ edited_content = st.text_area(
237
+ label=section,
238
+ value=str(content),
239
+ height=100,
240
+ label_visibility='collapsed',
241
+ key=f"edit_{section}_str"
242
+ )
243
+ # Update data with edited content
244
+ data[section] = edited_content
245
+
246
+ # 3) Build & download
247
+ st.subheader('πŸ“„ Generate Formatted Resume')
248
+
249
+ # Show what will be included in the formatted resume
250
+ col1, col2, col3 = st.columns(3)
251
+ with col1:
252
+ st.metric("Sections to Include", len([k for k, v in data.items() if v]), "sections")
253
+ with col2:
254
+ total_content = sum(len(str(v)) for v in data.values() if v)
255
+ st.metric("Content Length", f"{total_content:,}", "characters")
256
+ with col3:
257
+ quality_score = (
258
+ (1 if data.get('Name') else 0) +
259
+ (1 if data.get('Summary') else 0) +
260
+ (1 if data.get('StructuredExperiences') else 0) +
261
+ (1 if data.get('Skills') else 0)
262
+ ) * 25
263
+ st.metric("Quality Score", f"{quality_score}%", "completeness")
264
+
265
+ if st.button('πŸ“„ Generate Formatted Resume', type='primary'):
266
+ try:
267
+ with st.spinner('Building formatted resume...'):
268
+ # Build the resume document
269
+ doc = build_resume_from_data(template_path, data)
270
+
271
+ # Save to buffer
272
+ buf = BytesIO()
273
+ doc.save(buf)
274
+ buf.seek(0)
275
+
276
+ st.success('βœ… Resume formatted successfully!')
277
+
278
+ # Show what was included
279
+ st.info(f"""
280
+ **Formatted Resume Includes:**
281
+ β€’ Name: {data.get('Name', 'Not found')}
282
+ β€’ Professional Summary: {'βœ…' if data.get('Summary') else '❌'}
283
+ β€’ Technical Skills: {len(data.get('Skills', []))} items
284
+ β€’ Work Experience: {len(data.get('StructuredExperiences', []))} positions
285
+ β€’ Education: {len(data.get('Education', []))} items
286
+ """)
287
+
288
+ # Generate filename with candidate name
289
+ candidate_name = data.get('Name', 'Resume').replace(' ', '_')
290
+ filename = f"{candidate_name}_Formatted_Resume.docx"
291
+
292
+ st.download_button(
293
+ 'πŸ“₯ Download Formatted Resume',
294
+ data=buf,
295
+ file_name=filename,
296
+ mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
297
+ help=f"Download the formatted resume for {data.get('Name', 'candidate')}"
298
+ )
299
+
300
+ except Exception as e:
301
+ st.error(f"❌ Error generating formatted resume: {str(e)}")
302
+ st.info("πŸ’‘ Try editing the extracted data above to fix any issues, or contact support if the problem persists.")
303
+
304
+ # Add helpful tips
305
+ with st.expander("πŸ’‘ Tips for Better Results"):
306
+ st.markdown("""
307
+ **For best extraction results:**
308
+ - Ensure your resume has clear section headers (e.g., "Professional Summary", "Technical Skills", "Work Experience")
309
+ - Use consistent formatting for job entries (Title | Company | Dates)
310
+ - List technical skills clearly, separated by commas
311
+ - Include bullet points for job responsibilities
312
+
313
+ **If extraction isn't perfect:**
314
+ - Use the "Edit Extracted Data" section above to make corrections
315
+ - The system will learn from different resume formats over time
316
+ - OpenAI GPT-4o provides the most accurate extraction when available
317
+ """)
318
+
319
+ # Show extraction method info
320
+ with st.expander("πŸ”§ Extraction Method Details"):
321
+ st.markdown(f"""
322
+ **Method Used:** {method_used}
323
+
324
+ **Available Methods:**
325
+ - **OpenAI GPT-4o**: Highest accuracy, best for complex formats
326
+ - **Hugging Face Cloud**: Good accuracy, reliable backup
327
+ - **Regex Fallback**: Basic extraction, used when AI methods fail
328
+
329
+ **Current Status:**
330
+ - OpenAI Available: {'βœ…' if stats.get('ai_available') else '❌'}
331
+ - AI Preferred: {'βœ…' if stats.get('prefer_ai') else '❌'}
332
+ """)
333
+
334
+ # Footer navigation and additional actions
335
+ st.markdown("---")
336
+ st.markdown("### πŸš€ What's Next?")
337
+
338
+ col1, col2, col3 = st.columns(3)
339
+
340
+ with col1:
341
+ if st.button("🏠 Return to Home", use_container_width=True):
342
+ st.switch_page("app.py")
343
+
344
+ with col2:
345
+ if st.button("πŸ“„ Format Another Resume", use_container_width=True):
346
+ st.rerun()
347
+
348
+ with col3:
349
+ st.markdown("**Need Help?**")
350
+ st.markdown("Check the tips above or contact support")
351
+
352
+ # Final footer
353
+ st.markdown("---")
354
+ st.markdown(
355
+ "<div style='text-align: center; color: #666; padding: 20px;'>"
356
+ "πŸš€ <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
357
+ "</div>",
358
+ unsafe_allow_html=True
359
+ )
app.py CHANGED
@@ -18,7 +18,25 @@ from utils.reporting import generate_pdf_report, generate_interview_questions_fr
18
 
19
  # ------------------------- Main App Function -------------------------
20
  def main():
21
- st.set_page_config(page_title="TalentLens.AI", layout="centered")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  st.markdown("<h1 style='text-align: center;'>TalentLens.AI</h1>", unsafe_allow_html=True)
24
  st.divider()
@@ -38,8 +56,23 @@ def main():
38
  # Input job description
39
  job_description = st.text_area("Enter Job Description")
40
 
41
- # Evaluation trigger
42
- if st.button("Evaluate Resumes"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  if not job_description:
44
  st.error("⚠️ Please enter a job description.")
45
  return
 
18
 
19
  # ------------------------- Main App Function -------------------------
20
  def main():
21
+ st.set_page_config(
22
+ page_title="TalentLens.AI",
23
+ layout="centered",
24
+ initial_sidebar_state="collapsed"
25
+ )
26
+
27
+ # Hide sidebar completely with CSS
28
+ st.markdown("""
29
+ <style>
30
+ .css-1d391kg {display: none}
31
+ .css-1rs6os {display: none}
32
+ .css-17ziqus {display: none}
33
+ [data-testid="stSidebar"] {display: none}
34
+ [data-testid="collapsedControl"] {display: none}
35
+ .css-1lcbmhc {display: none}
36
+ .css-1outpf7 {display: none}
37
+ .sidebar .sidebar-content {display: none}
38
+ </style>
39
+ """, unsafe_allow_html=True)
40
 
41
  st.markdown("<h1 style='text-align: center;'>TalentLens.AI</h1>", unsafe_allow_html=True)
42
  st.divider()
 
56
  # Input job description
57
  job_description = st.text_area("Enter Job Description")
58
 
59
+ # Main action buttons
60
+ col1, col2 = st.columns(2)
61
+
62
+ with col1:
63
+ # Evaluation trigger
64
+ evaluate_clicked = st.button("πŸ“Š Evaluate Resumes", type="primary", use_container_width=True)
65
+
66
+ with col2:
67
+ # Format Resume redirect button
68
+ format_clicked = st.button("πŸ“„ Format Resume", use_container_width=True)
69
+
70
+ # Handle Format Resume redirect
71
+ if format_clicked:
72
+ st.switch_page("pages/Template.py")
73
+
74
+ # Handle Evaluate Resumes
75
+ if evaluate_clicked:
76
  if not job_description:
77
  st.error("⚠️ Please enter a job description.")
78
  return
app_startup.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Startup script for Hugging Face Spaces deployment
4
+ Ensures spaCy model is available before starting the main app
5
+ """
6
+
7
+ import subprocess
8
+ import sys
9
+ import os
10
+ import logging
11
+
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ def install_spacy_model():
17
+ """Install spaCy English model if not already available"""
18
+ try:
19
+ import spacy
20
+
21
+ # Try to load the model
22
+ try:
23
+ nlp = spacy.load("en_core_web_sm")
24
+ logger.info("βœ… spaCy model 'en_core_web_sm' is already available")
25
+ return True
26
+ except OSError:
27
+ logger.info("πŸ“¦ spaCy model 'en_core_web_sm' not found, downloading...")
28
+
29
+ # Download the model
30
+ subprocess.check_call([
31
+ sys.executable, "-m", "spacy", "download", "en_core_web_sm"
32
+ ])
33
+
34
+ # Verify installation
35
+ nlp = spacy.load("en_core_web_sm")
36
+ logger.info("βœ… spaCy model 'en_core_web_sm' downloaded and loaded successfully")
37
+ return True
38
+
39
+ except subprocess.CalledProcessError as e:
40
+ logger.error(f"❌ Failed to download spaCy model: {e}")
41
+ return False
42
+ except Exception as e:
43
+ logger.error(f"❌ Error setting up spaCy: {e}")
44
+ return False
45
+
46
+ def main():
47
+ """Main startup function"""
48
+ logger.info("πŸš€ Starting TalentLens.AI...")
49
+
50
+ # Install spaCy model
51
+ model_success = install_spacy_model()
52
+
53
+ if not model_success:
54
+ logger.warning("⚠️ spaCy model not available, application will use fallback methods")
55
+
56
+ # Import and run the main app
57
+ try:
58
+ from app import main as app_main
59
+ logger.info("βœ… Starting Streamlit app...")
60
+ app_main()
61
+ except Exception as e:
62
+ logger.error(f"❌ Failed to start main app: {e}")
63
+ sys.exit(1)
64
+
65
+ if __name__ == "__main__":
66
+ main()
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ build-essential
2
+ python3-dev
pages/{Format_Resume.py β†’ Template.py} RENAMED
@@ -1,4 +1,4 @@
1
- # pages/Format_Resume.py
2
 
3
  import os, sys, streamlit as st
4
  import json
@@ -20,8 +20,32 @@ template_path = os.path.join(
20
  os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
21
  )
22
 
23
- st.set_page_config(page_title='Resume Formatter', layout='centered')
24
- st.title('πŸ“„ Resume Formatter')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
27
  if not uploaded:
@@ -279,3 +303,30 @@ with st.expander("πŸ”§ Extraction Method Details"):
279
  - OpenAI Available: {'βœ…' if stats.get('ai_available') else '❌'}
280
  - AI Preferred: {'βœ…' if stats.get('prefer_ai') else '❌'}
281
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/Template.py
2
 
3
  import os, sys, streamlit as st
4
  import json
 
20
  os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
21
  )
22
 
23
+ st.set_page_config(
24
+ page_title='Resume Template Builder',
25
+ layout='centered',
26
+ initial_sidebar_state="collapsed"
27
+ )
28
+
29
+ # Hide sidebar completely with CSS
30
+ st.markdown("""
31
+ <style>
32
+ .css-1d391kg {display: none}
33
+ .css-1rs6os {display: none}
34
+ .css-17ziqus {display: none}
35
+ [data-testid="stSidebar"] {display: none}
36
+ [data-testid="collapsedControl"] {display: none}
37
+ .css-1lcbmhc {display: none}
38
+ .css-1outpf7 {display: none}
39
+ .sidebar .sidebar-content {display: none}
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ # Home button at the top
44
+ if st.button("🏠 Home", help="Return to main TalentLens.AI page"):
45
+ st.switch_page("app.py")
46
+
47
+ st.title('πŸ“„ Resume Template Builder')
48
+ st.markdown("---")
49
 
50
  uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
51
  if not uploaded:
 
303
  - OpenAI Available: {'βœ…' if stats.get('ai_available') else '❌'}
304
  - AI Preferred: {'βœ…' if stats.get('prefer_ai') else '❌'}
305
  """)
306
+
307
+ # Footer navigation and additional actions
308
+ st.markdown("---")
309
+ st.markdown("### πŸš€ What's Next?")
310
+
311
+ col1, col2, col3 = st.columns(3)
312
+
313
+ with col1:
314
+ if st.button("🏠 Return to Home", use_container_width=True):
315
+ st.switch_page("app.py")
316
+
317
+ with col2:
318
+ if st.button("πŸ“„ Format Another Resume", use_container_width=True):
319
+ st.rerun()
320
+
321
+ with col3:
322
+ st.markdown("**Need Help?**")
323
+ st.markdown("Check the tips above or contact support")
324
+
325
+ # Final footer
326
+ st.markdown("---")
327
+ st.markdown(
328
+ "<div style='text-align: center; color: #666; padding: 20px;'>"
329
+ "πŸš€ <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
330
+ "</div>",
331
+ unsafe_allow_html=True
332
+ )
requirements.txt CHANGED
@@ -6,7 +6,8 @@ PyMuPDF
6
  pytest
7
  sentence-transformers
8
  spacy
 
9
  openai
10
  fuzzywuzzy
11
  python-docx
12
- numpy
 
6
  pytest
7
  sentence-transformers
8
  spacy
9
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
10
  openai
11
  fuzzywuzzy
12
  python-docx
13
+ numpy<2.0
utils/extractor_fixed.py CHANGED
@@ -1,26 +1,37 @@
1
- import os, re, json, subprocess, spacy
2
- from spacy.matcher import PhraseMatcher, Matcher
3
  from utils.parser import extract_name # <= your helper
 
4
  from datetime import datetime
5
  from dateutil.parser import parse as date_parse
6
 
7
- nlp = spacy.load("en_core_web_sm") # assume already downloaded
 
8
 
9
- # ----------------------------- data lists -----------------------------
10
- BASE = os.path.dirname(__file__)
11
- SKILLS = json.load(open(os.path.join(BASE, "data/skills.json"))) \
12
- if os.path.exists(os.path.join(BASE,"data/skills.json")) \
13
- else ["python","sql","aws","selenium"]
14
- JOB_TITLES = json.load(open(os.path.join(BASE, "data/job_titles.json")))\
15
- if os.path.exists(os.path.join(BASE,"data/job_titles.json"))\
16
- else []
17
-
18
- skill_matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
19
- skill_matcher.add("SKILL", [nlp.make_doc(s) for s in SKILLS])
20
-
21
- edu_matcher = Matcher(nlp.vocab)
22
- edu_matcher.add("EDU" , [[{"LOWER":"bachelor"},{"LOWER":"of"},{"IS_TITLE":True,"OP":"+"}]])
23
- edu_matcher.add("CERT", [[{"LOWER":"certified"},{"IS_TITLE":True,"OP":"+"}]])
 
 
 
 
 
 
 
 
 
 
24
 
25
  # ----------------------------- regex helpers --------------------------
26
  # Jonathan's format: Company | Location | Title | Date
@@ -51,7 +62,9 @@ HEAD = re.compile(r"^\s*(summary|skills?|technical\s+skills?|education|tra
51
  # ----------------------------- main -----------------------------------
52
  def extract_sections_spacy_fixed(text:str)->dict:
53
  lines = [ln.rstrip() for ln in text.splitlines()]
54
- doc = nlp(text)
 
 
55
 
56
  # Helper function for contact detection
57
  def is_contact(s): return bool(re.search(r"@\w|\d{3}[-.\s]?\d{3}",s))
@@ -194,10 +207,24 @@ def extract_sections_spacy_fixed(text:str)->dict:
194
  out["StructuredExperiences"].append(exp)
195
 
196
  # ---------- education / training / certifications -----------------------------------
197
- doc2 = nlp(text)
198
- for mid, s, e in edu_matcher(doc2):
199
- bucket = "Education" if nlp.vocab.strings[mid]=="EDU" else "Training"
200
- out[bucket].append(doc2[s:e].text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # Also extract certifications section manually
203
  cert_section_found = False
 
1
+ import os, re, json, subprocess
 
2
  from utils.parser import extract_name # <= your helper
3
+ from utils.spacy_loader import get_nlp, is_spacy_available
4
  from datetime import datetime
5
  from dateutil.parser import parse as date_parse
6
 
7
+ # Load spaCy model with fallback
8
+ nlp = get_nlp()
9
 
10
+ # Initialize spaCy matchers only if spaCy is available
11
+ if nlp and is_spacy_available():
12
+ from spacy.matcher import PhraseMatcher, Matcher
13
+
14
+ # ----------------------------- data lists -----------------------------
15
+ BASE = os.path.dirname(__file__)
16
+ SKILLS = json.load(open(os.path.join(BASE, "data/skills.json"))) \
17
+ if os.path.exists(os.path.join(BASE,"data/skills.json")) \
18
+ else ["python","sql","aws","selenium"]
19
+ JOB_TITLES = json.load(open(os.path.join(BASE, "data/job_titles.json")))\
20
+ if os.path.exists(os.path.join(BASE,"data/job_titles.json"))\
21
+ else []
22
+
23
+ skill_matcher = PhraseMatcher(nlp.vocab, attr="LOWER")
24
+ skill_matcher.add("SKILL", [nlp.make_doc(s) for s in SKILLS])
25
+
26
+ edu_matcher = Matcher(nlp.vocab)
27
+ edu_matcher.add("EDU" , [[{"LOWER":"bachelor"},{"LOWER":"of"},{"IS_TITLE":True,"OP":"+"}]])
28
+ edu_matcher.add("CERT", [[{"LOWER":"certified"},{"IS_TITLE":True,"OP":"+"}]])
29
+ else:
30
+ # Fallback: set matchers to None when spaCy is not available
31
+ skill_matcher = None
32
+ edu_matcher = None
33
+ SKILLS = ["python","sql","aws","selenium"]
34
+ JOB_TITLES = []
35
 
36
  # ----------------------------- regex helpers --------------------------
37
  # Jonathan's format: Company | Location | Title | Date
 
62
  # ----------------------------- main -----------------------------------
63
  def extract_sections_spacy_fixed(text:str)->dict:
64
  lines = [ln.rstrip() for ln in text.splitlines()]
65
+
66
+ # Only create spaCy doc if nlp is available
67
+ doc = nlp(text) if nlp and is_spacy_available() else None
68
 
69
  # Helper function for contact detection
70
  def is_contact(s): return bool(re.search(r"@\w|\d{3}[-.\s]?\d{3}",s))
 
207
  out["StructuredExperiences"].append(exp)
208
 
209
  # ---------- education / training / certifications -----------------------------------
210
+ # Use spaCy matchers if available, otherwise use regex fallback
211
+ if doc and edu_matcher and is_spacy_available():
212
+ for mid, s, e in edu_matcher(doc):
213
+ bucket = "Education" if nlp.vocab.strings[mid]=="EDU" else "Training"
214
+ out[bucket].append(doc[s:e].text)
215
+ else:
216
+ # Regex fallback for education extraction
217
+ edu_patterns = [
218
+ r"(?i)\b(?:bachelor|master|phd|doctorate|associate).*(?:degree|of|in)\s+([^,\n]+)",
219
+ r"(?i)\b(?:bs|ba|ms|ma|mba|phd)\s+(?:in\s+)?([^,\n]+)",
220
+ r"(?i)\b(?:university|college|institute).*\n?.*(?:bachelor|master|phd|degree)",
221
+ ]
222
+
223
+ for pattern in edu_patterns:
224
+ matches = re.findall(pattern, text)
225
+ for match in matches:
226
+ if isinstance(match, str) and len(match.strip()) > 3:
227
+ out["Education"].append(match.strip())
228
 
229
  # Also extract certifications section manually
230
  cert_section_found = False
utils/reporting.py CHANGED
@@ -1,4 +1,7 @@
1
  # utils/reporting.py
 
 
 
2
  from config import supabase, embedding_model, client, query
3
  from .screening import evaluate_resumes
4
 
 
1
  # utils/reporting.py
2
+ import re
3
+ import fitz # PyMuPDF
4
+ from io import BytesIO
5
  from config import supabase, embedding_model, client, query
6
  from .screening import evaluate_resumes
7
 
utils/screening.py CHANGED
@@ -1,14 +1,14 @@
1
  # utils/screening.py
2
  from .parser import parse_resume, extract_email, summarize_resume
3
  from .hybrid_extractor import extract_resume_sections
 
4
  from config import supabase, embedding_model, client
5
- import spacy
6
  from fuzzywuzzy import fuzz
7
  from sentence_transformers import util
8
  import streamlit as st
9
 
10
- # Load spaCy model for keyword extraction
11
- nlp = spacy.load("en_core_web_sm")
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
 
14
  def extract_keywords(text, top_n=10):
@@ -18,8 +18,17 @@ def extract_keywords(text, top_n=10):
18
  if not text.strip():
19
  return []
20
 
21
- doc = nlp(text.lower())
22
- keywords = [t.text for t in doc if t.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not t.is_stop]
 
 
 
 
 
 
 
 
 
23
 
24
  if not keywords:
25
  return []
 
1
  # utils/screening.py
2
  from .parser import parse_resume, extract_email, summarize_resume
3
  from .hybrid_extractor import extract_resume_sections
4
+ from .spacy_loader import get_nlp, is_spacy_available
5
  from config import supabase, embedding_model, client
 
6
  from fuzzywuzzy import fuzz
7
  from sentence_transformers import util
8
  import streamlit as st
9
 
10
+ # Load spaCy model for keyword extraction with fallback
11
+ nlp = get_nlp()
12
  from sklearn.feature_extraction.text import TfidfVectorizer
13
 
14
  def extract_keywords(text, top_n=10):
 
18
  if not text.strip():
19
  return []
20
 
21
+ # Use spaCy for keyword extraction if available, otherwise use simple word extraction
22
+ if nlp and is_spacy_available():
23
+ doc = nlp(text.lower())
24
+ keywords = [t.text for t in doc if t.pos_ in {"NOUN", "PROPN", "VERB", "ADJ"} and not t.is_stop]
25
+ else:
26
+ # Fallback to simple word extraction without POS tagging
27
+ import re
28
+ words = re.findall(r'\b[a-zA-Z]{3,}\b', text.lower())
29
+ # Filter out common stop words manually
30
+ stop_words = {'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'its', 'did', 'yes', 'she', 'may', 'say', 'use', 'her', 'any', 'top', 'own', 'too', 'off', 'far', 'set', 'why', 'ask', 'men', 'run', 'end', 'put', 'lot', 'big', 'eye', 'try', 'yet', 'car', 'eat', 'job', 'sit', 'cut', 'let', 'got', 'buy', 'win', 'box', 'hit', 'add', 'oil', 'six', 'war', 'age', 'boy', 'due', 'bed', 'hot', 'cup', 'cut', 'gun', 'kid', 'red', 'sea', 'art', 'air', 'low', 'pay', 'act', 'bit', 'bad', 'law', 'dog', 'key', 'bit', 'arm', 'tax', 'gas'}
31
+ keywords = [word for word in words if word not in stop_words]
32
 
33
  if not keywords:
34
  return []
utils/spacy_loader.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ SpaCy Model Loader with fallbacks for deployment environments
3
+ """
4
+ import spacy
5
+ import logging
6
+ import subprocess
7
+ import sys
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def load_spacy_model(model_name="en_core_web_sm"):
12
+ """
13
+ Load spaCy model with fallbacks for deployment environments
14
+
15
+ Args:
16
+ model_name: Name of the spaCy model to load
17
+
18
+ Returns:
19
+ Loaded spaCy model or None if all attempts fail
20
+ """
21
+
22
+ # Try to load the model directly first
23
+ try:
24
+ nlp = spacy.load(model_name)
25
+ logger.info(f"βœ… Successfully loaded spaCy model: {model_name}")
26
+ return nlp
27
+ except OSError as e:
28
+ logger.warning(f"Failed to load {model_name} directly: {e}")
29
+
30
+ # Try to download and install the model
31
+ try:
32
+ logger.info(f"Attempting to download {model_name}...")
33
+ subprocess.check_call([
34
+ sys.executable, "-m", "spacy", "download", model_name
35
+ ])
36
+ nlp = spacy.load(model_name)
37
+ logger.info(f"βœ… Successfully downloaded and loaded spaCy model: {model_name}")
38
+ return nlp
39
+ except (subprocess.CalledProcessError, OSError) as e:
40
+ logger.warning(f"Failed to download {model_name}: {e}")
41
+
42
+ # Try to load blank English model as fallback
43
+ try:
44
+ logger.info("Loading blank English model as fallback...")
45
+ nlp = spacy.blank("en")
46
+ logger.info("βœ… Successfully loaded blank English model")
47
+ return nlp
48
+ except Exception as e:
49
+ logger.error(f"Failed to load blank English model: {e}")
50
+
51
+ # Final fallback - return None
52
+ logger.error("❌ All spaCy model loading attempts failed")
53
+ return None
54
+
55
+ # Global instance
56
+ _nlp_instance = None
57
+
58
+ def get_nlp():
59
+ """Get the global spaCy model instance"""
60
+ global _nlp_instance
61
+ if _nlp_instance is None:
62
+ _nlp_instance = load_spacy_model()
63
+ return _nlp_instance
64
+
65
+ def is_spacy_available():
66
+ """Check if spaCy model is available"""
67
+ return get_nlp() is not None