Spaces:
Running
Running
Johnny
updated resume_format > template, hide sidebar, download Spacy model with spacy_loader.py
102e49d
# pages/Format_Resume.py | |
import os, sys, streamlit as st | |
import json | |
from io import BytesIO | |
# Add parent directory to path so we can import utils | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
# Force reload environment variables for Streamlit | |
from dotenv import load_dotenv | |
load_dotenv(override=True) | |
from utils.hybrid_extractor import extract_resume_sections | |
from utils.builder import build_resume_from_data | |
from utils.parser import parse_resume # whatever parse_resume you already have | |
# Path to your blank template (header/footer only) | |
template_path = os.path.join( | |
os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx' | |
) | |
st.set_page_config( | |
page_title='Resume Formatter', | |
layout='centered', | |
initial_sidebar_state="collapsed" | |
) | |
# Hide sidebar completely with CSS | |
st.markdown(""" | |
<style> | |
.css-1d391kg {display: none} | |
.css-1rs6os {display: none} | |
.css-17ziqus {display: none} | |
[data-testid="stSidebar"] {display: none} | |
[data-testid="collapsedControl"] {display: none} | |
.css-1lcbmhc {display: none} | |
.css-1outpf7 {display: none} | |
.sidebar .sidebar-content {display: none} | |
</style> | |
""", unsafe_allow_html=True) | |
# Home button at the top | |
if st.button("π Home", help="Return to main TalentLens.AI page"): | |
st.switch_page("app.py") | |
st.title('π Resume Formatter') | |
st.markdown("---") | |
uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx']) | |
if not uploaded: | |
st.info("Please upload a resume to get started.") | |
# Show help information when no file is uploaded | |
st.markdown("### π‘ How to Use Resume Formatter") | |
st.markdown(""" | |
1. **Upload your resume** in PDF or DOCX format | |
2. **Review extracted data** - our AI will parse your resume sections | |
3. **Edit if needed** - make any corrections to the extracted information | |
4. **Generate formatted resume** - download a professionally formatted version | |
""") | |
st.markdown("### β¨ Features") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown(""" | |
**π€ AI-Powered Extraction:** | |
- OpenAI GPT-4o for highest accuracy | |
- Hugging Face Cloud as backup | |
- Regex fallback for reliability | |
""") | |
with col2: | |
st.markdown(""" | |
**π Professional Formatting:** | |
- Clean, modern design | |
- Consistent layout | |
- ATS-friendly format | |
""") | |
st.stop() | |
st.success(f'Uploaded: {uploaded.name}') | |
# 1) Extract raw text | |
ext = uploaded.name.split('.')[-1].lower() | |
resume_text = parse_resume(uploaded, ext) | |
st.subheader('π Raw Resume Text') | |
st.text_area( | |
label='Raw Resume Text', | |
value=resume_text, | |
height=300, | |
label_visibility='visible' | |
) | |
# 2) Parse into structured fields using improved hybrid approach | |
st.subheader('π Extracting Resume Data...') | |
# Show extraction progress | |
with st.spinner('Analyzing resume with AI models...'): | |
# Use OpenAI as primary, HF Cloud as backup | |
data = extract_resume_sections( | |
resume_text, | |
prefer_ai=True, | |
use_openai=True, # Try OpenAI GPT-4o first (best results) | |
use_hf_cloud=True # Fallback to HF Cloud (good backup) | |
) | |
# Show extraction success and method used | |
from utils.hybrid_extractor import HybridResumeExtractor | |
extractor = HybridResumeExtractor(prefer_ai=True, use_openai=True, use_hf_cloud=True) | |
extractor.extract_sections(resume_text) # Just to get the method used | |
stats = extractor.get_extraction_stats() | |
method_used = stats.get('method_used', 'unknown') | |
if method_used == 'openai_gpt4o': | |
st.success('β Extracted using OpenAI GPT-4o (highest accuracy)') | |
elif method_used == 'huggingface_cloud': | |
st.info('βΉοΈ Extracted using Hugging Face Cloud (good accuracy)') | |
else: | |
st.warning('β οΈ Used fallback extraction method') | |
# Show extraction quality indicators | |
name_found = bool(data.get('Name')) | |
experiences_found = len(data.get('StructuredExperiences', [])) | |
skills_found = len(data.get('Skills', [])) | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Name", "β " if name_found else "β", "Found" if name_found else "Missing") | |
with col2: | |
st.metric("Job Experiences", experiences_found, f"{experiences_found} positions") | |
with col3: | |
st.metric("Technical Skills", skills_found, f"{skills_found} skills") | |
# π TEMP β remove after test (show raw JSON for debugging) | |
with st.expander("π§ Debug: Raw Extraction Data"): | |
import json, textwrap | |
st.code(textwrap.indent(json.dumps(data, indent=2), " "), language="json") | |
st.subheader('π Parsed Resume Sections') | |
# Display sections in a more user-friendly way | |
col1, col2 = st.columns(2) | |
with col1: | |
# Name and Summary | |
st.markdown("**π€ Personal Information**") | |
if data.get('Name'): | |
st.write(f"**Name:** {data['Name']}") | |
else: | |
st.error("β Name not found") | |
if data.get('Summary'): | |
st.markdown("**π Professional Summary:**") | |
st.write(data['Summary']) | |
else: | |
st.warning("β οΈ No professional summary found") | |
# Education | |
st.markdown("**π Education**") | |
education = data.get('Education', []) | |
if education: | |
for edu in education: | |
st.write(f"β’ {edu}") | |
else: | |
st.warning("β οΈ No education information found") | |
with col2: | |
# Skills | |
st.markdown("**π οΈ Technical Skills**") | |
skills = data.get('Skills', []) | |
if skills: | |
# Show skills in a nice format | |
skills_text = ", ".join(skills) | |
st.write(skills_text) | |
# Show skills quality | |
company_names = [s for s in skills if any(word in s.lower() for word in ['abc', 'xyz', 'financial', 'insurance', 'solutions'])] | |
if company_names: | |
st.warning(f"β οΈ Found {len(company_names)} company names in skills (will be cleaned)") | |
else: | |
st.error("β No technical skills found") | |
# Training/Certifications | |
training = data.get('Training', []) | |
if training: | |
st.markdown("**π Certifications/Training**") | |
for cert in training: | |
st.write(f"β’ {cert}") | |
# Work Experience (full width) | |
st.markdown("**πΌ Professional Experience**") | |
experiences = data.get('StructuredExperiences', []) | |
if experiences: | |
for i, exp in enumerate(experiences, 1): | |
with st.expander(f"Job {i}: {exp.get('title', 'Unknown Title')} at {exp.get('company', 'Unknown Company')}"): | |
st.write(f"**Position:** {exp.get('title', 'N/A')}") | |
st.write(f"**Company:** {exp.get('company', 'N/A')}") | |
st.write(f"**Duration:** {exp.get('date_range', 'N/A')}") | |
responsibilities = exp.get('responsibilities', []) | |
if responsibilities: | |
st.write("**Key Responsibilities:**") | |
for resp in responsibilities: | |
st.write(f"β’ {resp}") | |
else: | |
st.warning("β οΈ No responsibilities found for this position") | |
else: | |
st.error("β No work experience found") | |
# Show editable sections for user to modify if needed | |
st.subheader('βοΈ Edit Extracted Data (Optional)') | |
with st.expander("Click to edit extracted data before formatting"): | |
for section, content in data.items(): | |
st.markdown(f"**{section}:**") | |
# pure list of strings | |
if isinstance(content, list) and all(isinstance(i, str) for i in content): | |
edited_content = st.text_area( | |
label=section, | |
value="\n".join(content), | |
height=100, | |
label_visibility='collapsed', | |
key=f"edit_{section}" | |
) | |
# Update data with edited content | |
data[section] = [line.strip() for line in edited_content.split('\n') if line.strip()] | |
# list of dicts β show as JSON (read-only for now) | |
elif isinstance(content, list) and all(isinstance(i, dict) for i in content): | |
st.json(content) | |
# everything else (e.g. single string) | |
else: | |
edited_content = st.text_area( | |
label=section, | |
value=str(content), | |
height=100, | |
label_visibility='collapsed', | |
key=f"edit_{section}_str" | |
) | |
# Update data with edited content | |
data[section] = edited_content | |
# 3) Build & download | |
st.subheader('π Generate Formatted Resume') | |
# Show what will be included in the formatted resume | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Sections to Include", len([k for k, v in data.items() if v]), "sections") | |
with col2: | |
total_content = sum(len(str(v)) for v in data.values() if v) | |
st.metric("Content Length", f"{total_content:,}", "characters") | |
with col3: | |
quality_score = ( | |
(1 if data.get('Name') else 0) + | |
(1 if data.get('Summary') else 0) + | |
(1 if data.get('StructuredExperiences') else 0) + | |
(1 if data.get('Skills') else 0) | |
) * 25 | |
st.metric("Quality Score", f"{quality_score}%", "completeness") | |
if st.button('π Generate Formatted Resume', type='primary'): | |
try: | |
with st.spinner('Building formatted resume...'): | |
# Build the resume document | |
doc = build_resume_from_data(template_path, data) | |
# Save to buffer | |
buf = BytesIO() | |
doc.save(buf) | |
buf.seek(0) | |
st.success('β Resume formatted successfully!') | |
# Show what was included | |
st.info(f""" | |
**Formatted Resume Includes:** | |
β’ Name: {data.get('Name', 'Not found')} | |
β’ Professional Summary: {'β ' if data.get('Summary') else 'β'} | |
β’ Technical Skills: {len(data.get('Skills', []))} items | |
β’ Work Experience: {len(data.get('StructuredExperiences', []))} positions | |
β’ Education: {len(data.get('Education', []))} items | |
""") | |
# Generate filename with candidate name | |
candidate_name = data.get('Name', 'Resume').replace(' ', '_') | |
filename = f"{candidate_name}_Formatted_Resume.docx" | |
st.download_button( | |
'π₯ Download Formatted Resume', | |
data=buf, | |
file_name=filename, | |
mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document', | |
help=f"Download the formatted resume for {data.get('Name', 'candidate')}" | |
) | |
except Exception as e: | |
st.error(f"β Error generating formatted resume: {str(e)}") | |
st.info("π‘ Try editing the extracted data above to fix any issues, or contact support if the problem persists.") | |
# Add helpful tips | |
with st.expander("π‘ Tips for Better Results"): | |
st.markdown(""" | |
**For best extraction results:** | |
- Ensure your resume has clear section headers (e.g., "Professional Summary", "Technical Skills", "Work Experience") | |
- Use consistent formatting for job entries (Title | Company | Dates) | |
- List technical skills clearly, separated by commas | |
- Include bullet points for job responsibilities | |
**If extraction isn't perfect:** | |
- Use the "Edit Extracted Data" section above to make corrections | |
- The system will learn from different resume formats over time | |
- OpenAI GPT-4o provides the most accurate extraction when available | |
""") | |
# Show extraction method info | |
with st.expander("π§ Extraction Method Details"): | |
st.markdown(f""" | |
**Method Used:** {method_used} | |
**Available Methods:** | |
- **OpenAI GPT-4o**: Highest accuracy, best for complex formats | |
- **Hugging Face Cloud**: Good accuracy, reliable backup | |
- **Regex Fallback**: Basic extraction, used when AI methods fail | |
**Current Status:** | |
- OpenAI Available: {'β ' if stats.get('ai_available') else 'β'} | |
- AI Preferred: {'β ' if stats.get('prefer_ai') else 'β'} | |
""") | |
# Footer navigation and additional actions | |
st.markdown("---") | |
st.markdown("### π What's Next?") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
if st.button("π Return to Home", use_container_width=True): | |
st.switch_page("app.py") | |
with col2: | |
if st.button("π Format Another Resume", use_container_width=True): | |
st.rerun() | |
with col3: | |
st.markdown("**Need Help?**") | |
st.markdown("Check the tips above or contact support") | |
# Final footer | |
st.markdown("---") | |
st.markdown( | |
"<div style='text-align: center; color: #666; padding: 20px;'>" | |
"π <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing" | |
"</div>", | |
unsafe_allow_html=True | |
) | |