Spaces:

gauravbox
/

TalentLensAI

Running

File size: 12,707 Bytes

# pages/Format_Resume.py

import os, sys, streamlit as st
import json
from io import BytesIO

# Add parent directory to path so we can import utils
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Force reload environment variables for Streamlit
from dotenv import load_dotenv
load_dotenv(override=True)

from utils.hybrid_extractor import extract_resume_sections
from utils.builder   import build_resume_from_data
from utils.parser import parse_resume            # whatever parse_resume you already have

# Path to your blank template (header/footer only)
template_path = os.path.join(
    os.path.dirname(__file__), '..', 'templates', 'blank_resume.docx'
)

st.set_page_config(
    page_title='Resume Formatter', 
    layout='centered',
    initial_sidebar_state="collapsed"
)

# Hide sidebar completely with CSS
st.markdown("""
    <style>
        .css-1d391kg {display: none}
        .css-1rs6os {display: none}
        .css-17ziqus {display: none}
        [data-testid="stSidebar"] {display: none}
        [data-testid="collapsedControl"] {display: none}
        .css-1lcbmhc {display: none}
        .css-1outpf7 {display: none}
        .sidebar .sidebar-content {display: none}
    </style>
""", unsafe_allow_html=True)

# Home button at the top
if st.button("🏠 Home", help="Return to main TalentLens.AI page"):
    st.switch_page("app.py")

st.title('📄 Resume Formatter')
st.markdown("---")

uploaded = st.file_uploader('Upload Resume (PDF or DOCX)', type=['pdf','docx'])
if not uploaded:
    st.info("Please upload a resume to get started.")
    
    # Show help information when no file is uploaded
    st.markdown("### 💡 How to Use Resume Formatter")
    st.markdown("""
    1. **Upload your resume** in PDF or DOCX format
    2. **Review extracted data** - our AI will parse your resume sections
    3. **Edit if needed** - make any corrections to the extracted information
    4. **Generate formatted resume** - download a professionally formatted version
    """)
    
    st.markdown("### ✨ Features")
    col1, col2 = st.columns(2)
    with col1:
        st.markdown("""
        **🤖 AI-Powered Extraction:**
        - OpenAI GPT-4o for highest accuracy
        - Hugging Face Cloud as backup
        - Regex fallback for reliability
        """)
    with col2:
        st.markdown("""
        **📄 Professional Formatting:**
        - Clean, modern design
        - Consistent layout
        - ATS-friendly format
        """)
    
    st.stop()

st.success(f'Uploaded: {uploaded.name}')

# 1) Extract raw text
ext = uploaded.name.split('.')[-1].lower()
resume_text = parse_resume(uploaded, ext)

st.subheader('📄 Raw Resume Text')
st.text_area(
    label='Raw Resume Text',
    value=resume_text,
    height=300,
    label_visibility='visible'
)

# 2) Parse into structured fields using improved hybrid approach
st.subheader('🔍 Extracting Resume Data...')

# Show extraction progress
with st.spinner('Analyzing resume with AI models...'):
    # Use OpenAI as primary, HF Cloud as backup
    data = extract_resume_sections(
        resume_text, 
        prefer_ai=True, 
        use_openai=True,      # Try OpenAI GPT-4o first (best results)
        use_hf_cloud=True     # Fallback to HF Cloud (good backup)
    )

# Show extraction success and method used
from utils.hybrid_extractor import HybridResumeExtractor
extractor = HybridResumeExtractor(prefer_ai=True, use_openai=True, use_hf_cloud=True)
extractor.extract_sections(resume_text)  # Just to get the method used
stats = extractor.get_extraction_stats()

method_used = stats.get('method_used', 'unknown')
if method_used == 'openai_gpt4o':
    st.success('✅ Extracted using OpenAI GPT-4o (highest accuracy)')
elif method_used == 'huggingface_cloud':
    st.info('ℹ️ Extracted using Hugging Face Cloud (good accuracy)')
else:
    st.warning('⚠️ Used fallback extraction method')

# Show extraction quality indicators
name_found = bool(data.get('Name'))
experiences_found = len(data.get('StructuredExperiences', []))
skills_found = len(data.get('Skills', []))

col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Name", "✅" if name_found else "❌", "Found" if name_found else "Missing")
with col2:
    st.metric("Job Experiences", experiences_found, f"{experiences_found} positions")
with col3:
    st.metric("Technical Skills", skills_found, f"{skills_found} skills")

# 👇 TEMP – remove after test (show raw JSON for debugging)
with st.expander("🔧 Debug: Raw Extraction Data"):
    import json, textwrap
    st.code(textwrap.indent(json.dumps(data, indent=2), "  "), language="json")

st.subheader('📋 Parsed Resume Sections')

# Display sections in a more user-friendly way
col1, col2 = st.columns(2)

with col1:
    # Name and Summary
    st.markdown("**👤 Personal Information**")
    if data.get('Name'):
        st.write(f"**Name:** {data['Name']}")
    else:
        st.error("❌ Name not found")
    
    if data.get('Summary'):
        st.markdown("**📝 Professional Summary:**")
        st.write(data['Summary'])
    else:
        st.warning("⚠️ No professional summary found")
    
    # Education
    st.markdown("**🎓 Education**")
    education = data.get('Education', [])
    if education:
        for edu in education:
            st.write(f"• {edu}")
    else:
        st.warning("⚠️ No education information found")

with col2:
    # Skills
    st.markdown("**🛠️ Technical Skills**")
    skills = data.get('Skills', [])
    if skills:
        # Show skills in a nice format
        skills_text = ", ".join(skills)
        st.write(skills_text)
        
        # Show skills quality
        company_names = [s for s in skills if any(word in s.lower() for word in ['abc', 'xyz', 'financial', 'insurance', 'solutions'])]
        if company_names:
            st.warning(f"⚠️ Found {len(company_names)} company names in skills (will be cleaned)")
    else:
        st.error("❌ No technical skills found")
    
    # Training/Certifications
    training = data.get('Training', [])
    if training:
        st.markdown("**📜 Certifications/Training**")
        for cert in training:
            st.write(f"• {cert}")

# Work Experience (full width)
st.markdown("**💼 Professional Experience**")
experiences = data.get('StructuredExperiences', [])
if experiences:
    for i, exp in enumerate(experiences, 1):
        with st.expander(f"Job {i}: {exp.get('title', 'Unknown Title')} at {exp.get('company', 'Unknown Company')}"):
            st.write(f"**Position:** {exp.get('title', 'N/A')}")
            st.write(f"**Company:** {exp.get('company', 'N/A')}")
            st.write(f"**Duration:** {exp.get('date_range', 'N/A')}")
            
            responsibilities = exp.get('responsibilities', [])
            if responsibilities:
                st.write("**Key Responsibilities:**")
                for resp in responsibilities:
                    st.write(f"• {resp}")
            else:
                st.warning("⚠️ No responsibilities found for this position")
else:
    st.error("❌ No work experience found")

# Show editable sections for user to modify if needed
st.subheader('✏️ Edit Extracted Data (Optional)')
with st.expander("Click to edit extracted data before formatting"):
    for section, content in data.items():
        st.markdown(f"**{section}:**")

        # pure list of strings
        if isinstance(content, list) and all(isinstance(i, str) for i in content):
            edited_content = st.text_area(
                label=section,
                value="\n".join(content),
                height=100,
                label_visibility='collapsed',
                key=f"edit_{section}"
            )
            # Update data with edited content
            data[section] = [line.strip() for line in edited_content.split('\n') if line.strip()]

        # list of dicts → show as JSON (read-only for now)
        elif isinstance(content, list) and all(isinstance(i, dict) for i in content):
            st.json(content)

        # everything else (e.g. single string)
        else:
            edited_content = st.text_area(
                label=section,
                value=str(content),
                height=100,
                label_visibility='collapsed',
                key=f"edit_{section}_str"
            )
            # Update data with edited content
            data[section] = edited_content

# 3) Build & download
st.subheader('📄 Generate Formatted Resume')

# Show what will be included in the formatted resume
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Sections to Include", len([k for k, v in data.items() if v]), "sections")
with col2:
    total_content = sum(len(str(v)) for v in data.values() if v)
    st.metric("Content Length", f"{total_content:,}", "characters")
with col3:
    quality_score = (
        (1 if data.get('Name') else 0) +
        (1 if data.get('Summary') else 0) +
        (1 if data.get('StructuredExperiences') else 0) +
        (1 if data.get('Skills') else 0)
    ) * 25
    st.metric("Quality Score", f"{quality_score}%", "completeness")

if st.button('📄 Generate Formatted Resume', type='primary'):
    try:
        with st.spinner('Building formatted resume...'):
            # Build the resume document
            doc = build_resume_from_data(template_path, data)
            
            # Save to buffer
            buf = BytesIO()
            doc.save(buf)
            buf.seek(0)

        st.success('✅ Resume formatted successfully!')
        
        # Show what was included
        st.info(f"""
        **Formatted Resume Includes:**
        • Name: {data.get('Name', 'Not found')}
        • Professional Summary: {'✅' if data.get('Summary') else '❌'}
        • Technical Skills: {len(data.get('Skills', []))} items
        • Work Experience: {len(data.get('StructuredExperiences', []))} positions
        • Education: {len(data.get('Education', []))} items
        """)
        
        # Generate filename with candidate name
        candidate_name = data.get('Name', 'Resume').replace(' ', '_')
        filename = f"{candidate_name}_Formatted_Resume.docx"
        
        st.download_button(
            '📥 Download Formatted Resume',
            data=buf,
            file_name=filename,
            mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document',
            help=f"Download the formatted resume for {data.get('Name', 'candidate')}"
        )
        
    except Exception as e:
        st.error(f"❌ Error generating formatted resume: {str(e)}")
        st.info("💡 Try editing the extracted data above to fix any issues, or contact support if the problem persists.")

# Add helpful tips
with st.expander("💡 Tips for Better Results"):
    st.markdown("""
    **For best extraction results:**
    - Ensure your resume has clear section headers (e.g., "Professional Summary", "Technical Skills", "Work Experience")
    - Use consistent formatting for job entries (Title | Company | Dates)
    - List technical skills clearly, separated by commas
    - Include bullet points for job responsibilities
    
    **If extraction isn't perfect:**
    - Use the "Edit Extracted Data" section above to make corrections
    - The system will learn from different resume formats over time
    - OpenAI GPT-4o provides the most accurate extraction when available
    """)

# Show extraction method info
with st.expander("🔧 Extraction Method Details"):
    st.markdown(f"""
    **Method Used:** {method_used}
    
    **Available Methods:**
    - **OpenAI GPT-4o**: Highest accuracy, best for complex formats
    - **Hugging Face Cloud**: Good accuracy, reliable backup
    - **Regex Fallback**: Basic extraction, used when AI methods fail
    
    **Current Status:**
    - OpenAI Available: {'✅' if stats.get('ai_available') else '❌'}
    - AI Preferred: {'✅' if stats.get('prefer_ai') else '❌'}
    """)

# Footer navigation and additional actions
st.markdown("---")
st.markdown("### 🚀 What's Next?")

col1, col2, col3 = st.columns(3)

with col1:
    if st.button("🏠 Return to Home", use_container_width=True):
        st.switch_page("app.py")

with col2:
    if st.button("📄 Format Another Resume", use_container_width=True):
        st.rerun()

with col3:
    st.markdown("**Need Help?**")
    st.markdown("Check the tips above or contact support")

# Final footer
st.markdown("---")
st.markdown(
    "<div style='text-align: center; color: #666; padding: 20px;'>"
    "🚀 <strong>TalentLens.AI</strong> - Powered by AI for intelligent resume processing"
    "</div>", 
    unsafe_allow_html=True
)