Spaces:

gauravbox
/

TalentLensAI

Running

File size: 13,133 Bytes

c2f9ec8

from datetime import datetime
from dateutil.parser import parse as date_parse
import re, math
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_ALIGN_PARAGRAPH
import logging

logger = logging.getLogger(__name__)

# ---------- helpers ---------------------------------------------------
def _date(dt_str:str)->datetime:
    try:    return date_parse(dt_str, default=datetime(1900,1,1))
    except: return datetime(1900,1,1)

def fmt_range(raw:str)->str:
    if not raw: return ""
    parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)]
    
    formatted_parts = []
    for part in parts:
        if part.lower() == "present":
            formatted_parts.append("Present")
        else:
            try:
                date_obj = _date(part)
                formatted_parts.append(date_obj.strftime("%B %Y"))
            except:
                formatted_parts.append(part)  # fallback to original text
    
    return " – ".join(formatted_parts)

# ---------- main ------------------------------------------------------
def build_resume_from_data(tmpl:str, sections:dict)->Document:
    logger.info(f"BUILDER: Attempting to load document template from: {tmpl}")
    doc = Document(tmpl)
    logger.info(f"BUILDER: Template {tmpl} loaded successfully.")

    # Log the template state
    logger.info(f"BUILDER: Template has {len(doc.sections)} sections")
    for i, section_obj in enumerate(doc.sections):
        if section_obj.header:
            logger.info(f"BUILDER: Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
        if section_obj.footer:
            logger.info(f"BUILDER: Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")

    # MOST CONSERVATIVE APPROACH: Clear paragraph content but don't remove elements
    # This should preserve all document structure including sections
    logger.info(f"BUILDER: Before clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
    
    # Clear paragraph text content only, don't remove elements
    for paragraph in doc.paragraphs:
        # Clear all runs in the paragraph but keep the paragraph element
        for run in paragraph.runs:
            run.text = ""
        # Also clear the paragraph text directly
        paragraph.text = ""
    
    # Remove tables (these are less likely to affect sections)
    tables_to_remove = list(doc.tables)  # Create a copy of the list
    for table in tables_to_remove:
        tbl = table._element
        tbl.getparent().remove(tbl)
    
    logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables")
    
    # Verify headers/footers are still intact
    logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections")
    for i, section_obj in enumerate(doc.sections):
        if section_obj.header:
            logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs")
        if section_obj.footer:
            logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs")
    
    logger.info(f"BUILDER: Template preserved with original headers and footers")

    # --- easy builders ---
    def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12)
    def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"• {txt}").font.size=Pt(11)
    def two_col(l,r):
        tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True
        tbl.cell(0,0).paragraphs[0].add_run(l).bold=True
        rp  = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT
        rr  = rp.add_run(r); rr.italic=True

    # --- header (name + current role) ---
    exps = sections.get("StructuredExperiences",[])
    if exps:
        try:
            # Filter to only dictionary experiences
            dict_exps = [e for e in exps if isinstance(e, dict)]
            if dict_exps:
                newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("–")[0] if "–" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range","")))
                cur_title = newest.get("title","")
            else:
                cur_title = ""
        except:
            # Fallback: try to get title from first dictionary experience
            for exp in exps:
                if isinstance(exp, dict) and exp.get("title"):
                    cur_title = exp.get("title","")
                    break
            else:
                cur_title = ""
    else:
        # Try to extract job title from summary if no structured experiences
        cur_title = ""
        summary = sections.get("Summary", "")
        if summary:
            # Look for job titles in the summary
            title_patterns = [
                r'(?i)(.*?engineer)',
                r'(?i)(.*?developer)',
                r'(?i)(.*?analyst)',
                r'(?i)(.*?manager)',
                r'(?i)(.*?specialist)',
                r'(?i)(.*?consultant)',
                r'(?i)(.*?architect)',
                r'(?i)(.*?lead)',
                r'(?i)(.*?director)',
                r'(?i)(.*?coordinator)'
            ]
            
            for pattern in title_patterns:
                match = re.search(pattern, summary)
                if match:
                    potential_title = match.group(1).strip()
                    # Clean up the title
                    potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I)
                    if len(potential_title) > 3 and len(potential_title) < 50:
                        cur_title = potential_title.title()
                        break

    if sections.get("Name"):
        p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
        run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16)
    if cur_title:
        p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER
        p.add_run(cur_title).font.size=Pt(12)

    # --- summary ---
    if sections.get("Summary"):
        heading("Professional Summary:")
        pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12)
        pg.add_run(sections["Summary"]).font.size=Pt(11)

    # --- skills ---
    if sections.get("Skills"):
        heading("Skills:")
        skills = sorted(set(sections["Skills"]))
        cols   = 3
        rows   = math.ceil(len(skills)/cols)
        tbl    = doc.add_table(rows=rows, cols=cols); tbl.autofit=True
        k=0
        for r in range(rows):
            for c in range(cols):
                if k < len(skills):
                    tbl.cell(r,c).paragraphs[0].add_run(f"• {skills[k]}").font.size=Pt(11)
                    k+=1

    # --- experience ---
    if exps:
        heading("Professional Experience:")
        for e in exps:
            # Ensure e is a dictionary, not a string
            if isinstance(e, str):
                # If it's a string, create a basic experience entry
                bullet(e, 0)
                continue
            elif not isinstance(e, dict):
                # Skip if it's neither string nor dict
                continue
                
            # Process dictionary experience entry
            title = e.get("title", "")
            company = e.get("company", "")
            date_range = e.get("date_range", "")
            responsibilities = e.get("responsibilities", [])
            
            # Create the job header
            two_col(" | ".join(filter(None, [title, company])),
                    fmt_range(date_range))
            
            # Add responsibilities
            if isinstance(responsibilities, list):
                for resp in responsibilities:
                    if isinstance(resp, str) and resp.strip():
                        bullet(resp, 1)
            elif isinstance(responsibilities, str) and responsibilities.strip():
                bullet(responsibilities, 1)
    else:
        # If no structured experiences found, try to extract from summary
        heading("Professional Experience:")
        summary = sections.get("Summary", "")
        
        if summary and cur_title:
            # Extract years of experience from summary
            years_match = re.search(r'(\d+)\s+years?\s+of\s+experience', summary, re.I)
            years_text = f"{years_match.group(1)} years of experience" if years_match else "Multiple years of experience"
            
            # Create a basic experience entry from summary
            two_col(cur_title, years_text)
            
            # Extract key responsibilities/skills from summary
            sentences = re.split(r'[.!]', summary)
            responsibilities = []
            
            for sentence in sentences:
                sentence = sentence.strip()
                if len(sentence) > 30 and any(keyword in sentence.lower() for keyword in 
                    ['expert', 'specializing', 'experience', 'developing', 'designing', 'implementing', 'managing', 'leading']):
                    responsibilities.append(sentence)
            
            # Add responsibilities as bullet points
            for resp in responsibilities[:5]:  # Limit to 5 key points
                bullet(resp.strip(), 1)
        else:
            # Fallback message
            pg = doc.add_paragraph()
            pg.add_run("Experience details are included in the Professional Summary above.").font.size = Pt(11)
            pg.add_run(" For specific job titles, companies, and dates, please refer to the original resume.").font.size = Pt(11)

    # --- job history timeline (chronological list) ---
    if exps:
        # Filter to only dictionary experiences and sort by date (most recent first)
        dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")]
        
        if dict_exps:
            # Sort experiences by start date (most recent first)
            try:
                sorted_exps = sorted(dict_exps, key=lambda e: _date(
                    e.get("date_range", "").split("–")[0] if "–" in e.get("date_range", "") 
                    else e.get("date_range", "").split("-")[0] if "-" in e.get("date_range", "") 
                    else e.get("date_range", "")
                ), reverse=True)
            except:
                # If sorting fails, use original order
                sorted_exps = dict_exps
            
            heading("Career Timeline:")
            for exp in sorted_exps:
                title = exp.get("title", "")
                company = exp.get("company", "")
                date_range = exp.get("date_range", "")
                
                # Format: "Job Title at Company (Dates)"
                if company:
                    timeline_entry = f"{title} at {company}"
                else:
                    timeline_entry = title
                
                if date_range:
                    timeline_entry += f" ({fmt_range(date_range)})"
                
                bullet(timeline_entry, 0)

    # --- education / training ---
    education = sections.get("Education", [])
    training = sections.get("Training", [])
    
    # Check if we have any real education or if it's just experience duration
    has_real_education = False
    processed_education = []
    experience_years = None
    
    for ed in education:
        # Ensure ed is a string
        if not isinstance(ed, str):
            continue
            
        # Clean up the education entry (remove bullets)
        clean_ed = ed.replace('•', '').strip()
        if re.match(r'^\d+\s+years?$', clean_ed, re.I):
            # This is experience duration, not education
            experience_years = clean_ed
        else:
            processed_education.append(clean_ed)
            has_real_education = True
    
    # Show education section
    if has_real_education:
        heading("Education:")
        for ed in processed_education: 
            bullet(ed)
    elif experience_years:
        # If only experience years found, show it as a note
        heading("Education:")
        pg = doc.add_paragraph()
        pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11)
    
    if training:
        heading("Training:")
        for tr in training:
            # Ensure tr is a string
            if isinstance(tr, str) and tr.strip():
                bullet(tr)

    # Final diagnostic before returning
    logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections")
    for i, section_obj in enumerate(doc.sections):
        if section_obj.header:
            logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs")
        if section_obj.footer:
            logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs")

    return doc