Spaces:
Running
Running
Johnny
feat: Complete Format_Resume.py system with OpenAI GPT-4o integration and template preservation - Added Format_Resume.py Streamlit page with OpenAI GPT-4o primary extraction, HF Cloud backup, 5-tier fallback system, template preservation with Qvell branding, contact info extraction, skills cleaning, career timeline generation, and comprehensive utils restructure (10/11 files required). Renamed app.py to TalentLens.py, added blank_resume.docx template, updated .gitignore for Salesforce exclusion.
c2f9ec8
from datetime import datetime | |
from dateutil.parser import parse as date_parse | |
import re, math | |
from docx import Document | |
from docx.shared import Pt | |
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_ALIGN_PARAGRAPH | |
import logging | |
logger = logging.getLogger(__name__) | |
# ---------- helpers --------------------------------------------------- | |
def _date(dt_str:str)->datetime: | |
try: return date_parse(dt_str, default=datetime(1900,1,1)) | |
except: return datetime(1900,1,1) | |
def fmt_range(raw:str)->str: | |
if not raw: return "" | |
parts = [p.strip() for p in re.split(r"\s*[β-]\s*", raw)] | |
formatted_parts = [] | |
for part in parts: | |
if part.lower() == "present": | |
formatted_parts.append("Present") | |
else: | |
try: | |
date_obj = _date(part) | |
formatted_parts.append(date_obj.strftime("%B %Y")) | |
except: | |
formatted_parts.append(part) # fallback to original text | |
return " β ".join(formatted_parts) | |
# ---------- main ------------------------------------------------------ | |
def build_resume_from_data(tmpl:str, sections:dict)->Document: | |
logger.info(f"BUILDER: Attempting to load document template from: {tmpl}") | |
doc = Document(tmpl) | |
logger.info(f"BUILDER: Template {tmpl} loaded successfully.") | |
# Log the template state | |
logger.info(f"BUILDER: Template has {len(doc.sections)} sections") | |
for i, section_obj in enumerate(doc.sections): | |
if section_obj.header: | |
logger.info(f"BUILDER: Section {i} header has {len(section_obj.header.paragraphs)} paragraphs") | |
if section_obj.footer: | |
logger.info(f"BUILDER: Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs") | |
# MOST CONSERVATIVE APPROACH: Clear paragraph content but don't remove elements | |
# This should preserve all document structure including sections | |
logger.info(f"BUILDER: Before clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables") | |
# Clear paragraph text content only, don't remove elements | |
for paragraph in doc.paragraphs: | |
# Clear all runs in the paragraph but keep the paragraph element | |
for run in paragraph.runs: | |
run.text = "" | |
# Also clear the paragraph text directly | |
paragraph.text = "" | |
# Remove tables (these are less likely to affect sections) | |
tables_to_remove = list(doc.tables) # Create a copy of the list | |
for table in tables_to_remove: | |
tbl = table._element | |
tbl.getparent().remove(tbl) | |
logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables") | |
# Verify headers/footers are still intact | |
logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections") | |
for i, section_obj in enumerate(doc.sections): | |
if section_obj.header: | |
logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs") | |
if section_obj.footer: | |
logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs") | |
logger.info(f"BUILDER: Template preserved with original headers and footers") | |
# --- easy builders --- | |
def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12) | |
def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"β’ {txt}").font.size=Pt(11) | |
def two_col(l,r): | |
tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True | |
tbl.cell(0,0).paragraphs[0].add_run(l).bold=True | |
rp = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT | |
rr = rp.add_run(r); rr.italic=True | |
# --- header (name + current role) --- | |
exps = sections.get("StructuredExperiences",[]) | |
if exps: | |
try: | |
# Filter to only dictionary experiences | |
dict_exps = [e for e in exps if isinstance(e, dict)] | |
if dict_exps: | |
newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("β")[0] if "β" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range",""))) | |
cur_title = newest.get("title","") | |
else: | |
cur_title = "" | |
except: | |
# Fallback: try to get title from first dictionary experience | |
for exp in exps: | |
if isinstance(exp, dict) and exp.get("title"): | |
cur_title = exp.get("title","") | |
break | |
else: | |
cur_title = "" | |
else: | |
# Try to extract job title from summary if no structured experiences | |
cur_title = "" | |
summary = sections.get("Summary", "") | |
if summary: | |
# Look for job titles in the summary | |
title_patterns = [ | |
r'(?i)(.*?engineer)', | |
r'(?i)(.*?developer)', | |
r'(?i)(.*?analyst)', | |
r'(?i)(.*?manager)', | |
r'(?i)(.*?specialist)', | |
r'(?i)(.*?consultant)', | |
r'(?i)(.*?architect)', | |
r'(?i)(.*?lead)', | |
r'(?i)(.*?director)', | |
r'(?i)(.*?coordinator)' | |
] | |
for pattern in title_patterns: | |
match = re.search(pattern, summary) | |
if match: | |
potential_title = match.group(1).strip() | |
# Clean up the title | |
potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I) | |
if len(potential_title) > 3 and len(potential_title) < 50: | |
cur_title = potential_title.title() | |
break | |
if sections.get("Name"): | |
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER | |
run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16) | |
if cur_title: | |
p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER | |
p.add_run(cur_title).font.size=Pt(12) | |
# --- summary --- | |
if sections.get("Summary"): | |
heading("Professional Summary:") | |
pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12) | |
pg.add_run(sections["Summary"]).font.size=Pt(11) | |
# --- skills --- | |
if sections.get("Skills"): | |
heading("Skills:") | |
skills = sorted(set(sections["Skills"])) | |
cols = 3 | |
rows = math.ceil(len(skills)/cols) | |
tbl = doc.add_table(rows=rows, cols=cols); tbl.autofit=True | |
k=0 | |
for r in range(rows): | |
for c in range(cols): | |
if k < len(skills): | |
tbl.cell(r,c).paragraphs[0].add_run(f"β’ {skills[k]}").font.size=Pt(11) | |
k+=1 | |
# --- experience --- | |
if exps: | |
heading("Professional Experience:") | |
for e in exps: | |
# Ensure e is a dictionary, not a string | |
if isinstance(e, str): | |
# If it's a string, create a basic experience entry | |
bullet(e, 0) | |
continue | |
elif not isinstance(e, dict): | |
# Skip if it's neither string nor dict | |
continue | |
# Process dictionary experience entry | |
title = e.get("title", "") | |
company = e.get("company", "") | |
date_range = e.get("date_range", "") | |
responsibilities = e.get("responsibilities", []) | |
# Create the job header | |
two_col(" | ".join(filter(None, [title, company])), | |
fmt_range(date_range)) | |
# Add responsibilities | |
if isinstance(responsibilities, list): | |
for resp in responsibilities: | |
if isinstance(resp, str) and resp.strip(): | |
bullet(resp, 1) | |
elif isinstance(responsibilities, str) and responsibilities.strip(): | |
bullet(responsibilities, 1) | |
else: | |
# If no structured experiences found, try to extract from summary | |
heading("Professional Experience:") | |
summary = sections.get("Summary", "") | |
if summary and cur_title: | |
# Extract years of experience from summary | |
years_match = re.search(r'(\d+)\s+years?\s+of\s+experience', summary, re.I) | |
years_text = f"{years_match.group(1)} years of experience" if years_match else "Multiple years of experience" | |
# Create a basic experience entry from summary | |
two_col(cur_title, years_text) | |
# Extract key responsibilities/skills from summary | |
sentences = re.split(r'[.!]', summary) | |
responsibilities = [] | |
for sentence in sentences: | |
sentence = sentence.strip() | |
if len(sentence) > 30 and any(keyword in sentence.lower() for keyword in | |
['expert', 'specializing', 'experience', 'developing', 'designing', 'implementing', 'managing', 'leading']): | |
responsibilities.append(sentence) | |
# Add responsibilities as bullet points | |
for resp in responsibilities[:5]: # Limit to 5 key points | |
bullet(resp.strip(), 1) | |
else: | |
# Fallback message | |
pg = doc.add_paragraph() | |
pg.add_run("Experience details are included in the Professional Summary above.").font.size = Pt(11) | |
pg.add_run(" For specific job titles, companies, and dates, please refer to the original resume.").font.size = Pt(11) | |
# --- job history timeline (chronological list) --- | |
if exps: | |
# Filter to only dictionary experiences and sort by date (most recent first) | |
dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")] | |
if dict_exps: | |
# Sort experiences by start date (most recent first) | |
try: | |
sorted_exps = sorted(dict_exps, key=lambda e: _date( | |
e.get("date_range", "").split("β")[0] if "β" in e.get("date_range", "") | |
else e.get("date_range", "").split("-")[0] if "-" in e.get("date_range", "") | |
else e.get("date_range", "") | |
), reverse=True) | |
except: | |
# If sorting fails, use original order | |
sorted_exps = dict_exps | |
heading("Career Timeline:") | |
for exp in sorted_exps: | |
title = exp.get("title", "") | |
company = exp.get("company", "") | |
date_range = exp.get("date_range", "") | |
# Format: "Job Title at Company (Dates)" | |
if company: | |
timeline_entry = f"{title} at {company}" | |
else: | |
timeline_entry = title | |
if date_range: | |
timeline_entry += f" ({fmt_range(date_range)})" | |
bullet(timeline_entry, 0) | |
# --- education / training --- | |
education = sections.get("Education", []) | |
training = sections.get("Training", []) | |
# Check if we have any real education or if it's just experience duration | |
has_real_education = False | |
processed_education = [] | |
experience_years = None | |
for ed in education: | |
# Ensure ed is a string | |
if not isinstance(ed, str): | |
continue | |
# Clean up the education entry (remove bullets) | |
clean_ed = ed.replace('β’', '').strip() | |
if re.match(r'^\d+\s+years?$', clean_ed, re.I): | |
# This is experience duration, not education | |
experience_years = clean_ed | |
else: | |
processed_education.append(clean_ed) | |
has_real_education = True | |
# Show education section | |
if has_real_education: | |
heading("Education:") | |
for ed in processed_education: | |
bullet(ed) | |
elif experience_years: | |
# If only experience years found, show it as a note | |
heading("Education:") | |
pg = doc.add_paragraph() | |
pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11) | |
if training: | |
heading("Training:") | |
for tr in training: | |
# Ensure tr is a string | |
if isinstance(tr, str) and tr.strip(): | |
bullet(tr) | |
# Final diagnostic before returning | |
logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections") | |
for i, section_obj in enumerate(doc.sections): | |
if section_obj.header: | |
logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs") | |
if section_obj.footer: | |
logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs") | |
return doc | |