from datetime import datetime from dateutil.parser import parse as date_parse import re, math from docx import Document from docx.shared import Pt from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_ALIGN_PARAGRAPH import logging logger = logging.getLogger(__name__) # ---------- helpers --------------------------------------------------- def _date(dt_str:str)->datetime: try: return date_parse(dt_str, default=datetime(1900,1,1)) except: return datetime(1900,1,1) def fmt_range(raw:str)->str: if not raw: return "" parts = [p.strip() for p in re.split(r"\s*[–-]\s*", raw)] formatted_parts = [] for part in parts: if part.lower() == "present": formatted_parts.append("Present") else: try: date_obj = _date(part) formatted_parts.append(date_obj.strftime("%B %Y")) except: formatted_parts.append(part) # fallback to original text return " – ".join(formatted_parts) # ---------- main ------------------------------------------------------ def build_resume_from_data(tmpl:str, sections:dict)->Document: logger.info(f"BUILDER: Attempting to load document template from: {tmpl}") doc = Document(tmpl) logger.info(f"BUILDER: Template {tmpl} loaded successfully.") # Log the template state logger.info(f"BUILDER: Template has {len(doc.sections)} sections") for i, section_obj in enumerate(doc.sections): if section_obj.header: logger.info(f"BUILDER: Section {i} header has {len(section_obj.header.paragraphs)} paragraphs") if section_obj.footer: logger.info(f"BUILDER: Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs") # MOST CONSERVATIVE APPROACH: Clear paragraph content but don't remove elements # This should preserve all document structure including sections logger.info(f"BUILDER: Before clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables") # Clear paragraph text content only, don't remove elements for paragraph in doc.paragraphs: # Clear all runs in the paragraph but keep the paragraph element for run in paragraph.runs: run.text = "" # Also clear the paragraph text directly paragraph.text = "" # Remove tables (these are less likely to affect sections) tables_to_remove = list(doc.tables) # Create a copy of the list for table in tables_to_remove: tbl = table._element tbl.getparent().remove(tbl) logger.info(f"BUILDER: After clearing - Document has {len(doc.paragraphs)} paragraphs and {len(doc.tables)} tables") # Verify headers/footers are still intact logger.info(f"BUILDER: After clearing - Document still has {len(doc.sections)} sections") for i, section_obj in enumerate(doc.sections): if section_obj.header: logger.info(f"BUILDER: Section {i} header still has {len(section_obj.header.paragraphs)} paragraphs") if section_obj.footer: logger.info(f"BUILDER: Section {i} footer still has {len(section_obj.footer.paragraphs)} paragraphs") logger.info(f"BUILDER: Template preserved with original headers and footers") # --- easy builders --- def heading(txt): pg=doc.add_paragraph(); r=pg.add_run(txt); r.bold=True; r.font.size=Pt(12) def bullet(txt,lvl=0): p=doc.add_paragraph(); p.paragraph_format.left_indent=Pt(lvl*12); p.add_run(f"• {txt}").font.size=Pt(11) def two_col(l,r): tbl=doc.add_table(rows=1,cols=2); tbl.autofit=True tbl.cell(0,0).paragraphs[0].add_run(l).bold=True rp = tbl.cell(0,1).paragraphs[0]; rp.alignment=WD_ALIGN_PARAGRAPH.RIGHT rr = rp.add_run(r); rr.italic=True # --- header (name + current role) --- exps = sections.get("StructuredExperiences",[]) if exps: try: # Filter to only dictionary experiences dict_exps = [e for e in exps if isinstance(e, dict)] if dict_exps: newest = max(dict_exps, key=lambda e: _date(e.get("date_range","").split("–")[0] if "–" in e.get("date_range","") else e.get("date_range","").split("-")[0] if "-" in e.get("date_range","") else e.get("date_range",""))) cur_title = newest.get("title","") else: cur_title = "" except: # Fallback: try to get title from first dictionary experience for exp in exps: if isinstance(exp, dict) and exp.get("title"): cur_title = exp.get("title","") break else: cur_title = "" else: # Try to extract job title from summary if no structured experiences cur_title = "" summary = sections.get("Summary", "") if summary: # Look for job titles in the summary title_patterns = [ r'(?i)(.*?engineer)', r'(?i)(.*?developer)', r'(?i)(.*?analyst)', r'(?i)(.*?manager)', r'(?i)(.*?specialist)', r'(?i)(.*?consultant)', r'(?i)(.*?architect)', r'(?i)(.*?lead)', r'(?i)(.*?director)', r'(?i)(.*?coordinator)' ] for pattern in title_patterns: match = re.search(pattern, summary) if match: potential_title = match.group(1).strip() # Clean up the title potential_title = re.sub(r'^(results-driven|experienced|senior|junior|lead)\s+', '', potential_title, flags=re.I) if len(potential_title) > 3 and len(potential_title) < 50: cur_title = potential_title.title() break if sections.get("Name"): p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER run=p.add_run(sections["Name"]); run.bold=True; run.font.size=Pt(16) if cur_title: p=doc.add_paragraph(); p.alignment=WD_PARAGRAPH_ALIGNMENT.CENTER p.add_run(cur_title).font.size=Pt(12) # --- summary --- if sections.get("Summary"): heading("Professional Summary:") pg=doc.add_paragraph(); pg.paragraph_format.first_line_indent=Pt(12) pg.add_run(sections["Summary"]).font.size=Pt(11) # --- skills --- if sections.get("Skills"): heading("Skills:") skills = sorted(set(sections["Skills"])) cols = 3 rows = math.ceil(len(skills)/cols) tbl = doc.add_table(rows=rows, cols=cols); tbl.autofit=True k=0 for r in range(rows): for c in range(cols): if k < len(skills): tbl.cell(r,c).paragraphs[0].add_run(f"• {skills[k]}").font.size=Pt(11) k+=1 # --- experience --- if exps: heading("Professional Experience:") for e in exps: # Ensure e is a dictionary, not a string if isinstance(e, str): # If it's a string, create a basic experience entry bullet(e, 0) continue elif not isinstance(e, dict): # Skip if it's neither string nor dict continue # Process dictionary experience entry title = e.get("title", "") company = e.get("company", "") date_range = e.get("date_range", "") responsibilities = e.get("responsibilities", []) # Create the job header two_col(" | ".join(filter(None, [title, company])), fmt_range(date_range)) # Add responsibilities if isinstance(responsibilities, list): for resp in responsibilities: if isinstance(resp, str) and resp.strip(): bullet(resp, 1) elif isinstance(responsibilities, str) and responsibilities.strip(): bullet(responsibilities, 1) else: # If no structured experiences found, try to extract from summary heading("Professional Experience:") summary = sections.get("Summary", "") if summary and cur_title: # Extract years of experience from summary years_match = re.search(r'(\d+)\s+years?\s+of\s+experience', summary, re.I) years_text = f"{years_match.group(1)} years of experience" if years_match else "Multiple years of experience" # Create a basic experience entry from summary two_col(cur_title, years_text) # Extract key responsibilities/skills from summary sentences = re.split(r'[.!]', summary) responsibilities = [] for sentence in sentences: sentence = sentence.strip() if len(sentence) > 30 and any(keyword in sentence.lower() for keyword in ['expert', 'specializing', 'experience', 'developing', 'designing', 'implementing', 'managing', 'leading']): responsibilities.append(sentence) # Add responsibilities as bullet points for resp in responsibilities[:5]: # Limit to 5 key points bullet(resp.strip(), 1) else: # Fallback message pg = doc.add_paragraph() pg.add_run("Experience details are included in the Professional Summary above.").font.size = Pt(11) pg.add_run(" For specific job titles, companies, and dates, please refer to the original resume.").font.size = Pt(11) # --- job history timeline (chronological list) --- if exps: # Filter to only dictionary experiences and sort by date (most recent first) dict_exps = [e for e in exps if isinstance(e, dict) and e.get("title") and e.get("date_range")] if dict_exps: # Sort experiences by start date (most recent first) try: sorted_exps = sorted(dict_exps, key=lambda e: _date( e.get("date_range", "").split("–")[0] if "–" in e.get("date_range", "") else e.get("date_range", "").split("-")[0] if "-" in e.get("date_range", "") else e.get("date_range", "") ), reverse=True) except: # If sorting fails, use original order sorted_exps = dict_exps heading("Career Timeline:") for exp in sorted_exps: title = exp.get("title", "") company = exp.get("company", "") date_range = exp.get("date_range", "") # Format: "Job Title at Company (Dates)" if company: timeline_entry = f"{title} at {company}" else: timeline_entry = title if date_range: timeline_entry += f" ({fmt_range(date_range)})" bullet(timeline_entry, 0) # --- education / training --- education = sections.get("Education", []) training = sections.get("Training", []) # Check if we have any real education or if it's just experience duration has_real_education = False processed_education = [] experience_years = None for ed in education: # Ensure ed is a string if not isinstance(ed, str): continue # Clean up the education entry (remove bullets) clean_ed = ed.replace('•', '').strip() if re.match(r'^\d+\s+years?$', clean_ed, re.I): # This is experience duration, not education experience_years = clean_ed else: processed_education.append(clean_ed) has_real_education = True # Show education section if has_real_education: heading("Education:") for ed in processed_education: bullet(ed) elif experience_years: # If only experience years found, show it as a note heading("Education:") pg = doc.add_paragraph() pg.add_run(f"Professional experience: {experience_years}").font.size = Pt(11) if training: heading("Training:") for tr in training: # Ensure tr is a string if isinstance(tr, str) and tr.strip(): bullet(tr) # Final diagnostic before returning logger.info(f"BUILDER: FINAL STATE - Document has {len(doc.sections)} sections") for i, section_obj in enumerate(doc.sections): if section_obj.header: logger.info(f"BUILDER: FINAL - Section {i} header has {len(section_obj.header.paragraphs)} paragraphs") if section_obj.footer: logger.info(f"BUILDER: FINAL - Section {i} footer has {len(section_obj.footer.paragraphs)} paragraphs") return doc