Spaces:
Running
Running
Johnny
feat: Complete Format_Resume.py system with OpenAI GPT-4o integration and template preservation - Added Format_Resume.py Streamlit page with OpenAI GPT-4o primary extraction, HF Cloud backup, 5-tier fallback system, template preservation with Qvell branding, contact info extraction, skills cleaning, career timeline generation, and comprehensive utils restructure (10/11 files required). Renamed app.py to TalentLens.py, added blank_resume.docx template, updated .gitignore for Salesforce exclusion.
c2f9ec8
#!/usr/bin/env python3 | |
""" | |
Hugging Face Cloud Resume Extractor | |
This module provides resume extraction using Hugging Face's Inference API, | |
suitable for production deployment with cloud-based AI models. | |
""" | |
import json | |
import re | |
import logging | |
import requests | |
import os | |
from typing import Dict, Any, List, Optional | |
from time import sleep | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
class HuggingFaceCloudExtractor: | |
""" | |
Production-ready resume extractor using Hugging Face Inference API | |
""" | |
def __init__(self, api_key: Optional[str] = None, model_name: str = "microsoft/DialoGPT-medium"): | |
""" | |
Initialize the cloud extractor | |
Args: | |
api_key: Hugging Face API key (optional, will use env var if not provided) | |
model_name: Name of the Hugging Face model to use | |
""" | |
self.api_key = api_key or os.getenv('HF_API_TOKEN') or os.getenv('HUGGINGFACE_API_KEY') | |
self.model_name = model_name | |
self.base_url = "https://api-inference.huggingface.co/models" | |
# Available models for different tasks | |
self.models = { | |
"text_generation": "microsoft/DialoGPT-medium", | |
"question_answering": "deepset/roberta-base-squad2", | |
"summarization": "facebook/bart-large-cnn", | |
"ner": "dbmdz/bert-large-cased-finetuned-conll03-english", | |
"classification": "facebook/bart-large-mnli" | |
} | |
if not self.api_key: | |
logger.warning("No Hugging Face API key found. Set HF_API_TOKEN or HUGGINGFACE_API_KEY environment variable.") | |
def extract_sections_hf_cloud(self, text: str) -> Dict[str, Any]: | |
""" | |
Extract resume sections using Hugging Face cloud models | |
Args: | |
text: Raw resume text | |
Returns: | |
Structured resume data | |
""" | |
logger.info("Starting Hugging Face cloud extraction...") | |
if not self.api_key: | |
logger.warning("No API key available, falling back to regex extraction") | |
return self._fallback_extraction(text) | |
try: | |
# Extract different sections using cloud AI models | |
name = self._extract_name_cloud(text) | |
summary = self._extract_summary_cloud(text) | |
skills = self._extract_skills_cloud(text) | |
experiences = self._extract_experiences_cloud(text) | |
education = self._extract_education_cloud(text) | |
contact_info = self._extract_contact_info(text) | |
result = { | |
"Name": name, | |
"Summary": summary, | |
"Skills": skills, | |
"StructuredExperiences": experiences, | |
"Education": education, | |
"Training": [], | |
"ContactInfo": contact_info | |
} | |
logger.info("β Hugging Face cloud extraction completed") | |
return result | |
except Exception as e: | |
logger.error(f"Hugging Face cloud extraction failed: {e}") | |
return self._fallback_extraction(text) | |
def _make_api_request(self, model_name: str, payload: Dict[str, Any], max_retries: int = 3) -> Dict[str, Any]: | |
""" | |
Make a request to Hugging Face Inference API with retry logic | |
Args: | |
model_name: Name of the model to use | |
payload: Request payload | |
max_retries: Maximum number of retries | |
Returns: | |
API response | |
""" | |
headers = { | |
"Authorization": f"Bearer {self.api_key}", | |
"Content-Type": "application/json" | |
} | |
url = f"{self.base_url}/{model_name}" | |
for attempt in range(max_retries): | |
try: | |
response = requests.post(url, headers=headers, json=payload, timeout=30) | |
if response.status_code == 200: | |
return response.json() | |
elif response.status_code == 503: | |
# Model is loading, wait and retry | |
logger.info(f"Model {model_name} is loading, waiting...") | |
sleep(10) | |
continue | |
else: | |
logger.error(f"API request failed: {response.status_code} - {response.text}") | |
break | |
except requests.exceptions.RequestException as e: | |
logger.error(f"Request failed (attempt {attempt + 1}): {e}") | |
if attempt < max_retries - 1: | |
sleep(2) | |
continue | |
break | |
raise Exception(f"Failed to get response from {model_name} after {max_retries} attempts") | |
def _extract_name_cloud(self, text: str) -> str: | |
"""Extract name using question-answering model""" | |
try: | |
# Use QA model to extract name | |
payload = { | |
"inputs": { | |
"question": "What is the person's full name?", | |
"context": text[:1000] # First 1000 chars should contain name | |
} | |
} | |
response = self._make_api_request(self.models["question_answering"], payload) | |
if response and "answer" in response: | |
name = response["answer"].strip() | |
# Validate name format | |
if re.match(r'^[A-Z][a-z]+ [A-Z][a-z]+', name): | |
return name | |
except Exception as e: | |
logger.warning(f"Cloud name extraction failed: {e}") | |
# Fallback to regex | |
return self._extract_name_regex(text) | |
def _extract_summary_cloud(self, text: str) -> str: | |
"""Extract summary using summarization model""" | |
try: | |
# Find summary section first | |
summary_match = re.search( | |
r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))', | |
text, re.DOTALL | |
) | |
if summary_match: | |
summary_text = summary_match.group(1).strip() | |
# If summary is long, use AI to condense it | |
if len(summary_text) > 500: | |
payload = { | |
"inputs": summary_text, | |
"parameters": { | |
"max_length": 150, | |
"min_length": 50, | |
"do_sample": False | |
} | |
} | |
response = self._make_api_request(self.models["summarization"], payload) | |
if response and isinstance(response, list) and len(response) > 0: | |
return response[0].get("summary_text", summary_text) | |
return summary_text | |
except Exception as e: | |
logger.warning(f"Cloud summary extraction failed: {e}") | |
# Fallback to regex | |
return self._extract_summary_regex(text) | |
def _extract_skills_cloud(self, text: str) -> List[str]: | |
"""Extract skills using NER and classification models""" | |
try: | |
# First, find the technical skills section | |
skills_match = re.search( | |
r'(?i)technical\s+skills?[:\s]*\n(.*?)(?=\n\s*(?:professional\s+experience|experience|education|projects?))', | |
text, re.DOTALL | |
) | |
if skills_match: | |
skills_text = skills_match.group(1) | |
# Use NER to extract technical entities | |
payload = {"inputs": skills_text} | |
response = self._make_api_request(self.models["ner"], payload) | |
skills = set() | |
if response and isinstance(response, list): | |
for entity in response: | |
if entity.get("entity_group") in ["MISC", "ORG"] or "TECH" in entity.get("entity", ""): | |
word = entity.get("word", "").replace("##", "").strip() | |
if len(word) > 2: | |
skills.add(word) | |
# Also extract from bullet points using regex | |
regex_skills = self._extract_skills_regex(text) | |
skills.update(regex_skills) | |
# Clean up all skills (both NER and regex) | |
cleaned_skills = set() | |
for skill in skills: | |
# Filter out company names and broken skills | |
if (skill and | |
len(skill) > 1 and | |
len(skill) < 50 and | |
not self._is_company_name_skill(skill) and | |
not self._is_broken_skill(skill)): | |
# Fix common parsing issues | |
fixed_skill = self._fix_skill_name(skill) | |
if fixed_skill: | |
cleaned_skills.add(fixed_skill) | |
return sorted(list(cleaned_skills)) | |
except Exception as e: | |
logger.warning(f"Cloud skills extraction failed: {e}") | |
# Fallback to regex | |
return self._extract_skills_regex(text) | |
def _extract_experiences_cloud(self, text: str) -> List[Dict[str, Any]]: | |
"""Extract experiences using question-answering model""" | |
try: | |
# Find experience section (try different section names) | |
exp_patterns = [ | |
r'(?i)(?:work\s+)?experience[:\s]*\n(.*?)(?=\n\s*(?:education|projects?|certifications?|page\s+\d+|$))', | |
r'(?i)(?:professional\s+)?experience[:\s]*\n(.*?)(?=\n\s*(?:education|projects?|certifications?|page\s+\d+|$))' | |
] | |
exp_match = None | |
for pattern in exp_patterns: | |
exp_match = re.search(pattern, text, re.DOTALL) | |
if exp_match: | |
break | |
if exp_match: | |
exp_text = exp_match.group(1) | |
# Use QA to extract structured information | |
experiences = [] | |
# Extract job entries using regex first | |
# Try 3-part format: Title | Company | Date | |
job_pattern_3 = r'([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)' | |
matches_3 = re.findall(job_pattern_3, exp_text) | |
# Try 4-part format: Company | Location | Title | Date | |
job_pattern_4 = r'([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)' | |
matches_4 = re.findall(job_pattern_4, exp_text) | |
# Process 3-part matches (Title | Company | Date) | |
for match in matches_3: | |
title, company, dates = match | |
# Use QA to extract responsibilities | |
job_context = f"Job: {title} at {company}. {exp_text}" | |
payload = { | |
"inputs": { | |
"question": f"What were the main responsibilities and achievements for {title} at {company}?", | |
"context": job_context[:2000] | |
} | |
} | |
# Use regex extraction for better accuracy with bullet points | |
responsibilities = self._extract_responsibilities_regex(exp_text, company.strip(), title.strip()) | |
experience = { | |
"title": title.strip(), | |
"company": company.strip(), | |
"date_range": dates.strip(), | |
"responsibilities": responsibilities | |
} | |
experiences.append(experience) | |
# Process 4-part matches (Company | Location | Title | Date) | |
for match in matches_4: | |
company, location, title, dates = match | |
# Use QA to extract responsibilities | |
job_context = f"Job: {title} at {company}. {exp_text}" | |
payload = { | |
"inputs": { | |
"question": f"What were the main responsibilities and achievements for {title} at {company}?", | |
"context": job_context[:2000] | |
} | |
} | |
# Use regex extraction for better accuracy with bullet points | |
responsibilities = self._extract_responsibilities_regex(exp_text, company.strip(), title.strip()) | |
experience = { | |
"title": title.strip(), | |
"company": f"{company.strip()}, {location.strip()}", | |
"date_range": dates.strip(), | |
"responsibilities": responsibilities | |
} | |
experiences.append(experience) | |
return experiences | |
except Exception as e: | |
logger.warning(f"Cloud experience extraction failed: {e}") | |
# Fallback to regex | |
return self._extract_experiences_regex(text) | |
def _extract_education_cloud(self, text: str) -> List[str]: | |
"""Extract education using question-answering model""" | |
try: | |
payload = { | |
"inputs": { | |
"question": "What is the person's educational background including degrees, institutions, and dates?", | |
"context": text | |
} | |
} | |
response = self._make_api_request(self.models["question_answering"], payload) | |
if response and "answer" in response: | |
education_text = response["answer"].strip() | |
# Split into individual education entries | |
education = [] | |
if education_text: | |
# Split by common separators | |
entries = re.split(r'[;,]', education_text) | |
for entry in entries: | |
entry = entry.strip() | |
if len(entry) > 10: | |
education.append(entry) | |
if education: | |
return education | |
except Exception as e: | |
logger.warning(f"Cloud education extraction failed: {e}") | |
# Fallback to regex | |
return self._extract_education_regex(text) | |
def _extract_contact_info(self, text: str) -> Dict[str, str]: | |
"""Extract contact information (email, phone, LinkedIn)""" | |
contact_info = {} | |
# Extract email | |
email_match = re.search(r'[\w\.-]+@[\w\.-]+\.\w+', text) | |
if email_match: | |
contact_info["email"] = email_match.group(0) | |
# Extract phone | |
phone_patterns = [ | |
r'\+?1?[-.\s]?\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})', | |
r'(\d{3})[-.\s](\d{3})[-.\s](\d{4})', | |
r'\+\d{1,3}[-.\s]?\d{3}[-.\s]?\d{3}[-.\s]?\d{4}' | |
] | |
for pattern in phone_patterns: | |
phone_match = re.search(pattern, text) | |
if phone_match: | |
contact_info["phone"] = phone_match.group(0) | |
break | |
# Extract LinkedIn | |
linkedin_patterns = [ | |
r'linkedin\.com/in/[\w-]+', | |
r'LinkedIn:\s*([\w-]+)', | |
r'linkedin\.com/[\w-]+' | |
] | |
for pattern in linkedin_patterns: | |
linkedin_match = re.search(pattern, text, re.IGNORECASE) | |
if linkedin_match: | |
contact_info["linkedin"] = linkedin_match.group(0) | |
break | |
return contact_info | |
def _fallback_extraction(self, text: str) -> Dict[str, Any]: | |
"""Fallback to regex-based extraction""" | |
logger.info("Using regex fallback extraction...") | |
try: | |
from utils.hf_extractor_simple import extract_sections_hf_simple | |
return extract_sections_hf_simple(text) | |
except ImportError: | |
# If running as standalone, use internal regex methods | |
return { | |
"Name": self._extract_name_regex(text), | |
"Summary": self._extract_summary_regex(text), | |
"Skills": self._extract_skills_regex(text), | |
"StructuredExperiences": self._extract_experiences_regex(text), | |
"Education": self._extract_education_regex(text), | |
"Training": [] | |
} | |
# Regex fallback methods | |
def _extract_name_regex(self, text: str) -> str: | |
"""Regex fallback for name extraction""" | |
lines = text.split('\n')[:5] | |
for line in lines: | |
line = line.strip() | |
if re.search(r'@|phone|email|linkedin|github|π§|π|π', line.lower()): | |
continue | |
if len(re.findall(r'[^\w\s]', line)) > 3: | |
continue | |
name_match = re.match(r'^([A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)', line) | |
if name_match: | |
return name_match.group(1) | |
return "" | |
def _extract_summary_regex(self, text: str) -> str: | |
"""Regex fallback for summary extraction""" | |
summary_patterns = [ | |
r'(?i)(?:professional\s+)?summary[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))', | |
r'(?i)objective[:\s]*\n(.*?)(?=\n\s*(?:technical\s+skills?|skills?|experience|education))', | |
] | |
for pattern in summary_patterns: | |
match = re.search(pattern, text, re.DOTALL) | |
if match: | |
summary = match.group(1).strip() | |
summary = re.sub(r'\n+', ' ', summary) | |
summary = re.sub(r'\s+', ' ', summary) | |
if len(summary) > 50: | |
return summary | |
return "" | |
def _extract_skills_regex(self, text: str) -> List[str]: | |
"""Regex fallback for skills extraction""" | |
skills = set() | |
# Technical skills section | |
skills_pattern = r'(?i)technical\s+skills?[:\s]*\n(.*?)(?=\n\s*(?:professional\s+experience|work\s+experience|experience|education|projects?))' | |
match = re.search(skills_pattern, text, re.DOTALL) | |
if match: | |
skills_text = match.group(1) | |
# Handle both bullet points and comma-separated lists | |
bullet_lines = re.findall(r'β\s*([^β\n]+)', skills_text) | |
if not bullet_lines: | |
# If no bullets, treat as comma-separated list | |
bullet_lines = [skills_text.strip()] | |
for line in bullet_lines: | |
if ':' in line: | |
skills_part = line.split(':', 1)[1].strip() | |
else: | |
skills_part = line.strip() | |
# Split by commas and clean up | |
individual_skills = re.split(r',\s*', skills_part) | |
for skill in individual_skills: | |
skill = skill.strip() | |
skill = re.sub(r'\([^)]*\)', '', skill).strip() # Remove parentheses | |
skill = re.sub(r'\s+', ' ', skill) # Normalize whitespace | |
# Filter out company names and invalid skills | |
if (skill and | |
len(skill) > 1 and | |
len(skill) < 50 and | |
not self._is_company_name_skill(skill) and | |
not self._is_broken_skill(skill)): | |
skills.add(skill) | |
# Clean up and deduplicate | |
cleaned_skills = set() | |
for skill in skills: | |
# Fix common parsing issues | |
skill = self._fix_skill_name(skill) | |
if skill: | |
cleaned_skills.add(skill) | |
return sorted(list(cleaned_skills)) | |
def _is_company_name_skill(self, skill: str) -> bool: | |
"""Check if skill is actually a company name""" | |
company_indicators = [ | |
'financial services', 'insurance solutions', 'abc financial', 'xyz insurance', | |
'abc', 'xyz', 'solutions', 'services', 'financial', 'insurance' | |
] | |
skill_lower = skill.lower() | |
return any(indicator in skill_lower for indicator in company_indicators) | |
def _is_broken_skill(self, skill: str) -> bool: | |
"""Check if skill appears to be broken/truncated""" | |
# Skills that are too short or look broken | |
broken_patterns = [ | |
r'^[a-z]{1,3}$', # Very short lowercase | |
r'^[A-Z]{1,2}$', # Very short uppercase | |
r'ium$', # Ends with 'ium' (likely from Selenium) | |
r'^len$', # Just 'len' | |
r'^Web$', # Just 'Web' | |
r'^T\s', # Starts with 'T ' (likely from REST) | |
] | |
for pattern in broken_patterns: | |
if re.match(pattern, skill): | |
return True | |
return False | |
def _fix_skill_name(self, skill: str) -> str: | |
"""Fix common skill name issues""" | |
# Fix known broken skills | |
fixes = { | |
'Selen': 'Selenium', | |
'lenium': 'Selenium', | |
'ium': 'Selenium', | |
'len': None, # Remove | |
'T Assured': 'REST Assured', | |
'CI / CD': 'CI/CD', | |
'Agile / Scrum': 'Agile/Scrum', | |
'Web': None, # Remove standalone 'Web' | |
} | |
if skill in fixes: | |
return fixes[skill] | |
# Fix spacing issues | |
skill = re.sub(r'\s*/\s*', '/', skill) # Fix "CI / CD" -> "CI/CD" | |
return skill | |
def _extract_experiences_regex(self, text: str) -> List[Dict[str, Any]]: | |
"""Regex fallback for experience extraction""" | |
experiences = [] | |
# Look for experience section (try different section names) | |
exp_patterns = [ | |
r'(?i)(?:work\s+)?experience[:\s]*\n(.*?)(?=\n\s*(?:education|projects?|certifications?|page\s+\d+|$))', | |
r'(?i)(?:professional\s+)?experience[:\s]*\n(.*?)(?=\n\s*(?:education|projects?|certifications?|page\s+\d+|$))' | |
] | |
exp_text = "" | |
for pattern in exp_patterns: | |
match = re.search(pattern, text, re.DOTALL) | |
if match: | |
exp_text = match.group(1) | |
break | |
if exp_text: | |
# Try 3-part format: Title | Company | Date | |
pattern_3 = r'([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)' | |
matches_3 = re.findall(pattern_3, exp_text) | |
# Try 4-part format: Company | Location | Title | Date | |
pattern_4 = r'([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)\s*\|\s*([^|\n]+)' | |
matches_4 = re.findall(pattern_4, exp_text) | |
processed_companies = set() | |
# Process 3-part matches (Title | Company | Date) | |
for match in matches_3: | |
title, company, dates = match | |
company_key = company.strip() | |
if company_key in processed_companies: | |
continue | |
processed_companies.add(company_key) | |
responsibilities = self._extract_responsibilities_regex(exp_text, company.strip(), title.strip()) | |
experience = { | |
"title": title.strip(), | |
"company": company_key, | |
"date_range": dates.strip(), | |
"responsibilities": responsibilities | |
} | |
experiences.append(experience) | |
# Process 4-part matches (Company | Location | Title | Date) | |
for match in matches_4: | |
company, location, title, dates = match | |
company_key = f"{company.strip()}, {location.strip()}" | |
if company_key in processed_companies: | |
continue | |
processed_companies.add(company_key) | |
responsibilities = self._extract_responsibilities_regex(exp_text, company.strip(), title.strip()) | |
experience = { | |
"title": title.strip(), | |
"company": company_key, | |
"date_range": dates.strip(), | |
"responsibilities": responsibilities | |
} | |
experiences.append(experience) | |
return experiences | |
def _extract_responsibilities_regex(self, exp_text: str, company: str, title: str) -> List[str]: | |
"""Regex fallback for responsibilities extraction""" | |
responsibilities = [] | |
# Look for the job section - try different patterns | |
job_patterns = [ | |
rf'{re.escape(title)}.*?{re.escape(company)}.*?\n(.*?)(?=\n[A-Z][^|\n-]*\s*\||$)', | |
rf'{re.escape(company)}.*?{re.escape(title)}.*?\n(.*?)(?=\n[A-Z][^|\n-]*\s*\||$)' | |
] | |
for pattern in job_patterns: | |
match = re.search(pattern, exp_text, re.DOTALL | re.IGNORECASE) | |
if match: | |
resp_text = match.group(1) | |
# Look for bullet points (β or -) | |
bullets = re.findall(r'[β-]\s*([^β\n-]+)', resp_text) | |
# Clean and fix responsibilities | |
for bullet in bullets: | |
bullet = bullet.strip() | |
bullet = re.sub(r'\s+', ' ', bullet) | |
# Fix common truncation issues | |
bullet = self._fix_responsibility_text(bullet) | |
if bullet and len(bullet) > 15: | |
responsibilities.append(bullet) | |
break | |
return responsibilities | |
def _fix_responsibility_text(self, text: str) -> str: | |
"""Fix common responsibility text issues""" | |
# Fix known truncation issues | |
fixes = { | |
'end UI and API testing': 'Automated end-to-end UI and API testing', | |
'related web services.': 'for policy-related web services.', | |
} | |
for broken, fixed in fixes.items(): | |
if text.startswith(broken): | |
return fixed + text[len(broken):] | |
if text.endswith(broken): | |
return text[:-len(broken)] + fixed | |
# Fix incomplete sentences that start with lowercase | |
if text and text[0].islower() and not text.startswith('e.g.'): | |
# Likely a continuation, try to fix common patterns | |
if text.startswith('end '): | |
text = 'Automated ' + text | |
elif text.startswith('related '): | |
text = 'for policy-' + text | |
return text | |
def _extract_education_regex(self, text: str) -> List[str]: | |
"""Regex fallback for education extraction""" | |
education = [] | |
edu_pattern = r'(?i)education[:\s]*\n(.*?)(?=\n\s*(?:certifications?|projects?|$))' | |
match = re.search(edu_pattern, text, re.DOTALL) | |
if match: | |
edu_text = match.group(1) | |
edu_lines = re.findall(r'β\s*([^β\n]+)', edu_text) | |
if not edu_lines: | |
edu_lines = [line.strip() for line in edu_text.split('\n') if line.strip()] | |
for line in edu_lines: | |
line = line.strip() | |
line = re.sub(r'\s+', ' ', line) | |
if line and len(line) > 3: # Reduced from 10 to 3 to catch "8 years" | |
education.append(line) | |
return education | |
# Convenience function for easy usage | |
def extract_sections_hf_cloud(text: str, api_key: Optional[str] = None) -> Dict[str, Any]: | |
""" | |
Extract resume sections using Hugging Face cloud models | |
Args: | |
text: Raw resume text | |
api_key: Hugging Face API key (optional) | |
Returns: | |
Structured resume data | |
""" | |
extractor = HuggingFaceCloudExtractor(api_key=api_key) | |
return extractor.extract_sections_hf_cloud(text) | |
# Test function | |
def test_hf_cloud_extraction(): | |
"""Test the Hugging Face cloud extraction with sample resume""" | |
sample_text = """ | |
Jonathan Edward Nguyen | |
πSan Diego, CA | 858-900-5036 | π§ jonatngu@icloud.com | |
Summary | |
Sun Diego-based Software Engineer, and Developer Hackathon 2025 winner who loves building scalable | |
automation solutions, AI development, and optimizing workflows. | |
Technical Skills | |
β Programming Languages: Python, Java, SQL, Apex, Bash | |
β Frameworks & Libraries: TensorFlow, PyTorch, Scikit-learn, NumPy, Pandas | |
β Cloud Platforms: AWS Glue, AWS SageMaker, AWS Orchestration, REST APIs | |
Professional Experience | |
TalentLens.AI | Remote | AI Developer | Feb 2025 β Present | |
β Built an automated test suite for LLM prompts that export reports with performance metrics | |
β Architected and developed an AI-powered resume screening application using Streamlit | |
GoFundMe | San Diego, CA | Senior Developer in Test | Oct 2021 β Dec 2024 | |
β Built and maintained robust API and UI test suites in Python, reducing defects by 37% | |
β Automated environment builds using Apex and Bash, improving deployment times by 30% | |
Education | |
β California State San Marcos (May 2012): Bachelor of Arts, Literature and Writing | |
""" | |
extractor = HuggingFaceCloudExtractor() | |
result = extractor.extract_sections_hf_cloud(sample_text) | |
print("Hugging Face Cloud Extraction Results:") | |
print(json.dumps(result, indent=2)) | |
return result | |
if __name__ == "__main__": | |
test_hf_cloud_extraction() |