Spaces:
Running
Running
# app/services/text_cleaner.py | |
from symspellpy.symspellpy import SymSpell | |
from pathlib import Path | |
# Initialize only once when this module is imported | |
sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7) | |
# Load dictionary | |
dict_path = Path.cwd()/"app"/"services"/"frequency_dictionary_en_82_765.txt" | |
sym_spell.load_dictionary(dict_path, term_index=0, count_index=1) | |
# Leetspeak normalizer | |
def leetspeak_normalizer(text: str) -> str: | |
leet_map = str.maketrans("014!3$@5#+", "oialesasht") | |
return text.translate(leet_map) | |
# Combined cleaning function | |
def clean_text(text: str) -> str: | |
normalized = leetspeak_normalizer(text) | |
suggestions = sym_spell.lookup_compound(normalized, max_edit_distance=2) | |
corrected = suggestions[0].term if suggestions else normalized | |
return corrected | |