Spaces:

Xindus
/

xindus_summarizer

Sleeping

madankn79 commited on May 1

Commit

d665d2a

1 Parent(s): 3d19915

google

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,6 +45,7 @@ def emphasize_keywords(text, keywords, repeat=3):
 def clean_text(input_text):
     cleaned = re.sub(r"[^A-Za-z0-9\s]", " ", input_text)
     cleaned = re.sub(r"\b[A-Za-z]{2,}[0-9]{3,}\b", "", cleaned)  # SKU/product code pattern (letters followed by numbers)
     cleaned = re.sub(r"\b\d+\b", "", cleaned)  # Remove numbers as tokens
     # Example keyword list
@@ -98,7 +99,7 @@ def summarize_text(input_text, model_label, char_limit):
         do_sample=False,              # Disable sampling to avoid introducing new words
         num_beams=5,                  # Beam search to find the most likely sequence of tokens
         early_stopping=True,          # Stop once a reasonable summary is generated
-        no_repeat_ngram_size=2        # Prevent repetition of n-grams (bigrams in this case)
     )

 def clean_text(input_text):
     cleaned = re.sub(r"[^A-Za-z0-9\s]", " ", input_text)
     cleaned = re.sub(r"\b[A-Za-z]{2,}[0-9]{3,}\b", "", cleaned)  # SKU/product code pattern (letters followed by numbers)
+    cleaned = re.sub(r"\b[A-Za-z]{2,}[0-9]{2,}\b", "", cleaned)
     cleaned = re.sub(r"\b\d+\b", "", cleaned)  # Remove numbers as tokens
     # Example keyword list
         do_sample=False,              # Disable sampling to avoid introducing new words
         num_beams=5,                  # Beam search to find the most likely sequence of tokens
         early_stopping=True,          # Stop once a reasonable summary is generated
+        no_repeat_ngram_size=1        # Prevent repetition of n-grams (bigrams in this case)
     )