|
import spacy |
|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
from nltk.corpus import stopwords |
|
from collections import Counter |
|
import re |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
|
|
|
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
app = FastAPI() |
|
|
|
class PostText(BaseModel): |
|
post: str |
|
|
|
def clean_text(text): |
|
"""Remove caracteres especiais e faz a limpeza do texto.""" |
|
text = re.sub(r'[^\w\s]', '', text) |
|
text = text.lower() |
|
return text |
|
|
|
def extract_keywords(text): |
|
"""Extrai palavras-chave usando spaCy e nltk.""" |
|
|
|
|
|
cleaned_text = clean_text(text) |
|
|
|
|
|
words = word_tokenize(cleaned_text) |
|
|
|
|
|
stop_words = set(stopwords.words("english")) |
|
filtered_words = [word for word in words if word not in stop_words] |
|
|
|
|
|
word_counts = Counter(filtered_words) |
|
|
|
|
|
doc = nlp(text) |
|
entities = [ent.text for ent in doc.ents] |
|
|
|
|
|
keywords = set(filtered_words + entities) |
|
|
|
|
|
return [keyword for keyword, _ in word_counts.most_common(10)] + entities[:10] |
|
|
|
@app.get("/generate-keywords") |
|
async def generate_keywords(post_text: PostText): |
|
try: |
|
|
|
keywords = extract_keywords(post_text.post) |
|
|
|
return {"keywords": keywords} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=400, detail=str(e)) |
|
|