from newsapi import NewsApiClient from newspaper import Article import os __export__ = ["News"] class News: __EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"} __CATEGORIES__ = { "business", "entertainment", "general", "health", "science", "sports", "technology" } def __init__(self): newsapi_key = os.environ.get("NEWS_API_KEY") self.newsapi = NewsApiClient(api_key=newsapi_key) def get_sources(self, category=None): sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"] sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} print(sources) return sources def get_top_headlines(self, num_headlines=None, category=None): sources = self.get_sources(category=category) headlines = self.newsapi.get_top_headlines( sources=", ".join(sources), page_size=num_headlines )["articles"] return headlines def get_headlines(self, num_headlines=None, query=None): sources = self.get_sources() headlines = self.newsapi.get_everything( q=query, sources=", ".join(sources), page_size=num_headlines )["articles"] return headlines def get_articles_from_headlines(self, headlines): for headline in headlines: article = Article(headline["url"]) article.download() article.parse() headline["content"] = article.text # headline["authors"] = article.authors headline["source"] = headline["source"]["name"] del headline["author"] # headline.pop("author", None) return headlines