from newsapi import NewsApiClient from newspaper import Article import os __export__ = ["News"] class News: __EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek", "Politico"] __CATEGORIES__ = [ "General", # "Business", # "Entertainment", # "Health", # "Science", "Technology" ] def __init__(self): newsapi_key = os.environ.get("NEWS_API_KEY") self.newsapi = NewsApiClient(api_key=newsapi_key) def get_sources(self, category=None): sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"] sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} return sources def get_top_headlines(self, num_headlines=5, category=None): sources = self.get_sources(category.lower() if category else category) headlines = self.newsapi.get_top_headlines( sources=", ".join(sources), page_size=num_headlines, language="en", )["articles"] headlines = self._get_articles_from_headlines(headlines) return headlines def get_headlines(self, num_headlines=5, query=None): sources = self.get_sources() headlines = self.newsapi.get_everything( q=query, sources=", ".join(sources), page_size=num_headlines, lanuguage="en", )["articles"] headlines = self._get_articles_from_headlines(headlines) return headlines def _get_articles_from_headlines(self, headlines): for headline in headlines: del headline["author"] headline["source"] = headline["source"]["name"] article = Article(headline["url"]) article.download() article.parse() headline["content"] = article.text return headlines