CEC-Learning / News.py
Jeff Myers II
Update space
52321a8
raw
history blame
1.89 kB
from newsapi import NewsApiClient
from newspaper import Article
import os
__export__ = ["News"]
class News:
__EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"}
__CATEGORIES__ = {
"business",
"entertainment",
"general",
"health",
"science",
"sports",
"technology"
}
def __init__(self):
newsapi_key = os.environ.get("NEWS_API_KEY")
self.newsapi = NewsApiClient(api_key=newsapi_key)
def get_sources(self, category=None):
sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"]
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
print(sources)
return sources
def get_top_headlines(self, num_headlines=None, category=None):
sources = self.get_sources(category=category)
headlines = self.newsapi.get_top_headlines(
sources=", ".join(sources),
page_size=num_headlines
)["articles"]
return headlines
def get_headlines(self, num_headlines=None, query=None):
sources = self.get_sources()
headlines = self.newsapi.get_everything(
q=query,
sources=", ".join(sources),
page_size=num_headlines
)["articles"]
return headlines
def get_articles_from_headlines(self, headlines):
for headline in headlines:
article = Article(headline["url"])
article.download()
article.parse()
headline["content"] = article.text
# headline["authors"] = article.authors
headline["source"] = headline["source"]["name"]
del headline["author"]
# headline.pop("author", None)
return headlines