Spaces:
Running
Running
from newsapi import NewsApiClient | |
from newspaper import Article | |
import os | |
__export__ = ["News"] | |
class News: | |
__EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"} | |
__CATEGORIES__ = { | |
"business", | |
"entertainment", | |
"general", | |
"health", | |
"science", | |
"sports", | |
"technology" | |
} | |
def __init__(self): | |
newsapi_key = os.environ.get("NEWS_API_KEY") | |
self.newsapi = NewsApiClient(api_key=newsapi_key) | |
def get_sources(self, category=None): | |
sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"] | |
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} | |
print(sources) | |
return sources | |
def get_top_headlines(self, num_headlines=None, category=None): | |
sources = self.get_sources(category=category) | |
headlines = self.newsapi.get_top_headlines( | |
sources=", ".join(sources), | |
page_size=num_headlines | |
)["articles"] | |
return headlines | |
def get_headlines(self, num_headlines=None, query=None): | |
sources = self.get_sources() | |
headlines = self.newsapi.get_everything( | |
q=query, | |
sources=", ".join(sources), | |
page_size=num_headlines | |
)["articles"] | |
return headlines | |
def get_articles_from_headlines(self, headlines): | |
for headline in headlines: | |
article = Article(headline["url"]) | |
article.download() | |
article.parse() | |
headline["content"] = article.text | |
# headline["authors"] = article.authors | |
headline["source"] = headline["source"]["name"] | |
del headline["author"] | |
# headline.pop("author", None) | |
return headlines | |