Spaces:
Running
Running
File size: 1,894 Bytes
52321a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
from newsapi import NewsApiClient
from newspaper import Article
import os
__export__ = ["News"]
class News:
__EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"}
__CATEGORIES__ = {
"business",
"entertainment",
"general",
"health",
"science",
"sports",
"technology"
}
def __init__(self):
newsapi_key = os.environ.get("NEWS_API_KEY")
self.newsapi = NewsApiClient(api_key=newsapi_key)
def get_sources(self, category=None):
sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"]
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
print(sources)
return sources
def get_top_headlines(self, num_headlines=None, category=None):
sources = self.get_sources(category=category)
headlines = self.newsapi.get_top_headlines(
sources=", ".join(sources),
page_size=num_headlines
)["articles"]
return headlines
def get_headlines(self, num_headlines=None, query=None):
sources = self.get_sources()
headlines = self.newsapi.get_everything(
q=query,
sources=", ".join(sources),
page_size=num_headlines
)["articles"]
return headlines
def get_articles_from_headlines(self, headlines):
for headline in headlines:
article = Article(headline["url"])
article.download()
article.parse()
headline["content"] = article.text
# headline["authors"] = article.authors
headline["source"] = headline["source"]["name"]
del headline["author"]
# headline.pop("author", None)
return headlines
|