Spaces:
Running
Running
File size: 2,083 Bytes
52321a8 e280ecd 64fde65 363fea7 30927bb 7ba0657 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 52321a8 64fde65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from newsapi import NewsApiClient
from newspaper import Article
import os
__export__ = ["News"]
class News:
__EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek", "Politico"]
__CATEGORIES__ = [
"General",
# "Business",
# "Entertainment",
# "Health",
# "Science",
"Technology"
]
def __init__(self):
newsapi_key = os.environ.get("NEWS_API_KEY")
self.newsapi = NewsApiClient(api_key=newsapi_key)
def get_sources(self, category=None):
sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"]
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
return sources
def get_top_headlines(self, num_headlines=5, category=None):
sources = self.get_sources(category.lower() if category else category)
headlines = self.newsapi.get_top_headlines(
sources=", ".join(sources),
page_size=num_headlines,
language="en",
)["articles"]
headlines = self._get_articles_from_headlines(headlines)
return headlines
def get_headlines(self, num_headlines=5, query=None):
sources = self.get_sources()
headlines = self.newsapi.get_everything(
q=query,
sources=", ".join(sources),
page_size=num_headlines,
lanuguage="en",
)["articles"]
headlines = self._get_articles_from_headlines(headlines)
return headlines
def _get_articles_from_headlines(self, headlines):
for headline in headlines:
del headline["author"]
headline["source"] = headline["source"]["name"]
article = Article(headline["url"])
article.download()
article.parse()
headline["content"] = article.text
return headlines
|