Spaces:

JeffMII
/

CEC-Learning

Running

File size: 2,063 Bytes

from newsapi import NewsApiClient
from newspaper import Article
import os

__export__ = ["News"]

class News:
    __EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek"]
    __CATEGORIES__ = [
        "General",
        "Business",
        "Entertainment",
        "Health",
        "Science",
        "Technology"
    ]
    
    def __init__(self):
        newsapi_key = os.environ.get("NEWS_API_KEY")
        self.newsapi = NewsApiClient(api_key=newsapi_key)

    def get_sources(self, category=None):
        sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"]
        sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
        return sources

    def get_top_headlines(self, num_headlines=5, category=None):
        sources = self.get_sources(category.lower() if category else category)
        
        headlines = self.newsapi.get_top_headlines(
            sources=", ".join(sources),
            page_size=num_headlines,
            language="en",
        )["articles"]
        
        headlines = self._get_articles_from_headlines(headlines)
        
        return headlines

    def get_headlines(self, num_headlines=5, query=None):
        sources = self.get_sources()
        
        headlines = self.newsapi.get_everything(
            q=query,
            sources=", ".join(sources),
            page_size=num_headlines,
            lanuguage="en",
        )["articles"]
        
        headlines = self._get_articles_from_headlines(headlines)
        
        return headlines

    def _get_articles_from_headlines(self, headlines):
        for headline in headlines:
            del headline["author"]
        
            headline["source"] = headline["source"]["name"]
        
            article = Article(headline["url"])
            article.download()
            article.parse()
        
            headline["content"] = article.text
        
        return headlines