Spaces:

JeffMII
/

CEC-Learning

Running

File size: 1,894 Bytes

52321a8

from newsapi import NewsApiClient
from newspaper import Article
import os

__export__ = ["News"]

class News:
    __EX_SOURCES__ = {"ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News"}
    __CATEGORIES__ = {
        "business",
        "entertainment",
        "general",
        "health",
        "science",
        "sports",
        "technology"
    }
    
    def __init__(self):
        newsapi_key = os.environ.get("NEWS_API_KEY")
        self.newsapi = NewsApiClient(api_key=newsapi_key)

    def get_sources(self, category=None):
        sources = self.newsapi.get_sources(language="en", country="us", category=category)["sources"]
        sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
        print(sources)
        return sources
        

    def get_top_headlines(self, num_headlines=None, category=None):
        sources = self.get_sources(category=category)
        
        headlines = self.newsapi.get_top_headlines(
            sources=", ".join(sources),
            page_size=num_headlines
        )["articles"]
        
        return headlines

    def get_headlines(self, num_headlines=None, query=None):
        sources = self.get_sources()
        
        headlines = self.newsapi.get_everything(
            q=query,
            sources=", ".join(sources),
            page_size=num_headlines
        )["articles"]
        
        return headlines

    def get_articles_from_headlines(self, headlines):
        for headline in headlines:
            article = Article(headline["url"])
            article.download()
            article.parse()
            headline["content"] = article.text
            # headline["authors"] = article.authors
            headline["source"] = headline["source"]["name"]
            del headline["author"]
            # headline.pop("author", None)

        return headlines