File size: 2,063 Bytes
52321a8
 
 
 
 
 
 
64fde65
 
 
 
 
 
 
 
 
52321a8
 
 
 
 
 
64fde65
52321a8
 
 
64fde65
 
52321a8
 
 
64fde65
 
52321a8
 
64fde65
 
52321a8
 
64fde65
52321a8
 
 
 
 
64fde65
 
52321a8
 
64fde65
 
52321a8
 
64fde65
52321a8
64fde65
 
 
 
52321a8
 
 
64fde65
52321a8
64fde65
52321a8
64fde65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from newsapi import NewsApiClient
from newspaper import Article
import os

__export__ = ["News"]

class News:
    __EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek"]
    __CATEGORIES__ = [
        "General",
        "Business",
        "Entertainment",
        "Health",
        "Science",
        "Technology"
    ]
    
    def __init__(self):
        newsapi_key = os.environ.get("NEWS_API_KEY")
        self.newsapi = NewsApiClient(api_key=newsapi_key)

    def get_sources(self, category=None):
        sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"]
        sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
        return sources

    def get_top_headlines(self, num_headlines=5, category=None):
        sources = self.get_sources(category.lower() if category else category)
        
        headlines = self.newsapi.get_top_headlines(
            sources=", ".join(sources),
            page_size=num_headlines,
            language="en",
        )["articles"]
        
        headlines = self._get_articles_from_headlines(headlines)
        
        return headlines

    def get_headlines(self, num_headlines=5, query=None):
        sources = self.get_sources()
        
        headlines = self.newsapi.get_everything(
            q=query,
            sources=", ".join(sources),
            page_size=num_headlines,
            lanuguage="en",
        )["articles"]
        
        headlines = self._get_articles_from_headlines(headlines)
        
        return headlines

    def _get_articles_from_headlines(self, headlines):
        for headline in headlines:
            del headline["author"]
        
            headline["source"] = headline["source"]["name"]
        
            article = Article(headline["url"])
            article.download()
            article.parse()
        
            headline["content"] = article.text
        
        return headlines