File size: 2,623 Bytes
72e6d3f
 
 
 
 
 
453fb62
72e6d3f
 
 
 
0a66a84
dc76c3e
 
 
 
 
 
 
 
 
 
 
 
 
0a66a84
72e6d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d8d250
72e6d3f
 
 
 
 
 
 
 
 
 
7d8d250
 
72e6d3f
795b687
72e6d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca69c77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72e6d3f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import requests
from bs4 import BeautifulSoup
from fastapi import FastAPI
from pydantic import BaseModel
import re
import os


app = FastAPI()



from transformers import AutoModelWithLMHead, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")
model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")

def summarize(text, max_length=150):
  input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)

  generated_ids = model.generate(input_ids=input_ids, num_beams=2, max_length=max_length,  repetition_penalty=2.5, length_penalty=1.0, early_stopping=True)

  preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

  return preds[0]


@app.get("/")
async def root():
    return {"status": "OK"}

class Item(BaseModel):
    url: str
    percentage: int
    


def extract_article_content(url):
    try:
        # Fetch the HTML content of the article URL
        response = requests.get(url)

        soup = BeautifulSoup(response.text, 'html.parser')
        results = soup.find_all(['h1', 'p'])
        text = [result.text for result in results]
        ARTICLE = ' '.join(text)
        
        return ARTICLE
    except Exception as e:
        return ""



@app.post("/summarize-v2")
async def root(item: Item):
    
    try:

        article = extract_article_content(item.url)

        if len(article) == 0:
            return {'summary': ""}
        response = requests.post('https://fumes-api.onrender.com/llama3',
        json={'prompt': "{ 'User': 'Summarize the following news article: '" + article + "}",
        "temperature":0.6,
        "topP":0.9,
        "maxTokens": 200}, stream=True)

        response_content = response.content.decode('utf-8')

        response_content = response_content.replace("Here is a summary of the news article:", "")
        response_content = response_content.replace("YOU CAN BUY ME COFFE! https://buymeacoffee.com/mygx", "")

        #return {clean_response}
        return {
                "summary":response_content}
        
    except requests.RequestException as e:
        return {"error": str(e), "status_code": 500}



    
@app.post("/extract-content")
async def root(item: Item):
    
    try:

        article = extract_article_content(item.url)

        if len(article) == 0:
            return {'ERROR': "AHHHHHHHHH"}
        
        return {
                "content":article}
        
    except requests.RequestException as e:
        return {"error": str(e), "status_code": 500}