File size: 3,287 Bytes
c74b98b
0a59f74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5222849
0a59f74
5222849
 
0a59f74
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import json
import arxiv

# import packages that are used in our tools
import requests
from bs4 import BeautifulSoup
from huggingface_hub import HfApi
from pypdf import PdfReader
from smolagents import CodeAgent, HfApiModel, tool


@tool
def get_hugging_face_top_daily_paper() -> str:
    """
    This is a tool that returns the most upvoted paper on Hugging Face daily papers.
    It returns the title of the paper
    """
    try:
        url = "<https://huggingface.co/papers>"
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes (4xx or 5xx)
        soup = BeautifulSoup(response.content, "html.parser")

        # Extract the title element from the JSON-like data in the "data-props" attribute
        containers = soup.find_all('div', class_='SVELTE_HYDRATER contents')
        top_paper = ""

        for container in containers:
            data_props = container.get('data-props', '')
            if data_props:
                try:
                    # Parse the JSON-like string
                    json_data = json.loads(data_props.replace('&quot;', '"'))
                    if 'dailyPapers' in json_data:
                        top_paper = json_data['dailyPapers'][0]['title']
                except json.JSONDecodeError:
                    continue

        return top_paper
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while fetching the HTML: {e}")
        return ''


@tool
def get_paper_id_by_title(title: str) -> str:
    """
    This is a tool that returns the arxiv paper id by its title.
    It returns the title of the paper

    Args:
        title: The paper title for which to get the id.
    """
    api = HfApi()
    papers = api.list_papers(query=title)
    if papers:
        paper = next(iter(papers))
        return paper.id
    else:
        return ''


@tool
def download_paper_by_id(paper_id: str) -> None:
    """
    This tool gets the id of a paper and downloads it from arxiv. It saves the paper locally 
    in the current directory as "paper.pdf".

    Args:
        paper_id: The id of the paper to download.
    """
    paper = next(arxiv.Client().results(arxiv.Search(id_list=[paper_id])))
    paper.download_pdf(filename="paper.pdf")
    return None


@tool
def read_pdf_file(file_path: str) -> str:
    """
    This function reads the first three pages of a PDF file and returns its content as a string.
    Args:
        file_path: The path to the PDF file.
    Returns:
        A string containing the content of the PDF file.
    """
    content = ""
    reader = PdfReader('paper.pdf')
    print(len(reader.pages))
    pages = reader.pages[:3]
    for page in pages:
        content += page.extract_text()
    return content


model_id = "Qwen/Qwen2.5-Coder-32B-Instruct"
hf_token = os.environ["HF_TOKEN"]


model = HfApiModel(model_id=model_id, token=hf_token)
agent = CodeAgent(tools=[get_hugging_face_top_daily_paper,
                         get_paper_id_by_title,
                         download_paper_by_id,
                         read_pdf_file],
                  model=model,
                  add_base_tools=True)

agent.run(
    "Summarize today's top paper on Hugging Face daily papers by reading it.",
)