Spaces:
Running
Running
File size: 19,464 Bytes
ad66b01 61b2353 9764706 2e329bd 61b2353 9513d18 61b2353 62eaf4f 61b2353 a13fabc 61b2353 2e329bd 61b2353 2e329bd 61b2353 9513d18 ca2c7e8 848b14f 577d055 54261f6 577d055 9513d18 61b2353 9513d18 61b2353 2ca42fd 61b2353 9513d18 577d055 ca2c7e8 61b2353 07e2819 61b2353 07e2819 61b2353 5e9984e 26ddf5d 61b2353 ca2c7e8 88cff0c a5f46a9 61b2353 577d055 848b14f 0bf43b3 577d055 952bac3 52717e9 577d055 a13fabc 26ddf5d 577d055 0bf43b3 577d055 62eaf4f ad66b01 62eaf4f ad66b01 62eaf4f ad66b01 62eaf4f 26ddf5d 62eaf4f 26ddf5d 62eaf4f ad66b01 26ddf5d ad66b01 26ddf5d 62eaf4f 26ddf5d 62eaf4f ad66b01 62eaf4f ad66b01 62eaf4f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 |
from fastapi import FastAPI, Header
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from contextlib import asynccontextmanager
import xml.etree.ElementTree as xmlparser
import requests
from pydantic import BaseModel
import sys
import fitz
import re,os,json
from io import BytesIO
from datetime import datetime
def remove_in_betweens(text):
removed_brackets = re.sub(r'\[.*?\]', ' ', text)
removed_parentheses = re.sub(r'\(.*?\)', ' ', removed_brackets)
return removed_parentheses
def remove_punctuations(text):
return re.sub(r"[\,\;\:\?\!\'\’\"\(\)\{\}\[\]\/\\\*]", '', text)
def receive_signal(signalNumber, frame):
print('Received:', signalNumber)
sys.exit()
@asynccontextmanager
async def lifespan(app: FastAPI):
import signal
signal.signal(signal.SIGINT, receive_signal)
yield
app = FastAPI(lifespan=lifespan)
app.mount("/static", StaticFiles(directory="static"), name="static")
origins = [
"*",
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
return FileResponse(os.path.join("templates", "index.html"))
class Query(BaseModel):
keyword: str
limit: int
class DocumentID(BaseModel):
doc_id: str
class PDF(BaseModel):
url: str
page_num: int = -1
@app.post("/search")
async def get_articles(query: Query):
XML_NAMESPACE = "{http://www.w3.org/2005/Atom}"
content = {}
try:
arxiv_search_result = requests.get(f"http://export.arxiv.org/api/query?search_query=all:{query.keyword}&max_results={query.limit}", verify=False)
response = xmlparser.fromstring(arxiv_search_result.text)
publications = response.findall(f"{XML_NAMESPACE}entry")
for pub in publications:
id_pub = pub.find(f"{XML_NAMESPACE}id").text.split("/")[-1]
title_pub = pub.find(f"{XML_NAMESPACE}title").text
authors = " and ".join([author.find(f"{XML_NAMESPACE}name").text for author in pub.findall(f"{XML_NAMESPACE}author")])
pub_date = datetime.strptime(pub.find(f"{XML_NAMESPACE}published").text, "%Y-%m-%dT%H:%M:%SZ").strftime("%d/%m/%Y")
abstract = pub.find(f"{XML_NAMESPACE}summary").text
content[id_pub] = {
"title": title_pub,
"authors": authors,
"date": pub_date,
"abstract": abstract,
"pdf": f"http://arxiv.org/pdf/{id_pub}"
}
return {"error": False, "message": content}
except Exception as e:
print(f"Error while downloading data : {str(e)}")
return {"error": True, "message": str(e)}
@app.post("/extract_pdf/arxiv_id")
async def extract_arxiv_pdf(document: DocumentID):
pdf_req = requests.get(f"http://arxiv.org/pdf/{document.doc_id}", verify=False)
if pdf_req.status_code == 200:
pdf_data = BytesIO(pdf_req.content)
doc = fitz.open(stream=pdf_data, filetype="pdf")
pdf_text = " ".join([page.get_text("text") for page in doc])
ref_pos = re.search(r"REFERENCES", pdf_text, re.IGNORECASE)
if ref_pos:
ref_pos = ref_pos.end()
if ref_pos is not None:
pdf_text = pdf_text[:ref_pos - 10]
postprocess_text = remove_in_betweens(pdf_text)
postprocess_text = remove_punctuations(postprocess_text)
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
postprocess_text = postprocess_text.strip()
return {"error": False, "message": {"pub_id": document.doc_id, "text": postprocess_text}}
else:
print("ID: " + document.doc_id)
print("URL: " + f"http://arxiv.org/pdf/{document.doc_id}")
print("Status code: " + str(pdf_req.status_code))
return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
@app.post("/extract_pdf/url")
async def extract_pdf(pdf: PDF):
pdf_req = requests.get(pdf.url)
if pdf_req.status_code == 200:
pdf_data = BytesIO(pdf_req.content)
doc = fitz.open(stream=pdf_data, filetype="pdf")
pdf_text = " ".join([doc[page].get_text("text") for page in range(doc.page_count if pdf.page_num == -1 else pdf.page_num)])
pdf_metadata = doc.metadata
print(pdf_metadata)
postprocess_text = remove_in_betweens(pdf_text)
postprocess_text = remove_punctuations(postprocess_text)
postprocess_text = re.sub(r"\s+", " ", postprocess_text)
postprocess_text = postprocess_text.strip()
return {"error": False, "message": {"title": pdf_metadata.get("title", "No title found").strip(), "text": postprocess_text}}
else:
print("URL: " + pdf.url)
print("Status code: " + str(pdf_req.status_code))
return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
def researcher(model, user, token):
url = 'https://api.groq.com/openai/v1/chat/completions'
headers = {
'Content-Type': 'application/json',
'Authorization': f'Bearer {token}',
}
system_msg = {
'role': 'system',
'content': (
'You are an experience PhD professor with 20 years experience in research. You help the user build their research plan based on the following examples. build the plan according to the examples without further questions. provide the steps of the plan in a form of research requests to search engines of public document publisher or web searching purposes, nothing else:\n'
'''<example>
<search-request>
Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning
</search-request>
<search-plan>
Help me research recent AI-powered marketing campaigns to benchmark for 2025 planning by:
(1) Find articles and case studies on AI-powered marketing campaigns in 2024.
(2) Find information on the specific AI technologies used in these campaigns (e.g., generative AI, predictive analytics).
(3) Find data on the results of these campaigns (e.g., ROI, customer engagement).
(4) Find information on the challenges and limitations of using AI in marketing.
(5) Find information on emerging trends in AI-powered marketing for 2025.
(6) Based on the above information, create a report summarizing key takeaways.
(7) Create a SWOT analysis of AI-powered marketing campaigns.
</search-plan>
</example>\n'''
'''<example>
<search-request>
Research AI models and compare them per use cases for a guide on which model to use for which use case
</search-request>
<search-plan>
Research AI models and compare them per use cases for a guide on which model to use for which use case by:
(1) Find a list of popular AI models and categorize them by type (e.g., image generation, language processing, etc.).
(2) For each AI model, find information on its strengths, weaknesses, and common use cases.
(3) Compare and contrast the AI models within each category based on their performance, ease of use, and cost.
(4) Find real-world examples of how each AI model is being used in different industries and applications.
(5) Create a guide that recommends specific AI models for different use cases, taking into account factors such as accuracy, speed, and cost.
(6) Include a disclaimer in the guide stating that the recommendations are based on current knowledge and may change as AI technology evolves.
</search-plan>
</example>\n'''
'''<example>
<search-request>
research Open source threat or opportunities to 6G standardization bodies such as 3GPP for thought leadership paper
</search-request>
<search-plan>
Research Open source threat or opportunities to 6G standardization bodies such as 3GPP for thought leadership paper:
(1) Find information on the role of 3GPP in 6G standardization.
(2) Find information on open source initiatives in the 6G space.
(3) Find articles or reports discussing the potential impact of open source on 6G standardization.
(4) Find information on the benefits and challenges of open source for 6G standardization.
(5) Find information on how 3GPP is addressing the challenges of open source.
(6) Find examples of successful open source initiatives in other technology domains.
(7) Based on your research, develop a point of view on the threat or opportunity of open source to 6G standardization bodies like 3GPP.
</search-plan>
</example>\n'''
'''<example>
<search-request>
research vodafone activities in 6G for a competition analysis include a SWOT analysis
</search-request>
<search-plan>
Research Vodafone's activities in 6G for a competition analysis, including a SWOT analysis, by:
(1) Find Vodafone's public statements and press releases about their 6G research and development efforts.
(2) Find news articles and industry reports about Vodafone's 6G activities.
(3) Find information about Vodafone's partnerships and collaborations in the 6G space.
(4) Find information about Vodafone's investments in 6G infrastructure and technology.
(5) Based on the information gathered, create a SWOT analysis of Vodafone's position in the 6G landscape:
(a) Strengths: Vodafone's existing infrastructure, expertise, and partnerships.
(b) Weaknesses: Vodafone's potential challenges in competing with larger or more established players in the 6G space.
(c) Opportunities: Emerging 6G technologies and market trends that Vodafone could leverage.
(d) Threats: Competition from other companies, regulatory hurdles, and technological uncertainties.
(6) Compare Vodafone's 6G activities to those of its main competitors (e.g., Ericsson, Nokia, Huawei, Samsung) to identify areas of strength and weakness.
(7) Consider any recent developments or announcements in the 6G space that could impact Vodafone's competitive position.
</search-plan>
</example>\n'''
'''<example>
<search-request>
report on researches on 6G energy efficiency, how to achieve it, assess the reality of these findings or solutions for a technical paper on 6G environmental impact
</search-request>
<search-plan>
Report on researches on 6G energy efficiency, how to achieve it, assess the reality of these findings or solutions for a technical paper on 6G environmental impact by:
(1) Find research papers and articles on 6G energy efficiency.
(2) Summarize the findings of these researches on 6G energy efficiency.
(3) Find proposed solutions to achieve 6G energy efficiency.
(4) Assess the feasibility and potential impact of these solutions.
(5) Find any existing case studies or pilot projects implementing these solutions.
(6) Find information on the potential environmental benefits of 6G energy efficiency.
(7) Find information on the challenges and limitations of achieving 6G energy efficiency.
(8) Find information on the potential economic benefits of 6G energy efficiency.
</search-plan>
</example>\n'''
'''<example>
<search-request>
research Authentication and Identity Management:
Study lightweight, low-energy authentication methods for IoT and other connected devices.
Explore advancements in identity and access management for 6G networks.
</search-request>
<search-plan>
Research Authentication and Identity Management: Study lightweight, low-energy authentication methods for IoT and other connected devices. Explore advancements in identity and access management for 6G networks by:
(1) Find research papers and articles on lightweight, low-energy authentication methods for IoT and other connected devices.
(2) Find research papers and articles on advancements in identity and access management for 6G networks.
(3) Find information on the current state of authentication and identity management for IoT and other connected devices.
(4) Find information on the challenges and opportunities in authentication and identity management for 6G networks.
(5) Find information on the different types of authentication methods available for IoT and other connected devices.
(6) Find information on the different types of identity and access management systems available for 6G networks.
(7) Compare and contrast the different authentication methods and identity and access management systems.
(8) Identify potential areas for future research in authentication and identity management for IoT and other connected devices, as well as for 6G networks.
</search-plan>
</example>\n'''
'''<example>
<search-request>
research Global Market Dynamics:
Assess which stakeholders (e.g., operators, tech companies, governments) are likely to drive investment in 6G infrastructure.
Investigate the role of new entrants, such as GAFAM (Google, Amazon, Facebook, Apple, Microsoft), in shaping the 6G ecosystem.
Explore how regions like China, the EU, and the US are positioning themselves for 6G leadership.
</search-request>
<search-plan>
Research Global Market Dynamics: Assess which stakeholders (e.g., operators, tech companies, governments) are likely to drive investment in 6G infrastructure. Investigate the role of new entrants, such as GAFAM (Google, Amazon, Facebook, Apple, Microsoft), in shaping the 6G ecosystem. Explore how regions like China, the EU, and the US are positioning themselves for 6G leadership by:
(1) Find research reports and articles on the 6G market and its potential stakeholders.
(2) Find information on the current investments and initiatives of major telecom operators in 6G. If there are too many, limit to several that are most relevant.
(3) Find information on the R&D efforts of major tech companies, including GAFAM, in 6G technologies.
(4) Find information on government policies and funding initiatives related to 6G in China, the EU, and the US.
(5) Find expert opinions and analysis on the potential drivers of 6G investment and the role of new entrants.
(6) Find information on the potential impact of 6G on various industries and sectors.
(7) Find information on the potential challenges and barriers to 6G adoption and deployment.
(8) Find information on the potential timeline for 6G commercialization and deployment.
</search-plan>
</example>\n'''
'''<example>
<search-request>
research how new business models beyond ROI could benefit societal impact of 6G
</search-request>
<search-plan>
research how new business models beyond ROI could benefit societal impact of 6G by:
(1) Find articles and research papers discussing the potential societal impact of 6G technology.
(2) Find articles and research papers discussing current business models used in the telecommunications industry and their limitations.
(3) Find articles and research papers discussing alternative business models that could be used to fund and deploy 6G technology.
(4) Find articles and research papers discussing how new business models could be used to maximize the societal benefits of 6G technology.
(5) Find case studies of companies or organizations that have successfully implemented alternative business models to achieve social impact goals.
(6) Find information on potential risks and challenges associated with new business models for 6G technology.
</search-plan>
</example>\n'''
'Optionally, do not precise the sources, as we search on every websites that we possibly can. Take note that sometimes, the user will send you keywords only, just provide report of them.\n'
'For the response format, you must send a JSON of this format : [{"step_index": The step number, "step_text": What we have to do, "keywords": The important keywords separated by spaces (no comma) (important: the keywords that we will use for search engines and APIs, so get rid of `research papers`, `articles`, ... keywords), `privilegie les abbreviations`}, ...] Take those for examples :\n'
'''<example><search-request>Provide a plan for 6G challenges</search-request><search-plan>[
{
"step_index": 1,
"step_text": "Find information on technical challenges in 6G development",
"keywords": "6G technical challenges development hurdles"
},
{
"step_index": 2,
"step_text": " Identify key challenges in 6G standardization",
"keywords": "6G standardization challenges 3GPP"
},
{
"step_index": 3,
"step_text": "Investigate security challenges in 6G networks",
"keywords": "6G security threats vulnerabilities"
},
{
"step_index": 4,
"step_text": "Explore challenges in 6G deployment and implementation",
"keywords": "6G deployment implementation rollout"
},
{
"step_index": 5,
"step_text": "Find information on energy efficiency challenges in 6G",
"keywords": "6G energy efficiency power consumption"
},
{
"step_index": 6,
"step_text": "Investigate challenges in 6G spectrum management",
"keywords": "6G spectrum frequency management"
},
{
"step_index": 7,
"step_text": "Analyze challenges in 6G device and hardware development",
"keywords": "6G devices hardware UE"
},
{
"step_index": 8,
"step_text": "Explore challenges in 6G network architecture and design",
"keywords": "6G network architecture network design RAN"
},
{
"step_index": 9,
"step_text": "Find information on challenges in 6G testing and validation",
"keywords": "6G testing validation trial"
},
{
"step_index": 10,
"step_text": "Investigate challenges in 6G regulation and policy-making",
"keywords": "6G regulation policy governance"
}
]</search-plan></example>'''
)
}
user_msg = {
'role': 'user',
'content': user
}
data = {
'model': model,
'messages': [system_msg, user_msg]
}
response = requests.post(url, headers=headers, data=json.dumps(data), verify=False)
if response.status_code != 200:
print(f"Groq API error on post: {response.status_code}")
return None
try:
response_data = response.json()
raw_content = response_data['choices'][0]['message']['content'].strip()
return raw_content
except Exception as e:
print(f"Groq API error after post: {str(e)}")
return None
class GroqRequest(BaseModel):
model: str
user: str
@app.post("/search/plan")
async def get_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
if api_key is None:
return {"error": True, "message": "Missing API key"}
plan = researcher(infos.model, infos.user, api_key)
if plan is None:
return {"error": True, "message": "Error while generating the research plan"}
plan = json.loads(re.sub(r"\s+", " ", plan))
return {"error": False, "message": {"plan": plan}}
@app.post("/search/plan/arxiv")
async def get_arxiv_research_plan(infos: GroqRequest, api_key: str = Header(None, alias="GROQ_TOKEN")):
plan_articles = []
plan = get_research_plan(infos, api_key)["plan"]
if plan == "":
return {"error": True, "message": "Error while generating the research plan"}
for step in plan:
index, inst, kws = step.values()
data = await get_articles(Query(keyword=kws, limit=5))
if not data["error"]:
publications = data["message"]
else:
print(data["message"])
continue
plan_articles.append({'step_id': index, 'request': inst, 'articles': [x for x in publications.keys()]})
return {"error": False, "message": plan_articles} |