Omar ID EL MOUMEN commited on
Commit
0bf43b3
·
1 Parent(s): d588a4d

Fix extract pdf url

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -130,8 +130,8 @@ async def extract_random_arxiv_pdf(query: Query):
130
  return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
131
 
132
  @app.post("/extract_pdf/url")
133
- async def extract_pdf(url: WebPDF):
134
- pdf_req = requests.get(url)
135
  if pdf_req.status_code == 200:
136
  pdf_data = BytesIO(pdf_req.content)
137
  doc = fitz.open(stream=pdf_data, filetype="pdf")
@@ -139,6 +139,6 @@ async def extract_pdf(url: WebPDF):
139
  pdf_metadata = doc.metadata
140
  return {"error": False, "title": pdf_metadata.get("title", "").strip(), "text": pdf_text}
141
  else:
142
- print("URL: " + url)
143
  print("Status code: " + str(pdf_req.status_code))
144
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
 
130
  return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
131
 
132
  @app.post("/extract_pdf/url")
133
+ async def extract_pdf(pdf: WebPDF):
134
+ pdf_req = requests.get(pdf.url)
135
  if pdf_req.status_code == 200:
136
  pdf_data = BytesIO(pdf_req.content)
137
  doc = fitz.open(stream=pdf_data, filetype="pdf")
 
139
  pdf_metadata = doc.metadata
140
  return {"error": False, "title": pdf_metadata.get("title", "").strip(), "text": pdf_text}
141
  else:
142
+ print("URL: " + pdf.url)
143
  print("Status code: " + str(pdf_req.status_code))
144
  return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}