Spaces:
Running
Running
Omar ID EL MOUMEN
commited on
Commit
·
0bf43b3
1
Parent(s):
d588a4d
Fix extract pdf url
Browse files
app.py
CHANGED
@@ -130,8 +130,8 @@ async def extract_random_arxiv_pdf(query: Query):
|
|
130 |
return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
|
131 |
|
132 |
@app.post("/extract_pdf/url")
|
133 |
-
async def extract_pdf(
|
134 |
-
pdf_req = requests.get(url)
|
135 |
if pdf_req.status_code == 200:
|
136 |
pdf_data = BytesIO(pdf_req.content)
|
137 |
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
@@ -139,6 +139,6 @@ async def extract_pdf(url: WebPDF):
|
|
139 |
pdf_metadata = doc.metadata
|
140 |
return {"error": False, "title": pdf_metadata.get("title", "").strip(), "text": pdf_text}
|
141 |
else:
|
142 |
-
print("URL: " + url)
|
143 |
print("Status code: " + str(pdf_req.status_code))
|
144 |
return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
|
|
|
130 |
return await extract_arxiv_pdf(random.choice(list(pubs["message"].keys())))
|
131 |
|
132 |
@app.post("/extract_pdf/url")
|
133 |
+
async def extract_pdf(pdf: WebPDF):
|
134 |
+
pdf_req = requests.get(pdf.url)
|
135 |
if pdf_req.status_code == 200:
|
136 |
pdf_data = BytesIO(pdf_req.content)
|
137 |
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
|
|
139 |
pdf_metadata = doc.metadata
|
140 |
return {"error": False, "title": pdf_metadata.get("title", "").strip(), "text": pdf_text}
|
141 |
else:
|
142 |
+
print("URL: " + pdf.url)
|
143 |
print("Status code: " + str(pdf_req.status_code))
|
144 |
return {"error": True, "message": "Error while downloading PDF: HTTP/" + str(pdf_req.status_code)}
|