Spaces:
Running
Running
Omar ID EL MOUMEN
commited on
Commit
·
952bac3
1
Parent(s):
54261f6
Attempt of fix
Browse files
app.py
CHANGED
@@ -118,8 +118,8 @@ async def extract_pdf(pdf: PDF):
|
|
118 |
pdf_req = requests.get(pdf.url)
|
119 |
if pdf_req.status_code == 200:
|
120 |
pdf_data = BytesIO(pdf_req.content)
|
121 |
-
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
122 |
-
pdf_text = " ".join([page.get_text("text") for page in doc])
|
123 |
pdf_metadata = doc.metadata
|
124 |
print(pdf_metadata)
|
125 |
|
|
|
118 |
pdf_req = requests.get(pdf.url)
|
119 |
if pdf_req.status_code == 200:
|
120 |
pdf_data = BytesIO(pdf_req.content)
|
121 |
+
doc = fitz.open(stream=pdf_data, filetype="pdf")
|
122 |
+
pdf_text = " ".join([page.get_text("text") for page in range(len(doc.page_count) if pdf.page_num == -1 else pdf.page_num)])
|
123 |
pdf_metadata = doc.metadata
|
124 |
print(pdf_metadata)
|
125 |
|