Spaces:
Sleeping
Sleeping
Fix & debug
Browse files- app.py +12 -3
- indexed_docs.json +6 -0
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from io import StringIO
|
|
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
4 |
import requests
|
@@ -22,10 +23,12 @@ from fastapi.staticfiles import StaticFiles
|
|
22 |
from pydantic import BaseModel
|
23 |
from typing import Any, Dict, List, Literal, Optional
|
24 |
|
|
|
|
|
25 |
warnings.filterwarnings("ignore")
|
26 |
|
27 |
-
app = FastAPI(title="
|
28 |
-
description="API to find
|
29 |
|
30 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
31 |
|
@@ -71,7 +74,8 @@ class DocFinder:
|
|
71 |
self.session = requests.Session()
|
72 |
self.indexer_file = "indexed_docs.json"
|
73 |
self.indexer, self.last_indexer_date = self.load_indexer()
|
74 |
-
self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
|
|
|
75 |
|
76 |
def load_indexer(self):
|
77 |
if os.path.exists(self.indexer_file):
|
@@ -127,12 +131,15 @@ class DocFinder:
|
|
127 |
urls = []
|
128 |
if main_tsg:
|
129 |
wg_url = self.find_workgroup_url(main_tsg, workgroup)
|
|
|
130 |
if wg_url:
|
131 |
files = self.get_docs_from_url(wg_url)
|
|
|
132 |
for f in files:
|
133 |
if doc in f.lower() or original in f:
|
134 |
print(f)
|
135 |
doc_url = f"{wg_url}/{f}"
|
|
|
136 |
self.indexer[original] = doc_url
|
137 |
self.save_indexer()
|
138 |
urls.append(doc_url)
|
@@ -194,6 +201,7 @@ class SpecFinder:
|
|
194 |
return self.indexer[doc]
|
195 |
|
196 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
|
|
197 |
|
198 |
releases = self.get_docs_from_url(url)
|
199 |
files = self.get_docs_from_url(url + releases[-1])
|
@@ -214,6 +222,7 @@ finder_spec = SpecFinder()
|
|
214 |
def find_document(request: DocRequest):
|
215 |
start_time = time.time()
|
216 |
finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
|
|
|
217 |
result = finder.search_document(request.doc_id)
|
218 |
|
219 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
|
|
1 |
from io import StringIO
|
2 |
+
from dotenv import load_dotenv
|
3 |
import numpy as np
|
4 |
import pandas as pd
|
5 |
import requests
|
|
|
23 |
from pydantic import BaseModel
|
24 |
from typing import Any, Dict, List, Literal, Optional
|
25 |
|
26 |
+
load_dotenv()
|
27 |
+
|
28 |
warnings.filterwarnings("ignore")
|
29 |
|
30 |
+
app = FastAPI(title="ETSI Document Finder API",
|
31 |
+
description="API to find ETSI documents based on document IDs")
|
32 |
|
33 |
app.mount("/static", StaticFiles(directory="static"), name="static")
|
34 |
|
|
|
74 |
self.session = requests.Session()
|
75 |
self.indexer_file = "indexed_docs.json"
|
76 |
self.indexer, self.last_indexer_date = self.load_indexer()
|
77 |
+
req = self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
|
78 |
+
print(req.content, req.status_code)
|
79 |
|
80 |
def load_indexer(self):
|
81 |
if os.path.exists(self.indexer_file):
|
|
|
131 |
urls = []
|
132 |
if main_tsg:
|
133 |
wg_url = self.find_workgroup_url(main_tsg, workgroup)
|
134 |
+
print(wg_url)
|
135 |
if wg_url:
|
136 |
files = self.get_docs_from_url(wg_url)
|
137 |
+
print(files)
|
138 |
for f in files:
|
139 |
if doc in f.lower() or original in f:
|
140 |
print(f)
|
141 |
doc_url = f"{wg_url}/{f}"
|
142 |
+
|
143 |
self.indexer[original] = doc_url
|
144 |
self.save_indexer()
|
145 |
urls.append(doc_url)
|
|
|
201 |
return self.indexer[doc]
|
202 |
|
203 |
url = f"{self.main_url}/{self.get_spec_path(original)}/"
|
204 |
+
print(url)
|
205 |
|
206 |
releases = self.get_docs_from_url(url)
|
207 |
files = self.get_docs_from_url(url + releases[-1])
|
|
|
222 |
def find_document(request: DocRequest):
|
223 |
start_time = time.time()
|
224 |
finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
|
225 |
+
print(finder)
|
226 |
result = finder.search_document(request.doc_id)
|
227 |
|
228 |
if "not found" not in result and "Could not" not in result and "Unable" not in result:
|
indexed_docs.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"docs": {
|
3 |
+
"SET(25)000002": "https://docbox.etsi.org/SET/SET/05-CONTRIBUTIONS/2025/SET(25)000002_Draft_report_of_SET__116.docx"
|
4 |
+
},
|
5 |
+
"last_indexed_date": "03/06/2025-14:20:45"
|
6 |
+
}
|