om4r932 commited on
Commit
7aa5e73
·
1 Parent(s): 2cdd1ea

Fix & debug

Browse files
Files changed (2) hide show
  1. app.py +12 -3
  2. indexed_docs.json +6 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from io import StringIO
 
2
  import numpy as np
3
  import pandas as pd
4
  import requests
@@ -22,10 +23,12 @@ from fastapi.staticfiles import StaticFiles
22
  from pydantic import BaseModel
23
  from typing import Any, Dict, List, Literal, Optional
24
 
 
 
25
  warnings.filterwarnings("ignore")
26
 
27
- app = FastAPI(title="3GPP Document Finder API",
28
- description="API to find 3GPP documents based on TSG document IDs")
29
 
30
  app.mount("/static", StaticFiles(directory="static"), name="static")
31
 
@@ -71,7 +74,8 @@ class DocFinder:
71
  self.session = requests.Session()
72
  self.indexer_file = "indexed_docs.json"
73
  self.indexer, self.last_indexer_date = self.load_indexer()
74
- self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
 
75
 
76
  def load_indexer(self):
77
  if os.path.exists(self.indexer_file):
@@ -127,12 +131,15 @@ class DocFinder:
127
  urls = []
128
  if main_tsg:
129
  wg_url = self.find_workgroup_url(main_tsg, workgroup)
 
130
  if wg_url:
131
  files = self.get_docs_from_url(wg_url)
 
132
  for f in files:
133
  if doc in f.lower() or original in f:
134
  print(f)
135
  doc_url = f"{wg_url}/{f}"
 
136
  self.indexer[original] = doc_url
137
  self.save_indexer()
138
  urls.append(doc_url)
@@ -194,6 +201,7 @@ class SpecFinder:
194
  return self.indexer[doc]
195
 
196
  url = f"{self.main_url}/{self.get_spec_path(original)}/"
 
197
 
198
  releases = self.get_docs_from_url(url)
199
  files = self.get_docs_from_url(url + releases[-1])
@@ -214,6 +222,7 @@ finder_spec = SpecFinder()
214
  def find_document(request: DocRequest):
215
  start_time = time.time()
216
  finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
 
217
  result = finder.search_document(request.doc_id)
218
 
219
  if "not found" not in result and "Could not" not in result and "Unable" not in result:
 
1
  from io import StringIO
2
+ from dotenv import load_dotenv
3
  import numpy as np
4
  import pandas as pd
5
  import requests
 
23
  from pydantic import BaseModel
24
  from typing import Any, Dict, List, Literal, Optional
25
 
26
+ load_dotenv()
27
+
28
  warnings.filterwarnings("ignore")
29
 
30
+ app = FastAPI(title="ETSI Document Finder API",
31
+ description="API to find ETSI documents based on document IDs")
32
 
33
  app.mount("/static", StaticFiles(directory="static"), name="static")
34
 
 
74
  self.session = requests.Session()
75
  self.indexer_file = "indexed_docs.json"
76
  self.indexer, self.last_indexer_date = self.load_indexer()
77
+ req = self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
78
+ print(req.content, req.status_code)
79
 
80
  def load_indexer(self):
81
  if os.path.exists(self.indexer_file):
 
131
  urls = []
132
  if main_tsg:
133
  wg_url = self.find_workgroup_url(main_tsg, workgroup)
134
+ print(wg_url)
135
  if wg_url:
136
  files = self.get_docs_from_url(wg_url)
137
+ print(files)
138
  for f in files:
139
  if doc in f.lower() or original in f:
140
  print(f)
141
  doc_url = f"{wg_url}/{f}"
142
+
143
  self.indexer[original] = doc_url
144
  self.save_indexer()
145
  urls.append(doc_url)
 
201
  return self.indexer[doc]
202
 
203
  url = f"{self.main_url}/{self.get_spec_path(original)}/"
204
+ print(url)
205
 
206
  releases = self.get_docs_from_url(url)
207
  files = self.get_docs_from_url(url + releases[-1])
 
222
  def find_document(request: DocRequest):
223
  start_time = time.time()
224
  finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
225
+ print(finder)
226
  result = finder.search_document(request.doc_id)
227
 
228
  if "not found" not in result and "Could not" not in result and "Unable" not in result:
indexed_docs.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "docs": {
3
+ "SET(25)000002": "https://docbox.etsi.org/SET/SET/05-CONTRIBUTIONS/2025/SET(25)000002_Draft_report_of_SET__116.docx"
4
+ },
5
+ "last_indexed_date": "03/06/2025-14:20:45"
6
+ }