Spaces:

OrganizedProgrammers
/

ETSIDocFinder

Sleeping

App Files Files Community

om4r932 commited on 25 days ago

Commit

7aa5e73

1 Parent(s): 2cdd1ea

Fix & debug

Browse files

Files changed (2) hide show

app.py +12 -3
indexed_docs.json +6 -0

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from io import StringIO
 import numpy as np
 import pandas as pd
 import requests
@@ -22,10 +23,12 @@ from fastapi.staticfiles import StaticFiles
 from pydantic import BaseModel
 from typing import Any, Dict, List, Literal, Optional
 warnings.filterwarnings("ignore")
-app = FastAPI(title="3GPP Document Finder API",
-              description="API to find 3GPP documents based on TSG document IDs")
 app.mount("/static", StaticFiles(directory="static"), name="static")
@@ -71,7 +74,8 @@ class DocFinder:
         self.session = requests.Session()
         self.indexer_file = "indexed_docs.json"
         self.indexer, self.last_indexer_date = self.load_indexer()
-        self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
     def load_indexer(self):
         if os.path.exists(self.indexer_file):
@@ -127,12 +131,15 @@ class DocFinder:
         urls = []
         if main_tsg:
             wg_url = self.find_workgroup_url(main_tsg, workgroup)
             if wg_url:
                 files = self.get_docs_from_url(wg_url)
                 for f in files:
                     if doc in f.lower() or original in f:
                         print(f)
                         doc_url = f"{wg_url}/{f}"
                         self.indexer[original] = doc_url
                         self.save_indexer()
                         urls.append(doc_url)
@@ -194,6 +201,7 @@ class SpecFinder:
                 return self.indexer[doc]
         url = f"{self.main_url}/{self.get_spec_path(original)}/"
         releases = self.get_docs_from_url(url)
         files = self.get_docs_from_url(url + releases[-1])
@@ -214,6 +222,7 @@ finder_spec = SpecFinder()
 def find_document(request: DocRequest):
     start_time = time.time()
     finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
     result = finder.search_document(request.doc_id)
     if "not found" not in result and "Could not" not in result and "Unable" not in result:

 from io import StringIO
+from dotenv import load_dotenv
 import numpy as np
 import pandas as pd
 import requests
 from pydantic import BaseModel
 from typing import Any, Dict, List, Literal, Optional
+load_dotenv()
 warnings.filterwarnings("ignore")
+app = FastAPI(title="ETSI Document Finder API",
+              description="API to find ETSI documents based on document IDs")
 app.mount("/static", StaticFiles(directory="static"), name="static")
         self.session = requests.Session()
         self.indexer_file = "indexed_docs.json"
         self.indexer, self.last_indexer_date = self.load_indexer()
+        req = self.session.post("https://portal.etsi.org/ETSIPages/LoginEOL.ashx", verify=False, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"}, data=json.dumps({"username": os.environ.get("EOL_USER"), "password": os.environ.get("EOL_PASSWORD")}))
+        print(req.content, req.status_code)
     def load_indexer(self):
         if os.path.exists(self.indexer_file):
         urls = []
         if main_tsg:
             wg_url = self.find_workgroup_url(main_tsg, workgroup)
+            print(wg_url)
             if wg_url:
                 files = self.get_docs_from_url(wg_url)
+                print(files)
                 for f in files:
                     if doc in f.lower() or original in f:
                         print(f)
                         doc_url = f"{wg_url}/{f}"
                         self.indexer[original] = doc_url
                         self.save_indexer()
                         urls.append(doc_url)
                 return self.indexer[doc]
         url = f"{self.main_url}/{self.get_spec_path(original)}/"
+        print(url)
         releases = self.get_docs_from_url(url)
         files = self.get_docs_from_url(url + releases[-1])
 def find_document(request: DocRequest):
     start_time = time.time()
     finder = finder_spec if request.doc_id[0].isnumeric() else finder_doc
+    print(finder)
     result = finder.search_document(request.doc_id)
     if "not found" not in result and "Could not" not in result and "Unable" not in result:

indexed_docs.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "docs": {
+        "SET(25)000002": "https://docbox.etsi.org/SET/SET/05-CONTRIBUTIONS/2025/SET(25)000002_Draft_report_of_SET__116.docx"
+    },
+    "last_indexed_date": "03/06/2025-14:20:45"
+}