File size: 3,840 Bytes
1550711 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
#!/usr/bin/env python3
"""
Download and serve Data Lifeboat from HuggingFace Hub
This script downloads a raw Data Lifeboat from a HuggingFace dataset
repository and serves it using Python's HTTP server.
"""
import os
import sys
import shutil
import http.server
import socketserver
from pathlib import Path
from huggingface_hub import snapshot_download
def main():
# Get the raw dataset repository from environment
raw_repo = os.environ.get("RAW_DATASET_REPO")
if not raw_repo:
print("β Error: RAW_DATASET_REPO environment variable not set")
sys.exit(1)
print(f"π’ Starting Dynamic Data Lifeboat Space")
print(f"π¦ Raw dataset repository: {raw_repo}")
# Download directory
download_dir = Path("/home/user/app/data")
try:
print(f"β¬οΈ Downloading raw Data Lifeboat from HuggingFace Hub...")
# Download the entire repository
repo_path = snapshot_download(
repo_id=raw_repo,
repo_type="dataset",
local_dir=str(download_dir),
)
print(f"β
Download completed to: {repo_path}")
# Find the Data Lifeboat directory inside data/
# Raw datasets have structure: data/LIFEBOAT_NAME/
data_subdir = download_dir / "data"
if data_subdir.exists():
lifeboat_dirs = [d for d in data_subdir.iterdir() if d.is_dir()]
if lifeboat_dirs:
lifeboat_path = lifeboat_dirs[0] # Take the first (should be only one)
print(f"π Found Data Lifeboat at: {lifeboat_path}")
# Verify it has the expected structure
readme_path = lifeboat_path / "README.html"
viewer_path = lifeboat_path / "viewer"
if readme_path.exists() and viewer_path.exists():
print(f"β
Data Lifeboat structure verified")
serve_directory = str(lifeboat_path)
else:
print(f"β οΈ Warning: Data Lifeboat structure not fully recognized")
serve_directory = str(lifeboat_path)
else:
print(f"β Error: No Data Lifeboat directory found in data/")
sys.exit(1)
else:
print(f"β Error: No data/ directory found in downloaded repository")
sys.exit(1)
except Exception as e:
print(f"β Error downloading Data Lifeboat: {e}")
sys.exit(1)
# Start HTTP server
print(f"π Starting HTTP server on port 7860...")
print(f"π Serving directory: {serve_directory}")
os.chdir(serve_directory)
handler = http.server.SimpleHTTPRequestHandler
# Custom handler to serve README.html as index
class DataLifeboatHandler(handler):
def end_headers(self):
self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
self.send_header('Pragma', 'no-cache')
self.send_header('Expires', '0')
super().end_headers()
def do_GET(self):
# Redirect root to README.html
if self.path == '/' or self.path == '/index.html':
self.send_response(302)
self.send_header('Location', '/README.html')
self.end_headers()
return
super().do_GET()
with socketserver.TCPServer(("", 7860), DataLifeboatHandler) as httpd:
print(f"β
Data Lifeboat is now available at http://localhost:7860")
print(f"π Serving Data Lifeboat from downloaded repository...")
try:
httpd.serve_forever()
except KeyboardInterrupt:
print(f"\\nπ Server stopped")
if __name__ == "__main__":
main() |