File size: 3,840 Bytes
1550711
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python3
"""
Download and serve Data Lifeboat from HuggingFace Hub

This script downloads a raw Data Lifeboat from a HuggingFace dataset
repository and serves it using Python's HTTP server.
"""

import os
import sys
import shutil
import http.server
import socketserver
from pathlib import Path
from huggingface_hub import snapshot_download

def main():
    # Get the raw dataset repository from environment
    raw_repo = os.environ.get("RAW_DATASET_REPO")
    if not raw_repo:
        print("❌ Error: RAW_DATASET_REPO environment variable not set")
        sys.exit(1)
    
    print(f"🚒 Starting Dynamic Data Lifeboat Space")
    print(f"πŸ“¦ Raw dataset repository: {raw_repo}")
    
    # Download directory
    download_dir = Path("/home/user/app/data")
    
    try:
        print(f"⬇️ Downloading raw Data Lifeboat from HuggingFace Hub...")
        
        # Download the entire repository
        repo_path = snapshot_download(
            repo_id=raw_repo,
            repo_type="dataset",
            local_dir=str(download_dir),
        )
        
        print(f"βœ… Download completed to: {repo_path}")
        
        # Find the Data Lifeboat directory inside data/
        # Raw datasets have structure: data/LIFEBOAT_NAME/
        data_subdir = download_dir / "data"
        if data_subdir.exists():
            lifeboat_dirs = [d for d in data_subdir.iterdir() if d.is_dir()]
            if lifeboat_dirs:
                lifeboat_path = lifeboat_dirs[0]  # Take the first (should be only one)
                print(f"πŸ“ Found Data Lifeboat at: {lifeboat_path}")
                
                # Verify it has the expected structure
                readme_path = lifeboat_path / "README.html"
                viewer_path = lifeboat_path / "viewer"
                
                if readme_path.exists() and viewer_path.exists():
                    print(f"βœ… Data Lifeboat structure verified")
                    serve_directory = str(lifeboat_path)
                else:
                    print(f"⚠️ Warning: Data Lifeboat structure not fully recognized")
                    serve_directory = str(lifeboat_path)
            else:
                print(f"❌ Error: No Data Lifeboat directory found in data/")
                sys.exit(1)
        else:
            print(f"❌ Error: No data/ directory found in downloaded repository")
            sys.exit(1)
            
    except Exception as e:
        print(f"❌ Error downloading Data Lifeboat: {e}")
        sys.exit(1)
    
    # Start HTTP server
    print(f"🌐 Starting HTTP server on port 7860...")
    print(f"πŸ“‚ Serving directory: {serve_directory}")
    
    os.chdir(serve_directory)
    
    handler = http.server.SimpleHTTPRequestHandler
    
    # Custom handler to serve README.html as index
    class DataLifeboatHandler(handler):
        def end_headers(self):
            self.send_header('Cache-Control', 'no-cache, no-store, must-revalidate')
            self.send_header('Pragma', 'no-cache')
            self.send_header('Expires', '0')
            super().end_headers()
            
        def do_GET(self):
            # Redirect root to README.html
            if self.path == '/' or self.path == '/index.html':
                self.send_response(302)
                self.send_header('Location', '/README.html')
                self.end_headers()
                return
            super().do_GET()
    
    with socketserver.TCPServer(("", 7860), DataLifeboatHandler) as httpd:
        print(f"βœ… Data Lifeboat is now available at http://localhost:7860")
        print(f"πŸ”„ Serving Data Lifeboat from downloaded repository...")
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            print(f"\\nπŸ›‘ Server stopped")

if __name__ == "__main__":
    main()