File size: 7,045 Bytes
63ed3a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
import argparse
import logging
import sys
import threading
import time

from inference_server.ui import launch_ui


def setup_logging(debug: bool = False):
    """Set up logging configuration."""
    level = logging.DEBUG if debug else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        handlers=[
            logging.StreamHandler(sys.stdout),
        ],
    )


def launch_server_only(host: str = "0.0.0.0", port: int = 8001, reload: bool = True):
    """Launch only the AI server."""
    print(f"πŸš€ Starting Inference Server on {host}:{port}")

    try:
        import uvicorn

        from inference_server.main import app

        uvicorn.run(app, host=host, port=port, reload=reload, log_level="info")
    except KeyboardInterrupt:
        print("\nπŸ›‘ Server stopped by user")
    except Exception as e:
        print(f"❌ Server failed to start: {e}")
        sys.exit(1)


def launch_ui_only(
    host: str = "localhost", port: int = 7860, share: bool = False, debug: bool = False
):
    """Launch only the Gradio UI."""
    print(f"🎨 Starting Gradio UI on {host}:{port}")

    setup_logging(debug)

    try:
        launch_ui(server_name=host, server_port=port, share=share)
    except KeyboardInterrupt:
        print("\nπŸ›‘ UI stopped by user")


def launch_both(
    server_host: str = "0.0.0.0",
    server_port: int = 8001,
    ui_host: str = "localhost",
    ui_port: int = 7860,
    share: bool = False,
    debug: bool = False,
):
    """Launch both the AI server and Gradio UI."""
    print("πŸš€ Starting Inference Server with Gradio UI")

    setup_logging(debug)

    try:
        print(f"πŸ“‘ Starting AI Server on {server_host}:{server_port}")

        # Start server in a background thread
        def run_server():
            import uvicorn

            from inference_server.main import app

            uvicorn.run(
                app,
                host=server_host,
                port=server_port,
                log_level="warning",  # Reduce verbosity
            )

        server_thread = threading.Thread(target=run_server, daemon=True)
        server_thread.start()

        # Give server time to start
        print("⏳ Waiting for server to start...")
        time.sleep(3)

        print("βœ… Server started successfully")
        print(f"🎨 Starting Gradio UI on {ui_host}:{ui_port}")

        # Start the UI (this will block)
        launch_ui(server_name=ui_host, server_port=ui_port, share=share)

    except KeyboardInterrupt:
        print("\nπŸ›‘ Stopping services...")
        print("βœ… All services stopped")
    except Exception as e:
        print(f"❌ Error: {e}")
        sys.exit(1)


def main():
    """Main CLI entry point."""
    parser = argparse.ArgumentParser(
        description="Inference Server CLI",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  # Launch simple integrated app (recommended)
  python -m inference_server.cli --simple

  # Launch both server and UI (development)
  python -m inference_server.cli

  # Launch only the server
  python -m inference_server.cli --server-only

  # Launch only the UI (server must be running separately)
  python -m inference_server.cli --ui-only

  # Launch with custom ports
  python -m inference_server.cli --server-port 8002 --ui-port 7861

  # Launch with public sharing (Gradio)
  python -m inference_server.cli --share

  # Launch for deployment (recommended)
  python -m inference_server.cli --simple --host 0.0.0.0 --share

  # Export OpenAPI schema
  python -m inference_server.cli --export-openapi

  # Export as YAML
  python -m inference_server.cli --export-openapi --export-format yaml
        """,
    )

    # Mode selection
    mode_group = parser.add_mutually_exclusive_group()
    mode_group.add_argument(
        "--server-only", action="store_true", help="Launch only the AI server"
    )
    mode_group.add_argument(
        "--ui-only", action="store_true", help="Launch only the Gradio UI"
    )
    mode_group.add_argument(
        "--simple",
        action="store_true",
        help="Launch simple integrated app (recommended)",
    )

    # Server configuration
    parser.add_argument(
        "--server-host", default="0.0.0.0", help="AI server host (default: 0.0.0.0)"
    )
    parser.add_argument(
        "--server-port", type=int, default=8001, help="AI server port (default: 8001)"
    )
    parser.add_argument(
        "--no-reload", action="store_true", help="Disable auto-reload for server"
    )

    # UI configuration
    parser.add_argument(
        "--ui-host", default="localhost", help="Gradio UI host (default: localhost)"
    )
    parser.add_argument(
        "--ui-port", type=int, default=7860, help="Gradio UI port (default: 7860)"
    )
    parser.add_argument(
        "--share", action="store_true", help="Create public Gradio link"
    )

    # General options
    parser.add_argument("--debug", action="store_true", help="Enable debug logging")

    # Export options
    mode_group.add_argument(
        "--export-openapi", action="store_true", help="Export OpenAPI schema to file"
    )
    parser.add_argument(
        "--export-format",
        choices=["json", "yaml"],
        default="json",
        help="OpenAPI export format (default: json)",
    )
    parser.add_argument(
        "--export-output",
        help="OpenAPI export output file (default: openapi.json or openapi.yaml)",
    )

    args = parser.parse_args()

    # Route to appropriate function
    if args.server_only:
        launch_server_only(
            host=args.server_host, port=args.server_port, reload=not args.no_reload
        )
    elif args.ui_only:
        launch_ui_only(
            host=args.ui_host, port=args.ui_port, share=args.share, debug=args.debug
        )
    elif args.simple:
        # Launch simple integrated app
        from inference_server.simple_integrated import (
            launch_simple_integrated_app,
        )

        print("πŸš€ Launching simple integrated Inference Server + UI")
        print("No mounting issues - direct session management!")
        launch_simple_integrated_app(
            host=args.ui_host, port=args.ui_port, share=args.share
        )
    elif args.export_openapi:
        # Export OpenAPI schema
        from inference_server.export_openapi import export_openapi_schema

        output_file = args.export_output
        if output_file is None:
            output_file = f"openapi.{args.export_format}"

        print(f"πŸ“„ Exporting OpenAPI schema to {output_file}")
        export_openapi_schema(output_file=output_file, format_type=args.export_format)
    else:
        # Launch both (default)
        launch_both(
            server_host=args.server_host,
            server_port=args.server_port,
            ui_host=args.ui_host,
            ui_port=args.ui_port,
            share=args.share,
            debug=args.debug,
        )


if __name__ == "__main__":
    main()