blanchon's picture
Update
63ed3a7
raw
history blame
7.05 kB
import argparse
import logging
import sys
import threading
import time
from inference_server.ui import launch_ui
def setup_logging(debug: bool = False):
"""Set up logging configuration."""
level = logging.DEBUG if debug else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=[
logging.StreamHandler(sys.stdout),
],
)
def launch_server_only(host: str = "0.0.0.0", port: int = 8001, reload: bool = True):
"""Launch only the AI server."""
print(f"πŸš€ Starting Inference Server on {host}:{port}")
try:
import uvicorn
from inference_server.main import app
uvicorn.run(app, host=host, port=port, reload=reload, log_level="info")
except KeyboardInterrupt:
print("\nπŸ›‘ Server stopped by user")
except Exception as e:
print(f"❌ Server failed to start: {e}")
sys.exit(1)
def launch_ui_only(
host: str = "localhost", port: int = 7860, share: bool = False, debug: bool = False
):
"""Launch only the Gradio UI."""
print(f"🎨 Starting Gradio UI on {host}:{port}")
setup_logging(debug)
try:
launch_ui(server_name=host, server_port=port, share=share)
except KeyboardInterrupt:
print("\nπŸ›‘ UI stopped by user")
def launch_both(
server_host: str = "0.0.0.0",
server_port: int = 8001,
ui_host: str = "localhost",
ui_port: int = 7860,
share: bool = False,
debug: bool = False,
):
"""Launch both the AI server and Gradio UI."""
print("πŸš€ Starting Inference Server with Gradio UI")
setup_logging(debug)
try:
print(f"πŸ“‘ Starting AI Server on {server_host}:{server_port}")
# Start server in a background thread
def run_server():
import uvicorn
from inference_server.main import app
uvicorn.run(
app,
host=server_host,
port=server_port,
log_level="warning", # Reduce verbosity
)
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()
# Give server time to start
print("⏳ Waiting for server to start...")
time.sleep(3)
print("βœ… Server started successfully")
print(f"🎨 Starting Gradio UI on {ui_host}:{ui_port}")
# Start the UI (this will block)
launch_ui(server_name=ui_host, server_port=ui_port, share=share)
except KeyboardInterrupt:
print("\nπŸ›‘ Stopping services...")
print("βœ… All services stopped")
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
def main():
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
description="Inference Server CLI",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Launch simple integrated app (recommended)
python -m inference_server.cli --simple
# Launch both server and UI (development)
python -m inference_server.cli
# Launch only the server
python -m inference_server.cli --server-only
# Launch only the UI (server must be running separately)
python -m inference_server.cli --ui-only
# Launch with custom ports
python -m inference_server.cli --server-port 8002 --ui-port 7861
# Launch with public sharing (Gradio)
python -m inference_server.cli --share
# Launch for deployment (recommended)
python -m inference_server.cli --simple --host 0.0.0.0 --share
# Export OpenAPI schema
python -m inference_server.cli --export-openapi
# Export as YAML
python -m inference_server.cli --export-openapi --export-format yaml
""",
)
# Mode selection
mode_group = parser.add_mutually_exclusive_group()
mode_group.add_argument(
"--server-only", action="store_true", help="Launch only the AI server"
)
mode_group.add_argument(
"--ui-only", action="store_true", help="Launch only the Gradio UI"
)
mode_group.add_argument(
"--simple",
action="store_true",
help="Launch simple integrated app (recommended)",
)
# Server configuration
parser.add_argument(
"--server-host", default="0.0.0.0", help="AI server host (default: 0.0.0.0)"
)
parser.add_argument(
"--server-port", type=int, default=8001, help="AI server port (default: 8001)"
)
parser.add_argument(
"--no-reload", action="store_true", help="Disable auto-reload for server"
)
# UI configuration
parser.add_argument(
"--ui-host", default="localhost", help="Gradio UI host (default: localhost)"
)
parser.add_argument(
"--ui-port", type=int, default=7860, help="Gradio UI port (default: 7860)"
)
parser.add_argument(
"--share", action="store_true", help="Create public Gradio link"
)
# General options
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
# Export options
mode_group.add_argument(
"--export-openapi", action="store_true", help="Export OpenAPI schema to file"
)
parser.add_argument(
"--export-format",
choices=["json", "yaml"],
default="json",
help="OpenAPI export format (default: json)",
)
parser.add_argument(
"--export-output",
help="OpenAPI export output file (default: openapi.json or openapi.yaml)",
)
args = parser.parse_args()
# Route to appropriate function
if args.server_only:
launch_server_only(
host=args.server_host, port=args.server_port, reload=not args.no_reload
)
elif args.ui_only:
launch_ui_only(
host=args.ui_host, port=args.ui_port, share=args.share, debug=args.debug
)
elif args.simple:
# Launch simple integrated app
from inference_server.simple_integrated import (
launch_simple_integrated_app,
)
print("πŸš€ Launching simple integrated Inference Server + UI")
print("No mounting issues - direct session management!")
launch_simple_integrated_app(
host=args.ui_host, port=args.ui_port, share=args.share
)
elif args.export_openapi:
# Export OpenAPI schema
from inference_server.export_openapi import export_openapi_schema
output_file = args.export_output
if output_file is None:
output_file = f"openapi.{args.export_format}"
print(f"πŸ“„ Exporting OpenAPI schema to {output_file}")
export_openapi_schema(output_file=output_file, format_type=args.export_format)
else:
# Launch both (default)
launch_both(
server_host=args.server_host,
server_port=args.server_port,
ui_host=args.ui_host,
ui_port=args.ui_port,
share=args.share,
debug=args.debug,
)
if __name__ == "__main__":
main()