Spaces:
Sleeping
Sleeping
import argparse | |
import logging | |
import sys | |
import threading | |
import time | |
from inference_server.ui import launch_ui | |
def setup_logging(debug: bool = False): | |
"""Set up logging configuration.""" | |
level = logging.DEBUG if debug else logging.INFO | |
logging.basicConfig( | |
level=level, | |
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", | |
handlers=[ | |
logging.StreamHandler(sys.stdout), | |
], | |
) | |
def launch_server_only(host: str = "0.0.0.0", port: int = 8001, reload: bool = True): | |
"""Launch only the AI server.""" | |
print(f"π Starting Inference Server on {host}:{port}") | |
try: | |
import uvicorn | |
from inference_server.main import app | |
uvicorn.run(app, host=host, port=port, reload=reload, log_level="info") | |
except KeyboardInterrupt: | |
print("\nπ Server stopped by user") | |
except Exception as e: | |
print(f"β Server failed to start: {e}") | |
sys.exit(1) | |
def launch_ui_only( | |
host: str = "localhost", port: int = 7860, share: bool = False, debug: bool = False | |
): | |
"""Launch only the Gradio UI.""" | |
print(f"π¨ Starting Gradio UI on {host}:{port}") | |
setup_logging(debug) | |
try: | |
launch_ui(server_name=host, server_port=port, share=share) | |
except KeyboardInterrupt: | |
print("\nπ UI stopped by user") | |
def launch_both( | |
server_host: str = "0.0.0.0", | |
server_port: int = 8001, | |
ui_host: str = "localhost", | |
ui_port: int = 7860, | |
share: bool = False, | |
debug: bool = False, | |
): | |
"""Launch both the AI server and Gradio UI.""" | |
print("π Starting Inference Server with Gradio UI") | |
setup_logging(debug) | |
try: | |
print(f"π‘ Starting AI Server on {server_host}:{server_port}") | |
# Start server in a background thread | |
def run_server(): | |
import uvicorn | |
from inference_server.main import app | |
uvicorn.run( | |
app, | |
host=server_host, | |
port=server_port, | |
log_level="warning", # Reduce verbosity | |
) | |
server_thread = threading.Thread(target=run_server, daemon=True) | |
server_thread.start() | |
# Give server time to start | |
print("β³ Waiting for server to start...") | |
time.sleep(3) | |
print("β Server started successfully") | |
print(f"π¨ Starting Gradio UI on {ui_host}:{ui_port}") | |
# Start the UI (this will block) | |
launch_ui(server_name=ui_host, server_port=ui_port, share=share) | |
except KeyboardInterrupt: | |
print("\nπ Stopping services...") | |
print("β All services stopped") | |
except Exception as e: | |
print(f"β Error: {e}") | |
sys.exit(1) | |
def main(): | |
"""Main CLI entry point.""" | |
parser = argparse.ArgumentParser( | |
description="Inference Server CLI", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=""" | |
Examples: | |
# Launch simple integrated app (recommended) | |
python -m inference_server.cli --simple | |
# Launch both server and UI (development) | |
python -m inference_server.cli | |
# Launch only the server | |
python -m inference_server.cli --server-only | |
# Launch only the UI (server must be running separately) | |
python -m inference_server.cli --ui-only | |
# Launch with custom ports | |
python -m inference_server.cli --server-port 8002 --ui-port 7861 | |
# Launch with public sharing (Gradio) | |
python -m inference_server.cli --share | |
# Launch for deployment (recommended) | |
python -m inference_server.cli --simple --host 0.0.0.0 --share | |
# Export OpenAPI schema | |
python -m inference_server.cli --export-openapi | |
# Export as YAML | |
python -m inference_server.cli --export-openapi --export-format yaml | |
""", | |
) | |
# Mode selection | |
mode_group = parser.add_mutually_exclusive_group() | |
mode_group.add_argument( | |
"--server-only", action="store_true", help="Launch only the AI server" | |
) | |
mode_group.add_argument( | |
"--ui-only", action="store_true", help="Launch only the Gradio UI" | |
) | |
mode_group.add_argument( | |
"--simple", | |
action="store_true", | |
help="Launch simple integrated app (recommended)", | |
) | |
# Server configuration | |
parser.add_argument( | |
"--server-host", default="0.0.0.0", help="AI server host (default: 0.0.0.0)" | |
) | |
parser.add_argument( | |
"--server-port", type=int, default=8001, help="AI server port (default: 8001)" | |
) | |
parser.add_argument( | |
"--no-reload", action="store_true", help="Disable auto-reload for server" | |
) | |
# UI configuration | |
parser.add_argument( | |
"--ui-host", default="localhost", help="Gradio UI host (default: localhost)" | |
) | |
parser.add_argument( | |
"--ui-port", type=int, default=7860, help="Gradio UI port (default: 7860)" | |
) | |
parser.add_argument( | |
"--share", action="store_true", help="Create public Gradio link" | |
) | |
# General options | |
parser.add_argument("--debug", action="store_true", help="Enable debug logging") | |
# Export options | |
mode_group.add_argument( | |
"--export-openapi", action="store_true", help="Export OpenAPI schema to file" | |
) | |
parser.add_argument( | |
"--export-format", | |
choices=["json", "yaml"], | |
default="json", | |
help="OpenAPI export format (default: json)", | |
) | |
parser.add_argument( | |
"--export-output", | |
help="OpenAPI export output file (default: openapi.json or openapi.yaml)", | |
) | |
args = parser.parse_args() | |
# Route to appropriate function | |
if args.server_only: | |
launch_server_only( | |
host=args.server_host, port=args.server_port, reload=not args.no_reload | |
) | |
elif args.ui_only: | |
launch_ui_only( | |
host=args.ui_host, port=args.ui_port, share=args.share, debug=args.debug | |
) | |
elif args.simple: | |
# Launch simple integrated app | |
from inference_server.simple_integrated import ( | |
launch_simple_integrated_app, | |
) | |
print("π Launching simple integrated Inference Server + UI") | |
print("No mounting issues - direct session management!") | |
launch_simple_integrated_app( | |
host=args.ui_host, port=args.ui_port, share=args.share | |
) | |
elif args.export_openapi: | |
# Export OpenAPI schema | |
from inference_server.export_openapi import export_openapi_schema | |
output_file = args.export_output | |
if output_file is None: | |
output_file = f"openapi.{args.export_format}" | |
print(f"π Exporting OpenAPI schema to {output_file}") | |
export_openapi_schema(output_file=output_file, format_type=args.export_format) | |
else: | |
# Launch both (default) | |
launch_both( | |
server_host=args.server_host, | |
server_port=args.server_port, | |
ui_host=args.ui_host, | |
ui_port=args.ui_port, | |
share=args.share, | |
debug=args.debug, | |
) | |
if __name__ == "__main__": | |
main() | |