diff --git "a/auto_diffusers.log" "b/auto_diffusers.log"
--- "a/auto_diffusers.log"
+++ "b/auto_diffusers.log"
@@ -6149,3 +6149,7388 @@ IMPORTANT GUIDELINES:
 2025-05-30 01:01:20,159 - auto_diffusers - INFO - Sending request to Gemini API
 2025-05-30 01:01:43,263 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
 2025-05-30 01:01:43,263 - auto_diffusers - DEBUG - Response length: 2591 characters
+2025-05-30 09:07:29,183 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:07:29,183 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:07:29,183 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:07:29,183 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:07:29,183 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:07:29,183 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:07:29,183 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:07:29,183 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:07:29,183 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:07:29,183 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:07:29,187 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:07:29,187 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:07:29,635 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:07:29,635 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:07:29,635 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:07:29,635 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:07:29,635 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:07:29,635 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:07:29,635 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:07:29,635 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:07:29,635 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:07:29,635 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:07:29,635 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:07:29,637 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:07:29,650 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:07:29,655 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:07:29,730 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:07:29,765 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:07:29,766 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ec33e10>
+2025-05-30 09:07:29,766 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:29,766 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:07:29,766 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:07:29 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:07:29,767 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:07:29,767 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:07:29,767 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:07:29,767 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:07:29,768 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:07:29,768 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12f8fcb10>
+2025-05-30 09:07:29,768 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:07:29,768 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:07:29,768 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:07:29,768 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:07:29,768 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:07:29,774 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:07:29 GMT'), (b'server', b'uvicorn'), (b'content-length', b'95123'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:07:29,774 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:07:29,774 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:07:29,774 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:07:29,774 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:07:29,774 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:07:29,774 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:07:29,775 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:07:29,785 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:07:29,931 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:07:29,998 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b344910>
+2025-05-30 09:07:29,999 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12f7cbf50> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:07:30,001 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12c634b90>
+2025-05-30 09:07:30,001 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11ac251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:07:30,282 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11a8efa90>
+2025-05-30 09:07:30,283 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:30,283 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:07:30,283 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:07:30,283 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:07:30,284 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:30,292 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d6ffcd0>
+2025-05-30 09:07:30,292 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:30,292 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:07:30,292 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:07:30,293 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:07:30,293 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:07:30,425 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:07:30 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:07:30,426 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:07:30,426 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:07:30,427 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:07:30,427 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:07:30,427 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:07:30,427 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:07:30,427 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:07:30,441 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:07:30 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:07:30,442 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:07:30,442 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:07:30,442 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:07:30,442 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:07:30,442 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:07:30,443 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:07:30,443 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:07:31,087 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:07:31,309 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:07:44,601 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:07:44,601 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:07:44,601 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:07:44,601 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:07:44,601 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:07:44,601 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:07:44,601 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:07:44,601 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:07:44,601 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:09,156 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:11:09,156 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:11:09,156 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:11:09,156 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:11:09,156 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:11:09,156 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:11:09,156 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:11:09,156 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:11:09,156 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:11:09,156 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:11:09,160 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:11:09,161 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:11:09,617 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:11:09,617 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:11:09,617 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:11:09,617 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:11:09,617 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:11:09,617 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:11:09,617 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:11:09,617 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:11:09,617 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:11:09,617 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:11:09,617 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:11:09,619 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:11:09,632 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:11:09,640 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:11:09,721 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:11:09,757 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:11:09,757 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16b5ce7d0>
+2025-05-30 09:11:09,757 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:09,758 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:11:09,758 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:11:09,758 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:11:09,758 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:09,758 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:11:09 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:11:09,758 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:11:09,759 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:11:09,759 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:11:09,759 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:11:09,759 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:11:09,759 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:11:09,759 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:11:09,759 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:11:09,760 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1177c6b90>
+2025-05-30 09:11:09,760 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:11:09,760 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:11:09,760 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:11:09,760 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:11:09,760 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:11:09,766 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:11:09 GMT'), (b'server', b'uvicorn'), (b'content-length', b'95128'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:11:09,766 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:11:09,766 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:11:09,766 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:11:09,766 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:11:09,766 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:11:09,766 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:11:09,766 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:11:09,778 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:11:09,975 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:11:09,981 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x168be77d0>
+2025-05-30 09:11:09,981 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1120251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:11:09,986 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x168c71b50>
+2025-05-30 09:11:09,986 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x16b5cbe30> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:11:10,272 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x168ff3ad0>
+2025-05-30 09:11:10,272 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:10,273 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:11:10,273 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:11:10,273 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:11:10,273 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:10,284 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x168ae8e10>
+2025-05-30 09:11:10,284 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:10,284 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:11:10,284 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:11:10,284 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:11:10,284 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:11:10,419 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:11:10 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:11:10,419 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:11:10,419 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:11:10,419 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:11:10,419 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:11:10,419 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:11:10,419 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:11:10,420 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:11:10,435 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:11:10 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:11:10,435 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:11:10,435 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:11:10,435 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:11:10,435 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:11:10,435 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:11:10,436 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:11:10,436 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:11:11,033 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:11:11,259 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:11:13,467 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:13,467 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:13,467 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:11:13,467 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:11:13,467 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:13,467 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:13,467 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:11:13,467 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:11:13,467 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:36,094 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:13:36,094 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:13:36,094 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:13:36,094 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:13:36,094 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:13:36,094 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:13:36,094 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:13:36,094 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:13:36,094 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:13:36,094 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:13:36,098 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:13:36,099 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:13:36,533 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:13:36,533 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:13:36,533 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:13:36,533 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:13:36,533 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:13:36,533 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:13:36,533 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:13:36,533 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:13:36,533 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:13:36,533 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:13:36,533 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:13:36,535 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:13:36,548 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:13:36,554 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:13:36,628 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:13:36,666 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:13:36,667 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d557f90>
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:36,667 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:13:36 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:13:36,668 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:13:36,668 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:13:36,668 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:13:36,668 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:13:36,668 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:13:36,668 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:13:36,668 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:13:36,668 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:13:36,669 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f60fc90>
+2025-05-30 09:13:36,669 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:13:36,669 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:13:36,669 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:13:36,669 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:13:36,669 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:13:36,675 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:13:36 GMT'), (b'server', b'uvicorn'), (b'content-length', b'95123'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:13:36,675 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:13:36,675 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:13:36,675 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:13:36,675 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:13:36,675 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:13:36,675 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:13:36,675 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:13:36,686 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:13:36,845 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d573790>
+2025-05-30 09:13:36,845 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1078251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:13:36,848 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x118a62710>
+2025-05-30 09:13:36,848 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11f4cbf50> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:13:36,946 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:13:37,121 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d59c090>
+2025-05-30 09:13:37,121 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:37,122 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:13:37,122 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:13:37,122 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:13:37,122 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:37,133 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c92bad0>
+2025-05-30 09:13:37,134 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:37,134 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:13:37,134 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:13:37,134 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:13:37,134 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:13:37,261 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:13:37 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:13:37,262 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:13:37,262 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:13:37,262 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:13:37,262 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:13:37,262 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:13:37,263 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:13:37,263 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:13:37,277 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:13:37 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:13:37,277 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:13:37,278 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:13:37,278 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:13:37,278 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:13:37,278 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:13:37,278 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:13:37,278 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:13:38,071 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:13:38,138 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,138 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,138 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:13:38,138 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:13:38,138 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,138 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,138 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:13:38,138 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,139 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:13:38,290 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:15:53,436 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:15:53,436 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:15:53,436 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:15:53,436 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:15:53,436 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:15:53,436 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:15:53,436 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:15:53,436 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:15:53,436 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:15:53,436 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:15:53,439 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:15:53,439 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:15:53,862 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:15:53,862 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:15:53,862 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:15:53,862 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:15:53,862 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:15:53,862 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:15:53,862 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:15:53,862 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:15:53,862 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:15:53,862 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:15:53,862 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:15:53,864 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:15:53,878 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:15:53,878 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:15:53,959 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:15:53,996 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:15:53,996 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129b28810>
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:53,997 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:15:53 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:15:53,998 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:15:53,998 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:15:53,998 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:15:53,998 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:15:53,998 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:15:53,998 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:15:53,998 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:15:53,998 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:15:53,999 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129beccd0>
+2025-05-30 09:15:53,999 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:15:53,999 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:15:53,999 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:15:53,999 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:15:53,999 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:15:54,005 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:15:53 GMT'), (b'server', b'uvicorn'), (b'content-length', b'93371'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:15:54,005 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:15:54,005 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:15:54,005 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:15:54,005 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:15:54,005 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:15:54,005 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:15:54,005 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:15:54,017 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:15:54,037 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128c39950>
+2025-05-30 09:15:54,037 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12898d490> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:15:54,165 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:15:54,177 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1199ff8d0>
+2025-05-30 09:15:54,177 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x129927da0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:15:54,311 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f787d90>
+2025-05-30 09:15:54,311 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:54,311 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:15:54,311 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:15:54,311 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:15:54,311 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:54,450 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:15:54 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:15:54,451 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:15:54,451 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:15:54,451 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:15:54,451 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:15:54,451 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:15:54,451 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:15:54,451 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:15:54,500 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f131f10>
+2025-05-30 09:15:54,500 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:54,500 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:15:54,500 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:15:54,500 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:15:54,500 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:15:54,673 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:15:54 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:15:54,674 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:15:54,675 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:15:54,676 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:15:54,676 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:15:54,676 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:15:54,677 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:15:54,677 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:15:55,285 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:15:55,504 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:16:00,140 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:00,140 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:00,140 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:16:00,140 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:16:00,141 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:00,141 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:00,141 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:16:00,141 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:00,141 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:16:04,018 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:16:04,018 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:16:04,018 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:42,173 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:17:42,173 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:17:42,173 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:17:42,173 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:17:42,173 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:17:42,173 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:17:42,173 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:17:42,173 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:17:42,173 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:17:42,173 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:17:42,177 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:17:42,177 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:17:42,617 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:17:42,617 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:17:42,617 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:17:42,617 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:17:42,617 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:17:42,617 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:17:42,617 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:17:42,618 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:17:42,618 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:17:42,618 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:17:42,618 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:17:42,620 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:17:42,633 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:17:42,639 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:17:42,720 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:17:42,756 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:17:42,756 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124462690>
+2025-05-30 09:17:42,756 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:17:42 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:17:42,757 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:17:42,757 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:17:42,758 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:17:42,758 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:17:42,758 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:17:42,758 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1244637d0>
+2025-05-30 09:17:42,758 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:17:42,759 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:17:42,759 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:17:42,759 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:17:42,759 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:17:42,765 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:17:42 GMT'), (b'server', b'uvicorn'), (b'content-length', b'93382'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:17:42,765 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:17:42,765 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:17:42,765 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:17:42,765 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:17:42,765 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:17:42,765 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:17:42,765 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:17:42,775 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:17:42,928 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:17:42,973 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124351350>
+2025-05-30 09:17:42,973 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x124327da0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:17:42,974 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123668290>
+2025-05-30 09:17:42,974 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12328d490> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:17:43,245 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1236d6850>
+2025-05-30 09:17:43,245 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:43,245 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:17:43,245 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:17:43,245 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:17:43,246 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:43,256 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123652410>
+2025-05-30 09:17:43,256 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:43,257 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:17:43,257 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:17:43,257 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:17:43,257 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:17:43,387 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:17:43 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:17:43,389 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:17:43,389 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:17:43,389 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:17:43,389 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:17:43,389 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:17:43,389 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:17:43,389 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:17:43,402 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:17:43 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:17:43,404 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:17:43,404 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:17:43,405 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:17:43,405 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:17:43,405 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:17:43,405 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:17:43,405 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:17:44,007 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:17:44,224 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:17:47,229 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:47,230 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:47,230 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:17:47,230 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:17:47,230 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:47,230 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:47,230 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:17:47,230 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:17:47,230 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:40,149 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:18:40,149 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:18:40,149 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:18:40,149 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:18:40,149 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:18:40,149 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:18:40,149 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:18:40,149 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:18:40,149 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:18:40,149 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:18:40,153 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:18:40,153 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:18:40,577 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:18:40,577 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:18:40,577 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:18:40,577 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:18:40,577 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:18:40,577 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:18:40,577 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:18:40,577 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:18:40,577 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:18:40,577 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:18:40,577 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:18:40,580 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:18:40,593 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:18:40,601 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:18:40,683 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:18:40,721 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:18:40,722 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123f29950>
+2025-05-30 09:18:40,722 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:40,722 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:18:40,722 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:18:40 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:18:40,723 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:18:40,723 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:18:40,723 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:18:40,723 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:18:40,724 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:18:40,724 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123ff07d0>
+2025-05-30 09:18:40,724 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:18:40,724 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:18:40,724 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:18:40,725 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:18:40,725 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:18:40,730 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:18:40 GMT'), (b'server', b'uvicorn'), (b'content-length', b'93376'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:18:40,731 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:18:40,731 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:18:40,731 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:18:40,731 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:18:40,731 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:18:40,731 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:18:40,731 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:18:40,735 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123a56910>
+2025-05-30 09:18:40,735 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x121d8d490> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:18:40,741 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:18:40,867 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:18:40,885 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x115c6e6d0>
+2025-05-30 09:18:40,885 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x123ec7da0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:18:41,061 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1218c2590>
+2025-05-30 09:18:41,062 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:41,062 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:18:41,063 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:18:41,063 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:18:41,063 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:41,177 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123acd750>
+2025-05-30 09:18:41,178 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:41,178 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:18:41,178 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:18:41,178 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:18:41,179 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:18:41,199 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:18:41 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:18:41,200 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:18:41,200 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:18:41,200 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:18:41,200 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:18:41,200 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:18:41,200 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:18:41,200 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:18:41,324 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:18:41 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:18:41,326 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:18:41,327 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:18:41,328 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:18:41,328 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:18:41,328 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:18:41,329 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:18:41,329 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:18:41,938 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:18:42,158 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:18:43,365 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:43,365 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:43,365 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:18:43,365 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:18:43,365 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:43,365 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:43,365 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:18:43,365 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:43,366 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,715 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,715 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,715 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:18:45,715 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,716 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,716 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:18:45,716 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:18:45,716 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:02,035 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:21:02,035 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:21:02,035 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:21:02,035 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:21:02,035 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:21:02,035 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:21:02,035 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:21:02,035 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:21:02,035 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:21:02,035 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:21:02,038 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:21:02,039 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:21:02,480 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:21:02,480 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:21:02,480 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:21:02,480 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:21:02,480 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:21:02,480 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:21:02,480 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:21:02,480 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:21:02,480 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:21:02,480 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:21:02,480 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:21:02,482 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:21:02,496 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:21:02,502 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:21:02,579 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:21:02,617 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:21:02,618 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124c2a9d0>
+2025-05-30 09:21:02,618 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:02,618 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:21:02,618 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:21:02 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:21:02,619 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:21:02,619 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:21:02,619 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:21:02,619 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:21:02,620 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:21:02,620 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124c2bb90>
+2025-05-30 09:21:02,620 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:21:02,620 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:21:02,620 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:21:02,620 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:21:02,621 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:21:02,627 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:21:02 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92301'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:21:02,627 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:21:02,627 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:21:02,627 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:21:02,627 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:21:02,627 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:21:02,627 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:21:02,627 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:21:02,637 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:21:02,678 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122c96350>
+2025-05-30 09:21:02,678 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10de251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:21:02,783 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11758ae50>
+2025-05-30 09:21:02,783 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x124bcbda0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:21:02,826 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:21:02,998 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124b3dc10>
+2025-05-30 09:21:02,998 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:02,999 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:21:02,999 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:21:02,999 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:21:02,999 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:03,077 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122c6d5d0>
+2025-05-30 09:21:03,078 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:03,078 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:21:03,079 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:21:03,079 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:21:03,079 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:21:03,159 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:21:03 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:21:03,161 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:21:03,161 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:21:03,162 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:21:03,162 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:21:03,162 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:21:03,162 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:21:03,163 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:21:03,227 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:21:03 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:21:03,228 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:21:03,228 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:21:03,228 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:21:03,228 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:21:03,228 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:21:03,229 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:21:03,229 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:21:03,865 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:21:04,081 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:21:05,618 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:05,618 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:05,618 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:21:05,618 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:21:05,619 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:05,619 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:05,619 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:21:05,619 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:05,619 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,815 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,816 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,816 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:21:07,816 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,816 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,816 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:21:07,816 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:21:07,816 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:08,080 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:23:08,080 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:23:08,080 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:23:08,080 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:23:08,080 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:23:08,080 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:23:08,080 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:23:08,080 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:23:08,080 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:23:08,080 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:23:08,084 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:23:08,084 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:23:08,503 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:23:08,503 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:23:08,503 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:23:08,503 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:23:08,503 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:23:08,503 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:23:08,503 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:23:08,503 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:23:08,503 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:23:08,503 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:23:08,503 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:23:08,505 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:23:08,518 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:23:08,524 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:23:08,598 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:23:08,637 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:23:08,638 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127b040d0>
+2025-05-30 09:23:08,638 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:23:08 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:23:08,639 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:23:08,639 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:23:08,640 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:23:08,640 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:23:08,640 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:23:08,640 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127bccb90>
+2025-05-30 09:23:08,640 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:23:08,641 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:23:08,641 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:23:08,641 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:23:08,641 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:23:08,647 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:23:08 GMT'), (b'server', b'uvicorn'), (b'content-length', b'91293'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:23:08,647 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:23:08,647 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:23:08,647 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:23:08,647 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:23:08,647 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:23:08,647 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:23:08,647 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:23:08,658 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:23:08,687 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x126798cd0>
+2025-05-30 09:23:08,687 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x113d291c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:23:08,793 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1267f3050>
+2025-05-30 09:23:08,793 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x127aa3da0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:23:08,797 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:23:08,979 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x126487fd0>
+2025-05-30 09:23:08,980 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:08,980 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:23:08,980 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:23:08,980 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:23:08,981 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:09,066 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1259a9b50>
+2025-05-30 09:23:09,066 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:09,067 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:23:09,067 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:23:09,067 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:23:09,067 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:23:09,130 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:23:09 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:23:09,131 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:23:09,131 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:23:09,132 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:23:09,132 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:23:09,132 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:23:09,132 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:23:09,133 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:23:09,244 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:23:09 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:23:09,245 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:23:09,245 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:23:09,246 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:23:09,246 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:23:09,246 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:23:09,246 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:23:09,247 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:23:09,841 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:23:10,058 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:23:10,267 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:10,267 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:10,267 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:23:10,267 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:23:10,267 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:10,267 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:10,268 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:23:10,268 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:10,268 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,394 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,394 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,394 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:23:12,394 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,394 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,394 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:23:12,394 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:23:12,395 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,687 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,687 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,687 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:24:16,687 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,687 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,688 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:24:16,688 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:24:16,688 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:11,528 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:26:11,528 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:26:11,528 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:26:11,528 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:26:11,528 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:26:11,528 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:26:11,528 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:26:11,528 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:26:11,528 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:26:11,528 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:26:11,532 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:26:11,532 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:26:11,962 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:26:11,962 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:26:11,962 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:26:11,962 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:26:11,962 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:26:11,962 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:26:11,962 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:26:11,962 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:26:11,962 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:26:11,962 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:26:11,962 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:26:11,964 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:26:11,978 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:26:11,978 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:26:12,060 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:26:12,090 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:26:12,091 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1393c27d0>
+2025-05-30 09:26:12,091 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,091 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:26:12,091 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,091 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:26:12 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:26:12,092 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:26:12,092 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:26:12,092 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:26:12,092 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:26:12,093 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:26:12,095 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x139503a90>
+2025-05-30 09:26:12,095 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:26:12,095 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:26:12,095 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:26:12,095 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:26:12,095 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:26:12,101 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:26:12 GMT'), (b'server', b'uvicorn'), (b'content-length', b'89315'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:26:12,101 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:26:12,101 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:26:12,101 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:26:12,101 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:26:12,101 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:26:12,101 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:26:12,102 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:26:12,112 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:26:12,204 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12eb8bf90>
+2025-05-30 09:26:12,204 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x113e251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:26:12,258 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12e1ee710>
+2025-05-30 09:26:12,258 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1393bfda0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:26:12,263 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:26:12,546 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12eb21010>
+2025-05-30 09:26:12,546 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,546 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:26:12,546 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,546 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:26:12,546 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,550 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12eb47e50>
+2025-05-30 09:26:12,550 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,550 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:26:12,550 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,551 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:26:12,551 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:26:12,698 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:26:12 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:26:12,700 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:26:12,700 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,701 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:26:12,702 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:26:12,703 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:26:12,705 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:26:12,706 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:26:12,724 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:26:12 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:26:12,725 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:26:12,726 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:26:12,726 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:26:12,726 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:26:12,726 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:26:12,726 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:26:12,726 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:26:13,321 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:26:13,519 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,519 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,519 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:26:13,520 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:26:13,520 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,520 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,520 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:26:13,520 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,520 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:26:13,550 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:29:17,309 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:29:17,309 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:29:17,309 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:29:17,309 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:29:17,309 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:29:17,309 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:29:17,309 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:29:17,309 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:29:17,309 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:29:17,309 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:29:17,312 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:29:17,312 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:29:17,727 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:29:17,727 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:29:17,727 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:29:17,727 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:29:17,727 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:29:17,727 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:29:17,727 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:29:17,727 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:29:17,727 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:29:17,727 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:29:17,727 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:29:17,729 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:29:17,742 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:29:17,747 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:29:17,819 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:29:17,854 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:29:17,854 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122c27f90>
+2025-05-30 09:29:17,854 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:17,854 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:29:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:29:17,855 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:29:17,855 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:29:17,856 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:29:17,856 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:29:17,856 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:29:17,856 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121c6ef10>
+2025-05-30 09:29:17,856 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:29:17,857 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:29:17,857 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:29:17,857 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:29:17,857 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:29:17,862 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:29:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'89336'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:29:17,862 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:29:17,862 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:29:17,862 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:29:17,863 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:29:17,863 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:29:17,863 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:29:17,863 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:29:17,873 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:29:17,952 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121c527d0>
+2025-05-30 09:29:17,952 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10e7251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:29:18,019 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122d23450>
+2025-05-30 09:29:18,019 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x122cc7da0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:29:18,028 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:29:18,249 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122ad40d0>
+2025-05-30 09:29:18,249 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:18,249 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:29:18,249 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:29:18,249 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:29:18,249 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:18,310 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121232750>
+2025-05-30 09:29:18,310 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:18,310 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:29:18,311 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:29:18,311 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:29:18,311 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:29:18,397 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:29:18 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:29:18,398 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:29:18,398 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:29:18,398 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:29:18,398 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:29:18,399 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:29:18,399 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:29:18,399 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:29:18,459 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:29:18 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:29:18,459 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:29:18,459 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:29:18,459 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:29:18,459 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:29:18,459 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:29:18,460 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:29:18,460 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:29:19,063 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:29:19,292 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:29:19,583 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:29:19,583 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:29:19,584 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:29:19,584 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:29:19,584 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:29:19,584 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:29:19,584 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:29:19,584 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:29:19,584 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:37,148 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:30:37,148 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:30:37,148 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:30:37,148 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:30:37,148 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:30:37,148 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:30:37,148 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:30:37,148 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:30:37,148 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:30:37,148 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:30:37,152 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:30:37,152 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:30:37,581 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:30:37,581 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:30:37,581 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:30:37,581 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:30:37,581 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:30:37,581 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:30:37,581 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:30:37,581 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:30:37,581 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:30:37,581 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:30:37,581 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:30:37,584 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:30:37,597 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:30:37,603 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:30:37,676 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:30:37,716 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:30:37,716 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x15b776450>
+2025-05-30 09:30:37,716 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:37,716 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:30:37 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:30:37,717 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:30:37,717 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:30:37,718 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:30:37,718 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:30:37,718 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:30:37,719 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16a285950>
+2025-05-30 09:30:37,719 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:30:37,719 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:30:37,719 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:30:37,719 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:30:37,719 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:30:37,726 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:30:37 GMT'), (b'server', b'uvicorn'), (b'content-length', b'90180'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:30:37,726 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:30:37,726 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:30:37,726 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:30:37,726 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:30:37,726 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:30:37,727 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:30:37,727 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:30:37,737 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:30:37,755 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x159b87790>
+2025-05-30 09:30:37,755 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x15a48d490> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:30:37,878 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:30:37,883 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x15a1c3ad0>
+2025-05-30 09:30:37,883 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x16a14bda0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:30:38,027 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123f94c50>
+2025-05-30 09:30:38,027 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:38,027 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:30:38,028 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:30:38,028 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:30:38,028 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:38,229 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:30:38 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:30:38,229 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x15a8b3d50>
+2025-05-30 09:30:38,231 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:30:38,232 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:30:38,232 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:38,233 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:30:38,233 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:30:38,233 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:30:38,233 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:30:38,233 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:30:38,233 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:30:38,234 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:30:38,234 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:30:38,234 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:30:38,378 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,378 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,379 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:30:38,379 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:30:38,379 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,379 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,379 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:30:38,379 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,379 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:30:38,380 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:30:38 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:30:38,380 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:30:38,380 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:30:38,380 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:30:38,380 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:30:38,380 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:30:38,380 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:30:38,381 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:30:38,978 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:30:39,192 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:32:39,242 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:32:39,242 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:32:39,242 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:32:39,242 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:32:39,242 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:32:39,242 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:32:39,242 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:32:39,242 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:32:39,242 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:32:39,242 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:32:39,245 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:32:39,246 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:32:39,674 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:32:39,674 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:32:39,674 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:32:39,674 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:32:39,674 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:32:39,674 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:32:39,674 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:32:39,674 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:32:39,674 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:32:39,674 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:32:39,674 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:32:39,676 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:32:39,690 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:32:39,694 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:32:39,767 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:32:39,806 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:32:39,806 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12859b050>
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:39,807 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:32:39 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:32:39,807 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:32:39,808 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:32:39,808 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:32:39,808 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:32:39,808 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:32:39,808 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:32:39,808 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:32:39,808 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:32:39,809 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a3ec890>
+2025-05-30 09:32:39,809 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:32:39,809 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:32:39,809 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:32:39,809 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:32:39,809 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:32:39,814 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:32:39 GMT'), (b'server', b'uvicorn'), (b'content-length', b'90468'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:32:39,815 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:32:39,815 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:32:39,815 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:32:39,815 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:32:39,815 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:32:39,815 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:32:39,815 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:32:39,825 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:32:39,928 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fb15b90>
+2025-05-30 09:32:39,928 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12818d370> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:32:39,968 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f6a78d0>
+2025-05-30 09:32:39,968 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1294c7c80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:32:39,981 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:32:40,204 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1293f4250>
+2025-05-30 09:32:40,204 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:40,204 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:32:40,205 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:32:40,205 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:32:40,205 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:40,261 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f05db90>
+2025-05-30 09:32:40,261 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:40,261 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:32:40,261 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:32:40,262 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:32:40,262 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:32:40,342 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:32:40 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:32:40,343 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:32:40,343 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:32:40,343 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:32:40,343 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:32:40,343 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:32:40,343 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:32:40,343 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:32:40,407 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:32:40 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:32:40,407 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:32:40,408 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:32:40,408 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:32:40,408 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:32:40,409 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:32:40,409 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:32:40,409 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:32:41,061 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:32:41,280 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:32:47,262 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:32:47,262 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:32:47,262 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:32:47,263 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:32:47,263 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:32:47,263 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:32:47,263 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:32:47,263 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:32:47,263 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:51,807 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:33:51,807 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:33:51,807 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:33:51,807 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:33:51,807 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:33:51,807 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:33:51,807 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:33:51,807 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:33:51,807 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:33:51,807 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:33:51,811 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:33:51,812 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:33:52,239 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:33:52,239 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:33:52,239 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:33:52,239 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:33:52,239 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:33:52,239 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:33:52,239 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:33:52,239 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:33:52,239 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:33:52,239 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:33:52,239 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:33:52,241 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:33:52,254 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:33:52,260 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:33:52,334 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:33:52,372 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:33:52,372 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x134617750>
+2025-05-30 09:33:52,372 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:33:52 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:33:52,373 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,373 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:33:52,374 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:33:52,374 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:33:52,374 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:33:52,374 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:33:52,374 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:33:52,374 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13726f7d0>
+2025-05-30 09:33:52,375 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:33:52,375 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:33:52,375 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:33:52,375 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:33:52,375 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:33:52,380 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:33:52 GMT'), (b'server', b'uvicorn'), (b'content-length', b'90684'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:33:52,380 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:33:52,380 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:33:52,381 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:33:52,381 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:33:52,381 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:33:52,381 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:33:52,381 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:33:52,391 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:33:52,403 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1309d2e50>
+2025-05-30 09:33:52,403 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x131e8d370> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:33:52,515 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:33:52,532 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1319b51d0>
+2025-05-30 09:33:52,532 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x137133c80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:33:52,689 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1332b3290>
+2025-05-30 09:33:52,690 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,691 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:33:52,691 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,691 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:33:52,691 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,816 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x130368050>
+2025-05-30 09:33:52,817 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,818 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:33:52,818 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,818 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:33:52,818 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:33:52,835 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:33:52 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:33:52,835 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:33:52,835 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,835 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:33:52,835 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:33:52,835 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:33:52,836 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:33:52,836 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:33:52,962 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:33:52 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:33:52,963 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:33:52,963 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:33:52,963 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:33:52,963 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:33:52,963 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:33:52,963 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:33:52,963 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:33:53,568 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:33:53,692 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,692 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,692 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:33:53,692 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:33:53,692 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,693 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,693 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:33:53,693 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,693 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:33:53,974 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:35:18,862 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:35:18,862 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:35:18,862 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:35:18,862 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:35:18,862 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:35:18,863 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:35:18,863 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:35:18,863 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:35:18,863 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:35:18,863 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:35:18,867 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:35:18,867 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:35:19,363 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:35:19,363 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:35:19,363 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:35:19,363 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:35:19,363 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:35:19,363 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:35:19,363 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:35:19,363 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:35:19,363 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:35:19,363 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:35:19,363 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:35:19,366 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:35:19,379 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:35:19,386 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:35:19,461 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:35:19,494 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:35:19,495 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12caf6590>
+2025-05-30 09:35:19,495 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:19,495 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:35:19,495 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:35:19,495 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:35:19,495 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:19,496 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:35:19 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:35:19,496 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:35:19,496 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:35:19,496 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:35:19,496 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:35:19,496 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:35:19,496 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:35:19,496 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:35:19,496 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:35:19,497 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12f764790>
+2025-05-30 09:35:19,497 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:35:19,497 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:35:19,497 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:35:19,497 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:35:19,497 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:35:19,503 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:35:19 GMT'), (b'server', b'uvicorn'), (b'content-length', b'90526'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:35:19,503 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:35:19,503 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:35:19,503 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:35:19,503 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:35:19,503 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:35:19,503 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:35:19,503 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:35:19,514 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:35:19,639 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b8916d0>
+2025-05-30 09:35:19,639 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12b48d370> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:35:19,661 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b8b28d0>
+2025-05-30 09:35:19,661 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12cbabc80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:35:19,694 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:35:19,915 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b81f210>
+2025-05-30 09:35:19,915 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:19,915 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:35:19,915 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:35:19,915 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:35:19,915 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:19,956 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a3610d0>
+2025-05-30 09:35:19,957 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:19,957 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:35:19,958 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:35:19,958 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:35:19,958 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:35:20,053 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:35:20 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:35:20,054 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:35:20,054 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:35:20,054 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:35:20,054 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:35:20,054 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:35:20,055 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:35:20,055 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:35:20,105 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:35:20 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:35:20,106 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:35:20,106 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:35:20,106 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:35:20,106 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:35:20,107 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:35:20,107 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:35:20,107 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:35:20,659 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,660 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,660 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:35:20,660 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:35:20,660 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,660 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,660 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:35:20,660 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,660 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:35:20,743 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:35:20,960 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:36:13,741 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:36:13,741 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:36:13,741 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:36:13,741 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:36:13,741 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:36:13,741 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:36:13,741 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:36:13,741 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:36:13,741 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:36:13,741 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:36:13,745 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:36:13,745 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:36:14,231 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:36:14,231 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:36:14,231 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:36:14,231 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:36:14,231 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:36:14,231 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:36:14,231 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:36:14,231 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:36:14,231 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:36:14,231 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:36:14,231 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:36:14,234 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:36:14,245 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:36:14,253 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:36:14,334 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:36:14,367 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:36:14,368 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127e84b90>
+2025-05-30 09:36:14,368 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,368 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:36:14,368 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:36:14 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:36:14,369 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:36:14,369 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:36:14,369 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:36:14,369 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:36:14,370 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:36:14,370 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127f50c50>
+2025-05-30 09:36:14,370 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:36:14,370 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:36:14,370 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:36:14,370 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:36:14,370 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:36:14,376 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:36:14 GMT'), (b'server', b'uvicorn'), (b'content-length', b'91032'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:36:14,376 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:36:14,376 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:36:14,376 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:36:14,376 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:36:14,376 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:36:14,376 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:36:14,376 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:36:14,388 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:36:14,391 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127efced0>
+2025-05-30 09:36:14,391 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x126d8d370> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:36:14,520 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:36:14,532 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123db00d0>
+2025-05-30 09:36:14,532 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x127e2bc80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:36:14,680 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x126aa3c50>
+2025-05-30 09:36:14,680 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,681 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:36:14,681 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,681 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:36:14,681 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,821 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:36:14 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:36:14,822 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:36:14,822 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,822 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:36:14,823 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:36:14,823 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:36:14,823 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:36:14,824 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:36:14,826 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x126715190>
+2025-05-30 09:36:14,826 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,826 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:36:14,826 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,826 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:36:14,826 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:36:14,975 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:36:14 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:36:14,976 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:36:14,976 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:36:14,976 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:36:14,976 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:36:14,976 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:36:14,976 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:36:14,976 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:36:15,386 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,387 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,387 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:36:15,387 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:36:15,387 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,387 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,387 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:36:15,387 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,387 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:36:15,600 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:36:15,821 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:38:22,046 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:38:22,046 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:38:22,046 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:38:22,046 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:38:22,046 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:38:22,046 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:38:22,046 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:38:22,046 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:38:22,046 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:38:22,046 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:38:22,050 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:38:22,050 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:38:22,539 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:38:22,540 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:38:22,540 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:38:22,540 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:38:22,540 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:38:22,540 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:38:22,540 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:38:22,540 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:38:22,540 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:38:22,540 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:38:22,540 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:38:22,542 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:38:22,549 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:38:22,556 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:38:22,638 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:38:22,671 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:38:22,672 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129752010>
+2025-05-30 09:38:22,672 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:22,672 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:38:22,672 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:38:22,672 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:38:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:38:22,673 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:38:22,673 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:38:22,673 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:38:22,673 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:38:22,673 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:38:22,674 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12baf4690>
+2025-05-30 09:38:22,674 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:38:22,674 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:38:22,674 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:38:22,674 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:38:22,674 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:38:22,680 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:38:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'91836'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:38:22,680 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:38:22,680 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:38:22,680 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:38:22,681 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:38:22,681 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:38:22,681 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:38:22,681 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:38:22,692 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:38:22,727 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1297b3190>
+2025-05-30 09:38:22,727 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10e4251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:38:22,832 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ba200d0>
+2025-05-30 09:38:22,832 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12b6d3c80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:38:22,838 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:38:23,020 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128e88110>
+2025-05-30 09:38:23,021 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:23,021 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:38:23,021 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:38:23,022 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:38:23,022 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:23,120 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128980c10>
+2025-05-30 09:38:23,120 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:23,121 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:38:23,121 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:38:23,121 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:38:23,121 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:38:23,168 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:38:23 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:38:23,168 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:38:23,168 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:38:23,168 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:38:23,169 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:38:23,169 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:38:23,169 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:38:23,169 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:38:23,263 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:38:23 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:38:23,263 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:38:23,263 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:38:23,263 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:38:23,263 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:38:23,263 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:38:23,264 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:38:23,264 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:38:23,743 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,744 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,744 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:38:23,744 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:38:23,744 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,744 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,744 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:38:23,744 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,745 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:38:23,902 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:38:24,120 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:40:31,645 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:40:31,645 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:40:31,645 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:40:31,645 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:40:31,645 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:40:31,645 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:40:31,645 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:40:31,645 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:40:31,645 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:40:31,645 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:40:31,649 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:40:31,649 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:40:32,097 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:40:32,097 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:40:32,097 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:40:32,097 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:40:32,097 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:40:32,097 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:40:32,097 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:40:32,097 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:40:32,097 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:40:32,097 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:40:32,097 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:40:32,099 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:40:32,106 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:40:32,112 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:40:32,195 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:40:32,231 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:40:32,231 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13195a450>
+2025-05-30 09:40:32,231 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:40:32 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:40:32,232 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:40:32,232 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:40:32,233 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:40:32,233 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:40:32,233 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:40:32,233 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x131bd8250>
+2025-05-30 09:40:32,233 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:40:32,233 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:40:32,233 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:40:32,234 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:40:32,234 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:40:32,240 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:40:32 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92502'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:40:32,240 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:40:32,240 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:40:32,240 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:40:32,240 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:40:32,240 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:40:32,240 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:40:32,240 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:40:32,251 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:40:32,335 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12757d350>
+2025-05-30 09:40:32,335 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x115a251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:40:32,388 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127556c90>
+2025-05-30 09:40:32,388 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x131aabc80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:40:32,389 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:40:32,637 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x126729c90>
+2025-05-30 09:40:32,638 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,638 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:40:32,638 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,638 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:40:32,638 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,664 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x115a52290>
+2025-05-30 09:40:32,664 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,665 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:40:32,665 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,665 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:40:32,665 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:40:32,792 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:40:32 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:40:32,792 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:40:32,793 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,793 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:40:32,793 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:40:32,793 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:40:32,793 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:40:32,794 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:40:32,804 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:40:32 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:40:32,805 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:40:32,805 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:40:32,805 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:40:32,805 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:40:32,805 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:40:32,806 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:40:32,806 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:40:32,946 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:32,947 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:32,947 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:40:32,947 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:40:32,947 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:32,947 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:32,947 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:40:32,947 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:32,947 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:40:33,598 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:40:33,881 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:41:21,842 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:41:21,842 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:41:21,842 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:41:21,842 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:41:21,842 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:41:21,842 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:41:21,842 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:41:21,842 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:41:21,842 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:41:21,842 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:41:21,846 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:41:21,846 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:41:22,316 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:41:22,316 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:41:22,316 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:41:22,316 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:41:22,316 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:41:22,316 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:41:22,316 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:41:22,316 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:41:22,316 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:41:22,316 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:41:22,316 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:41:22,318 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:41:22,331 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:41:22,338 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:41:22,428 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:41:22,460 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:41:22,460 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12cb8cb90>
+2025-05-30 09:41:22,460 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:41:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:41:22,461 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:41:22,461 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:41:22,461 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:41:22,461 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:41:22,462 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:41:22,462 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12cb8e6d0>
+2025-05-30 09:41:22,462 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:41:22,462 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:41:22,462 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:41:22,462 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:41:22,462 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:41:22,468 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:41:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92492'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:41:22,468 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:41:22,468 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:41:22,468 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:41:22,468 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:41:22,469 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:41:22,469 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:41:22,469 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:41:22,473 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128d35c50>
+2025-05-30 09:41:22,473 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1169211c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:41:22,480 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:41:22,600 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:41:22,615 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128101350>
+2025-05-30 09:41:22,615 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12a1cbc80> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:41:22,746 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1285f7090>
+2025-05-30 09:41:22,747 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:22,748 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:41:22,748 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:41:22,748 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:41:22,748 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:22,883 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:41:22 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:41:22,883 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:41:22,884 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:41:22,884 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:41:22,884 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:41:22,884 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:41:22,884 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:41:22,884 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:41:22,895 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fb1ac90>
+2025-05-30 09:41:22,896 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:22,896 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:41:22,896 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:41:22,896 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:41:22,896 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:41:23,034 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:41:23 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:41:23,034 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:41:23,034 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:41:23,035 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:41:23,035 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:41:23,035 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:41:23,035 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:41:23,036 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:41:23,324 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,324 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,324 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:41:23,325 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:41:23,325 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,325 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,325 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:41:23,325 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,325 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:41:23,626 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:41:23,847 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:44:34,217 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:44:34,217 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:44:34,217 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:44:34,217 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:44:34,217 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:44:34,217 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:44:34,217 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:44:34,217 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:44:34,217 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:44:34,217 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:44:34,221 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:44:34,221 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:44:34,732 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:44:34,732 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:44:34,732 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:44:34,732 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:44:34,732 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:44:34,732 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:44:34,732 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:44:34,732 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:44:34,732 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:44:34,732 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:44:34,732 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:44:34,735 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:44:34,748 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:44:34,755 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:44:34,833 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:44:34,866 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:44:34,866 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12eca27d0>
+2025-05-30 09:44:34,866 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:34,866 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:44:34 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:44:34,867 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:44:34,867 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:44:34,867 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:44:34,867 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:44:34,868 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:44:34,868 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ede78d0>
+2025-05-30 09:44:34,868 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:44:34,868 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:44:34,868 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:44:34,869 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:44:34,869 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:44:34,875 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:44:34 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92835'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:44:34,875 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:44:34,875 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:44:34,875 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:44:34,875 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:44:34,875 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:44:34,875 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:44:34,875 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:44:34,887 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:44:34,910 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d99ab50>
+2025-05-30 09:44:34,910 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12d48d400> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:44:35,032 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a362ad0>
+2025-05-30 09:44:35,032 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12ecabd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:44:35,170 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:44:35,181 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d0f6b10>
+2025-05-30 09:44:35,181 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:35,182 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:44:35,183 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:44:35,183 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:44:35,183 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:35,321 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:44:35 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:44:35,322 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:44:35,322 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:44:35,322 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:44:35,323 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:44:35,323 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d9062d0>
+2025-05-30 09:44:35,323 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:44:35,323 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:35,324 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:44:35,324 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:44:35,324 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:44:35,324 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:44:35,325 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:44:35,325 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:44:35,470 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:44:35 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:44:35,470 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:44:35,471 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:44:35,471 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:44:35,471 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:44:35,471 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:44:35,472 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:44:35,472 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:44:36,137 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:44:36,192 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,192 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,192 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:44:36,192 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:44:36,193 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,193 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,193 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:44:36,193 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,193 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:44:36,351 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:45:03,951 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:45:03,951 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:45:03,951 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:45:03,951 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:45:03,951 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:45:03,951 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:45:03,951 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:45:03,951 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:45:03,951 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:45:03,951 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:45:03,955 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:45:03,955 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:45:04,444 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:45:04,444 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:45:04,444 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:45:04,444 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:45:04,444 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:45:04,444 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:45:04,444 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:45:04,444 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:45:04,444 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:45:04,444 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:45:04,444 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:45:04,447 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:04,460 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:04,460 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:04,544 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:04,577 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:45:04,577 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a9049d0>
+2025-05-30 09:45:04,577 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:04 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:45:04,578 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:04,578 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:04,579 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:04,579 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:04,579 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:04,579 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:04,579 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:04,579 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a9d8590>
+2025-05-30 09:45:04,579 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:04,580 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:04,580 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:04,580 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:04,580 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:04,586 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:04 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92844'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:45:04,586 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:45:04,586 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:04,586 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:04,586 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:04,586 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:04,586 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:04,586 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:04,597 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:45:04,614 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129593dd0>
+2025-05-30 09:45:04,614 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12928d400> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:45:04,723 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:45:04,735 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129029550>
+2025-05-30 09:45:04,735 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12a8abd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:45:04,906 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12952d790>
+2025-05-30 09:45:04,907 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:04,907 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:04,907 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:04,907 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:04,908 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:05,016 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c917f50>
+2025-05-30 09:45:05,017 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:05,017 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:05,017 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:05,017 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:05,018 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:05,055 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:05 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:45:05,055 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:45:05,055 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:05,055 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:05,056 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:05,056 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:05,056 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:05,056 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:05,161 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:05 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:45:05,161 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:45:05,162 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:05,162 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:05,162 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:05,162 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:05,163 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:05,163 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:05,643 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,643 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,643 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:45:05,643 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:45:05,643 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,643 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,643 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:45:05,643 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,644 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:05,849 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:06,066 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:45:23,213 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:45:23,213 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:45:23,213 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:45:23,213 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:45:23,213 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:45:23,213 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:45:23,213 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:45:23,213 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:45:23,213 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:45:23,213 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:45:23,217 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:45:23,217 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:45:23,677 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:45:23,677 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:45:23,677 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:45:23,677 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:45:23,677 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:45:23,677 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:45:23,677 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:45:23,677 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:45:23,677 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:45:23,677 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:45:23,677 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:45:23,679 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:23,696 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:23,700 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:23,783 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:23,818 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:45:23,819 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16b1b3f90>
+2025-05-30 09:45:23,819 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:23,819 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:23 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:45:23,820 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:23,820 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:23,820 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:23,820 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:23,821 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:23,821 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16b3dc6d0>
+2025-05-30 09:45:23,821 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:23,822 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:23,822 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:23,822 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:23,822 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:23,828 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:23 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92831'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:45:23,828 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:45:23,828 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:23,828 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:23,828 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:23,828 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:23,828 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:23,829 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:23,838 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x169fc9c50>
+2025-05-30 09:45:23,838 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x169b8d400> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:45:23,841 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:45:24,013 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:45:24,013 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16b37fb10>
+2025-05-30 09:45:24,013 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x16b2abd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:45:24,151 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x169f27690>
+2025-05-30 09:45:24,152 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:24,152 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:24,152 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:24,152 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:24,153 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:24,290 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:24 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:45:24,290 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:45:24,290 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:24,290 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:24,290 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:24,290 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:24,290 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:24,291 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:24,297 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124e2ec90>
+2025-05-30 09:45:24,297 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:24,298 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:24,298 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:24,298 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:24,298 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:24,439 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:24 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:45:24,439 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:45:24,439 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:24,440 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:24,440 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:24,440 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:24,440 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:24,440 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:25,006 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:25,228 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:45:27,524 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:27,524 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:27,524 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:45:27,524 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:45:27,524 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:27,524 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:27,524 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:45:27,524 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:27,524 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:49,950 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:45:49,950 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:45:49,950 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:45:49,950 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:45:49,950 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:45:49,950 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:45:49,950 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:45:49,950 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:45:49,951 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:45:49,951 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:45:49,955 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:45:49,955 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:45:50,413 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:45:50,413 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:45:50,413 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:45:50,413 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:45:50,413 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:45:50,413 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:45:50,413 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:45:50,413 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:45:50,413 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:45:50,413 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:45:50,413 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:45:50,415 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:50,428 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:50,435 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:50,514 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:45:50,548 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:45:50,548 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x133c72590>
+2025-05-30 09:45:50,548 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:50,548 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:50 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:45:50,549 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:50,549 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:50,549 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:50,549 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:50,550 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:45:50,550 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x133c73a50>
+2025-05-30 09:45:50,550 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:50,550 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:50,551 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:50,551 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:50,551 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:45:50,557 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:45:50 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92836'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:45:50,557 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:45:50,557 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:45:50,557 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:50,557 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:50,557 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:50,557 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:50,557 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:50,568 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:45:50,595 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124bdc450>
+2025-05-30 09:45:50,595 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1173251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:45:50,711 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1310d1c50>
+2025-05-30 09:45:50,711 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1311abd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:45:50,906 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127cf9e10>
+2025-05-30 09:45:50,907 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:50,907 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:50,907 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:50,907 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:50,908 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:50,997 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1311a2150>
+2025-05-30 09:45:50,997 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:50,997 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:45:50,997 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:45:50,997 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:45:50,997 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:45:51,063 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:51 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:45:51,063 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:45:51,063 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:51,063 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:51,063 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:51,063 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:51,063 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:51,063 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:51,065 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:45:51,142 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:45:51 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:45:51,142 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:45:51,143 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:45:51,144 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:45:51,144 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:45:51,144 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:45:51,144 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:45:51,147 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:45:51,613 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,613 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,613 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:45:51,613 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:45:51,613 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,613 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,613 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:45:51,613 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,613 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:45:51,757 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:45:51,977 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:48:41,328 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:48:41,328 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:48:41,328 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:48:41,328 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:48:41,328 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:48:41,328 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:48:41,328 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:48:41,328 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:48:41,328 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:48:41,328 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:48:41,332 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:48:41,333 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:48:41,794 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:48:41,794 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:48:41,794 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:48:41,794 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:48:41,794 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:48:41,794 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:48:41,794 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:48:41,794 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:48:41,794 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:48:41,794 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:48:41,794 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:48:41,797 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:48:41,810 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:48:41,817 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:48:41,897 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:48:41,930 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:48:41,931 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x137729cd0>
+2025-05-30 09:48:41,931 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:41,931 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:48:41 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:48:41,932 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:48:41,932 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:48:41,932 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:48:41,932 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:48:41,933 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:48:41,933 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1377fc7d0>
+2025-05-30 09:48:41,933 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:48:41,933 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:48:41,933 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:48:41,933 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:48:41,933 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:48:41,939 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:48:41 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92836'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:48:41,939 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:48:41,939 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:48:41,939 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:48:41,939 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:48:41,939 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:48:41,940 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:48:41,940 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:48:41,951 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:48:42,310 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x132a93cd0>
+2025-05-30 09:48:42,311 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x122a251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:48:42,327 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13772b510>
+2025-05-30 09:48:42,327 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1376cbd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:48:42,505 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:48:42,589 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x132bf0050>
+2025-05-30 09:48:42,589 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:42,590 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:48:42,590 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:48:42,590 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:48:42,590 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:42,644 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1377a6550>
+2025-05-30 09:48:42,644 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:42,645 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:48:42,645 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:48:42,645 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:48:42,645 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:48:42,727 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:48:42 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:48:42,728 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:48:42,728 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:48:42,729 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:48:42,729 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:48:42,729 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:48:42,729 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:48:42,730 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:48:42,803 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:48:42 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:48:42,803 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:48:42,804 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:48:42,804 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:48:42,804 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:48:42,804 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:48:42,804 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:48:42,804 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:48:43,200 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,201 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,201 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:48:43,201 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:48:43,201 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,201 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,201 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:48:43,201 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,201 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:48:43,715 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:48:43,930 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:50:27,837 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:50:27,838 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:50:27,838 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:50:27,838 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:50:27,838 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:50:27,838 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:50:27,838 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:50:27,839 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:41,974 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:52:41,975 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:52:41,975 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:52:41,975 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:52:41,975 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:52:41,975 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:52:41,975 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:52:41,975 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:52:41,975 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:52:41,975 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:52:41,978 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:52:41,978 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:52:42,426 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:52:42,426 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:52:42,426 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:52:42,426 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:52:42,426 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:52:42,426 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:52:42,426 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:52:42,426 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:52:42,426 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:52:42,426 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:52:42,426 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:52:42,428 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:52:42,441 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:52:42,448 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:52:42,529 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:52:42,560 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:52:42,560 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x136676350>
+2025-05-30 09:52:42,560 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:42,560 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:52:42 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:52:42,561 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:52:42,561 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:52:42,561 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:52:42,561 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:52:42,562 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:52:42,562 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x136677310>
+2025-05-30 09:52:42,562 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:52:42,562 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:52:42,562 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:52:42,562 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:52:42,562 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:52:42,569 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:52:42 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92851'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:52:42,569 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:52:42,569 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:52:42,569 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:52:42,569 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:52:42,569 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:52:42,569 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:52:42,569 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:52:42,580 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:52:42,646 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x132897c90>
+2025-05-30 09:52:42,646 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x13248d400> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:52:42,718 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1313edb10>
+2025-05-30 09:52:42,718 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x133ba7d10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:52:42,727 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:52:42,928 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13287c950>
+2025-05-30 09:52:42,929 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:42,929 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:52:42,929 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:52:42,929 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:52:42,929 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:42,998 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x132875350>
+2025-05-30 09:52:42,998 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:42,998 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:52:42,999 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:52:42,999 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:52:42,999 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:52:43,071 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:52:43 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:52:43,071 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:52:43,072 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:52:43,072 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:52:43,072 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:52:43,072 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:52:43,073 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:52:43,073 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:52:43,141 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:52:43 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:52:43,141 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:52:43,141 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:52:43,141 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:52:43,142 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:52:43,142 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:52:43,142 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:52:43,142 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:52:43,220 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,220 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,220 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:52:43,220 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:52:43,220 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,220 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,220 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:52:43,220 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,220 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:52:43,776 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:52:44,018 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:53:56,264 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:53:56,264 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:53:56,264 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:53:56,264 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:53:56,264 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:53:56,264 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:53:56,264 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:53:56,264 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:53:56,264 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:53:56,264 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:53:56,268 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:53:56,268 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:53:56,755 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:53:56,755 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:53:56,755 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:53:56,755 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:53:56,755 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:53:56,755 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:53:56,755 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:53:56,755 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:53:56,755 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:53:56,755 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:53:56,755 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:53:56,757 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:53:56,765 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:53:56,772 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:53:56,861 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:53:56,894 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:53:56,894 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d809990>
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:56,895 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:53:56 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:53:56,896 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:53:56,896 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:53:56,896 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:53:56,896 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:53:56,896 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:53:56,896 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:53:56,896 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:53:56,896 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:53:56,897 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12d8dc910>
+2025-05-30 09:53:56,897 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:53:56,897 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:53:56,897 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:53:56,897 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:53:56,897 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:53:56,903 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:53:56 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92851'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:53:56,903 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:53:56,903 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:53:56,903 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:53:56,903 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:53:56,903 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:53:56,903 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:53:56,903 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:53:56,914 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:53:56,926 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bba8290>
+2025-05-30 09:53:56,926 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x118a251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:53:57,043 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:53:57,054 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b757a10>
+2025-05-30 09:53:57,054 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12bfcbd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:53:57,227 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bbcd5d0>
+2025-05-30 09:53:57,228 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:57,228 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:53:57,228 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:53:57,228 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:53:57,228 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:57,336 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bb0dd10>
+2025-05-30 09:53:57,336 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:57,336 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:53:57,336 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:53:57,337 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:53:57,337 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:53:57,377 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:53:57 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:53:57,378 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:53:57,378 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:53:57,378 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:53:57,378 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:53:57,378 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:53:57,378 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:53:57,379 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:53:57,481 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:53:57 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:53:57,481 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:53:57,481 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:53:57,482 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:53:57,482 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:53:57,482 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:53:57,482 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:53:57,482 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:53:58,100 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:53:58,265 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,266 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,266 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:53:58,266 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:53:58,266 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,267 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,267 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:53:58,267 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,267 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:53:58,796 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 09:57:16,387 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 09:57:16,387 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 09:57:16,387 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 09:57:16,387 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 09:57:16,387 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 09:57:16,387 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 09:57:16,387 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 09:57:16,387 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 09:57:16,387 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 09:57:16,387 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 09:57:16,391 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 09:57:16,391 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 09:57:16,894 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 09:57:16,894 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 09:57:16,894 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 09:57:16,894 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 09:57:16,894 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 09:57:16,894 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 09:57:16,894 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 09:57:16,894 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 09:57:16,894 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 09:57:16,894 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 09:57:16,894 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 09:57:16,897 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:57:16,911 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 09:57:16,911 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:57:17,001 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 09:57:17,035 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 09:57:17,036 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12aea8510>
+2025-05-30 09:57:17,036 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,036 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:57:17,036 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,036 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:57:17,036 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,037 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:57:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 09:57:17,037 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 09:57:17,037 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,037 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:57:17,037 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:57:17,037 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:57:17,037 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:57:17,037 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:57:17,038 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 09:57:17,038 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12b5f8290>
+2025-05-30 09:57:17,038 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:57:17,038 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:57:17,038 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 09:57:17,039 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:57:17,039 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 09:57:17,045 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 00:57:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'92854'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 09:57:17,045 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 09:57:17,045 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 09:57:17,045 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:57:17,045 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:57:17,045 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:57:17,045 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:57:17,045 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:57:17,058 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 09:57:17,214 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 09:57:17,321 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12aede990>
+2025-05-30 09:57:17,321 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12b4cbd10> server_hostname='api.gradio.app' timeout=30
+2025-05-30 09:57:17,323 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12a0fc450>
+2025-05-30 09:57:17,323 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1184251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 09:57:17,612 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ae025d0>
+2025-05-30 09:57:17,612 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,612 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:57:17,612 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,612 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:57:17,612 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,619 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ae288d0>
+2025-05-30 09:57:17,619 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,619 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 09:57:17,619 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,619 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 09:57:17,619 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 09:57:17,760 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:57:17 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 09:57:17,760 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 09:57:17,760 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,760 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:57:17,760 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:57:17,761 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:57:17,761 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:57:17,761 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:57:17,771 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 00:57:17 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 09:57:17,772 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 09:57:17,772 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 09:57:17,772 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 09:57:17,772 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 09:57:17,772 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 09:57:17,772 - httpcore.connection - DEBUG - close.started
+2025-05-30 09:57:17,773 - httpcore.connection - DEBUG - close.complete
+2025-05-30 09:57:17,847 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:17,847 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:17,847 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 09:57:17,847 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 09:57:17,847 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:17,847 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:17,847 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 09:57:17,847 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:17,847 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 09:57:18,344 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 09:57:18,559 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:00:19,614 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:00:19,614 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:00:19,614 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:00:19,614 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:00:19,614 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:00:19,614 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:00:19,614 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:00:19,614 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:00:19,614 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:00:19,614 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:00:19,617 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:00:19,618 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:00:20,081 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:00:20,081 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:00:20,081 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:00:20,081 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:00:20,081 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:00:20,081 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:00:20,081 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:00:20,081 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:00:20,081 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:00:20,081 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:00:20,081 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:00:20,083 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:00:20,091 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:00:20,097 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:00:20,186 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:00:20,216 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:00:20,216 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124c3ea10>
+2025-05-30 10:00:20,216 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,216 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:00:20 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:00:20,217 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:00:20,217 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:00:20,217 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:00:20,217 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:00:20,218 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:00:20,218 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124c3fb90>
+2025-05-30 10:00:20,218 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:00:20,218 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:00:20,218 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:00:20,218 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:00:20,218 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:00:20,225 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:00:20 GMT'), (b'server', b'uvicorn'), (b'content-length', b'98793'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:00:20,225 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:00:20,225 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:00:20,225 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:00:20,225 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:00:20,225 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:00:20,225 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:00:20,225 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:00:20,237 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:00:20,268 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121fde7d0>
+2025-05-30 10:00:20,268 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1108291c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:00:20,378 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124d214d0>
+2025-05-30 10:00:20,378 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x124bcbec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:00:20,386 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:00:20,552 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121e13cd0>
+2025-05-30 10:00:20,552 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,552 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:00:20,552 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,552 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:00:20,553 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,660 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1228b6390>
+2025-05-30 10:00:20,660 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,660 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:00:20,660 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,660 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:00:20,660 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:00:20,696 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:00:20 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:00:20,696 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:00:20,697 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,697 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:00:20,697 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:00:20,697 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:00:20,697 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:00:20,697 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:00:20,806 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:00:20 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:00:20,806 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:00:20,806 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:00:20,806 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:00:20,806 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:00:20,806 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:00:20,807 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:00:20,807 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:00:21,398 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:00:21,430 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,430 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,430 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:00:21,430 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:00:21,430 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,430 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,430 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:00:21,431 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,431 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:21,772 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:00:22,804 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:00:22,804 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:00:22,804 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:00:22,804 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 10:00:22,804 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 10:00:22,804 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 10:00:22,804 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 10:00:22,804 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 10:00:22,804 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 10:00:22,805 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 10:00:22,805 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:00:22,805 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 10:00:48,063 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 10:00:48,064 - auto_diffusers - DEBUG - Response length: 2595 characters
+2025-05-30 10:03:59,180 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:03:59,180 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:03:59,180 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:03:59,180 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:03:59,181 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:03:59,181 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:03:59,181 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:03:59,181 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:03:59,181 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:03:59,181 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:03:59,184 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:03:59,185 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:03:59,676 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:03:59,676 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:03:59,676 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:03:59,676 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:03:59,676 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:03:59,676 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:03:59,676 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:03:59,676 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:03:59,676 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:03:59,676 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:03:59,676 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:03:59,678 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:03:59,692 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:03:59,699 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:03:59,780 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:03:59,811 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:03:59,811 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12930f0d0>
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:03:59,812 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:03:59 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:03:59,813 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:03:59,813 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:03:59,813 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:03:59,813 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:03:59,813 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:03:59,813 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:03:59,813 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:03:59,813 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:03:59,814 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x129f58750>
+2025-05-30 10:03:59,814 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:03:59,814 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:03:59,814 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:03:59,814 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:03:59,814 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:03:59,820 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:03:59 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101856'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:03:59,820 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:03:59,820 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:03:59,820 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:03:59,820 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:03:59,820 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:03:59,820 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:03:59,820 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:03:59,832 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:03:59,933 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12808a0d0>
+2025-05-30 10:03:59,933 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10d42d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:03:59,974 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:04:00,006 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11e4ad7d0>
+2025-05-30 10:04:00,006 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1294cfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:04:00,312 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128011590>
+2025-05-30 10:04:00,312 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:04:00,312 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:04:00,312 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:04:00,313 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:04:00,313 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:04:00,349 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x128010790>
+2025-05-30 10:04:00,349 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:04:00,349 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:04:00,349 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:04:00,349 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:04:00,349 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:04:00,474 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:04:00 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:04:00,474 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:04:00,475 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:04:00,475 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:04:00,475 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:04:00,475 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:04:00,475 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:04:00,476 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:04:00,517 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:04:00 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:04:00,517 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:04:00,518 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:04:00,518 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:04:00,519 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:04:00,519 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:04:00,519 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:04:00,519 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:04:00,986 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:00,986 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:00,986 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:04:00,986 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:04:00,986 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:00,987 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:00,987 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:04:00,987 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:00,987 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:04:01,252 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:04:01,463 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:07:15,267 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:07:15,267 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:07:15,267 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:07:15,267 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:07:15,267 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:07:15,267 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:07:15,267 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:07:15,267 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:07:15,267 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:07:15,267 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:07:15,271 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:07:15,271 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:07:15,728 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:07:15,728 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:07:15,728 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:07:15,728 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:07:15,728 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:07:15,728 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:07:15,728 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:07:15,728 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:07:15,728 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:07:15,728 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:07:15,728 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:07:15,731 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:07:15,744 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:07:15,751 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:07:15,830 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:07:15,863 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:07:15,864 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124604810>
+2025-05-30 10:07:15,864 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:15,864 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:07:15,864 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:07:15 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:07:15,865 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:07:15,865 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:07:15,865 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:07:15,866 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:07:15,866 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:07:15,866 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1246049d0>
+2025-05-30 10:07:15,866 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:07:15,866 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:07:15,866 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:07:15,867 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:07:15,867 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:07:15,873 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:07:15 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101855'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:07:15,873 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:07:15,873 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:07:15,873 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:07:15,873 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:07:15,873 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:07:15,873 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:07:15,873 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:07:15,884 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:07:16,009 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122699fd0>
+2025-05-30 10:07:16,009 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11192d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:07:16,021 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1246bee10>
+2025-05-30 10:07:16,021 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1245d7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:07:16,087 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:07:16,283 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1245df290>
+2025-05-30 10:07:16,285 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:16,285 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:07:16,286 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:07:16,286 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:07:16,286 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:16,296 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122699d90>
+2025-05-30 10:07:16,296 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:16,296 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:07:16,296 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:07:16,297 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:07:16,297 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:07:16,423 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:07:16 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:07:16,424 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:07:16,424 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:07:16,424 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:07:16,424 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:07:16,424 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:07:16,424 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:07:16,425 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:07:16,436 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:07:16 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:07:16,437 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:07:16,437 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:07:16,437 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:07:16,437 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:07:16,437 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:07:16,437 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:07:16,438 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:07:16,932 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:16,932 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:16,932 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:07:16,932 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:07:16,933 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:16,933 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:16,933 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:07:16,933 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:16,933 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:17,071 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:07:17,305 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:07:21,656 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:21,657 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:21,657 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:07:21,657 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:21,657 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:21,657 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:07:21,658 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:21,658 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,033 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,033 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,033 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:07:23,033 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,033 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,034 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:07:23,034 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:07:23,034 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:46,621 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:08:46,622 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:08:46,622 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:08:46,622 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:08:46,622 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:08:46,622 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:08:46,622 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:08:46,622 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:08:46,622 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:08:46,622 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:08:46,625 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:08:46,625 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:08:47,104 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:08:47,104 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:08:47,105 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:08:47,105 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:08:47,105 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:08:47,105 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:08:47,105 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:08:47,105 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:08:47,105 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:08:47,105 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:08:47,105 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:08:47,107 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:08:47,119 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:08:47,127 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:08:47,208 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:08:47,242 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:08:47,242 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13232c350>
+2025-05-30 10:08:47,242 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:08:47 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:08:47,243 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:08:47,243 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,244 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:08:47,244 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:08:47,244 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:08:47,244 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:08:47,244 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:08:47,244 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:08:47,244 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13232ff90>
+2025-05-30 10:08:47,244 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:08:47,245 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:08:47,245 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:08:47,245 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:08:47,245 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:08:47,251 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:08:47 GMT'), (b'server', b'uvicorn'), (b'content-length', b'102088'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:08:47,251 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:08:47,252 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:08:47,252 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:08:47,252 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:08:47,252 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:08:47,252 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:08:47,252 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:08:47,263 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:08:47,295 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127f74d10>
+2025-05-30 10:08:47,295 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x127c955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:08:47,403 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1222268d0>
+2025-05-30 10:08:47,403 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x131fd7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:08:47,419 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:08:47,592 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x131f4e010>
+2025-05-30 10:08:47,592 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,593 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:08:47,593 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,593 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:08:47,594 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,686 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127cb96d0>
+2025-05-30 10:08:47,686 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,687 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:08:47,687 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,687 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:08:47,687 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:08:47,743 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:08:47 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:08:47,743 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:08:47,743 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,744 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:08:47,744 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:08:47,744 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:08:47,744 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:08:47,745 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:08:47,830 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:08:47 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:08:47,830 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:08:47,830 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:08:47,830 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:08:47,830 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:08:47,830 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:08:47,830 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:08:47,830 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:08:48,416 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:08:48,667 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:08:49,278 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:49,278 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:49,278 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:08:49,278 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:08:49,279 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:49,279 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:49,279 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:08:49,279 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:08:49,280 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:21,389 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:10:21,389 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:10:21,389 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:10:21,389 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:10:21,389 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:10:21,389 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:10:21,389 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:10:21,389 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:10:21,389 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:10:21,389 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:10:21,394 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:10:21,394 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:10:21,861 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:10:21,861 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:10:21,861 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:10:21,861 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:10:21,861 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:10:21,861 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:10:21,861 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:10:21,861 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:10:21,861 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:10:21,861 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:10:21,861 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:10:21,863 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:10:21,876 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:10:21,883 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:10:21,964 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:10:21,998 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:10:21,999 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fd3f210>
+2025-05-30 10:10:21,999 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:21,999 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:10:21,999 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:10:21 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:10:22,000 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:10:22,000 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:10:22,000 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:10:22,000 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:10:22,000 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:10:22,001 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fd5c290>
+2025-05-30 10:10:22,001 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:10:22,001 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:10:22,001 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:10:22,001 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:10:22,001 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:10:22,007 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:10:21 GMT'), (b'server', b'uvicorn'), (b'content-length', b'99284'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:10:22,007 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:10:22,007 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:10:22,007 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:10:22,007 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:10:22,007 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:10:22,007 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:10:22,007 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:10:22,019 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:10:22,115 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11eaae7d0>
+2025-05-30 10:10:22,115 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10d32d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:10:22,157 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fc441d0>
+2025-05-30 10:10:22,157 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11fccfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:10:22,206 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:10:22,389 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11ef56250>
+2025-05-30 10:10:22,389 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:22,389 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:10:22,389 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,389 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:10:22,390 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:22,434 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fba5090>
+2025-05-30 10:10:22,434 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:22,434 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:10:22,434 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,435 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:10:22,435 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:10:22,565 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:10:22 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:10:22,566 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:10:22,566 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,566 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:10:22,567 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:10:22,567 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:10:22,567 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:10:22,567 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:10:22,576 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:10:22 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:10:22,577 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:10:22,577 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:10:22,577 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:10:22,577 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:10:22,577 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:10:22,578 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:10:22,578 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:10:23,064 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,065 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,065 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:10:23,065 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:10:23,065 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,065 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,065 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:10:23,065 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,065 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:23,165 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:10:23,430 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:10:27,241 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:10:27,241 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:10:27,241 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:10:27,241 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 10:10:27,242 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 10:10:27,242 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 10:10:27,242 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 10:10:27,242 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 10:10:27,242 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 10:10:27,242 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:10:27,243 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 10:10:38,525 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 10:10:38,525 - auto_diffusers - DEBUG - Response length: 1928 characters
+2025-05-30 10:14:26,020 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:14:26,020 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:14:26,020 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:14:26,020 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:14:26,020 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:14:26,020 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:14:26,020 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:14:26,020 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:14:26,020 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:14:26,020 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:14:26,024 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:14:26,024 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:14:26,483 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:14:26,483 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:14:26,483 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:14:26,483 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:14:26,483 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:14:26,483 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:14:26,483 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:14:26,483 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:14:26,483 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:14:26,483 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:14:26,483 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:14:26,485 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:14:26,492 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:14:26,498 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:14:26,669 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d79ff90>
+2025-05-30 10:14:26,669 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10c8291c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:14:26,802 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:14:26,966 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d7a5750>
+2025-05-30 10:14:26,966 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:14:26,967 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:14:26,967 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:14:26,967 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:14:26,967 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:14:27,117 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:14:27 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:14:27,118 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:14:27,118 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:14:27,119 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:14:27,119 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:14:27,119 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:14:27,120 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:14:27,120 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:21:46,310 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:21:46,310 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:21:46,310 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:21:46,310 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:21:46,310 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:21:46,310 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:21:46,310 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:21:46,310 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:21:46,310 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:21:46,310 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:21:46,313 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:21:46,313 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:21:46,758 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:21:46,758 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:21:46,758 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:21:46,758 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:21:46,758 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:21:46,758 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:21:46,758 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:21:46,758 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:21:46,758 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:21:46,758 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:21:46,758 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:21:46,760 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:21:46,773 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:21:46,781 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:21:46,861 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:21:46,889 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:21:46,890 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c005d90>
+2025-05-30 10:21:46,890 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:46,890 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:21:46,890 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:21:46,890 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:21:46 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:21:46,891 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:21:46,891 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:21:46,891 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:21:46,891 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:21:46,891 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:21:46,892 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c23f650>
+2025-05-30 10:21:46,892 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:21:46,892 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:21:46,892 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:21:46,892 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:21:46,892 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:21:46,898 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:21:46 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101391'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:21:46,898 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:21:46,898 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:21:46,898 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:21:46,898 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:21:46,898 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:21:46,898 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:21:46,898 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:21:46,910 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:21:46,988 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11ac9a6d0>
+2025-05-30 10:21:46,988 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11a9957f0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:21:47,057 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c152a10>
+2025-05-30 10:21:47,057 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11c1cfbf0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:21:47,063 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:21:47,322 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11ac25c90>
+2025-05-30 10:21:47,322 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:47,323 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:21:47,323 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:21:47,323 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:21:47,323 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:47,349 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11a0c3290>
+2025-05-30 10:21:47,349 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:47,349 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:21:47,349 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:21:47,349 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:21:47,349 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:21:47,466 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:21:47 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:21:47,466 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:21:47,466 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:21:47,467 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:21:47,467 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:21:47,467 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:21:47,467 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:21:47,468 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:21:47,495 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:21:47 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:21:47,496 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:21:47,496 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:21:47,496 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:21:47,497 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:21:47,497 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:21:47,497 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:21:47,497 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:21:48,266 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:21:48,489 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:21:57,114 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:57,114 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:57,114 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:21:57,114 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:21:57,114 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:57,114 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:57,114 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:21:57,114 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:57,114 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:21:59,091 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:21:59,091 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:21:59,091 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 10:21:59,092 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 10:21:59,092 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 10:21:59,092 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 10:21:59,092 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 10:21:59,092 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:21:59,092 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 10:22:16,528 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 10:22:16,528 - auto_diffusers - DEBUG - Response length: 1776 characters
+2025-05-30 10:29:16,567 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:29:16,568 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:29:16,568 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:29:16,568 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:29:16,568 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:29:16,568 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:29:16,568 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:29:16,568 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:29:16,568 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:29:16,568 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:29:16,572 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:29:16,572 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:29:17,088 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:29:17,088 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:29:17,088 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:29:17,088 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:29:17,088 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:29:17,088 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:29:17,088 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:29:17,088 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:29:17,088 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:29:17,088 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:29:17,088 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:29:17,091 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:29:17,104 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:29:17,112 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:29:17,193 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:29:17,225 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:29:17,225 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11dcffb50>
+2025-05-30 10:29:17,225 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,225 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:29:17,225 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:29:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:29:17,226 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:29:17,226 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:29:17,226 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:29:17,226 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:29:17,227 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:29:17,227 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11f81c0d0>
+2025-05-30 10:29:17,227 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:29:17,227 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:29:17,227 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:29:17,228 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:29:17,228 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:29:17,233 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:29:17 GMT'), (b'server', b'uvicorn'), (b'content-length', b'100912'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:29:17,233 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:29:17,233 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:29:17,233 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:29:17,233 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:29:17,234 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:29:17,234 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:29:17,234 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:29:17,245 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:29:17,304 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d21a7d0>
+2025-05-30 10:29:17,304 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1143251c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:29:17,388 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11dc16590>
+2025-05-30 10:29:17,388 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11dfcbec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:29:17,629 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:29:17,629 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d5285d0>
+2025-05-30 10:29:17,630 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,630 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:29:17,630 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,630 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:29:17,630 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,675 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11ab83910>
+2025-05-30 10:29:17,675 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,676 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:29:17,676 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,676 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:29:17,676 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:29:17,814 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:29:17 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:29:17,815 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:29:17,815 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,815 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:29:17,815 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:29:17,815 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:29:17,816 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:29:17,816 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:29:17,821 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:29:17 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:29:17,822 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:29:17,822 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:29:17,823 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:29:17,823 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:29:17,823 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:29:17,823 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:29:17,823 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:29:18,430 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:29:18,542 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,542 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,542 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:29:18,542 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:29:18,542 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,542 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,542 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:29:18,542 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,542 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:18,652 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:29:19,602 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,602 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,602 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:29:19,602 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,602 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,602 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:29:19,603 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,603 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 10:29:19,603 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:29:19,603 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 10:29:31,763 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 10:29:31,764 - auto_diffusers - DEBUG - Response length: 1665 characters
+2025-05-30 10:32:32,108 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 10:32:32,108 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 10:32:32,108 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 10:32:32,108 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 10:32:32,108 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 10:32:32,108 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 10:32:32,108 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 10:32:32,108 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 10:32:32,108 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 10:32:32,109 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 10:32:32,112 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 10:32:32,112 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 10:32:32,574 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 10:32:32,575 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 10:32:32,575 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 10:32:32,575 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 10:32:32,575 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 10:32:32,575 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 10:32:32,575 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 10:32:32,575 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 10:32:32,575 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 10:32:32,575 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 10:32:32,575 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 10:32:32,577 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:32:32,591 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 10:32:32,599 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:32:32,683 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 10:32:32,714 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 10:32:32,715 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x121415610>
+2025-05-30 10:32:32,715 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:32,715 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:32:32,715 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:32:32,715 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:32:32 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 10:32:32,716 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:32:32,716 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:32:32,716 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:32:32,716 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:32:32,717 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 10:32:32,717 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123d34c10>
+2025-05-30 10:32:32,717 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:32:32,717 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:32:32,717 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 10:32:32,718 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:32:32,718 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 10:32:32,723 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 01:32:32 GMT'), (b'server', b'uvicorn'), (b'content-length', b'99751'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 10:32:32,724 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 10:32:32,724 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 10:32:32,724 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:32:32,724 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:32:32,724 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:32:32,724 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:32:32,724 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:32:32,736 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 10:32:32,802 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1214967d0>
+2025-05-30 10:32:32,802 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1210955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 10:32:32,885 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1209c25d0>
+2025-05-30 10:32:32,885 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1232cfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 10:32:32,931 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 10:32:33,098 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1168dc5d0>
+2025-05-30 10:32:33,098 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:33,098 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:32:33,098 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:32:33,098 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:32:33,099 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:33,178 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1206a6b10>
+2025-05-30 10:32:33,178 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:33,178 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 10:32:33,179 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 10:32:33,179 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 10:32:33,179 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 10:32:33,248 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:32:33 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 10:32:33,248 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 10:32:33,248 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:32:33,248 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:32:33,249 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:32:33,249 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:32:33,249 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:32:33,249 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:32:33,327 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 01:32:33 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 10:32:33,327 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 10:32:33,328 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 10:32:33,329 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 10:32:33,329 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 10:32:33,329 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 10:32:33,330 - httpcore.connection - DEBUG - close.started
+2025-05-30 10:32:33,330 - httpcore.connection - DEBUG - close.complete
+2025-05-30 10:32:33,629 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,630 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,630 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 10:32:33,630 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:32:33,630 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,631 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,631 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:32:33,631 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,631 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:33,912 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 10:32:34,135 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 10:32:34,638 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 10:32:34,638 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 10:32:34,638 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 10:32:34,638 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 10:32:34,639 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 10:32:34,639 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 10:32:34,639 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 10:32:34,639 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 10:32:34,639 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 10:32:34,639 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 10:32:34,639 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 10:32:34,640 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:32:34,640 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 10:32:34,640 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:32:34,640 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 10:32:34,640 - auto_diffusers - INFO - ================================================================================
+2025-05-30 10:32:34,641 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 10:33:08,633 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 10:33:08,634 - auto_diffusers - DEBUG - Response length: 2670 characters
+2025-05-30 11:43:56,204 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 11:43:56,204 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 11:43:56,204 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 11:43:56,204 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 11:43:56,204 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 11:43:56,204 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 11:43:56,204 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 11:43:56,204 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 11:43:56,204 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 11:43:56,204 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 11:43:56,208 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 11:43:56,208 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 11:43:56,690 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 11:43:56,690 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 11:43:56,690 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 11:43:56,690 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 11:43:56,690 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 11:43:56,690 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 11:43:56,690 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 11:43:56,690 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 11:43:56,690 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 11:43:56,690 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 11:43:56,690 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 11:43:56,692 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:43:56,704 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 11:43:56,712 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:43:56,814 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:43:56,848 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 11:43:56,849 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16fd3fe50>
+2025-05-30 11:43:56,849 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:56,849 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:43:56,849 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:43:56,849 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:43:56,849 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:56,850 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 02:43:56 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 11:43:56,850 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 11:43:56,850 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:43:56,850 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:43:56,850 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:43:56,850 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:43:56,850 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:43:56,850 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:43:56,850 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 11:43:56,851 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16fc68e50>
+2025-05-30 11:43:56,851 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 11:43:56,851 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:43:56,851 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 11:43:56,851 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:43:56,851 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 11:43:56,857 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 02:43:56 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101572'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 11:43:56,857 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 11:43:56,858 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 11:43:56,858 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:43:56,858 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:43:56,858 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:43:56,858 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:43:56,858 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:43:56,869 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 11:43:56,886 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16cc981d0>
+2025-05-30 11:43:56,886 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x115f2d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 11:43:56,996 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 11:43:57,019 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16c70ee90>
+2025-05-30 11:43:57,019 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x16fbe3ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 11:43:57,182 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16cc27710>
+2025-05-30 11:43:57,183 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:57,183 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:43:57,183 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:43:57,184 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:43:57,184 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:57,319 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x115d59490>
+2025-05-30 11:43:57,320 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:57,320 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:43:57,320 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:43:57,320 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:43:57,320 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:43:57,334 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:43:57 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 11:43:57,334 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 11:43:57,334 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:43:57,334 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:43:57,335 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:43:57,335 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:43:57,335 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:43:57,335 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:43:57,473 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:43:57 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 11:43:57,474 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 11:43:57,474 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:43:57,474 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:43:57,474 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:43:57,474 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:43:57,475 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:43:57,475 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:43:58,049 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:43:58,271 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 11:43:58,333 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:43:58,334 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:43:58,334 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 11:43:58,334 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 11:43:58,334 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:43:58,334 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:43:58,334 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 11:43:58,335 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:43:58,335 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:56:55,415 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 11:56:55,415 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 11:56:55,416 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 11:56:55,416 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 11:56:55,416 - auto_diffusers - DEBUG - Creating tools for Gemini
+2025-05-30 11:56:55,416 - auto_diffusers - INFO - Created 3 tools for Gemini
+2025-05-30 11:56:55,416 - auto_diffusers - INFO - Successfully configured Gemini AI model with tools
+2025-05-30 11:56:55,416 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 11:56:55,416 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 11:56:55,416 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 11:56:55,416 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.12.9
+2025-05-30 11:56:55,416 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 11:56:55,419 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 11:56:55,419 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 11:56:55,900 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 11:56:55,900 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 11:56:55,900 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 11:56:55,900 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.12.9', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 11:56:55,900 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 11:56:55,900 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 11:56:55,900 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 11:56:55,900 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 11:56:55,900 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 11:56:55,900 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 11:56:55,900 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 11:56:55,902 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:56:55,909 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:56:55,916 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 11:56:55,979 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:56:56,083 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ea98740>
+2025-05-30 11:56:56,083 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12e8fca50> server_hostname='api.gradio.app' timeout=3
+2025-05-30 11:56:56,166 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 11:56:56,418 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12ffbb0b0>
+2025-05-30 11:56:56,419 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:56:56,419 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:56:56,420 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:56:56,420 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:56:56,420 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:56:56,560 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:56:56 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 11:56:56,561 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 11:56:56,561 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:56:56,561 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:56:56,561 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:56:56,561 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:56:56,561 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:56:56,562 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:57:37,480 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 11:57:37,480 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 11:57:37,480 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 11:57:37,480 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 11:57:37,480 - auto_diffusers - DEBUG - Creating tools for Gemini
+2025-05-30 11:57:37,480 - auto_diffusers - INFO - Created 3 tools for Gemini
+2025-05-30 11:57:37,481 - auto_diffusers - INFO - Successfully configured Gemini AI model with tools
+2025-05-30 11:57:37,481 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 11:57:37,481 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 11:57:37,481 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 11:57:37,481 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.12.9
+2025-05-30 11:57:37,481 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 11:57:37,485 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 11:57:37,485 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 11:57:37,976 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 11:57:37,976 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 11:57:37,976 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 11:57:37,976 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.12.9', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 11:57:37,976 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 11:57:37,976 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 11:57:37,976 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 11:57:37,976 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 11:57:37,976 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 11:57:37,976 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 11:57:37,976 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 11:57:37,979 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:57:37,980 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:57:37,992 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 11:57:38,053 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:57:38,141 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16c5707d0>
+2025-05-30 11:57:38,141 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x16c2f8a50> server_hostname='api.gradio.app' timeout=3
+2025-05-30 11:57:38,242 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 11:57:38,420 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x16c570920>
+2025-05-30 11:57:38,420 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:57:38,421 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:57:38,422 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:57:38,422 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:57:38,422 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:57:38,563 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:57:38 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 11:57:38,563 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 11:57:38,564 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:57:38,564 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:57:38,564 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:57:38,564 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:57:38,564 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:57:38,565 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:58:21,477 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 11:58:21,477 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 11:58:21,477 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 11:58:21,478 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 11:58:21,478 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 11:58:21,478 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 11:58:21,478 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 11:58:21,478 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 11:58:21,478 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 11:58:21,478 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 11:58:21,482 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 11:58:21,482 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 11:58:22,009 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 11:58:22,009 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 11:58:22,009 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 11:58:22,009 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 11:58:22,009 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 11:58:22,009 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 11:58:22,009 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 11:58:22,009 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 11:58:22,009 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 11:58:22,009 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 11:58:22,009 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 11:58:22,011 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:58:22,025 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 11:58:22,031 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:58:22,110 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 11:58:22,141 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 11:58:22,141 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125e43310>
+2025-05-30 11:58:22,141 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,141 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 02:58:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 11:58:22,142 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:58:22,142 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:58:22,143 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:58:22,143 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:58:22,143 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 11:58:22,143 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125e4c050>
+2025-05-30 11:58:22,143 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 11:58:22,143 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:58:22,143 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 11:58:22,143 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:58:22,143 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 11:58:22,150 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 02:58:22 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101579'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 11:58:22,150 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 11:58:22,150 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 11:58:22,150 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:58:22,150 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:58:22,150 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:58:22,150 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:58:22,150 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:58:22,162 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 11:58:22,262 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124809c50>
+2025-05-30 11:58:22,262 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11192d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 11:58:22,311 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125bcb050>
+2025-05-30 11:58:22,311 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x125ccfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 11:58:22,416 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 11:58:22,535 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125f23090>
+2025-05-30 11:58:22,535 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,536 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:58:22,536 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,536 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:58:22,536 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,611 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x123e83ed0>
+2025-05-30 11:58:22,611 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,612 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 11:58:22,612 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,612 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 11:58:22,612 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 11:58:22,674 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:58:22 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 11:58:22,674 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 11:58:22,674 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,674 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:58:22,674 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:58:22,675 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:58:22,675 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:58:22,675 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:58:22,762 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 02:58:22 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 11:58:22,762 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 11:58:22,762 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 11:58:22,762 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 11:58:22,763 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 11:58:22,763 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 11:58:22,763 - httpcore.connection - DEBUG - close.started
+2025-05-30 11:58:22,763 - httpcore.connection - DEBUG - close.complete
+2025-05-30 11:58:23,210 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,210 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,210 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 11:58:23,210 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 11:58:23,210 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,210 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,210 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 11:58:23,210 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,210 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 11:58:23,418 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 11:58:23,633 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:00:05,794 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:00:05,794 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:00:05,794 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:00:05,794 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:00:05,795 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:00:05,795 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:00:05,795 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:00:05,795 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:00:05,795 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:00:05,795 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:00:05,798 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:00:05,798 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:00:06,286 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:00:06,286 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:00:06,286 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:00:06,286 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:00:06,287 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:00:06,287 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:00:06,287 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:00:06,287 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:00:06,287 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:00:06,287 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:00:06,287 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:00:06,289 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:00:06,308 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:00:06,309 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:00:06,392 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:00:06,432 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:00:06,432 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125c2bf90>
+2025-05-30 12:00:06,432 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:06,432 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:06,432 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:06,432 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:06,432 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:06,433 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:00:06 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:00:06,434 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:00:06,434 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:06,434 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:06,434 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:06,434 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:06,434 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:06,434 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:06,435 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:00:06,435 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127f4d450>
+2025-05-30 12:00:06,436 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:00:06,436 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:06,436 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:00:06,436 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:06,436 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:00:06,446 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:00:06 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101732'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:00:06,447 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:00:06,447 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:00:06,447 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:06,447 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:06,447 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:06,447 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:06,447 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:06,459 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:00:06,471 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125ccb150>
+2025-05-30 12:00:06,472 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x125995910> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:00:06,619 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127f08cd0>
+2025-05-30 12:00:06,619 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x127bd7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:00:06,647 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:00:06,750 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x125c460d0>
+2025-05-30 12:00:06,750 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:06,750 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:06,751 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:06,751 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:06,751 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:06,890 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:00:06 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:00:06,891 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:00:06,891 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:06,891 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:06,892 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:06,892 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:06,892 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:06,893 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:06,948 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124935510>
+2025-05-30 12:00:06,948 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:06,948 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:06,948 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:06,949 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:06,949 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:07,111 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:00:07 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:00:07,111 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:00:07,111 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:07,112 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:07,112 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:07,112 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:07,112 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:07,113 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:07,799 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:00:07,951 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:07,951 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:07,952 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:00:07,952 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:00:07,952 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:07,952 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:07,952 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:00:07,952 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:07,952 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:08,014 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:00:57,110 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:00:57,110 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:00:57,110 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:00:57,110 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:00:57,110 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:00:57,110 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:00:57,110 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:00:57,110 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:00:57,110 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:00:57,110 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:00:57,113 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:00:57,113 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:00:57,568 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:00:57,568 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:00:57,568 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:00:57,568 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:00:57,568 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:00:57,568 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:00:57,568 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:00:57,568 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:00:57,568 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:00:57,568 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:00:57,568 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:00:57,570 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:00:57,584 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:00:57,591 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:00:57,670 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:00:57,703 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:00:57,704 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12de37950>
+2025-05-30 12:00:57,704 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:57,704 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:57,704 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:57,704 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:57,704 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:57,705 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:00:57 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:00:57,705 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:00:57,705 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:57,705 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:57,705 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:57,705 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:57,705 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:57,705 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:57,706 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:00:57,706 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12dd58410>
+2025-05-30 12:00:57,706 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:00:57,706 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:57,706 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:00:57,706 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:57,706 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:00:57,712 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:00:57 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101701'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:00:57,712 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:00:57,713 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:00:57,713 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:57,713 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:57,713 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:57,713 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:57,713 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:57,724 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:00:57,728 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bc9a510>
+2025-05-30 12:00:57,728 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12b9955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:00:57,863 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12dd117d0>
+2025-05-30 12:00:57,863 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x12dcd7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:00:57,896 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:00:58,002 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bcd0d10>
+2025-05-30 12:00:58,003 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:58,003 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:58,004 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:58,004 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:58,004 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:58,141 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:00:58 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:00:58,141 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:00:58,141 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:58,142 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:58,142 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:58,142 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:58,142 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:58,142 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:58,144 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12bcc6910>
+2025-05-30 12:00:58,144 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:58,144 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:00:58,144 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:00:58,144 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:00:58,144 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:00:58,285 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:00:58 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:00:58,285 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:00:58,286 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:00:58,286 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:00:58,286 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:00:58,287 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:00:58,287 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:00:58,287 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:00:58,874 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:00:59,128 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:00:59,836 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:59,837 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:59,837 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:00:59,837 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:00:59,837 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:59,837 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:59,837 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:00:59,837 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:00:59,837 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:50,907 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:01:50,907 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:01:50,907 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:01:50,907 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:01:50,907 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:01:50,907 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:01:50,907 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:01:50,907 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:01:50,907 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:01:50,907 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:01:50,911 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:01:50,912 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:01:51,381 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:01:51,381 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:01:51,381 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:01:51,381 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:01:51,381 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:01:51,381 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:01:51,381 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:01:51,381 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:01:51,381 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:01:51,381 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:01:51,381 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:01:51,384 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:01:51,397 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:01:51,397 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:01:51,493 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:01:51,535 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:01:51,536 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fe3f650>
+2025-05-30 12:01:51,536 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:51,536 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:01:51,536 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:01:51 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:01:51,537 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:01:51,537 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:01:51,537 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:01:51,537 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:01:51,538 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:01:51,538 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fe2c110>
+2025-05-30 12:01:51,538 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:01:51,538 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:01:51,539 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:01:51,539 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:01:51,539 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:01:51,545 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:01:51 GMT'), (b'server', b'uvicorn'), (b'content-length', b'98531'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:01:51,545 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:01:51,545 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:01:51,545 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:01:51,545 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:01:51,545 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:01:51,545 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:01:51,546 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:01:51,557 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:01:51,604 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d208fd0>
+2025-05-30 12:01:51,605 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x10c92d1c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:01:51,694 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:01:51,714 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11853a750>
+2025-05-30 12:01:51,714 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11fccfe30> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:01:51,894 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11fc4d710>
+2025-05-30 12:01:51,895 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:51,895 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:01:51,895 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:01:51,896 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:01:51,896 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:52,036 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11e711690>
+2025-05-30 12:01:52,036 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:52,036 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:01:52,036 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:01:52,036 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:01:52,037 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:01:52,045 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:01:52 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:01:52,045 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:01:52,045 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:01:52,045 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:01:52,045 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:01:52,045 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:01:52,046 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:01:52,046 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:01:52,195 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:01:52 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:01:52,195 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:01:52,195 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:01:52,195 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:01:52,195 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:01:52,195 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:01:52,195 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:01:52,196 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:01:52,768 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:01:52,998 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:01:53,107 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:53,107 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:53,107 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:01:53,107 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:01:53,107 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:53,107 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:53,107 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:01:53,107 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:53,108 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,428 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,428 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,428 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:01:54,428 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,428 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,429 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:01:54,429 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,429 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,429 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:01:54,429 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 12:01:54,430 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 12:01:54,430 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 12:01:54,430 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 12:01:54,430 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 12:01:54,430 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 12:01:54,430 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:01:54,431 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 12:02:14,766 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 12:02:14,766 - auto_diffusers - DEBUG - Response length: 2109 characters
+2025-05-30 12:04:37,376 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:04:37,376 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:04:37,376 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:04:37,376 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:04:37,376 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:04:37,376 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:04:37,376 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:04:37,376 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:04:37,376 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:04:37,376 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:04:37,380 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:04:37,380 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:04:37,850 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:04:37,850 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:04:37,850 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:04:37,850 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:04:37,850 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:04:37,850 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:04:37,850 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:04:37,850 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:04:37,850 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:04:37,850 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:04:37,850 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:04:37,852 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:04:37,866 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:04:37,866 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:04:37,956 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:04:37,989 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:04:37,990 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x130438b50>
+2025-05-30 12:04:37,990 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:37,990 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:04:37,990 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:04:37,990 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:04:37 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:04:37,991 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:04:37,991 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:04:37,991 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:04:37,991 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:04:37,991 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:04:37,992 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13242ff90>
+2025-05-30 12:04:37,992 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:04:37,992 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:04:37,992 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:04:37,992 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:04:37,992 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:04:37,998 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:04:37 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101436'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:04:37,998 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:04:37,998 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:04:37,998 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:04:37,999 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:04:37,999 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:04:37,999 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:04:37,999 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:04:38,010 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:04:38,116 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x130d3e910>
+2025-05-30 12:04:38,116 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1164291c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:04:38,147 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1304a1a90>
+2025-05-30 12:04:38,147 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1321cfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:04:38,161 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:04:38,406 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x130dcb410>
+2025-05-30 12:04:38,407 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:38,407 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:04:38,407 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:04:38,407 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:04:38,407 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:38,424 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13201c390>
+2025-05-30 12:04:38,424 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:38,424 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:04:38,424 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:04:38,424 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:04:38,424 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:04:38,553 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:04:38 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:04:38,553 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:04:38,553 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:04:38,554 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:04:38,554 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:04:38,554 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:04:38,555 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:04:38,555 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:04:38,565 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:04:38 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:04:38,565 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:04:38,565 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:04:38,565 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:04:38,565 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:04:38,566 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:04:38,566 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:04:38,566 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:04:39,438 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:04:39,659 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:04:39,711 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:39,711 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:39,711 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:04:39,711 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:04:39,711 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:39,711 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:39,711 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:04:39,711 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:39,712 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,570 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,570 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:04:40,571 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:04:40,571 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:04:40,571 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 12:04:40,571 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 12:04:40,571 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 12:04:40,571 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 12:04:40,571 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 12:04:40,572 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 12:04:40,572 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 12:04:40,572 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 12:04:40,572 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:04:40,572 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 12:04:40,572 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:04:40,572 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 12:04:40,573 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:04:40,573 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 12:04:58,859 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 12:04:58,859 - auto_diffusers - DEBUG - Response length: 2133 characters
+2025-05-30 12:06:14,204 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:06:14,204 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:06:14,204 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:06:14,204 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:06:14,204 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:06:14,204 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:06:14,204 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:06:14,204 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:06:14,204 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:06:14,204 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:06:14,207 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:06:14,207 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:06:14,678 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:06:14,678 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:06:14,678 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:06:14,678 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:06:14,678 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:06:14,678 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:06:14,678 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:06:14,679 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:06:14,679 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:06:14,679 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:06:14,679 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:06:14,681 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:06:14,694 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:06:14,701 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:06:14,783 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:06:14,816 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:06:14,817 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c43d190>
+2025-05-30 12:06:14,817 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:14,817 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:06:14,817 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:06:14,817 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:06:14,817 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:14,818 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:06:14 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:06:14,818 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:06:14,818 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:06:14,818 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:06:14,818 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:06:14,818 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:06:14,818 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:06:14,818 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:06:14,818 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:06:14,819 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11e64c5d0>
+2025-05-30 12:06:14,819 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:06:14,819 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:06:14,819 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:06:14,819 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:06:14,819 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:06:14,825 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:06:14 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101440'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:06:14,826 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:06:14,826 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:06:14,826 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:06:14,826 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:06:14,826 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:06:14,826 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:06:14,826 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:06:14,837 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:06:14,859 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11e3519d0>
+2025-05-30 12:06:14,859 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11c1955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:06:14,975 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11e69a2d0>
+2025-05-30 12:06:14,975 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11e3d7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:06:14,999 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:06:15,180 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11d8f1f90>
+2025-05-30 12:06:15,180 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:15,180 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:06:15,180 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:06:15,181 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:06:15,181 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:15,255 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11c47bdd0>
+2025-05-30 12:06:15,256 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:15,256 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:06:15,256 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:06:15,256 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:06:15,256 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:06:15,318 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:06:15 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:06:15,319 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:06:15,319 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:06:15,319 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:06:15,319 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:06:15,319 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:06:15,320 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:06:15,320 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:06:15,394 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:06:15 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:06:15,394 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:06:15,395 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:06:15,395 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:06:15,395 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:06:15,395 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:06:15,395 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:06:15,395 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:06:15,482 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:15,482 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:15,483 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:06:15,483 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:06:15,483 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:15,483 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:15,483 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:06:15,483 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:15,483 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,066 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:06:16,145 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,145 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:06:16,146 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:06:16,146 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 12:06:16,146 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 12:06:16,146 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 12:06:16,146 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 12:06:16,146 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:06:16,146 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 12:06:16,147 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:06:16,147 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 12:06:16,147 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:06:16,147 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 12:06:16,279 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:06:50,443 - auto_diffusers - INFO - Successfully received response from Gemini API (no tools used)
+2025-05-30 12:06:50,444 - auto_diffusers - DEBUG - Response length: 2440 characters
+2025-05-30 12:07:59,163 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:07:59,163 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:07:59,163 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:07:59,163 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:07:59,163 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:07:59,163 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:07:59,163 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:07:59,163 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:07:59,163 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:07:59,163 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:07:59,167 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:07:59,167 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:07:59,633 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:07:59,634 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:07:59,634 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:07:59,634 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:07:59,634 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:07:59,634 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:07:59,634 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:07:59,634 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:07:59,634 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:07:59,634 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:07:59,634 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:07:59,636 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:07:59,648 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:07:59,656 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:07:59,741 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:07:59,788 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:07:59,788 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x119c2fc90>
+2025-05-30 12:07:59,788 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:07:59,789 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:07:59,789 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:07:59,789 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:07:59,789 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:07:59,789 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:07:59 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:07:59,790 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:07:59,790 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:07:59,790 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:07:59,790 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:07:59,790 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:07:59,790 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:07:59,790 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:07:59,790 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:07:59,791 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x119b4d3d0>
+2025-05-30 12:07:59,791 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:07:59,791 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:07:59,791 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:07:59,791 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:07:59,791 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:07:59,798 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:07:59 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101454'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:07:59,798 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:07:59,798 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:07:59,798 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:07:59,798 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:07:59,799 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:07:59,799 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:07:59,799 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:07:59,810 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:07:59,813 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11899a850>
+2025-05-30 12:07:59,814 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1073291c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:07:59,949 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x10fb07250>
+2025-05-30 12:07:59,949 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x119acfa40> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:07:59,967 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:08:00,140 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x119a49e90>
+2025-05-30 12:08:00,141 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:08:00,141 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:08:00,141 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:08:00,141 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:08:00,141 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:08:00,258 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x118b0b690>
+2025-05-30 12:08:00,258 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:08:00,258 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:08:00,258 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:08:00,258 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:08:00,258 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:08:00,279 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:08:00 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:08:00,279 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:08:00,280 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:08:00,280 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:08:00,280 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:08:00,280 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:08:00,280 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:08:00,280 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:08:00,400 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:08:00 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:08:00,401 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:08:00,401 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:08:00,401 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:08:00,401 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:08:00,401 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:08:00,401 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:08:00,401 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:08:00,986 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:08:01,202 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:08:08,120 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:08:08,121 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:08:08,121 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:08:08,121 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:08:08,121 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:08:08,121 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:08:08,122 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:08:08,122 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:08:08,122 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:20,211 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:10:20,211 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:10:20,211 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:10:20,211 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:10:20,211 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:10:20,211 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:10:20,211 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:10:20,211 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:10:20,211 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:10:20,211 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:10:20,216 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:10:20,216 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:10:20,680 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:10:20,680 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:10:20,680 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:10:20,680 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:10:20,680 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:10:20,680 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:10:20,680 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:10:20,680 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:10:20,680 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:10:20,680 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:10:20,680 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:10:20,682 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:10:20,695 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:10:20,703 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:10:20,786 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:10:20,820 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:10:20,820 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x135143f90>
+2025-05-30 12:10:20,820 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:10:20 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:10:20,821 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:10:20,821 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:10:20,821 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:10:20,822 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:10:20,822 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:10:20,822 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x137458bd0>
+2025-05-30 12:10:20,822 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:10:20,822 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:10:20,822 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:10:20,822 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:10:20,822 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:10:20,829 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:10:20 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101445'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:10:20,829 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:10:20,829 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:10:20,829 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:10:20,829 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:10:20,829 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:10:20,829 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:10:20,829 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:10:20,841 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:10:20,960 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x133ccf790>
+2025-05-30 12:10:20,960 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x134e955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:10:20,982 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:10:20,985 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1349f5d50>
+2025-05-30 12:10:20,985 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1371d7ec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:10:21,238 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1351920d0>
+2025-05-30 12:10:21,239 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:21,239 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:10:21,239 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:10:21,239 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:10:21,239 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:21,273 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x135147350>
+2025-05-30 12:10:21,273 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:21,273 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:10:21,273 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:10:21,273 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:10:21,273 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:10:21,379 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:10:21 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:10:21,379 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:10:21,379 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:10:21,379 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:10:21,379 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:10:21,379 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:10:21,380 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:10:21,380 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:10:21,419 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:10:21 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:10:21,419 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:10:21,420 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:10:21,420 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:10:21,420 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:10:21,420 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:10:21,420 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:10:21,421 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:10:22,043 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:10:22,264 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:10:22,493 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:22,493 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:22,493 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:10:22,493 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:10:22,493 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:22,493 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:22,493 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:10:22,494 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:10:22,494 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:53,624 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:11:53,624 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:11:53,624 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:11:53,624 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:11:53,624 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:11:53,624 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:11:53,624 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:11:53,624 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:11:53,624 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:11:53,624 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:11:53,628 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:11:53,628 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:11:54,097 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:11:54,097 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:11:54,097 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:11:54,097 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:11:54,097 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:11:54,097 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:11:54,097 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:11:54,097 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:11:54,097 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:11:54,097 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:11:54,097 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:11:54,099 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:11:54,118 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:11:54,119 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:11:54,199 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:11:54,235 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:11:54,235 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x124092a50>
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,236 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:11:54 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:11:54,237 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:11:54,237 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,237 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:11:54,237 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:11:54,237 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:11:54,237 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:11:54,237 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:11:54,237 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:11:54,238 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x130058c50>
+2025-05-30 12:11:54,238 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:11:54,238 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:11:54,238 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:11:54,238 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:11:54,238 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:11:54,245 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:11:54 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101552'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:11:54,245 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:11:54,245 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:11:54,245 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:11:54,245 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:11:54,245 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:11:54,245 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:11:54,245 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:11:54,258 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:11:54,283 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12299e010>
+2025-05-30 12:11:54,286 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1234955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:11:54,397 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12065f210>
+2025-05-30 12:11:54,397 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x127f3bec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:11:54,403 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:11:54,569 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x122c3f790>
+2025-05-30 12:11:54,570 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,570 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:11:54,570 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,570 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:11:54,570 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,676 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12349b650>
+2025-05-30 12:11:54,676 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,676 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:11:54,676 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,676 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:11:54,676 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:11:54,714 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:11:54 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:11:54,714 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:11:54,714 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,714 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:11:54,714 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:11:54,714 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:11:54,714 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:11:54,715 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:11:54,816 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:11:54 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:11:54,816 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:11:54,816 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:11:54,817 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:11:54,817 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:11:54,817 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:11:54,817 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:11:54,818 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:11:55,439 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:11:55,660 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:11:57,078 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:57,078 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:57,079 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:11:57,079 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:11:57,079 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:57,079 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:57,079 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:11:57,079 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:11:57,079 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:22,848 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:13:22,848 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:13:22,848 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:13:22,848 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:13:22,848 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:13:22,848 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:13:22,848 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:13:22,848 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:13:22,848 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:13:22,848 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:13:22,852 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:13:22,852 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:13:23,326 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:13:23,326 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:13:23,326 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:13:23,326 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:13:23,326 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:13:23,326 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:13:23,326 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:13:23,326 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:13:23,326 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:13:23,326 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:13:23,326 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:13:23,328 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:13:23,342 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:13:23,349 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:13:23,434 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:13:23,467 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:13:23,468 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1375e60d0>
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,468 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:13:23 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:13:23,469 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:13:23,469 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:13:23,469 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:13:23,469 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:13:23,469 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:13:23,469 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:13:23,469 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:13:23,469 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:13:23,470 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13784d390>
+2025-05-30 12:13:23,470 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:13:23,470 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:13:23,470 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:13:23,470 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:13:23,470 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:13:23,477 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:13:23 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101618'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:13:23,477 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:13:23,477 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:13:23,477 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:13:23,477 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:13:23,477 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:13:23,477 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:13:23,477 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:13:23,489 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:13:23,523 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x136297d10>
+2025-05-30 12:13:23,523 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x135f955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:13:23,621 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:13:23,638 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x116e0bf50>
+2025-05-30 12:13:23,638 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1376cfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:13:23,834 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x136213a90>
+2025-05-30 12:13:23,835 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,835 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:13:23,835 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:13:23,835 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:13:23,835 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,940 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x137534190>
+2025-05-30 12:13:23,941 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,941 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:13:23,941 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:13:23,941 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:13:23,941 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:13:23,992 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:13:23 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:13:23,992 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:13:23,992 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:13:23,992 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:13:23,992 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:13:23,992 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:13:23,993 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:13:23,993 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:13:24,092 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:13:24 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:13:24,093 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:13:24,093 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:13:24,093 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:13:24,093 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:13:24,093 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:13:24,093 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:13:24,093 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:13:24,177 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,177 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,178 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:13:24,178 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:13:24,178 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,178 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,178 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:13:24,178 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,178 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:13:24,707 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:13:24,928 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:14:05,021 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,021 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,021 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:14:05,022 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,022 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,022 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:14:05,022 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,022 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - Starting code generation for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - Parameters: prompt='A cat holding a sign that says hello world...', size=(768, 1360), steps=4
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - Manual specs: True, Memory analysis provided: True
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - Using manual hardware specifications
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - Manual specs: {'platform': 'Linux', 'architecture': 'manual_input', 'cpu_count': 8, 'python_version': '3.11', 'cuda_available': False, 'mps_available': False, 'torch_version': '2.0+', 'manual_input': True, 'ram_gb': 16, 'user_dtype': None, 'gpu_info': [{'name': 'Custom GPU', 'memory_mb': 8192}]}
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - GPU detected with 8.0 GB VRAM
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - Selected optimization profile: balanced
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - Creating generation prompt for Gemini API
+2025-05-30 12:14:05,022 - auto_diffusers - DEBUG - Prompt length: 7598 characters
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - PROMPT SENT TO GEMINI API:
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:14:05,022 - auto_diffusers - INFO - 
+You are an expert in optimizing diffusers library code for different hardware configurations.
+
+NOTE: This system includes curated optimization knowledge from HuggingFace documentation.
+
+TASK: Generate optimized Python code for running a diffusion model with the following specifications:
+- Model: black-forest-labs/FLUX.1-schnell
+- Prompt: "A cat holding a sign that says hello world"
+- Image size: 768x1360
+- Inference steps: 4
+
+HARDWARE SPECIFICATIONS:
+- Platform: Linux (manual_input)
+- CPU Cores: 8
+- CUDA Available: False
+- MPS Available: False
+- Optimization Profile: balanced
+- GPU: Custom GPU (8.0 GB VRAM)
+
+MEMORY ANALYSIS:
+- Model Memory Requirements: 36.0 GB (FP16 inference)
+- Model Weights Size: 24.0 GB (FP16)
+- Memory Recommendation: 🔄 Requires sequential CPU offloading
+- Recommended Precision: float16
+- Attention Slicing Recommended: True
+- VAE Slicing Recommended: True
+
+OPTIMIZATION KNOWLEDGE BASE:
+
+# DIFFUSERS OPTIMIZATION TECHNIQUES
+
+## Memory Optimization Techniques
+
+### 1. Model CPU Offloading
+Use `enable_model_cpu_offload()` to move models between GPU and CPU automatically:
+```python
+pipe.enable_model_cpu_offload()
+```
+- Saves significant VRAM by keeping only active models on GPU
+- Automatic management, no manual intervention needed
+- Compatible with all pipelines
+
+### 2. Sequential CPU Offloading  
+Use `enable_sequential_cpu_offload()` for more aggressive memory saving:
+```python
+pipe.enable_sequential_cpu_offload()
+```
+- More memory efficient than model offloading
+- Moves models to CPU after each forward pass
+- Best for very limited VRAM scenarios
+
+### 3. Attention Slicing
+Use `enable_attention_slicing()` to reduce memory during attention computation:
+```python
+pipe.enable_attention_slicing()
+# or specify slice size
+pipe.enable_attention_slicing("max")  # maximum slicing
+pipe.enable_attention_slicing(1)      # slice_size = 1
+```
+- Trades compute time for memory
+- Most effective for high-resolution images
+- Can be combined with other techniques
+
+### 4. VAE Slicing
+Use `enable_vae_slicing()` for large batch processing:
+```python
+pipe.enable_vae_slicing()
+```
+- Decodes images one at a time instead of all at once
+- Essential for batch sizes > 4
+- Minimal performance impact on single images
+
+### 5. VAE Tiling
+Use `enable_vae_tiling()` for high-resolution image generation:
+```python
+pipe.enable_vae_tiling()
+```
+- Enables 4K+ image generation on 8GB VRAM
+- Splits images into overlapping tiles
+- Automatically disabled for 512x512 or smaller images
+
+### 6. Memory Efficient Attention (xFormers)
+Use `enable_xformers_memory_efficient_attention()` if xFormers is installed:
+```python
+pipe.enable_xformers_memory_efficient_attention()
+```
+- Significantly reduces memory usage and improves speed
+- Requires xformers library installation
+- Compatible with most models
+
+## Performance Optimization Techniques
+
+### 1. Half Precision (FP16/BF16)
+Use lower precision for better memory and speed:
+```python
+# FP16 (widely supported)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+
+# BF16 (better numerical stability, newer hardware)
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+```
+- FP16: Halves memory usage, widely supported
+- BF16: Better numerical stability, requires newer GPUs
+- Essential for most optimization scenarios
+
+### 2. Torch Compile (PyTorch 2.0+)
+Use `torch.compile()` for significant speed improvements:
+```python
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+# For some models, compile VAE too:
+pipe.vae.decode = torch.compile(pipe.vae.decode, mode="reduce-overhead", fullgraph=True)
+```
+- 5-50% speed improvement
+- Requires PyTorch 2.0+
+- First run is slower due to compilation
+
+### 3. Fast Schedulers
+Use faster schedulers for fewer steps:
+```python
+from diffusers import LMSDiscreteScheduler, UniPCMultistepScheduler
+
+# LMS Scheduler (good quality, fast)
+pipe.scheduler = LMSDiscreteScheduler.from_config(pipe.scheduler.config)
+
+# UniPC Scheduler (fastest)
+pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+```
+
+## Hardware-Specific Optimizations
+
+### NVIDIA GPU Optimizations
+```python
+# Enable Tensor Cores
+torch.backends.cudnn.benchmark = True
+
+# Optimal data type for NVIDIA
+torch_dtype = torch.float16  # or torch.bfloat16 for RTX 30/40 series
+```
+
+### Apple Silicon (MPS) Optimizations
+```python
+# Use MPS device
+device = "mps" if torch.backends.mps.is_available() else "cpu"
+pipe = pipe.to(device)
+
+# Recommended dtype for Apple Silicon
+torch_dtype = torch.bfloat16  # Better than float16 on Apple Silicon
+
+# Attention slicing often helps on MPS
+pipe.enable_attention_slicing()
+```
+
+### CPU Optimizations
+```python
+# Use float32 for CPU
+torch_dtype = torch.float32
+
+# Enable optimized attention
+pipe.enable_attention_slicing()
+```
+
+## Model-Specific Guidelines
+
+### FLUX Models
+- Do NOT use guidance_scale parameter (not needed for FLUX)
+- Use 4-8 inference steps maximum
+- BF16 dtype recommended
+- Enable attention slicing for memory optimization
+
+### Stable Diffusion XL
+- Enable attention slicing for high resolutions
+- Use refiner model sparingly to save memory
+- Consider VAE tiling for >1024px images
+
+### Stable Diffusion 1.5/2.1
+- Very memory efficient base models
+- Can often run without optimizations on 8GB+ VRAM
+- Enable VAE slicing for batch processing
+
+## Memory Usage Estimation
+- FLUX.1: ~24GB for full precision, ~12GB for FP16
+- SDXL: ~7GB for FP16, ~14GB for FP32
+- SD 1.5: ~2GB for FP16, ~4GB for FP32
+
+## Optimization Combinations by VRAM
+
+### 24GB+ VRAM (High-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+pipe = pipe.to("cuda")
+pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
+```
+
+### 12-24GB VRAM (Mid-range)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe = pipe.to("cuda")
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### 8-12GB VRAM (Entry-level)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
+pipe.enable_xformers_memory_efficient_attention()
+```
+
+### <8GB VRAM (Low-end)
+```python
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+pipe.enable_sequential_cpu_offload()
+pipe.enable_attention_slicing("max")
+pipe.enable_vae_slicing()
+pipe.enable_vae_tiling()
+```
+
+
+IMPORTANT: For FLUX.1-schnell models, do NOT include guidance_scale parameter as it's not needed.
+
+Using the OPTIMIZATION KNOWLEDGE BASE above, generate Python code that:
+
+1. **Selects the best optimization techniques** for the specific hardware profile
+2. **Applies appropriate memory optimizations** based on available VRAM
+3. **Uses optimal data types** for the target hardware:
+   - User specified dtype (if provided): Use exactly as specified
+   - Apple Silicon (MPS): prefer torch.bfloat16
+   - NVIDIA GPUs: prefer torch.float16 or torch.bfloat16 
+   - CPU only: use torch.float32
+4. **Implements hardware-specific optimizations** (CUDA, MPS, CPU)
+5. **Follows model-specific guidelines** (e.g., FLUX guidance_scale handling)
+
+IMPORTANT GUIDELINES:
+- Reference the OPTIMIZATION KNOWLEDGE BASE to select appropriate techniques
+- Include all necessary imports
+- Add brief comments explaining optimization choices
+- Generate compact, production-ready code
+- Inline values where possible for concise code
+- Generate ONLY the Python code, no explanations before or after the code block
+
+2025-05-30 12:14:05,023 - auto_diffusers - INFO - ================================================================================
+2025-05-30 12:14:05,023 - auto_diffusers - INFO - Sending request to Gemini API
+2025-05-30 12:14:10,979 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:14:10,979 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:14:10,979 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:14:10,979 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:14:10,979 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:14:10,979 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:14:10,979 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:14:10,979 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:14:10,979 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:14:10,979 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:14:10,983 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:14:10,983 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:14:11,437 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:14:11,437 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:14:11,437 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:14:11,437 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:14:11,437 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:14:11,437 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:14:11,437 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:14:11,437 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:14:11,437 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:14:11,437 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:14:11,437 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:14:11,439 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:14:11,452 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:14:11,459 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:14:11,537 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:14:11,571 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:14:11,571 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13e431f10>
+2025-05-30 12:14:11,571 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:14:11 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:14:11,572 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:14:11,572 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:14:11,572 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:14:11,572 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:14:11,573 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:14:11,573 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13f958150>
+2025-05-30 12:14:11,573 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:14:11,574 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:14:11,574 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:14:11,574 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:14:11,574 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:14:11,580 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:14:11 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101701'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:14:11,580 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:14:11,580 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:14:11,580 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:14:11,580 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:14:11,580 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:14:11,580 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:14:11,580 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:14:11,592 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:14:11,602 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13e191350>
+2025-05-30 12:14:11,602 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x11cbc11c0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:14:11,721 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:14:11,734 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13d2a7d10>
+2025-05-30 12:14:11,734 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x13e4cfec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:14:11,887 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x11cc81c90>
+2025-05-30 12:14:11,887 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:11,887 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:14:11,887 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:14:11,888 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:14:11,888 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:12,022 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x13d6dc090>
+2025-05-30 12:14:12,022 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:12,022 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:14:12,023 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:14:12,023 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:14:12,023 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:14:12,047 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:14:11 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:14:12,048 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:14:12,048 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:14:12,048 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:14:12,048 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:14:12,048 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:14:12,048 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:14:12,049 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:14:12,168 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:14:12 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:14:12,168 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:14:12,168 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:14:12,168 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:14:12,168 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:14:12,168 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:14:12,168 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:14:12,169 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:14:12,334 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,334 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,335 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:14:12,335 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:14:12,335 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,335 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,335 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:14:12,335 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,335 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:14:12,733 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:14:12,951 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0
+2025-05-30 12:15:12,436 - __main__ - INFO - Initializing GradioAutodiffusers
+2025-05-30 12:15:12,436 - __main__ - DEBUG - API key found, length: 39
+2025-05-30 12:15:12,436 - auto_diffusers - INFO - Initializing AutoDiffusersGenerator
+2025-05-30 12:15:12,436 - auto_diffusers - DEBUG - API key length: 39
+2025-05-30 12:15:12,437 - auto_diffusers - WARNING - Tool calling dependencies not available, running without tools
+2025-05-30 12:15:12,437 - hardware_detector - INFO - Initializing HardwareDetector
+2025-05-30 12:15:12,437 - hardware_detector - DEBUG - Starting system hardware detection
+2025-05-30 12:15:12,437 - hardware_detector - DEBUG - Platform: Darwin, Architecture: arm64
+2025-05-30 12:15:12,437 - hardware_detector - DEBUG - CPU cores: 16, Python: 3.11.11
+2025-05-30 12:15:12,437 - hardware_detector - DEBUG - Attempting GPU detection via nvidia-smi
+2025-05-30 12:15:12,440 - hardware_detector - DEBUG - nvidia-smi not found, no NVIDIA GPU detected
+2025-05-30 12:15:12,441 - hardware_detector - DEBUG - Checking PyTorch availability
+2025-05-30 12:15:12,913 - hardware_detector - INFO - PyTorch 2.7.0 detected
+2025-05-30 12:15:12,913 - hardware_detector - DEBUG - CUDA available: False, MPS available: True
+2025-05-30 12:15:12,913 - hardware_detector - INFO - Hardware detection completed successfully
+2025-05-30 12:15:12,913 - hardware_detector - DEBUG - Detected specs: {'platform': 'Darwin', 'architecture': 'arm64', 'cpu_count': 16, 'python_version': '3.11.11', 'gpu_info': None, 'cuda_available': False, 'mps_available': True, 'torch_version': '2.7.0'}
+2025-05-30 12:15:12,913 - auto_diffusers - INFO - Hardware detector initialized successfully
+2025-05-30 12:15:12,913 - __main__ - INFO - AutoDiffusersGenerator initialized successfully
+2025-05-30 12:15:12,913 - simple_memory_calculator - INFO - Initializing SimpleMemoryCalculator
+2025-05-30 12:15:12,913 - simple_memory_calculator - DEBUG - HuggingFace API initialized
+2025-05-30 12:15:12,913 - simple_memory_calculator - DEBUG - Known models in database: 4
+2025-05-30 12:15:12,913 - __main__ - INFO - SimpleMemoryCalculator initialized successfully
+2025-05-30 12:15:12,913 - __main__ - DEBUG - Default model settings: gemini-2.5-flash-preview-05-20, temp=0.7
+2025-05-30 12:15:12,915 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:15:12,929 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=3 socket_options=None
+2025-05-30 12:15:12,935 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:15:13,014 - asyncio - DEBUG - Using selector: KqueueSelector
+2025-05-30 12:15:13,048 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=None socket_options=None
+2025-05-30 12:15:13,048 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12744b350>
+2025-05-30 12:15:13,048 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:15:13 GMT'), (b'server', b'uvicorn'), (b'content-length', b'4'), (b'content-type', b'application/json')])
+2025-05-30 12:15:13,049 - httpx - INFO - HTTP Request: GET http://localhost:7860/gradio_api/startup-events "HTTP/1.1 200 OK"
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,049 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:15:13,050 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:15:13,050 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:15:13,050 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:15:13,050 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:15:13,050 - httpcore.connection - DEBUG - connect_tcp.started host='localhost' port=7860 local_address=None timeout=3 socket_options=None
+2025-05-30 12:15:13,050 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127458490>
+2025-05-30 12:15:13,051 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:15:13,051 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:15:13,051 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'HEAD']>
+2025-05-30 12:15:13,051 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:15:13,051 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'HEAD']>
+2025-05-30 12:15:13,057 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'date', b'Fri, 30 May 2025 03:15:13 GMT'), (b'server', b'uvicorn'), (b'content-length', b'101553'), (b'content-type', b'text/html; charset=utf-8')])
+2025-05-30 12:15:13,057 - httpx - INFO - HTTP Request: HEAD http://localhost:7860/ "HTTP/1.1 200 OK"
+2025-05-30 12:15:13,057 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'HEAD']>
+2025-05-30 12:15:13,057 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:15:13,057 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:15:13,057 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:15:13,057 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:15:13,057 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:15:13,069 - httpcore.connection - DEBUG - connect_tcp.started host='api.gradio.app' port=443 local_address=None timeout=30 socket_options=None
+2025-05-30 12:15:13,100 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x12559ad90>
+2025-05-30 12:15:13,100 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1251955b0> server_hostname='api.gradio.app' timeout=3
+2025-05-30 12:15:13,206 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x127440f90>
+2025-05-30 12:15:13,206 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x1268afec0> server_hostname='api.gradio.app' timeout=30
+2025-05-30 12:15:13,294 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/initiated HTTP/1.1" 200 0
+2025-05-30 12:15:13,391 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1255a8590>
+2025-05-30 12:15:13,392 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,392 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:15:13,392 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,392 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:15:13,392 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,518 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x1274a2c10>
+2025-05-30 12:15:13,518 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,519 - httpcore.http11 - DEBUG - send_request_headers.complete
+2025-05-30 12:15:13,519 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,519 - httpcore.http11 - DEBUG - send_request_body.complete
+2025-05-30 12:15:13,519 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'GET']>
+2025-05-30 12:15:13,537 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:15:13 GMT'), (b'Content-Type', b'application/json'), (b'Content-Length', b'21'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'Access-Control-Allow-Origin', b'*')])
+2025-05-30 12:15:13,538 - httpx - INFO - HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
+2025-05-30 12:15:13,538 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,538 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:15:13,538 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:15:13,538 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:15:13,538 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:15:13,539 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:15:13,657 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Date', b'Fri, 30 May 2025 03:15:13 GMT'), (b'Content-Type', b'text/html; charset=utf-8'), (b'Transfer-Encoding', b'chunked'), (b'Connection', b'keep-alive'), (b'Server', b'nginx/1.18.0'), (b'ContentType', b'application/json'), (b'Access-Control-Allow-Origin', b'*'), (b'Content-Encoding', b'gzip')])
+2025-05-30 12:15:13,657 - httpx - INFO - HTTP Request: GET https://api.gradio.app/v3/tunnel-request "HTTP/1.1 200 OK"
+2025-05-30 12:15:13,657 - httpcore.http11 - DEBUG - receive_response_body.started request=<Request [b'GET']>
+2025-05-30 12:15:13,658 - httpcore.http11 - DEBUG - receive_response_body.complete
+2025-05-30 12:15:13,658 - httpcore.http11 - DEBUG - response_closed.started
+2025-05-30 12:15:13,658 - httpcore.http11 - DEBUG - response_closed.complete
+2025-05-30 12:15:13,658 - httpcore.connection - DEBUG - close.started
+2025-05-30 12:15:13,659 - httpcore.connection - DEBUG - close.complete
+2025-05-30 12:15:13,910 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:13,910 - simple_memory_calculator - INFO - Using known memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:13,910 - simple_memory_calculator - DEBUG - Known data: {'params_billions': 12.0, 'fp16_gb': 24.0, 'inference_fp16_gb': 36.0}
+2025-05-30 12:15:13,910 - simple_memory_calculator - INFO - Generating memory recommendations for black-forest-labs/FLUX.1-schnell with 8.0GB VRAM
+2025-05-30 12:15:13,910 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:13,910 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:13,911 - simple_memory_calculator - DEBUG - Model memory: 24.0GB, Inference memory: 36.0GB
+2025-05-30 12:15:13,911 - simple_memory_calculator - INFO - Getting memory requirements for model: black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:13,911 - simple_memory_calculator - DEBUG - Using cached memory data for black-forest-labs/FLUX.1-schnell
+2025-05-30 12:15:14,276 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): huggingface.co:443
+2025-05-30 12:15:14,494 - urllib3.connectionpool - DEBUG - https://huggingface.co:443 "HEAD /api/telemetry/gradio/launched HTTP/1.1" 200 0