Spaces:

adil9858
/

visox

Sleeping

App Files Files Community

adil9858 commited on Mar 1

Commit

27e9a0a

verified ·

1 Parent(s): 3a448ef

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -46

app.py CHANGED Viewed

@@ -4,16 +4,18 @@ import base64
 from PIL import Image
 from io import BytesIO
 import os
-# Set the API key
 os.environ["TOGETHER_API_KEY"] = "tgp_v1_ci8Tlva09oBrdDV89ULFNcyPgnR9NwNTQyvQ_4XBw3M"
 # Initialize the Together client
 together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
 # Function to encode image to base64
 def encode_image(image):
     buffered = BytesIO()
@@ -23,7 +25,7 @@ def encode_image(image):
 # Function to get image description from Together API
 def get_image_description(image):
-    get_description_prompt = "Describe the given image in detail in only 20 words max"
     # Encode the image to base64
     base64_image = encode_image(image)
@@ -49,6 +51,27 @@ def get_image_description(image):
     # Return the result from the API
     return response.choices[0].message.content
 # Custom CSS for a futuristic look
 st.markdown(
     """
@@ -93,34 +116,10 @@ st.markdown(
     unsafe_allow_html=True,
 )
-def tts(text):
-    key = 'sk_8db078175c4eba3e2d4b500ffb167c46326e520bdf001f0e'
-    from elevenlabs.client import ElevenLabs
-    from elevenlabs import play
-    client = ElevenLabs(api_key=key)
-    audio = client.text_to_speech.convert(
-        text=text,
-        voice_id="JBFqnCBsd6RMkjVDRZzb",
-        model_id="eleven_multilingual_v2",
-        output_format="mp3_44100_128",
-    )
-    play(audio)
 # Streamlit app layout
 st.title("🔮 Visox | Koshur AI")
 st.markdown("### See the world through AI's eyes!")
 # Sidebar for additional info
 st.sidebar.markdown("## About")
 st.sidebar.markdown("This app uses advanced AI to describe what it sees through your camera in real-time.")
@@ -130,20 +129,22 @@ st.sidebar.markdown("Powered by [Together AI](https://together.ai) and Streamlit
 img_file_buffer = st.camera_input("Take a picture")
 if img_file_buffer is not None:
-    # Convert the image file buffer to a PIL Image
-    img = Image.open(img_file_buffer)
-    # Display the captured image
-    st.image(img, caption='Captured Image',width=300)
-    # Get and display the description
-    with st.spinner('🔍 Analyzing the image...'):
-        description = get_image_description(img)
-    st.success('✅ Analysis complete!')
-    st.markdown("### AI Description:")
-    st.write(description)
-    tts(description)

 from PIL import Image
 from io import BytesIO
 import os
+from elevenlabs.client import ElevenLabs
+# Set API keys as environment variables
 os.environ["TOGETHER_API_KEY"] = "tgp_v1_ci8Tlva09oBrdDV89ULFNcyPgnR9NwNTQyvQ_4XBw3M"
+os.environ["ELEVENLABS_API_KEY"] = "sk_8db078175c4eba3e2d4b500ffb167c46326e520bdf001f0e"
 # Initialize the Together client
 together_client = Together(api_key=os.environ["TOGETHER_API_KEY"])
+# Initialize ElevenLabs client
+elevenlabs_client = ElevenLabs(api_key=os.environ["ELEVENLABS_API_KEY"])
 # Function to encode image to base64
 def encode_image(image):
     buffered = BytesIO()
 # Function to get image description from Together API
 def get_image_description(image):
+    get_description_prompt = "Describe the given image in detail in only 20 words max."
     # Encode the image to base64
     base64_image = encode_image(image)
     # Return the result from the API
     return response.choices[0].message.content
+# Function to convert text to speech using ElevenLabs
+def tts(text):
+    try:
+        # Generate audio using ElevenLabs
+        audio = elevenlabs_client.text_to_speech.convert(
+            text=text,
+            voice_id="JBFqnCBsd6RMkjVDRZzb",  # Replace with your preferred voice ID
+            model_id="eleven_multilingual_v2",
+            output_format="mp3_44100_128",
+        )
+        # Save the audio to a temporary file
+        audio_path = "temp_audio.mp3"
+        with open(audio_path, "wb") as f:
+            f.write(audio)
+        # Play the audio in Streamlit
+        st.audio(audio_path, format="audio/mp3")
+    except Exception as e:
+        st.error(f"Error generating speech: {e}")
 # Custom CSS for a futuristic look
 st.markdown(
     """
     unsafe_allow_html=True,
 )
 # Streamlit app layout
 st.title("🔮 Visox | Koshur AI")
 st.markdown("### See the world through AI's eyes!")
 # Sidebar for additional info
 st.sidebar.markdown("## About")
 st.sidebar.markdown("This app uses advanced AI to describe what it sees through your camera in real-time.")
 img_file_buffer = st.camera_input("Take a picture")
 if img_file_buffer is not None:
+    try:
+        # Convert the image file buffer to a PIL Image
+        img = Image.open(img_file_buffer)
+        # Display the captured image
+        st.image(img, caption='Captured Image', use_column_width=True)
+        # Get and display the description
+        with st.spinner('🔍 Analyzing the image...'):
+            description = get_image_description(img)
+        st.success('✅ Analysis complete!')
+        st.markdown("### AI Description:")
+        st.write(description)
+        # Convert description to speech and play it
+        if st.button("🔊 Read Aloud"):
+            tts(description)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")