adil9858 commited on
Commit
5a2da34
Β·
verified Β·
1 Parent(s): 9808be5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -0
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from openai import OpenAI
3
+ import base64
4
+ from PIL import Image
5
+ import io
6
+ from datetime import datetime
7
+
8
+ # OpenAI client setup
9
+ client = OpenAI(
10
+ base_url="https://openrouter.ai/api/v1",
11
+ api_key='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'
12
+ )
13
+
14
+ def analyze_image(image, prompt):
15
+ if image is None:
16
+ return "Please upload or capture an image first."
17
+
18
+ # Convert image to base64
19
+ buffered = io.BytesIO()
20
+ image.save(buffered, format="JPEG")
21
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
22
+
23
+ try:
24
+ response = client.chat.completions.create(
25
+ model="opengvlab/internvl3-14b:free",
26
+ messages=[
27
+ {
28
+ "role": "system",
29
+ "content": """You are Dalton, an expert AI assistant specialized in image understanding.
30
+ Your tasks include:
31
+ - Extracting and structuring text from images
32
+ - Answering questions about image content
33
+ - Providing detailed descriptions
34
+ - Analyzing receipts, documents, and other visual content
35
+ Be thorough, accurate, and helpful in your responses."""
36
+ },
37
+ {
38
+ "role": "user",
39
+ "content": [
40
+ {"type": "text", "text": prompt},
41
+ {
42
+ "type": "image_url",
43
+ "image_url": {
44
+ "url": f"data:image/jpeg;base64,{img_str}"
45
+ }
46
+ }
47
+ ]
48
+ }
49
+ ],
50
+ max_tokens=2048
51
+ )
52
+
53
+ result = response.choices[0].message.content
54
+ return result
55
+
56
+ except Exception as e:
57
+ return f"An error occurred: {str(e)}"
58
+
59
+ # Custom CSS for better mobile experience
60
+ css = """
61
+ #mobile-camera { width: 100% !important; }
62
+ #prompt-textbox { min-height: 100px !important; }
63
+ .result-box {
64
+ max-height: 500px;
65
+ overflow-y: auto;
66
+ padding: 15px;
67
+ border: 1px solid #e0e0e0;
68
+ border-radius: 8px;
69
+ }
70
+ .footer {
71
+ margin-top: 20px;
72
+ font-size: 12px;
73
+ color: #666;
74
+ text-align: center;
75
+ }
76
+ """
77
+
78
+ with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
79
+ gr.Markdown("""
80
+ # 🧾 DaltonVision - InternVL3-14B
81
+ ### Advanced Image Understanding β€’ Powered by OpenRouter β€’ Developed by [Koshur AI](https://koshurai.com)
82
+ """)
83
+
84
+ with gr.Row():
85
+ with gr.Column():
86
+ # Image input section
87
+ image_input = gr.Image(
88
+ sources=["upload", "webcam"],
89
+ type="pil",
90
+ label="Upload or Capture Image",
91
+ elem_id="mobile-camera"
92
+ )
93
+
94
+ # Prompt input
95
+ prompt_input = gr.Textbox(
96
+ label="πŸ“ Enter your question or instruction",
97
+ value="Extract all content structurally",
98
+ lines=3,
99
+ elem_id="prompt-textbox"
100
+ )
101
+
102
+ submit_btn = gr.Button("πŸ” Analyze Image", variant="primary")
103
+
104
+ gr.Examples(
105
+ examples=[
106
+ ["What is the total amount on this receipt?"],
107
+ ["List all items and their prices"],
108
+ ["Who is the vendor and what is the date?"],
109
+ ["Describe this image in detail"]
110
+ ],
111
+ inputs=[prompt_input],
112
+ label="πŸ’‘ Try these example prompts:"
113
+ )
114
+
115
+ with gr.Column():
116
+ # Result output
117
+ result_output = gr.Markdown(
118
+ label="βœ… Analysis Result",
119
+ elem_classes="result-box"
120
+ )
121
+
122
+ # Footer
123
+ gr.Markdown("""
124
+ <div class="footer">
125
+ Β© 2025 Koshur AI. All rights reserved.<br>
126
+ Note: Images are processed in real-time and not stored.
127
+ </div>
128
+ """)
129
+
130
+ # Button action
131
+ submit_btn.click(
132
+ fn=analyze_image,
133
+ inputs=[image_input, prompt_input],
134
+ outputs=result_output
135
+ )
136
+
137
+ # Launch the app
138
+ if __name__ == "__main__":
139
+ demo.launch()