WillHeld commited on
Commit
f66cdbc
·
verified ·
1 Parent(s): 51b5709

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -12
app.py CHANGED
@@ -5,7 +5,7 @@ from threading import Thread
5
  import os
6
  import json
7
  import uuid
8
- from datasets import Dataset
9
  from huggingface_hub import HfApi, login
10
  import time
11
 
@@ -28,6 +28,63 @@ DATASET_FILENAME = "feedback.jsonl" # Filename for feedback data
28
  # Ensure feedback directory exists
29
  os.makedirs(DATASET_PATH, exist_ok=True)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Feedback storage functions
32
  def save_feedback_locally(conversation, satisfaction, feedback_text):
33
  """Save feedback to a local JSONL file"""
@@ -96,13 +153,18 @@ def push_feedback_to_hub(hf_token=None):
96
  # Modified predict function to update conversation state
97
  @spaces.GPU(duration=120)
98
  def predict(message, history, state, temperature, top_p):
 
 
 
99
  # Update history with user message
100
- history.append({"role": "user", "content": message})
101
 
102
- # Update the conversation state
103
- state = history.copy()
 
 
104
 
105
- input_text = tokenizer.apply_chat_template(history, tokenize=False, add_generation_prompt=True)
106
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
107
 
108
  # Create a streamer
@@ -127,11 +189,22 @@ def predict(message, history, state, temperature, top_p):
127
  partial_text = ""
128
  for new_text in streamer:
129
  partial_text += new_text
130
- yield partial_text, state
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- # After full generation, update state with assistant's response
133
- history.append({"role": "assistant", "content": partial_text})
134
- state = history.copy()
135
  return partial_text, state
136
 
137
  # Function to handle the research feedback submission
@@ -164,8 +237,8 @@ with gr.Blocks() as demo:
164
  def chat_with_state(message, history, state, temperature, top_p):
165
  for partial_response, updated_state in predict(message, history, state, temperature, top_p):
166
  # Update our state with each yield
167
- state = updated_state
168
- yield partial_response, state
169
 
170
  # Create ChatInterface
171
  chatbot = gr.ChatInterface(
@@ -175,7 +248,6 @@ with gr.Blocks() as demo:
175
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
176
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
177
  ],
178
- additional_outputs=[conversation_state],
179
  type="messages"
180
  )
181
 
 
5
  import os
6
  import json
7
  import uuid
8
+ from datasets import Dataset, load_dataset
9
  from huggingface_hub import HfApi, login
10
  import time
11
 
 
28
  # Ensure feedback directory exists
29
  os.makedirs(DATASET_PATH, exist_ok=True)
30
 
31
+ # Sync existing dataset from Hub if available
32
+ def sync_dataset_from_hub():
33
+ """Download existing dataset from Hub and merge with local data"""
34
+ try:
35
+ # Try to get token from environment variable
36
+ hf_token = os.environ.get("HF_TOKEN")
37
+ if hf_token:
38
+ login(token=hf_token)
39
+
40
+ # Check if the dataset exists on Hub
41
+ api = HfApi()
42
+ try:
43
+ dataset_info = api.dataset_info(DATASET_REPO)
44
+ # Dataset exists, download it
45
+ print(f"Syncing existing dataset from {DATASET_REPO}")
46
+ remote_dataset = load_dataset(DATASET_REPO)
47
+
48
+ # Convert to list of dictionaries
49
+ remote_data = [item for item in remote_dataset['train']]
50
+
51
+ # Check if local file exists
52
+ local_file = os.path.join(DATASET_PATH, DATASET_FILENAME)
53
+ local_data = []
54
+
55
+ if os.path.exists(local_file):
56
+ # Read local data
57
+ with open(local_file, 'r') as f:
58
+ for line in f:
59
+ try:
60
+ local_data.append(json.loads(line))
61
+ except json.JSONDecodeError:
62
+ continue
63
+
64
+ # Merge data (using IDs to avoid duplicates)
65
+ all_items = {}
66
+ for item in remote_data + local_data:
67
+ all_items[item['id']] = item
68
+
69
+ # Write back merged data
70
+ with open(local_file, 'w') as f:
71
+ for item in all_items.values():
72
+ f.write(json.dumps(item) + '\n')
73
+
74
+ print(f"Synced {len(all_items)} feedback items")
75
+ return True
76
+
77
+ except Exception as e:
78
+ print(f"Dataset {DATASET_REPO} does not exist yet or could not be accessed: {e}")
79
+ return False
80
+
81
+ except Exception as e:
82
+ print(f"Error syncing dataset: {e}")
83
+ return False
84
+
85
+ # Call sync on startup
86
+ sync_dataset_from_hub()
87
+
88
  # Feedback storage functions
89
  def save_feedback_locally(conversation, satisfaction, feedback_text):
90
  """Save feedback to a local JSONL file"""
 
153
  # Modified predict function to update conversation state
154
  @spaces.GPU(duration=120)
155
  def predict(message, history, state, temperature, top_p):
156
+ # Create a deep copy of history to ensure we don't modify the original
157
+ current_history = history.copy()
158
+
159
  # Update history with user message
160
+ current_history.append({"role": "user", "content": message})
161
 
162
+ # Update the conversation state with user message
163
+ if not state:
164
+ state = []
165
+ state = current_history.copy()
166
 
167
+ input_text = tokenizer.apply_chat_template(current_history, tokenize=False, add_generation_prompt=True)
168
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
169
 
170
  # Create a streamer
 
189
  partial_text = ""
190
  for new_text in streamer:
191
  partial_text += new_text
192
+
193
+ # Create a temporary state with partial response
194
+ temp_history = current_history.copy()
195
+ temp_history.append({"role": "assistant", "content": partial_text})
196
+ temp_state = temp_history.copy()
197
+
198
+ yield partial_text, temp_state
199
+
200
+ # After full generation, update state with assistant's final response
201
+ current_history.append({"role": "assistant", "content": partial_text})
202
+ state = current_history.copy()
203
+
204
+ # Print debug info
205
+ print(f"Updated state with {len(state)} messages")
206
+ print(f"Last message: {state[-1]['role']}: {state[-1]['content'][:30]}...")
207
 
 
 
 
208
  return partial_text, state
209
 
210
  # Function to handle the research feedback submission
 
237
  def chat_with_state(message, history, state, temperature, top_p):
238
  for partial_response, updated_state in predict(message, history, state, temperature, top_p):
239
  # Update our state with each yield
240
+ conversation_state.value = updated_state
241
+ yield partial_response
242
 
243
  # Create ChatInterface
244
  chatbot = gr.ChatInterface(
 
248
  gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature"),
249
  gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P")
250
  ],
 
251
  type="messages"
252
  )
253