Spaces:
Sleeping
Sleeping
Commit
ยท
259448b
1
Parent(s):
b0280c9
Add system
Browse files- .DS_Store +0 -0
- app.py +8 -6
- system/pledge_tracking.py +8 -8
- system/process_time.py +3 -3
- test.html +3 -1
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -71,7 +71,12 @@ def similar_pledges():
|
|
71 |
|
72 |
def calculate_time_range(option: str, pledge_date: str = None):
|
73 |
today = datetime.today()
|
74 |
-
pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d")
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
if option == "week":
|
77 |
one_week_ago = today - timedelta(days=7)
|
@@ -79,16 +84,13 @@ def calculate_time_range(option: str, pledge_date: str = None):
|
|
79 |
elif option == "month":
|
80 |
one_month_ago = today - timedelta(days=30)
|
81 |
start = max(one_month_ago, pledge_date)
|
82 |
-
elif option == "year":
|
83 |
-
one_year_ago = today - timedelta(days=365)
|
84 |
-
start = max(one_year_ago, pledge_date)
|
85 |
elif option == "since_pledge_date":
|
86 |
if not pledge_date:
|
87 |
raise ValueError("Pledge date is required for 'since_pledge_date' option")
|
88 |
start = datetime.strptime(pledge_date, "%Y-%m-%d")
|
89 |
else:
|
90 |
raise ValueError("Invalid time range option")
|
91 |
-
print(start)
|
92 |
return start.strftime("%Y%m%d"), today.strftime("%Y%m%d")
|
93 |
|
94 |
@app.route("/")
|
@@ -146,7 +148,7 @@ def run_model():
|
|
146 |
time_start, time_end = calculate_time_range(time_range_option, pledge_date=pledge_date)
|
147 |
print(f"[DEMO] Received claim: {claim}")
|
148 |
print(f"[DEMO] Time range: {time_start} ~ {time_end}")
|
149 |
-
print(f"[DEMO]
|
150 |
|
151 |
# user_id = str(uuid.uuid4())[:8]
|
152 |
# outputs = run_pipeline(claim, pledge_date, pledge_author, time_start, timestamp, user_id)
|
|
|
71 |
|
72 |
def calculate_time_range(option: str, pledge_date: str = None):
|
73 |
today = datetime.today()
|
74 |
+
# pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d")
|
75 |
+
|
76 |
+
if isinstance(pledge_date, str):
|
77 |
+
pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d")
|
78 |
+
elif not isinstance(pledge_date, datetime):
|
79 |
+
raise ValueError("pledge_date must be a str or datetime")
|
80 |
|
81 |
if option == "week":
|
82 |
one_week_ago = today - timedelta(days=7)
|
|
|
84 |
elif option == "month":
|
85 |
one_month_ago = today - timedelta(days=30)
|
86 |
start = max(one_month_ago, pledge_date)
|
|
|
|
|
|
|
87 |
elif option == "since_pledge_date":
|
88 |
if not pledge_date:
|
89 |
raise ValueError("Pledge date is required for 'since_pledge_date' option")
|
90 |
start = datetime.strptime(pledge_date, "%Y-%m-%d")
|
91 |
else:
|
92 |
raise ValueError("Invalid time range option")
|
93 |
+
print(start, one_week_ago, one_month_ago, pledge_date)
|
94 |
return start.strftime("%Y%m%d"), today.strftime("%Y%m%d")
|
95 |
|
96 |
@app.route("/")
|
|
|
148 |
time_start, time_end = calculate_time_range(time_range_option, pledge_date=pledge_date)
|
149 |
print(f"[DEMO] Received claim: {claim}")
|
150 |
print(f"[DEMO] Time range: {time_start} ~ {time_end}")
|
151 |
+
print(f"[DEMO] Pledge date range: {pledge_date}")
|
152 |
|
153 |
# user_id = str(uuid.uuid4())[:8]
|
154 |
# outputs = run_pipeline(claim, pledge_date, pledge_author, time_start, timestamp, user_id)
|
system/pledge_tracking.py
CHANGED
@@ -57,7 +57,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
57 |
with open(initial_tsv_file, "r", encoding="utf-8") as f:
|
58 |
line_count = sum(1 for line in f)
|
59 |
if update_fn:
|
60 |
-
update_fn(step_id, f"
|
61 |
step_id+=1
|
62 |
|
63 |
|
@@ -70,7 +70,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
70 |
with open(initial_scraped_output_path, "r", encoding="utf-8") as f:
|
71 |
line_count = sum(1 for line in f if json.loads(line)["url2text"] != [])
|
72 |
if update_fn:
|
73 |
-
update_fn(step_id, f"
|
74 |
step_id+=1
|
75 |
|
76 |
|
@@ -85,7 +85,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
85 |
questions = {line["question"] for line in json.load(f)["evidence"]}
|
86 |
line_count = len(questions)
|
87 |
if update_fn:
|
88 |
-
update_fn(step_id, f"
|
89 |
step_id+=1
|
90 |
|
91 |
else:
|
@@ -115,7 +115,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
115 |
with open(augmented_tsv_file, "r", encoding="utf-8") as f:
|
116 |
line_count = sum(1 for line in f)
|
117 |
if update_fn:
|
118 |
-
update_fn(step_id, f"
|
119 |
step_id+=1
|
120 |
|
121 |
augmented_data_store_dir = os.path.join(pipeline_base_dir, "augmented_data_store")
|
@@ -126,7 +126,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
126 |
with open(augmented_scraped_output_path, "r", encoding="utf-8") as f:
|
127 |
line_count = sum(1 for line in f if json.loads(line)["url2text"] != [])
|
128 |
if update_fn:
|
129 |
-
update_fn(step_id, f"
|
130 |
step_id+=1
|
131 |
|
132 |
|
@@ -150,7 +150,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
150 |
if "url" in doc:
|
151 |
unique_urls.add(doc["url"])
|
152 |
if update_fn:
|
153 |
-
update_fn(step_id, f"
|
154 |
step_id+=1
|
155 |
|
156 |
extracted_event_path = run_gpt4_event_extraction(data_dir=pipeline_base_dir, max_tokens=100000)
|
@@ -158,7 +158,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
158 |
events_num = count_total_events(extracted_event_path)
|
159 |
|
160 |
if update_fn:
|
161 |
-
update_fn(step_id, f"
|
162 |
step_id+=1
|
163 |
|
164 |
|
@@ -173,7 +173,7 @@ def run_pipeline(claim, pledge_date, pledge_author, start_date, timestamp, user_
|
|
173 |
claim=claim,
|
174 |
suggestion_meta=suggestion_meta
|
175 |
)
|
176 |
-
print(sorted_events)
|
177 |
df = pd.DataFrame(sorted_events)
|
178 |
sorted_event_path = f"{pipeline_base_dir}/sorted_events.xlsx"
|
179 |
df.to_excel(sorted_event_path, index=False)
|
|
|
57 |
with open(initial_tsv_file, "r", encoding="utf-8") as f:
|
58 |
line_count = sum(1 for line in f)
|
59 |
if update_fn:
|
60 |
+
update_fn(step_id, f"{line_count} URLs are retrieved")
|
61 |
step_id+=1
|
62 |
|
63 |
|
|
|
70 |
with open(initial_scraped_output_path, "r", encoding="utf-8") as f:
|
71 |
line_count = sum(1 for line in f if json.loads(line)["url2text"] != [])
|
72 |
if update_fn:
|
73 |
+
update_fn(step_id, f"{line_count} URL pages have been sucessefully scraped")
|
74 |
step_id+=1
|
75 |
|
76 |
|
|
|
85 |
questions = {line["question"] for line in json.load(f)["evidence"]}
|
86 |
line_count = len(questions)
|
87 |
if update_fn:
|
88 |
+
update_fn(step_id, f"{line_count} relevant queries are generated")
|
89 |
step_id+=1
|
90 |
|
91 |
else:
|
|
|
115 |
with open(augmented_tsv_file, "r", encoding="utf-8") as f:
|
116 |
line_count = sum(1 for line in f)
|
117 |
if update_fn:
|
118 |
+
update_fn(step_id, f"{line_count} URLs are retrieved")
|
119 |
step_id+=1
|
120 |
|
121 |
augmented_data_store_dir = os.path.join(pipeline_base_dir, "augmented_data_store")
|
|
|
126 |
with open(augmented_scraped_output_path, "r", encoding="utf-8") as f:
|
127 |
line_count = sum(1 for line in f if json.loads(line)["url2text"] != [])
|
128 |
if update_fn:
|
129 |
+
update_fn(step_id, f"{line_count} URL pages have been sucessefully scraped")
|
130 |
step_id+=1
|
131 |
|
132 |
|
|
|
150 |
if "url" in doc:
|
151 |
unique_urls.add(doc["url"])
|
152 |
if update_fn:
|
153 |
+
update_fn(step_id, f"{len(unique_urls)} documents are selected")
|
154 |
step_id+=1
|
155 |
|
156 |
extracted_event_path = run_gpt4_event_extraction(data_dir=pipeline_base_dir, max_tokens=100000)
|
|
|
158 |
events_num = count_total_events(extracted_event_path)
|
159 |
|
160 |
if update_fn:
|
161 |
+
update_fn(step_id, f"{events_num} events are extracted from those documents.")
|
162 |
step_id+=1
|
163 |
|
164 |
|
|
|
173 |
claim=claim,
|
174 |
suggestion_meta=suggestion_meta
|
175 |
)
|
176 |
+
# print(sorted_events)
|
177 |
df = pd.DataFrame(sorted_events)
|
178 |
sorted_event_path = f"{pipeline_base_dir}/sorted_events.xlsx"
|
179 |
df.to_excel(sorted_event_path, index=False)
|
system/process_time.py
CHANGED
@@ -169,7 +169,7 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
|
|
169 |
file_path = os.path.join(data_dir, "gpt4_event_extraction", "gpt4o_results_0_claim.json")
|
170 |
gpt4_results_json = load_json(file_path)
|
171 |
|
172 |
-
print(gpt4_results_json)
|
173 |
train_file_path = hf_hub_download(
|
174 |
repo_id="PledgeTracker/demo_feedback",
|
175 |
filename="train_useful.json",
|
@@ -179,7 +179,7 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
|
|
179 |
|
180 |
with open(train_file_path, "r", encoding="utf-8") as f:
|
181 |
train_data = json.load(f)
|
182 |
-
print(train_data[0])
|
183 |
|
184 |
|
185 |
|
@@ -225,7 +225,7 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
|
|
225 |
|
226 |
test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful?"
|
227 |
|
228 |
-
print(test_instance)
|
229 |
|
230 |
label, score = gpt_eval(test_instance, train_data, instruction, suggestion_meta, ICL_id=ICL_id)
|
231 |
|
|
|
169 |
file_path = os.path.join(data_dir, "gpt4_event_extraction", "gpt4o_results_0_claim.json")
|
170 |
gpt4_results_json = load_json(file_path)
|
171 |
|
172 |
+
# print(gpt4_results_json)
|
173 |
train_file_path = hf_hub_download(
|
174 |
repo_id="PledgeTracker/demo_feedback",
|
175 |
filename="train_useful.json",
|
|
|
179 |
|
180 |
with open(train_file_path, "r", encoding="utf-8") as f:
|
181 |
train_data = json.load(f)
|
182 |
+
# print(train_data[0])
|
183 |
|
184 |
|
185 |
|
|
|
225 |
|
226 |
test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful?"
|
227 |
|
228 |
+
# print(test_instance)
|
229 |
|
230 |
label, score = gpt_eval(test_instance, train_data, instruction, suggestion_meta, ICL_id=ICL_id)
|
231 |
|
test.html
CHANGED
@@ -317,7 +317,9 @@
|
|
317 |
const data = await eventsRes.json();
|
318 |
if (!Array.isArray(data)) throw new Error("Unexpected data format");
|
319 |
|
320 |
-
p.innerHTML = `<strong>We have found ${data.length} events for this pledge.</strong><br><br>` +
|
|
|
|
|
321 |
data.map((e, index) => `
|
322 |
<div class="mb-6 border-b pb-4">
|
323 |
๐๏ธ <b>${e.date}</b>: ${e.event}<br>
|
|
|
317 |
const data = await eventsRes.json();
|
318 |
if (!Array.isArray(data)) throw new Error("Unexpected data format");
|
319 |
|
320 |
+
// p.innerHTML = `<strong>We have found ${data.length} events for this pledge.</strong><br><br>` +
|
321 |
+
// data.map((e, index) => `
|
322 |
+
p.innerHTML =
|
323 |
data.map((e, index) => `
|
324 |
<div class="mb-6 border-b pb-4">
|
325 |
๐๏ธ <b>${e.date}</b>: ${e.event}<br>
|