yulongchen commited on
Commit
4a83892
·
1 Parent(s): 765435c

Add system

Browse files
Files changed (1) hide show
  1. system/process_time.py +62 -44
system/process_time.py CHANGED
@@ -114,50 +114,69 @@ def extract_columns_to_dict(file_path, delimiter='\t'):
114
  return data_dict
115
 
116
 
 
 
 
117
  def parse_date(date_str):
118
  if not date_str:
119
  return None, date_str
 
 
 
120
  try:
121
  return datetime.datetime.strptime(date_str, "%Y-%m-%d"), date_str
122
  except ValueError:
123
- match = re.search(r'(.*) \(relative to (\d{4}-\d{2}-\d{2})\)', date_str)
124
- if match:
125
- reference = datetime.datetime.strptime(match.group(2), "%Y-%m-%d")
126
- if "Last month" in match.group(1):
127
- return reference - datetime.timedelta(days=30), date_str
128
- elif "Yesterday" in match.group(1):
129
- return reference - datetime.timedelta(days=1), date_str
130
- elif "Last week" in match.group(1):
131
- return reference - datetime.timedelta(days=7), date_str
132
- elif "This week" in match.group(1):
133
- return reference, date_str
134
-
135
- # 处理不同格式的日期
136
- match = re.fullmatch(r'\d{4}', date_str) # 处理年份格式: '2014'
137
- if match:
138
- return datetime.datetime(int(date_str), 1, 1), date_str
139
-
140
- match = re.fullmatch(r'(\w+) (\d{4})', date_str) # 处理月份+年份格式: 'November 2023'
141
- if match:
142
- try:
143
- return datetime.datetime.strptime(date_str, "%B %Y"), date_str
144
- except ValueError:
145
- return None, date_str
146
-
147
- match = re.fullmatch(r'(\d{4})-Q(\d)', date_str) # 处理季度格式: '2024-Q1'
148
- if match:
149
- year, quarter = int(match.group(1)), int(match.group(2))
150
- month = (quarter - 1) * 3 + 1
151
- return datetime.datetime(year, month, 1), date_str
152
-
153
- match = re.fullmatch(r'(\d{4}) (Spring|Summer|Autumn|Fall|Winter)', date_str, re.IGNORECASE) # 处理季度名称格式: '2023 Autumn' 或 '2023 Fall'
154
- if match:
155
- year = int(match.group(1))
156
- season_map = {"Spring": 3, "Summer": 6, "Autumn": 9, "Fall": 9, "Winter": 12}
157
- month = season_map[match.group(2).capitalize()]
158
- return datetime.datetime(year, month, 1), date_str
159
-
160
- return None, date_str
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggestion_meta):
163
 
@@ -220,10 +239,10 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
220
  for event in doc.get("output", {}).get("events", []):
221
  parsed_date, original_date = parse_date(event["date"])
222
  if parsed_date:
223
- if mete_date!= parsed_date:
224
- event_date_and_pub_date = original_date+f" ({mete_date})"
225
- else:
226
- event_date_and_pub_date = original_date
227
 
228
  test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful to track the fulfilment of this pledge"
229
 
@@ -238,10 +257,9 @@ def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggest
238
  "event": event["event"],
239
  "url": URL,
240
  "label": label,
241
- "confident": score
242
  })
243
 
244
- # 按时间排序
245
  events.sort(key=lambda x: parse_date(x["date"])[0], reverse=True)
246
  return events
247
 
 
114
  return data_dict
115
 
116
 
117
+ import datetime
118
+ import re
119
+
120
  def parse_date(date_str):
121
  if not date_str:
122
  return None, date_str
123
+ date_str = date_str.strip()
124
+
125
+ # Case 1: YYYY-MM-DD
126
  try:
127
  return datetime.datetime.strptime(date_str, "%Y-%m-%d"), date_str
128
  except ValueError:
129
+ pass
130
+
131
+ # Case 2: Relative date
132
+ match = re.search(r'(.*) \(relative to (\d{4}-\d{2}-\d{2})\)', date_str)
133
+ if match:
134
+ reference = datetime.datetime.strptime(match.group(2), "%Y-%m-%d")
135
+ relative_term = match.group(1).strip().lower()
136
+ if relative_term == "last month":
137
+ target_date = reference - datetime.timedelta(days=30)
138
+ elif relative_term == "yesterday":
139
+ target_date = reference - datetime.timedelta(days=1)
140
+ elif relative_term == "last week":
141
+ target_date = reference - datetime.timedelta(days=7)
142
+ elif relative_term == "this week":
143
+ target_date = reference
144
+ else:
145
+ return None, date_str
146
+ return target_date, date_str # 返回 datetime 对象,不是字符串
147
+
148
+ # Case 3: YYYY
149
+ match = re.fullmatch(r'(\d{4})', date_str)
150
+ if match:
151
+ year = int(match.group(1))
152
+ return datetime.datetime(year, 1, 1), date_str
153
+
154
+ # Case 4: Month YYYY
155
+ match = re.fullmatch(r'(\w+) (\d{4})', date_str)
156
+ if match:
157
+ try:
158
+ target_date = datetime.datetime.strptime(date_str, "%B %Y")
159
+ return target_date, date_str
160
+ except ValueError:
161
+ return None, date_str
162
+
163
+ # Case 5: YYYY-QX
164
+ match = re.fullmatch(r'(\d{4})-Q(\d)', date_str)
165
+ if match:
166
+ year, quarter = int(match.group(1)), int(match.group(2))
167
+ month = (quarter - 1) * 3 + 1
168
+ return datetime.datetime(year, month, 1), date_str
169
+
170
+ # Case 6: YYYY Season
171
+ match = re.fullmatch(r'(\d{4}) (Spring|Summer|Autumn|Fall|Winter)', date_str, re.IGNORECASE)
172
+ if match:
173
+ year = int(match.group(1))
174
+ season_map = {"spring": 3, "summer": 6, "autumn": 9, "fall": 9, "winter": 12}
175
+ month = season_map[match.group(2).lower()]
176
+ return datetime.datetime(year, month, 1), date_str
177
+
178
+ return None, date_str
179
+
180
 
181
  def extract_and_sort_events(data_dir, pledge_date, pledge_author, claim, suggestion_meta):
182
 
 
239
  for event in doc.get("output", {}).get("events", []):
240
  parsed_date, original_date = parse_date(event["date"])
241
  if parsed_date:
242
+ # if mete_date!= parsed_date:
243
+ event_date_and_pub_date = parsed_date+f" ({mete_date})"
244
+ # else:
245
+ # event_date_and_pub_date = original_date
246
 
247
  test_instance = f"Pledge: {pledge} (Speaker: {pledge_author}; Pledge Date: {pledge_date})\nEvent Summary: {event['event']} (Event Date: {original_date})\nIs this event summary useful to track the fulfilment of this pledge"
248
 
 
257
  "event": event["event"],
258
  "url": URL,
259
  "label": label,
260
+ "confident": score,
261
  })
262
 
 
263
  events.sort(key=lambda x: parse_date(x["date"])[0], reverse=True)
264
  return events
265