tangxuemei commited on
Commit
d0b57e5
·
verified ·
1 Parent(s): 738b510
src/backend/__pycache__/evaluate_model.cpython-310.pyc CHANGED
Binary files a/src/backend/__pycache__/evaluate_model.cpython-310.pyc and b/src/backend/__pycache__/evaluate_model.cpython-310.pyc differ
 
src/backend/__pycache__/model_operations.cpython-310.pyc CHANGED
Binary files a/src/backend/__pycache__/model_operations.cpython-310.pyc and b/src/backend/__pycache__/model_operations.cpython-310.pyc differ
 
src/backend/evaluate_model.py CHANGED
@@ -86,7 +86,7 @@ class Evaluator:
86
  # avg_summary_len = self.summary_generator.avg_length
87
  # answer_rate = self.summary_generator.answer_rate
88
  '''开始评估模型的结果'''
89
- self.humanlike = self.eval_model.evaluate_humanlike(self.generated_summaries_df, envs.HUMAN_DATA)
90
  '''原始指标'''
91
  # self.hallucination_scores, self.eval_results = self.eval_model.evaluate_hallucination(
92
  # self.generated_summaries_df)
 
86
  # avg_summary_len = self.summary_generator.avg_length
87
  # answer_rate = self.summary_generator.answer_rate
88
  '''开始评估模型的结果'''
89
+ self.humanlike = self.eval_model.evaluate_humanlike(self.generated_summaries_df, envs.HUMAN_DATA, f"generation_results/{self.model}.csv")
90
  '''原始指标'''
91
  # self.hallucination_scores, self.eval_results = self.eval_model.evaluate_hallucination(
92
  # self.generated_summaries_df)
src/backend/model_operations.py CHANGED
@@ -33,7 +33,7 @@ logging.basicConfig(level=logging.INFO,
33
 
34
  # Load spacy model for word tokenization
35
  nlp = spacy.load("en_core_web_sm")
36
-
37
  os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
38
  os.environ["OPENAI_API_KEY"] = "sk-None-tanhMyavhUtpX2G1kmPuT3BlbkFJGEhM5jmyGyhrTd3LdHDI"
39
 
@@ -46,7 +46,8 @@ def load_evaluation_model(model_path):
46
  Returns:
47
  CrossEncoder: The evaluation model
48
  """
49
- model = CrossEncoder(model_path)
 
50
  return model
51
 
52
 
@@ -121,10 +122,13 @@ class SummaryGenerator:
121
  print(f"Total: {len(sheet_names)}")
122
  print(sheet_names)
123
 
124
- item_ID, questions_ID, user_prompt, response = [], [], [], []
125
 
126
- for i, sheet_name in enumerate(sheet_names[0:1], start=1):
127
  # 读取每个工作表
 
 
 
128
  df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
129
 
130
  # 假设第一列是'Prompt0',但这里我们使用列名来避免硬编码
@@ -132,18 +136,37 @@ class SummaryGenerator:
132
  prompt_column = df_sheet['Prompt0']
133
  else:
134
  # 如果'Prompt0'列不存在,则跳过该工作表或进行其他处理
135
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  # 遍历Prompt0列的值
138
- for j, prompt_value in enumerate(tqdm(prompt_column, desc=f"Processing {sheet_name}"), start=1):
139
  ID = 'E' + str(i)
140
- q_ID = ID + '_' + str(j)
141
 
142
- # print(ID, q_ID, prompt_value)
143
- for i in range(2):
144
- system_prompt = envs.SYSTEM_PROMPT
 
145
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
146
- _user_prompt = prompt_value
147
  while True:
148
  try:
149
  '''调用'''
@@ -171,19 +194,58 @@ class SummaryGenerator:
171
  _response = ""
172
  exceptions.append(i)
173
  break
174
-
175
- item_ID.append(ID)
176
- questions_ID.append(q_ID)
177
- user_prompt.append(_user_prompt)
178
- response.append(_response)
179
- print(_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  # exit()
181
 
182
  # Sleep to prevent hitting rate limits too frequently
183
  time.sleep(1)
184
 
185
- self.summaries_df = pd.DataFrame(list(zip(item_ID, questions_ID, user_prompt, response)),
186
- columns=["Experiment", "Question_ID", "User_prompt", "Response"])
187
 
188
  if save_path is not None:
189
  print(f'Save summaries to {save_path}')
@@ -419,85 +481,486 @@ class EvaluationModel:
419
  def code_results(self, summaries_df):
420
  '''code results from LLM's response'''
421
  output = []
422
- '''item1'''
423
- # print(len(summaries_df['Experiment']),len(summaries_df['Response']))
424
- # exit()
425
- '''人类数据需要处理Item3'''
426
- item3 = pd.read_csv('/Users/tangtang/Desktop/leaderboard/src/datasets/Experiment_3_Items.csv')
427
- item2word = {}
428
- for j in range(len(item3['Item'])):
429
- item2word[item3['Item'][j]] = [item3['Field 2'][j], item3['Field 3'][j]]
 
 
 
 
 
 
 
 
 
 
430
 
431
  male_keyword = ["he", "his", "himself"]
432
  female_keyword = ["she", "her", "herself"]
433
- for i in range(len(summaries_df['Experiment'])):
 
434
  # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
 
 
 
 
 
 
435
  if summaries_df["Experiment"][i] == "E1":
436
- if summaries_df["Response"][i].strip() == "Round":
 
 
437
  # vote_1_1 += 1
438
  output.append("Round")
439
- elif summaries_df["Response"][i].strip() == "Spiky":
440
- output.append("Round")
441
  else:
442
- output.append("NA")
443
- # print()
444
 
445
- '''item2'''
446
- # vote_2_1, vote_2_2, vote_2_3 = 0, 0, 0
447
 
448
- if summaries_df["Experiment"][i] == "E2":
449
- rs = summaries_df["Response"][i].strip()
450
  rs = rs.split(' ')
 
451
  male, female = 0, 0
452
  for word in rs:
453
- if word in female_keyword and male != 1:
454
  female = 1
455
  output.append("Female")
456
  break
457
- if word in male_keyword and female != 1:
458
  male = 1
459
  output.append("Male")
460
  break
461
  if male == 0 and female == 0 :
462
- output.append("NA")
463
- '''item3'''
464
-
465
- if summaries_df["Experiment"][i] == "E3":
466
- rs = summaries_df["Response"][i].strip()
467
- id = summaries_df["Item"][i].strip()
468
- if '2' in rs:
469
- item2word[id][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470
 
471
 
472
- '''item4'''
473
- '''item5'''
474
- '''item6'''
475
-
476
- '''item7'''
477
- if summaries_df["Experiment"][i] == "E7":
478
- rs = summaries_df["Response"][i].strip()
479
- if rs == "No":
480
  output.append("0")
481
- elif rs == "Yes":
482
  output.append("1")
483
  else:
484
- output.append("NA")
485
- '''item8'''
486
- if summaries_df["Experiment"][i] == "E8":
487
- rs = summaries_df["Response"][i].strip()
488
- if rs == "Something is wrong with the question":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
489
  output.append("1")
490
  else:
491
  output.append("0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
 
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
- '''item9'''
496
- if summaries_df["Experiment"][i] == "E9":
 
 
 
 
497
  male, female = 0, 0
498
- rs = summaries_df["Response"][i].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  if "because" in rs:
500
- rs = rs.split("because")[1]
501
  else:
502
  rs = rs
503
  condition = summaries_df["Factor 2"][i].strip()
@@ -507,9 +970,11 @@ class EvaluationModel:
507
  male = 1
508
  break
509
  if w in female_keyword and male != 1:
 
510
  break
 
511
  if male == 0 and female == 0:
512
- output.append('NA')
513
  else:
514
  if male == 1 and female==0:
515
  if condition == "MF":
@@ -517,36 +982,70 @@ class EvaluationModel:
517
  elif condition == "FM":
518
  output.append("Object")
519
  else:
520
- output.append("NA")
521
  elif female == 1 and male ==0:
522
  if condition == "MF":
523
  output.append("Object")
524
  elif condition == "FM":
525
  output.append("Subject")
526
  else:
527
- output.append("NA")
528
 
529
- '''item10'''
530
- if summaries_df["Experiment"][i] == "E10":
531
- rs = summaries_df["Response"][i].strip()
532
- if rs == "Yes":
 
533
  output.append("1")
534
  else:
535
- output.append("0")
536
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
 
538
 
539
 
540
 
541
  '''是不是有不同的问题,如何计算'''
542
- def evaluate_humanlike(self, summaries_df, human_data_path):
543
  '''
544
  evaluate humanlike score
545
  1. code the result
546
  2. comput the similaritirs between human and model
547
  process model responses'''
548
- huamn_df = pd.read_csv(human_data_path)
549
- self.code_results(summaries_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  return 9.00
551
 
552
 
 
33
 
34
  # Load spacy model for word tokenization
35
  nlp = spacy.load("en_core_web_sm")
36
+ nlp1 = spacy.load("en_core_web_trf")
37
  os.environ["HUGGINGFACE_API_KEY"] = envs.TOKEN
38
  os.environ["OPENAI_API_KEY"] = "sk-None-tanhMyavhUtpX2G1kmPuT3BlbkFJGEhM5jmyGyhrTd3LdHDI"
39
 
 
46
  Returns:
47
  CrossEncoder: The evaluation model
48
  """
49
+ # model = CrossEncoder(model_path)
50
+ model = ""
51
  return model
52
 
53
 
 
122
  print(f"Total: {len(sheet_names)}")
123
  print(sheet_names)
124
 
125
+ Experiment_ID, Questions_ID, Item_ID, Condition, User_prompt, Response, Factor_2, Stimuli_1 = [], [], [], [], [] ,[], [], []
126
 
127
+ for i, sheet_name in enumerate(sheet_names, start=1):
128
  # 读取每个工作表
129
+ # if i > 2 and i ==1:
130
+ # continue
131
+ print(i, sheet_name)
132
  df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
133
 
134
  # 假设第一列是'Prompt0',但这里我们使用列名来避免硬编码
 
136
  prompt_column = df_sheet['Prompt0']
137
  else:
138
  # 如果'Prompt0'列不存在,则跳过该工作表或进行其他处理
139
+ continue
140
+ if i == 3 :
141
+ word1_list = df_sheet['Stimuli-2']
142
+ word2_list = df_sheet['Stimuli-3']
143
+ V2_column = []
144
+ for jj in range(len(word1_list)):
145
+ V2_column.append(word1_list[jj] + '_' + word2_list[jj])
146
+ # print(V2_column)
147
+ elif i == 9:
148
+ V2_column = df_sheet['V2'] #SL, LS
149
+ elif i == 4 or i == 6 :
150
+ V2_column = df_sheet['Stimuli-2'] #Stimuli-2
151
+ else:
152
+ V2_column = [""] * len(prompt_column)
153
+ q_column = df_sheet["ID"]
154
+ Item_column = df_sheet["Item"]
155
+ Condition_column = df_sheet["Condition"]
156
+ Stimuli_1_column = df_sheet["Stimuli-1"]
157
+ if 'Stimuli-2' in df_sheet.columns:
158
+ Stimuli_2_column = df_sheet["Stimuli-2"]
159
 
160
  # 遍历Prompt0列的值
161
+ for j, prompt_value in enumerate(tqdm(prompt_column[0:2], desc=f"Processing {sheet_name}"), start=0):
162
  ID = 'E' + str(i)
163
+ # q_ID = ID + '_' + str(j)
164
 
165
+ # print(ID, q_ID, prompt_value)
166
+ system_prompt = envs.SYSTEM_PROMPT
167
+ _user_prompt = prompt_value
168
+ for ii in range(2):
169
  # user_prompt = f"{envs.USER_PROMPT}\nPassage:\n{_source}"
 
170
  while True:
171
  try:
172
  '''调用'''
 
194
  _response = ""
195
  exceptions.append(i)
196
  break
197
+ if i == 5:
198
+ print(_response)
199
+
200
+ _response1, _response2 = _response.split('\n\n')
201
+ Experiment_ID.append(ID)
202
+ Questions_ID.append(q_column[j])
203
+ User_prompt.append(_user_prompt)
204
+
205
+ Response.append(_response2)
206
+
207
+ Factor_2.append(V2_column[j])
208
+ Stimuli_1.append(Stimuli_2_column[j])
209
+ Item_ID.append(Item_column[j])
210
+ Condition.append(Condition_column[j])
211
+
212
+ # the first sentence in the response is saved as E51
213
+ Experiment_ID.append(ID + '1')
214
+ Questions_ID.append(str(q_column[j]) + '1')
215
+ User_prompt.append(_user_prompt)
216
+ Response.append(_response1)
217
+
218
+
219
+
220
+ Factor_2.append(V2_column[j])
221
+ Stimuli_1.append(Stimuli_1_column[j])
222
+ Item_ID.append(Item_column[j])
223
+ Condition.append(Condition_column[j])
224
+
225
+ else:
226
+ Experiment_ID.append(ID)
227
+ Questions_ID.append(q_column[j])
228
+ User_prompt.append(_user_prompt)
229
+
230
+ Response.append(_response)
231
+ if i == 6:
232
+ Factor_2.append(Condition_column[j])
233
+ Stimuli_1.append(V2_column[j])
234
+ else:
235
+ Factor_2.append(V2_column[j])
236
+ Stimuli_1.append(Stimuli_1_column[j])
237
+ Item_ID.append(Item_column[j])
238
+ Condition.append(Condition_column[j])
239
+ print(_response)
240
+
241
+
242
  # exit()
243
 
244
  # Sleep to prevent hitting rate limits too frequently
245
  time.sleep(1)
246
 
247
+ self.summaries_df = pd.DataFrame(list(zip(Experiment_ID, Questions_ID, Item_ID, Condition, User_prompt, Response, Factor_2, Stimuli_1)),
248
+ columns=["Experiment", "Question_ID", "Item", "Condition", "User_prompt", "Response","Factor 2","Stimuli 1"])
249
 
250
  if save_path is not None:
251
  print(f'Save summaries to {save_path}')
 
481
  def code_results(self, summaries_df):
482
  '''code results from LLM's response'''
483
  output = []
484
+ '''database for Exp4'''
485
+ item4 = pd.read_csv(envs.ITEM_4_DATA)
486
+ wordpair2code = {}
487
+ for j in range(len(item4['Coding'])):
488
+ wordpair2code[item4['Pair'][j]] = item4['Coding'][j]
489
+ '''verb for Exp5'''
490
+ item5 = pd.read_csv(envs.ITEM_5_DATA)
491
+ # item corresponding to verb, same item id corresponding to verb pair
492
+ item2verb2 = {}
493
+ item2verb1 = {}
494
+
495
+ Stimuli1, Stimuli2 = {}, {}
496
+ for j in range(len(item5['Item'])):
497
+ item2verb1[item5['Item'][j]] = item5['Verb1'][j]
498
+ item2verb2[item5['Item'][j]] = item5['Verb2'][j]
499
+ Stimuli1[item5['ID'][j]] = item5['Stimuli-1'][j]
500
+ Stimuli2[item5['ID'][j]] = item5['Stimuli-2'][j]
501
+
502
 
503
  male_keyword = ["he", "his", "himself"]
504
  female_keyword = ["she", "her", "herself"]
505
+ print(len(summaries_df["Experiment"]))
506
+ for i in range(len(summaries_df["Experiment"])):
507
  # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
508
+ # print()
509
+ if pd.isna(summaries_df["Response"][i]):
510
+ output.append("Other")
511
+ continue
512
+ rs = summaries_df["Response"][i].strip().lower()
513
+ '''Exp1'''
514
  if summaries_df["Experiment"][i] == "E1":
515
+ print("E1", rs)
516
+ rs = rs.replace('"','')
517
+ if rs == "round":
518
  # vote_1_1 += 1
519
  output.append("Round")
520
+ elif rs == "spiky":
521
+ output.append("Spiky")
522
  else:
523
+ output.append("Other")
524
+
525
 
526
+ '''Exp2'''
 
527
 
528
+ elif summaries_df["Experiment"][i] == "E2":
529
+ # rs = summaries_df["Response"][i].strip()
530
  rs = rs.split(' ')
531
+ print("E2", rs)
532
  male, female = 0, 0
533
  for word in rs:
534
+ if word in female_keyword and male == 0:
535
  female = 1
536
  output.append("Female")
537
  break
538
+ if word in male_keyword and female == 0:
539
  male = 1
540
  output.append("Male")
541
  break
542
  if male == 0 and female == 0 :
543
+ output.append("Other")
544
+
545
+ '''Exp3'''
546
+ elif summaries_df["Experiment"][i] == "E3":
547
+ # rs = summaries_df["Response"][i].strip()
548
+ print("E3", rs)
549
+ if pd.isna(summaries_df["Factor 2"][i]):
550
+ output.append("Other")
551
+ else:
552
+ if summaries_df["Factor 2"][i].strip() == "LS":
553
+ if "2" in rs:
554
+ output.append("Long")
555
+ elif "3" in rs:
556
+ output.append("Short")
557
+ else:
558
+ output.append("Other")
559
+ if summaries_df["Factor 2"][i].strip() == "SL":
560
+ if "2" in rs:
561
+ output.append("Short")
562
+ elif "3" in rs:
563
+ output.append("Long")
564
+ else:
565
+ output.append("Other")
566
+ '''Exp4'''
567
+
568
+ elif summaries_df["Experiment"][i] == "E4":
569
+ # rs = summaries_df["Response"][i].strip()
570
+ target = summaries_df["Factor 2"][i].strip().lower()
571
+ pair = target + "_" + rs
572
+ print("E4:", pair)
573
+ if pair in wordpair2code.keys():
574
+ output.append(wordpair2code[pair])
575
+ else:
576
+ output.append("Other")
577
+
578
+ '''Exp5'''
579
+ elif summaries_df["Experiment"][i] == "E5" or summaries_df["Experiment"][i] == "E51":
580
+ # sentence = summaries_df["Response"][i].strip()
581
+ item_id = summaries_df["Item"][i]
582
+ question_id = summaries_df["Question_ID"][i]
583
+
584
+ sti1, sti2 = "", ""
585
+
586
+ if summaries_df["Experiment"][i] == "E51":
587
+ sti1 = Stimuli1[question_id[0:-1]].lower().replace("...", "")
588
+ sti2 = Stimuli2[question_id[0:-1]].lower().replace("...", "")
589
+ verb = item2verb1[item_id].lower()
590
+
591
+ sentence = sti1 + " " + rs.replace(sti1, "")
592
+ print("E5", verb, sentence)
593
+ if summaries_df["Experiment"][i] == "E5":
594
+ sti1 = Stimuli1[question_id].lower().replace("...", "")
595
+ # print(sti1)
596
+ sti2 = Stimuli2[question_id].lower().replace("...", "")
597
+
598
+ verb = item2verb2[item_id].lower()
599
+ sentence = sti2.replace("...","") + " " + rs.replace(sti2, "")
600
+ print("E5", verb, sentence)
601
+
602
+
603
+ doc = nlp1(sentence.replace(" "," "))
604
+ # print(doc)
605
+ # print()
606
+ verb_token = None
607
+ for token in doc:
608
+ # print(token.lemma_)
609
+ if token.lemma_ == verb:
610
+ verb_token = token
611
+ break
612
+ # exit()
613
+ if verb_token is None:
614
+ output.append("Other")
615
+ print("E5 The target verb is missing from the sentence.")
616
+ else:
617
+ pobj, dative = None, None
618
+ # print(verb_token.children)
619
+ # exit()
620
+ for child in verb_token.children:
621
+ print(child)
622
+ if (child.dep_ == 'dative' and child.pos_ == "ADP") or (child.text == "to" and child.dep_ == 'prep' and child.pos_ == "ADP"):
623
+ pobj = child.text
624
+ if child.dep_ == 'dative':
625
+ dative = child.text
626
+ print("E5", pobj, dative)
627
+ # exit()
628
+
629
+ if pobj:
630
+ output.append("PO")
631
+ elif dative:
632
+ output.append("DO")
633
+ else:
634
+ print("Other", sentence, pobj, dative)
635
+ # exit()
636
+ output.append("Other")
637
+
638
+ '''Exp6'''
639
+
640
+ elif summaries_df["Experiment"][i] == "E6":
641
+ sentence = summaries_df["Stimuli 1"][i].strip().lower()
642
+ print("E6", sentence)
643
+ doc = nlp1(sentence)
644
+ subject = "None"
645
+ obj = "None"
646
+ # 遍历依存关系,寻找主语和宾语
647
+ for token in doc:
648
+ if token.dep_ == "nsubj":
649
+ subject = token.text
650
+ elif token.dep_ == "dobj":
651
+ obj = token.text
652
+ print("E6", subject, obj)
653
+ if subject in rs and obj in rs:
654
+ print(rs, subject, obj, "Other")
655
+ output.append("Other")
656
+ elif subject in rs:
657
+ print(rs, subject, obj, "VP")
658
+ output.append("VP")
659
+ elif obj in rs:
660
+ print(rs, subject, obj, "NP")
661
+ output.append("NP")
662
+ else:
663
+ print(rs, subject, obj, "Other")
664
+ output.append("Other")
665
 
666
 
667
+
668
+
669
+ '''Exp7'''
670
+ elif summaries_df["Experiment"][i] == "E7":
671
+ # rs = summaries_df["Response"][i].strip().lower()
672
+ print("E7",rs)
673
+ if rs == "no":
 
674
  output.append("0")
675
+ elif rs == "yes":
676
  output.append("1")
677
  else:
678
+ output.append("Other")
679
+
680
+ '''Exp8'''
681
+ elif summaries_df["Experiment"][i] == "E8":
682
+ # rs = summaries_df["Response"][i].strip()
683
+
684
+ if "something is wrong with the question" in rs:
685
+ output.append("1")
686
+ else:
687
+ output.append("0")
688
+
689
+ '''Exp9'''
690
+ elif summaries_df["Experiment"][i] == "E9":
691
+ male, female = 0, 0
692
+
693
+ # rs = summaries_df["Response"][i].strip()
694
+ if "because" in rs:
695
+ rs = rs.replace("because because","because").split("because")[1]
696
+ else:
697
+ rs = rs
698
+ condition = summaries_df["Factor 2"][i].strip()
699
+ rs = rs.split(" ")
700
+ for w in rs:
701
+ if w in male_keyword and female != 1:
702
+ male = 1
703
+ break
704
+ if w in female_keyword and male != 1:
705
+ female = 1
706
+ break
707
+ print("E9", "condition", condition, "male", male, "female", female)
708
+ if male == 0 and female == 0:
709
+ output.append('Other')
710
+ else:
711
+ if male == 1 and female==0:
712
+ if condition == "MF":
713
+ output.append("Subject")
714
+ elif condition == "FM":
715
+ output.append("Object")
716
+ else:
717
+ output.append("Other")
718
+ elif female == 1 and male ==0:
719
+ if condition == "MF":
720
+ output.append("Object")
721
+ elif condition == "FM":
722
+ output.append("Subject")
723
+ else:
724
+ output.append("Other")
725
+
726
+ '''Exp10'''
727
+ elif summaries_df["Experiment"][i] == "E10":
728
+ # rs = summaries_df["Response"][i].strip()
729
+ if rs == "yes":
730
  output.append("1")
731
  else:
732
  output.append("0")
733
+ else:
734
+ print("can;t find the Exp:", summaries_df["Experiment"][i])
735
+ output.append("NA")
736
+ # print(output)
737
+ # exit()
738
+ '''human'''
739
+ self.data = pd.DataFrame(list(zip(summaries_df["Experiment"], summaries_df["Question_ID"], summaries_df["Item"], summaries_df["Response"], summaries_df["Factor 2"], summaries_df["Stimuli 1"], summaries_df["Coding"], output)),
740
+ columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Simulate 1","Original_Coding","Coding"])
741
+ # '''LLM'''
742
+ # self.data = pd.DataFrame(list(zip(summaries_df["Experiment"], summaries_df["Question_ID"], summaries_df["Item"], summaries_df["Response"], summaries_df["Factor 2"], summaries_df["Stimuli 1"], output)),
743
+ # columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Simulate 1","Coding"])
744
+ print(self.data.head())
745
+
746
+ return self.data
747
+ def code_results_llm(self, summaries_df):
748
+ '''code results from LLM's response'''
749
+ output = []
750
+ '''database for Exp4'''
751
+ item4 = pd.read_csv(envs.ITEM_4_DATA)
752
+ wordpair2code = {}
753
+ for j in range(len(item4['Coding'])):
754
+ wordpair2code[item4['Pair'][j]] = item4['Coding'][j]
755
+ '''verb for Exp5'''
756
+ item5 = pd.read_csv(envs.ITEM_5_DATA)
757
+ # item corresponding to verb, same item id corresponding to verb pair
758
+ item2verb2 = {}
759
+ item2verb1 = {}
760
+
761
+ Stimuli1, Stimuli2 = {}, {}
762
+ for j in range(len(item5['Item'])):
763
+ item2verb1[item5['Item'][j]] = item5['Verb1'][j]
764
+ item2verb2[item5['Item'][j]] = item5['Verb2'][j]
765
+ Stimuli1[item5['ID'][j]] = item5['Stimuli-1'][j]
766
+ Stimuli2[item5['ID'][j]] = item5['Stimuli-2'][j]
767
 
768
 
769
+ male_keyword = ["he", "his", "himself"]
770
+ female_keyword = ["she", "her", "herself"]
771
+ print(len(summaries_df["Experiment"]))
772
+ for i in range(len(summaries_df["Experiment"])):
773
+ # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
774
+ # print()
775
+ if pd.isna(summaries_df["Response"][i]):
776
+ output.append("Other")
777
+ continue
778
+ rs = summaries_df["Response"][i].strip().lower()
779
+ '''Exp1'''
780
+ if summaries_df["Experiment"][i] == "E1":
781
+ print("E1", rs)
782
+ rs = rs.replace('"','')
783
+ if rs == "round":
784
+ # vote_1_1 += 1
785
+ output.append("Round")
786
+ elif rs == "spiky":
787
+ output.append("Spiky")
788
+ else:
789
+ output.append("Other")
790
+
791
 
792
+ '''Exp2'''
793
+
794
+ elif summaries_df["Experiment"][i] == "E2":
795
+ # rs = summaries_df["Response"][i].strip()
796
+ rs = rs.split(' ')
797
+ print("E2", rs)
798
  male, female = 0, 0
799
+ for word in rs:
800
+ if word in female_keyword and male == 0:
801
+ female = 1
802
+ output.append("Female")
803
+ break
804
+ if word in male_keyword and female == 0:
805
+ male = 1
806
+ output.append("Male")
807
+ break
808
+ if male == 0 and female == 0 :
809
+ output.append("Other")
810
+
811
+ '''Exp3'''
812
+ elif summaries_df["Experiment"][i] == "E3":
813
+ # rs = summaries_df["Response"][i].strip()
814
+ print("E3", rs)
815
+ rs = rs.replace('"', '')
816
+ pair = summaries_df["Factor 2"][i]
817
+ word1, word2 = pair.split('_')
818
+
819
+ if rs == word1:
820
+ if len(word1) > len(word2):
821
+ output.append("Long")
822
+ else:
823
+ output.append("Short")
824
+ elif rs == word2:
825
+ if len(word1) > len(word2):
826
+ output.append("Short")
827
+ else:
828
+ output.append("Long")
829
+ else:
830
+ output.append("Other")
831
+
832
+ '''Exp4'''
833
+
834
+ elif summaries_df["Experiment"][i] == "E4":
835
+ # rs = summaries_df["Response"][i].strip()
836
+ meaning_word = rs.split(";")[4].replace(" ",'')
837
+ target = summaries_df["Factor 2"][i].strip().lower()
838
+ pair = target + "_" + meaning_word
839
+ print("E4:", pair)
840
+ if pair in wordpair2code.keys():
841
+ output.append(wordpair2code[pair])
842
+ else:
843
+ output.append("Other")
844
+
845
+ '''Exp5'''
846
+ elif summaries_df["Experiment"][i] == "E5" or summaries_df["Experiment"][i] == "E51":
847
+ # sentence = summaries_df["Response"][i].strip()
848
+ item_id = summaries_df["Item"][i]
849
+ question_id = summaries_df["Question_ID"][i]
850
+
851
+ sti1, sti2 = "", ""
852
+
853
+ if summaries_df["Experiment"][i] == "E51":
854
+ sti1 = Stimuli1[question_id[0:-1]].lower().replace("...", "")
855
+ sti2 = Stimuli2[question_id[0:-1]].lower().replace("...", "")
856
+ verb = item2verb1[item_id].lower()
857
+
858
+ sentence = sti1 + " " + rs.replace(sti1, "")
859
+ print("E5", verb, sentence)
860
+ if summaries_df["Experiment"][i] == "E5":
861
+ sti1 = Stimuli1[question_id].lower().replace("...", "")
862
+ # print(sti1)
863
+ sti2 = Stimuli2[question_id].lower().replace("...", "")
864
+
865
+ verb = item2verb2[item_id].lower()
866
+ sentence = sti2.replace("...","") + " " + rs.replace(sti2, "")
867
+ print("E5", verb, sentence)
868
+
869
+
870
+ doc = nlp1(sentence.replace(" "," "))
871
+ # print(doc)
872
+ # print()
873
+ verb_token = None
874
+ for token in doc:
875
+ # print(token.lemma_)
876
+ if token.lemma_ == verb:
877
+ verb_token = token
878
+ break
879
+ # exit()
880
+ if verb_token is None:
881
+ output.append("Other")
882
+ print("E5 The target verb is missing from the sentence.")
883
+ else:
884
+ pobj, dative = None, None
885
+ # print(verb_token.children)
886
+ # exit()
887
+ for child in verb_token.children:
888
+ print(child)
889
+ if (child.dep_ == 'dative' and child.pos_ == "ADP") or (child.text == "to" and child.dep_ == 'prep' and child.pos_ == "ADP"):
890
+ pobj = child.text
891
+ if child.dep_ == 'dative':
892
+ dative = child.text
893
+ print("E5", pobj, dative)
894
+ # exit()
895
+
896
+ if pobj:
897
+ output.append("PO")
898
+ elif dative:
899
+ output.append("DO")
900
+ else:
901
+ print("Other", sentence, pobj, dative)
902
+ # exit()
903
+ output.append("Other")
904
+
905
+ '''Exp6'''
906
+
907
+ elif summaries_df["Experiment"][i] == "E6":
908
+ sentence = summaries_df["Stimuli 1"][i].strip().lower()
909
+ print("E6", sentence)
910
+ doc = nlp1(sentence)
911
+ subject = "None"
912
+ obj = "None"
913
+ # 遍历依存关系,寻找主语和宾语
914
+ for token in doc:
915
+ if token.dep_ == "nsubj":
916
+ subject = token.text
917
+ elif token.dep_ == "dobj":
918
+ obj = token.text
919
+ print("E6", subject, obj)
920
+ if subject in rs and obj in rs:
921
+ print(rs, subject, obj, "Other")
922
+ output.append("Other")
923
+ elif subject in rs:
924
+ print(rs, subject, obj, "VP")
925
+ output.append("VP")
926
+ elif obj in rs:
927
+ print(rs, subject, obj, "NP")
928
+ output.append("NP")
929
+ else:
930
+ print(rs, subject, obj, "Other")
931
+ output.append("Other")
932
+
933
+
934
+
935
+
936
+ '''Exp7'''
937
+ elif summaries_df["Experiment"][i] == "E7":
938
+ # rs = summaries_df["Response"][i].strip().lower()
939
+ rs = rs.replace(".", "").replace(",", "")
940
+ print("E7",rs)
941
+ if rs == "no":
942
+ output.append("0")
943
+ elif rs == "yes":
944
+ output.append("1")
945
+ else:
946
+ output.append("Other")
947
+
948
+ '''Exp8'''
949
+ elif summaries_df["Experiment"][i] == "E8":
950
+ # rs = summaries_df["Response"][i].strip()
951
+ print("E8",rs)
952
+ if "something is wrong with the question" in rs:
953
+ output.append("1")
954
+ else:
955
+ output.append("0")
956
+
957
+ '''Exp9'''
958
+ elif summaries_df["Experiment"][i] == "E9":
959
+ male, female = 0, 0
960
+
961
+ # rs = summaries_df["Response"][i].strip()
962
  if "because" in rs:
963
+ rs = rs.replace("because because","because").split("because")[1]
964
  else:
965
  rs = rs
966
  condition = summaries_df["Factor 2"][i].strip()
 
970
  male = 1
971
  break
972
  if w in female_keyword and male != 1:
973
+ female = 1
974
  break
975
+ print("E9", "condition", condition, "male", male, "female", female)
976
  if male == 0 and female == 0:
977
+ output.append('Other')
978
  else:
979
  if male == 1 and female==0:
980
  if condition == "MF":
 
982
  elif condition == "FM":
983
  output.append("Object")
984
  else:
985
+ output.append("Other")
986
  elif female == 1 and male ==0:
987
  if condition == "MF":
988
  output.append("Object")
989
  elif condition == "FM":
990
  output.append("Subject")
991
  else:
992
+ output.append("Other")
993
 
994
+ '''Exp10'''
995
+ elif summaries_df["Experiment"][i] == "E10":
996
+ # rs = summaries_df["Response"][i].strip()
997
+ rs = rs.replace(".", "")
998
+ if rs == "yes":
999
  output.append("1")
1000
  else:
1001
+ output.append("0")
1002
+ else:
1003
+ print("can;t find the Exp:", summaries_df["Experiment"][i])
1004
+ output.append("NA")
1005
+ # print(output)
1006
+ # exit()
1007
+ '''human'''
1008
+ # self.data = pd.DataFrame(list(zip(summaries_df["Experiment"], summaries_df["Question_ID"], summaries_df["Item"], summaries_df["Response"], summaries_df["Factor 2"], summaries_df["Stimuli 1"], summaries_df["Coding"], output)),
1009
+ # columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Simulate 1","Original_Coding","Coding"])
1010
+ '''LLM'''
1011
+ self.data = pd.DataFrame(list(zip(summaries_df["Experiment"], summaries_df["Question_ID"], summaries_df["Item"], summaries_df["Response"], summaries_df["Factor 2"], summaries_df["Stimuli 1"], output)),
1012
+ columns=["Experiment", "Question_ID", "Item", "Response", "Factor 2", "Simulate 1","Coding"])
1013
+ print(self.data.head())
1014
+
1015
+ return self.data
1016
+
1017
 
1018
 
1019
 
1020
 
1021
  '''是不是有不同的问题,如何计算'''
1022
+ def evaluate_humanlike(self, summaries_df, human_data_path, result_save_path):
1023
  '''
1024
  evaluate humanlike score
1025
  1. code the result
1026
  2. comput the similaritirs between human and model
1027
  process model responses'''
1028
+
1029
+ '''coding human data'''
1030
+ # self.huamn_df = pd.read_csv(human_data_path)
1031
+ # self.data = self.code_results(self.huamn_df)
1032
+ # save_path = human_data_path.replace('.csv','_coding.csv')
1033
+ # if save_path is not None:
1034
+ # print(f'Save human coding results to {save_path}')
1035
+ # fpath = Path(save_path)
1036
+ # fpath.parent.mkdir(parents=True, exist_ok=True)
1037
+ # self.data.to_csv(fpath)
1038
+
1039
+ '''coding llm data'''
1040
+ save_path = result_save_path.replace('.csv','_coding.csv')
1041
+ self.llm_df = self.code_results_llm(summaries_df)
1042
+ if save_path is not None:
1043
+ print(f'Save LLM coding results to {save_path}')
1044
+ fpath = Path(save_path)
1045
+ fpath.parent.mkdir(parents=True, exist_ok=True)
1046
+ self.llm_df.to_csv(fpath)
1047
+ # exit()
1048
+
1049
  return 9.00
1050
 
1051