Spaces:
Sleeping
Sleeping
tangxuemei
commited on
src/backend/__pycache__/model_operations.cpython-310.pyc
CHANGED
Binary files a/src/backend/__pycache__/model_operations.cpython-310.pyc and b/src/backend/__pycache__/model_operations.cpython-310.pyc differ
|
|
src/backend/__pycache__/util.cpython-310.pyc
CHANGED
Binary files a/src/backend/__pycache__/util.cpython-310.pyc and b/src/backend/__pycache__/util.cpython-310.pyc differ
|
|
src/backend/model_operations.py
CHANGED
@@ -123,7 +123,7 @@ class SummaryGenerator:
|
|
123 |
|
124 |
item_ID, questions_ID, user_prompt, response = [], [], [], []
|
125 |
|
126 |
-
for i, sheet_name in enumerate(sheet_names[0:
|
127 |
# 读取每个工作表
|
128 |
df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
|
129 |
|
@@ -422,6 +422,14 @@ class EvaluationModel:
|
|
422 |
'''item1'''
|
423 |
# print(len(summaries_df['Experiment']),len(summaries_df['Response']))
|
424 |
# exit()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
for i in range(len(summaries_df['Experiment'])):
|
426 |
# vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
|
427 |
if summaries_df["Experiment"][i] == "E1":
|
@@ -436,8 +444,7 @@ class EvaluationModel:
|
|
436 |
|
437 |
'''item2'''
|
438 |
# vote_2_1, vote_2_2, vote_2_3 = 0, 0, 0
|
439 |
-
|
440 |
-
female_keyword = ["she", "her", "herself"]
|
441 |
if summaries_df["Experiment"][i] == "E2":
|
442 |
rs = summaries_df["Response"][i].strip()
|
443 |
rs = rs.split(' ')
|
@@ -454,6 +461,14 @@ class EvaluationModel:
|
|
454 |
if male == 0 and female == 0 :
|
455 |
output.append("NA")
|
456 |
'''item3'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
'''item4'''
|
458 |
'''item5'''
|
459 |
'''item6'''
|
@@ -474,10 +489,42 @@ class EvaluationModel:
|
|
474 |
output.append("1")
|
475 |
else:
|
476 |
output.append("0")
|
|
|
|
|
|
|
477 |
'''item9'''
|
478 |
if summaries_df["Experiment"][i] == "E9":
|
|
|
479 |
rs = summaries_df["Response"][i].strip()
|
480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
'''item10'''
|
483 |
if summaries_df["Experiment"][i] == "E10":
|
|
|
123 |
|
124 |
item_ID, questions_ID, user_prompt, response = [], [], [], []
|
125 |
|
126 |
+
for i, sheet_name in enumerate(sheet_names[0:1], start=1):
|
127 |
# 读取每个工作表
|
128 |
df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
|
129 |
|
|
|
422 |
'''item1'''
|
423 |
# print(len(summaries_df['Experiment']),len(summaries_df['Response']))
|
424 |
# exit()
|
425 |
+
'''人类数据需要处理Item3'''
|
426 |
+
item3 = pd.read_csv('/Users/tangtang/Desktop/leaderboard/src/datasets/Experiment_3_Items.csv')
|
427 |
+
item2word = {}
|
428 |
+
for j in range(len(item3['Item'])):
|
429 |
+
item2word[item3['Item'][j]] = [item3['Field 2'][j], item3['Field 3'][j]]
|
430 |
+
|
431 |
+
male_keyword = ["he", "his", "himself"]
|
432 |
+
female_keyword = ["she", "her", "herself"]
|
433 |
for i in range(len(summaries_df['Experiment'])):
|
434 |
# vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
|
435 |
if summaries_df["Experiment"][i] == "E1":
|
|
|
444 |
|
445 |
'''item2'''
|
446 |
# vote_2_1, vote_2_2, vote_2_3 = 0, 0, 0
|
447 |
+
|
|
|
448 |
if summaries_df["Experiment"][i] == "E2":
|
449 |
rs = summaries_df["Response"][i].strip()
|
450 |
rs = rs.split(' ')
|
|
|
461 |
if male == 0 and female == 0 :
|
462 |
output.append("NA")
|
463 |
'''item3'''
|
464 |
+
|
465 |
+
if summaries_df["Experiment"][i] == "E3":
|
466 |
+
rs = summaries_df["Response"][i].strip()
|
467 |
+
id = summaries_df["Item"][i].strip()
|
468 |
+
if '2' in rs:
|
469 |
+
item2word[id][0]
|
470 |
+
|
471 |
+
|
472 |
'''item4'''
|
473 |
'''item5'''
|
474 |
'''item6'''
|
|
|
489 |
output.append("1")
|
490 |
else:
|
491 |
output.append("0")
|
492 |
+
|
493 |
+
|
494 |
+
|
495 |
'''item9'''
|
496 |
if summaries_df["Experiment"][i] == "E9":
|
497 |
+
male, female = 0, 0
|
498 |
rs = summaries_df["Response"][i].strip()
|
499 |
+
if "because" in rs:
|
500 |
+
rs = rs.split("because")[1]
|
501 |
+
else:
|
502 |
+
rs = rs
|
503 |
+
condition = summaries_df["Factor 2"][i].strip()
|
504 |
+
rs = rs.split(" ")
|
505 |
+
for w in rs:
|
506 |
+
if w in male_keyword and female != 1:
|
507 |
+
male = 1
|
508 |
+
break
|
509 |
+
if w in female_keyword and male != 1:
|
510 |
+
break
|
511 |
+
if male == 0 and female == 0:
|
512 |
+
output.append('NA')
|
513 |
+
else:
|
514 |
+
if male == 1 and female==0:
|
515 |
+
if condition == "MF":
|
516 |
+
output.append("Subject")
|
517 |
+
elif condition == "FM":
|
518 |
+
output.append("Object")
|
519 |
+
else:
|
520 |
+
output.append("NA")
|
521 |
+
elif female == 1 and male ==0:
|
522 |
+
if condition == "MF":
|
523 |
+
output.append("Object")
|
524 |
+
elif condition == "FM":
|
525 |
+
output.append("Subject")
|
526 |
+
else:
|
527 |
+
output.append("NA")
|
528 |
|
529 |
'''item10'''
|
530 |
if summaries_df["Experiment"][i] == "E10":
|
src/backend/util.py
CHANGED
@@ -60,8 +60,8 @@ def format_results(model_name: str, revision: str, precision: str,
|
|
60 |
"model_sha": revision # Hash of the model
|
61 |
},
|
62 |
"results": {
|
63 |
-
"
|
64 |
-
"
|
65 |
},
|
66 |
"factual_consistency_rate": {
|
67 |
"factual_consistency_rate": round(factual_consistency_rate,1)
|
|
|
60 |
"model_sha": revision # Hash of the model
|
61 |
},
|
62 |
"results": {
|
63 |
+
"humanlike_rate": {
|
64 |
+
"humanlike_rate": round(hallucination_rate,1)
|
65 |
},
|
66 |
"factual_consistency_rate": {
|
67 |
"factual_consistency_rate": round(factual_consistency_rate,1)
|