Spaces:

XuemeiTang
/

Humanlike_Evaluation

Sleeping

App Files Files Community

tangxuemei commited on Jul 23, 2024

Commit

834a207

verified ·

1 Parent(s): 5c562d6

2

Browse files

Files changed (4) hide show

src/backend/__pycache__/model_operations.cpython-310.pyc +0 -0
src/backend/__pycache__/util.cpython-310.pyc +0 -0
src/backend/model_operations.py +51 -4
src/backend/util.py +2 -2

src/backend/__pycache__/model_operations.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/model_operations.cpython-310.pyc and b/src/backend/__pycache__/model_operations.cpython-310.pyc differ

src/backend/__pycache__/util.cpython-310.pyc CHANGED Viewed

Binary files a/src/backend/__pycache__/util.cpython-310.pyc and b/src/backend/__pycache__/util.cpython-310.pyc differ

src/backend/model_operations.py CHANGED Viewed

@@ -123,7 +123,7 @@ class SummaryGenerator:
             item_ID, questions_ID, user_prompt, response = [], [], [], []
-            for i, sheet_name in enumerate(sheet_names[0:2], start=1):
                 # 读取每个工作表
                 df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
@@ -422,6 +422,14 @@ class EvaluationModel:
         '''item1'''
         # print(len(summaries_df['Experiment']),len(summaries_df['Response']))
         # exit()
         for i in range(len(summaries_df['Experiment'])):
             # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
             if summaries_df["Experiment"][i] == "E1":
@@ -436,8 +444,7 @@ class EvaluationModel:
             '''item2'''
             # vote_2_1, vote_2_2, vote_2_3 = 0, 0, 0
-            male_keyword = ["he", "his", "himself"]
-            female_keyword = ["she", "her", "herself"]
             if summaries_df["Experiment"][i] == "E2":
                 rs = summaries_df["Response"][i].strip()
                 rs = rs.split(' ')
@@ -454,6 +461,14 @@ class EvaluationModel:
                 if male == 0 and female == 0 :
                     output.append("NA")
             '''item3'''
             '''item4'''
             '''item5'''
             '''item6'''
@@ -474,10 +489,42 @@ class EvaluationModel:
                     output.append("1")
                 else:
                     output.append("0")
             '''item9'''
             if summaries_df["Experiment"][i] == "E9":
                 rs = summaries_df["Response"][i].strip()
             '''item10'''
             if summaries_df["Experiment"][i] == "E10":

             item_ID, questions_ID, user_prompt, response = [], [], [], []
+            for i, sheet_name in enumerate(sheet_names[0:1], start=1):
                 # 读取每个工作表
                 df_sheet = pd.read_excel(xls, sheet_name=sheet_name)
         '''item1'''
         # print(len(summaries_df['Experiment']),len(summaries_df['Response']))
         # exit()
+        '''人类数据需要处理Item3'''
+        item3 = pd.read_csv('/Users/tangtang/Desktop/leaderboard/src/datasets/Experiment_3_Items.csv')
+        item2word = {}
+        for j in range(len(item3['Item'])):
+            item2word[item3['Item'][j]] = [item3['Field 2'][j], item3['Field 3'][j]]
+        male_keyword = ["he", "his", "himself"]
+        female_keyword = ["she", "her", "herself"]
         for i in range(len(summaries_df['Experiment'])):
             # vote_1_1, vote_1_2, vote_1_3 = 0, 0, 0
             if summaries_df["Experiment"][i] == "E1":
             '''item2'''
             # vote_2_1, vote_2_2, vote_2_3 = 0, 0, 0
             if summaries_df["Experiment"][i] == "E2":
                 rs = summaries_df["Response"][i].strip()
                 rs = rs.split(' ')
                 if male == 0 and female == 0 :
                     output.append("NA")
             '''item3'''
+            if summaries_df["Experiment"][i] == "E3":
+                rs = summaries_df["Response"][i].strip()
+                id = summaries_df["Item"][i].strip()
+                if '2' in rs:
+                    item2word[id][0]
             '''item4'''
             '''item5'''
             '''item6'''
                     output.append("1")
                 else:
                     output.append("0")
             '''item9'''
             if summaries_df["Experiment"][i] == "E9":
+                male, female = 0, 0
                 rs = summaries_df["Response"][i].strip()
+                if "because" in rs:
+                    rs = rs.split("because")[1]
+                else:
+                    rs = rs
+                condition = summaries_df["Factor 2"][i].strip()
+                rs = rs.split(" ")
+                for w in rs:
+                    if w in male_keyword and female != 1:
+                        male = 1
+                        break
+                    if w in female_keyword and male != 1:
+                        break
+                if  male == 0 and female == 0:
+                    output.append('NA')
+                else:
+                    if male == 1 and female==0:
+                        if condition == "MF":
+                            output.append("Subject")
+                        elif condition == "FM":
+                            output.append("Object")
+                        else:
+                            output.append("NA")
+                    elif female == 1 and male ==0:
+                        if condition == "MF":
+                            output.append("Object")
+                        elif condition == "FM":
+                            output.append("Subject")
+                        else:
+                            output.append("NA")
             '''item10'''
             if summaries_df["Experiment"][i] == "E10":

src/backend/util.py CHANGED Viewed

@@ -60,8 +60,8 @@ def format_results(model_name: str, revision: str, precision: str,
             "model_sha": revision # Hash of the model
         },
         "results": {
-            "hallucination_rate": {
-                "hallucination_rate": round(hallucination_rate,1)
             },
             "factual_consistency_rate": {
                 "factual_consistency_rate": round(factual_consistency_rate,1)

             "model_sha": revision # Hash of the model
         },
         "results": {
+            "humanlike_rate": {
+                "humanlike_rate": round(hallucination_rate,1)
             },
             "factual_consistency_rate": {
                 "factual_consistency_rate": round(factual_consistency_rate,1)