yangzhitao commited on
Commit
3165936
·
1 Parent(s): 482874e

refactor: format codes with ruff

Browse files
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
 
5
  from huggingface_hub import snapshot_download
6
 
7
  from src.about import (
@@ -20,9 +20,9 @@ from src.display.utils import (
20
  EVAL_TYPES,
21
  AutoEvalColumn,
22
  ModelType,
23
- fields,
24
  WeightType,
25
- Precision
26
  )
27
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
@@ -32,18 +32,29 @@ from src.submission.submit import add_new_eval
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
34
 
35
- ### Space initialisation
 
36
  try:
37
  print(EVAL_REQUESTS_PATH)
38
  snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
 
 
 
 
40
  )
41
  except Exception:
42
  restart_space()
43
  try:
44
  print(EVAL_RESULTS_PATH)
45
  snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
 
 
 
 
 
47
  )
48
  except Exception:
49
  restart_space()
@@ -57,6 +68,7 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
57
  pending_eval_queue_df,
58
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
 
60
  def init_leaderboard(dataframe):
61
  if dataframe is None or dataframe.empty:
62
  raise ValueError("Leaderboard DataFrame is empty or None.")
@@ -80,9 +92,7 @@ def init_leaderboard(dataframe):
80
  max=150,
81
  label="Select the number of parameters (B)",
82
  ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
  ],
87
  bool_checkboxgroup_label="Hide models",
88
  interactive=False,
@@ -201,4 +211,4 @@ with demo:
201
  scheduler = BackgroundScheduler()
202
  scheduler.add_job(restart_space, "interval", seconds=1800)
203
  scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import gradio as gr
 
2
  import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
4
+ from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns
5
  from huggingface_hub import snapshot_download
6
 
7
  from src.about import (
 
20
  EVAL_TYPES,
21
  AutoEvalColumn,
22
  ModelType,
23
+ Precision,
24
  WeightType,
25
+ fields,
26
  )
27
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
 
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
34
 
35
+
36
+ # Space initialisation
37
  try:
38
  print(EVAL_REQUESTS_PATH)
39
  snapshot_download(
40
+ repo_id=QUEUE_REPO,
41
+ local_dir=EVAL_REQUESTS_PATH,
42
+ repo_type="dataset",
43
+ tqdm_class=None,
44
+ etag_timeout=30,
45
+ token=TOKEN,
46
  )
47
  except Exception:
48
  restart_space()
49
  try:
50
  print(EVAL_RESULTS_PATH)
51
  snapshot_download(
52
+ repo_id=RESULTS_REPO,
53
+ local_dir=EVAL_RESULTS_PATH,
54
+ repo_type="dataset",
55
+ tqdm_class=None,
56
+ etag_timeout=30,
57
+ token=TOKEN,
58
  )
59
  except Exception:
60
  restart_space()
 
68
  pending_eval_queue_df,
69
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
70
 
71
+
72
  def init_leaderboard(dataframe):
73
  if dataframe is None or dataframe.empty:
74
  raise ValueError("Leaderboard DataFrame is empty or None.")
 
92
  max=150,
93
  label="Select the number of parameters (B)",
94
  ),
95
+ ColumnFilter(AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True),
 
 
96
  ],
97
  bool_checkboxgroup_label="Hide models",
98
  interactive=False,
 
211
  scheduler = BackgroundScheduler()
212
  scheduler.add_job(restart_space, "interval", seconds=1800)
213
  scheduler.start()
214
+ demo.queue(default_concurrency_limit=40).launch()
ruff.toml CHANGED
@@ -16,7 +16,7 @@ select = [
16
  "B", # flake8-bugbear
17
  "TC", # flake8-type-checking
18
  "I", # isort
19
- "T20", # flake8-print
20
  "C4", # flake8-comprehensions
21
  "UP", # pyupgrade
22
  "ARG001", # unused arguments in functions
 
16
  "B", # flake8-bugbear
17
  "TC", # flake8-type-checking
18
  "I", # isort
19
+ # "T20", # flake8-print
20
  "C4", # flake8-comprehensions
21
  "UP", # pyupgrade
22
  "ARG001", # unused arguments in functions
src/about.py CHANGED
@@ -1,6 +1,7 @@
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
 
4
  @dataclass
5
  class Task:
6
  benchmark: str
@@ -11,13 +12,13 @@ class Task:
11
  # Select your tasks here
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
  task0 = Task("anli_r1", "acc", "ANLI")
16
  task1 = Task("logiqa", "acc_norm", "LogiQA")
17
 
18
- NUM_FEWSHOT = 0 # Change with your few shot
19
- # ---------------------------------------------------
20
 
 
 
21
 
22
 
23
  # Your leaderboard name
@@ -29,7 +30,7 @@ Intro text
29
  """
30
 
31
  # Which evaluations are you running? how can people reproduce what you have?
32
- LLM_BENCHMARKS_TEXT = f"""
33
  ## How it works
34
 
35
  ## Reproducibility
 
1
  from dataclasses import dataclass
2
  from enum import Enum
3
 
4
+
5
  @dataclass
6
  class Task:
7
  benchmark: str
 
12
  # Select your tasks here
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
  task0 = Task("anli_r1", "acc", "ANLI")
17
  task1 = Task("logiqa", "acc_norm", "LogiQA")
18
 
 
 
19
 
20
+ NUM_FEWSHOT = 0 # Change with your few shot
21
+ # ---------------------------------------------------
22
 
23
 
24
  # Your leaderboard name
 
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
33
+ LLM_BENCHMARKS_TEXT = """
34
  ## How it works
35
 
36
  ## Reproducibility
src/display/utils.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
 
6
  from src.about import Tasks
7
 
 
8
  def fields(raw_class):
9
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
 
@@ -20,12 +21,13 @@ class ColumnContent:
20
  hidden: bool = False
21
  never_hidden: bool = False
22
 
23
- ## Leaderboard columns
 
24
  auto_eval_column_dict = []
25
  # Init
26
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
- #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
30
  for task in Tasks:
31
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -43,7 +45,8 @@ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sh
43
  # We use make dataclass to dynamically fill the scores from Tasks
44
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
45
 
46
- ## For the queue columns in the submission tab
 
47
  @dataclass(frozen=True)
48
  class EvalQueueColumn: # Queue column
49
  model = ColumnContent("model", "markdown", True)
@@ -53,12 +56,13 @@ class EvalQueueColumn: # Queue column
53
  weight_type = ColumnContent("weight_type", "str", "Original")
54
  status = ColumnContent("status", "str", True)
55
 
56
- ## All the model information that we might need
 
57
  @dataclass
58
  class ModelDetails:
59
  name: str
60
  display_name: str = ""
61
- symbol: str = "" # emoji
62
 
63
 
64
  class ModelType(Enum):
@@ -83,11 +87,13 @@ class ModelType(Enum):
83
  return ModelType.IFT
84
  return ModelType.Unknown
85
 
 
86
  class WeightType(Enum):
87
  Adapter = ModelDetails("Adapter")
88
  Original = ModelDetails("Original")
89
  Delta = ModelDetails("Delta")
90
 
 
91
  class Precision(Enum):
92
  float16 = ModelDetails("float16")
93
  bfloat16 = ModelDetails("bfloat16")
@@ -100,6 +106,7 @@ class Precision(Enum):
100
  return Precision.bfloat16
101
  return Precision.Unknown
102
 
 
103
  # Column selection
104
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
105
 
@@ -107,4 +114,3 @@ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
107
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
108
 
109
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
110
-
 
5
 
6
  from src.about import Tasks
7
 
8
+
9
  def fields(raw_class):
10
  return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
11
 
 
21
  hidden: bool = False
22
  never_hidden: bool = False
23
 
24
+
25
+ # Leaderboard columns
26
  auto_eval_column_dict = []
27
  # Init
28
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
29
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
30
+ # Scores
31
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
32
  for task in Tasks:
33
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
45
  # We use make dataclass to dynamically fill the scores from Tasks
46
  AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
47
 
48
+
49
+ # For the queue columns in the submission tab
50
  @dataclass(frozen=True)
51
  class EvalQueueColumn: # Queue column
52
  model = ColumnContent("model", "markdown", True)
 
56
  weight_type = ColumnContent("weight_type", "str", "Original")
57
  status = ColumnContent("status", "str", True)
58
 
59
+
60
+ # All the model information that we might need
61
  @dataclass
62
  class ModelDetails:
63
  name: str
64
  display_name: str = ""
65
+ symbol: str = "" # emoji
66
 
67
 
68
  class ModelType(Enum):
 
87
  return ModelType.IFT
88
  return ModelType.Unknown
89
 
90
+
91
  class WeightType(Enum):
92
  Adapter = ModelDetails("Adapter")
93
  Original = ModelDetails("Original")
94
  Delta = ModelDetails("Delta")
95
 
96
+
97
  class Precision(Enum):
98
  float16 = ModelDetails("float16")
99
  bfloat16 = ModelDetails("bfloat16")
 
106
  return Precision.bfloat16
107
  return Precision.Unknown
108
 
109
+
110
  # Column selection
111
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
112
 
 
114
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
115
 
116
  BENCHMARK_COLS = [t.value.col_name for t in Tasks]
 
src/envs.py CHANGED
@@ -4,9 +4,9 @@ from huggingface_hub import HfApi
4
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
- TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
- OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
@@ -14,7 +14,7 @@ QUEUE_REPO = f"{OWNER}/requests"
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
- CACHE_PATH=os.getenv("HF_HOME", ".")
18
 
19
  # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
 
4
 
5
  # Info to change for your repository
6
  # ----------------------------------
7
+ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
 
9
+ OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
  # ----------------------------------
11
 
12
  REPO_ID = f"{OWNER}/leaderboard"
 
14
  RESULTS_REPO = f"{OWNER}/results"
15
 
16
  # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH = os.getenv("HF_HOME", ".")
18
 
19
  # Local caches
20
  EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
src/leaderboard/read_evals.py CHANGED
@@ -8,28 +8,28 @@ import dateutil
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
16
  class EvalResult:
17
- """Represents one full evaluation. Built from a combination of the result and request file for a given run.
18
- """
19
- eval_name: str # org_model_precision (uid)
20
- full_model: str # org/model (path on hub)
21
- org: str
22
  model: str
23
- revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
- weight_type: WeightType = WeightType.Original # Original or Adapter
28
- architecture: str = "Unknown"
29
  license: str = "?"
30
  likes: int = 0
31
  num_params: int = 0
32
- date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
 
35
  @classmethod
@@ -85,10 +85,10 @@ class EvalResult:
85
  org=org,
86
  model=model,
87
  results=results,
88
- precision=precision,
89
- revision= config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
- architecture=architecture
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
@@ -96,7 +96,7 @@ class EvalResult:
96
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
97
 
98
  try:
99
- with open(request_file, "r") as f:
100
  request = json.load(f)
101
  self.model_type = ModelType.from_str(request.get("model_type", ""))
102
  self.weight_type = WeightType[request.get("weight_type", "Original")]
@@ -105,7 +105,9 @@ class EvalResult:
105
  self.num_params = request.get("params", 0)
106
  self.date = request.get("submitted_time", "")
107
  except Exception:
108
- print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
 
 
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -144,12 +146,9 @@ def get_request_file_for_model(requests_path, model_name, precision):
144
  request_file = ""
145
  request_files = sorted(request_files, reverse=True)
146
  for tmp_request_file in request_files:
147
- with open(tmp_request_file, "r") as f:
148
  req_content = json.load(f)
149
- if (
150
- req_content["status"] in ["FINISHED"]
151
- and req_content["precision"] == precision.split(".")[-1]
152
- ):
153
  request_file = tmp_request_file
154
  return request_file
155
 
@@ -188,7 +187,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
188
  results = []
189
  for v in eval_results.values():
190
  try:
191
- v.to_dict() # we test if the dict version is complete
192
  results.append(v)
193
  except KeyError: # not all eval values present
194
  continue
 
8
  import numpy as np
9
 
10
  from src.display.formatting import make_clickable_model
11
+ from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
14
 
15
  @dataclass
16
  class EvalResult:
17
+ """Represents one full evaluation. Built from a combination of the result and request file for a given run."""
18
+
19
+ eval_name: str # org_model_precision (uid)
20
+ full_model: str # org/model (path on hub)
21
+ org: str
22
  model: str
23
+ revision: str # commit hash, "" if main
24
  results: dict
25
  precision: Precision = Precision.Unknown
26
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
27
+ weight_type: WeightType = WeightType.Original # Original or Adapter
28
+ architecture: str = "Unknown"
29
  license: str = "?"
30
  likes: int = 0
31
  num_params: int = 0
32
+ date: str = "" # submission date of request file
33
  still_on_hub: bool = False
34
 
35
  @classmethod
 
85
  org=org,
86
  model=model,
87
  results=results,
88
+ precision=precision,
89
+ revision=config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
+ architecture=architecture,
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
 
96
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
97
 
98
  try:
99
+ with open(request_file) as f:
100
  request = json.load(f)
101
  self.model_type = ModelType.from_str(request.get("model_type", ""))
102
  self.weight_type = WeightType[request.get("weight_type", "Original")]
 
105
  self.num_params = request.get("params", 0)
106
  self.date = request.get("submitted_time", "")
107
  except Exception:
108
+ print(
109
+ f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}"
110
+ )
111
 
112
  def to_dict(self):
113
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
146
  request_file = ""
147
  request_files = sorted(request_files, reverse=True)
148
  for tmp_request_file in request_files:
149
+ with open(tmp_request_file) as f:
150
  req_content = json.load(f)
151
+ if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
 
 
 
152
  request_file = tmp_request_file
153
  return request_file
154
 
 
187
  results = []
188
  for v in eval_results.values():
189
  try:
190
+ v.to_dict() # we test if the dict version is complete
191
  results.append(v)
192
  except KeyError: # not all eval values present
193
  continue
src/populate.py CHANGED
@@ -39,7 +39,9 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
  # this is a folder
42
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
 
 
43
  for sub_entry in sub_entries:
44
  file_path = os.path.join(save_path, entry, sub_entry)
45
  with open(file_path) as fp:
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
  # this is a folder
42
+ sub_entries = [
43
+ e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")
44
+ ]
45
  for sub_entry in sub_entries:
46
  file_path = os.path.join(save_path, entry, sub_entry)
47
  with open(file_path) as fp:
src/submission/check_validity.py CHANGED
@@ -10,6 +10,7 @@ from huggingface_hub.hf_api import ModelInfo
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
 
13
  def check_model_card(repo_id: str) -> tuple[bool, str]:
14
  """Checks if the model card and license exist and have been filled"""
15
  try:
@@ -31,28 +32,35 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
31
 
32
  return True, ""
33
 
34
- def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
 
 
 
35
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
36
  try:
37
- config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
 
38
  if test_tokenizer:
39
  try:
40
- tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
 
 
41
  except ValueError as e:
 
 
42
  return (
43
  False,
44
- f"uses a tokenizer which is not in a transformers release: {e}",
45
- None
46
  )
47
- except Exception as e:
48
- return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
49
  return True, None, config
50
 
51
  except ValueError:
52
  return (
53
  False,
54
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
55
- None
56
  )
57
 
58
  except Exception as e:
@@ -70,10 +78,12 @@ def get_model_size(model_info: ModelInfo, precision: str):
70
  model_size = size_factor * model_size
71
  return model_size
72
 
 
73
  def get_model_arch(model_info: ModelInfo):
74
  """Gets the model architecture from the configuration"""
75
  return model_info.config.get("architectures", "Unknown")
76
 
 
77
  def already_submitted_models(requested_models_dir: str) -> set[str]:
78
  """Gather a list of already submitted models to avoid duplicates"""
79
  depth = 1
@@ -86,7 +96,7 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
86
  for file in files:
87
  if not file.endswith(".json"):
88
  continue
89
- with open(os.path.join(root, file), "r") as f:
90
  info = json.load(f)
91
  file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
92
 
 
10
  from transformers import AutoConfig
11
  from transformers.models.auto.tokenization_auto import AutoTokenizer
12
 
13
+
14
  def check_model_card(repo_id: str) -> tuple[bool, str]:
15
  """Checks if the model card and license exist and have been filled"""
16
  try:
 
32
 
33
  return True, ""
34
 
35
+
36
+ def is_model_on_hub(
37
+ model_name: str, revision: str, token: str | None = None, trust_remote_code=False, test_tokenizer=False
38
+ ) -> tuple[bool, str]:
39
  """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
40
  try:
41
+ config = AutoConfig.from_pretrained(
42
+ model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
43
+ )
44
  if test_tokenizer:
45
  try:
46
+ tk = AutoTokenizer.from_pretrained(
47
+ model_name, revision=revision, trust_remote_code=trust_remote_code, token=token
48
+ )
49
  except ValueError as e:
50
+ return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
51
+ except Exception as e:
52
  return (
53
  False,
54
+ "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
55
+ None,
56
  )
 
 
57
  return True, None, config
58
 
59
  except ValueError:
60
  return (
61
  False,
62
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
63
+ None,
64
  )
65
 
66
  except Exception as e:
 
78
  model_size = size_factor * model_size
79
  return model_size
80
 
81
+
82
  def get_model_arch(model_info: ModelInfo):
83
  """Gets the model architecture from the configuration"""
84
  return model_info.config.get("architectures", "Unknown")
85
 
86
+
87
  def already_submitted_models(requested_models_dir: str) -> set[str]:
88
  """Gather a list of already submitted models to avoid duplicates"""
89
  depth = 1
 
96
  for file in files:
97
  if not file.endswith(".json"):
98
  continue
99
+ with open(os.path.join(root, file)) as f:
100
  info = json.load(f)
101
  file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
102
 
src/submission/submit.py CHANGED
@@ -1,9 +1,9 @@
1
  import json
2
  import os
3
- from datetime import datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
- from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
7
  from src.submission.check_validity import (
8
  already_submitted_models,
9
  check_model_card,
@@ -14,6 +14,7 @@ from src.submission.check_validity import (
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
 
17
  def add_new_eval(
18
  model: str,
19
  base_model: str,
@@ -34,7 +35,7 @@ def add_new_eval(
34
  model_path = model.split("/")[1]
35
 
36
  precision = precision.split(" ")[0]
37
- current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
38
 
39
  if model_type is None or model_type == "":
40
  return styled_error("Please select a model type.")
@@ -45,7 +46,9 @@ def add_new_eval(
45
 
46
  # Is the model on the hub?
47
  if weight_type in ["Delta", "Adapter"]:
48
- base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
 
 
49
  if not base_model_on_hub:
50
  return styled_error(f'Base model "{base_model}" {error}')
51
 
 
1
  import json
2
  import os
3
+ from datetime import UTC, datetime, timezone
4
 
5
  from src.display.formatting import styled_error, styled_message, styled_warning
6
+ from src.envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO, TOKEN
7
  from src.submission.check_validity import (
8
  already_submitted_models,
9
  check_model_card,
 
14
  REQUESTED_MODELS = None
15
  USERS_TO_SUBMISSION_DATES = None
16
 
17
+
18
  def add_new_eval(
19
  model: str,
20
  base_model: str,
 
35
  model_path = model.split("/")[1]
36
 
37
  precision = precision.split(" ")[0]
38
+ current_time = datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
39
 
40
  if model_type is None or model_type == "":
41
  return styled_error("Please select a model type.")
 
46
 
47
  # Is the model on the hub?
48
  if weight_type in ["Delta", "Adapter"]:
49
+ base_model_on_hub, error, _ = is_model_on_hub(
50
+ model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True
51
+ )
52
  if not base_model_on_hub:
53
  return styled_error(f'Base model "{base_model}" {error}')
54
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff