apsys commited on
Commit
7ec1b66
1 Parent(s): 156dae6

some changes

Browse files
Files changed (5) hide show
  1. app.py +5 -3
  2. src/about.py +2 -2
  3. src/display/utils.py +2 -2
  4. src/populate.py +7 -4
  5. src/submission/submit.py +38 -25
app.py CHANGED
@@ -58,8 +58,8 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
58
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
60
  def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
  return Leaderboard(
64
  value=dataframe,
65
  datatype=[c.type for c in fields(AutoEvalColumn)],
@@ -172,6 +172,7 @@ with demo:
172
  interactive=True,
173
  )
174
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
 
175
 
176
  submit_button = gr.Button("Submit Eval")
177
  submission_result = gr.Markdown()
@@ -184,6 +185,7 @@ with demo:
184
  precision,
185
  weight_type,
186
  model_type,
 
187
  ],
188
  submission_result,
189
  )
@@ -199,6 +201,6 @@ with demo:
199
  )
200
 
201
  scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
  scheduler.start()
204
  demo.queue(default_concurrency_limit=40).launch()
 
58
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
60
  def init_leaderboard(dataframe):
61
+ # if dataframe is None or dataframe.empty:
62
+ # raise ValueError("Leaderboard DataFrame is empty or None.")
63
  return Leaderboard(
64
  value=dataframe,
65
  datatype=[c.type for c in fields(AutoEvalColumn)],
 
172
  interactive=True,
173
  )
174
  base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
+ ans_file = gr.File(label="Arena Hard Answer File", file_types=[".json"])
176
 
177
  submit_button = gr.Button("Submit Eval")
178
  submission_result = gr.Markdown()
 
185
  precision,
186
  weight_type,
187
  model_type,
188
+ ans_file
189
  ],
190
  submission_result,
191
  )
 
201
  )
202
 
203
  scheduler = BackgroundScheduler()
204
+ # scheduler.add_job(restart_space, "interval", seconds=1800)
205
  scheduler.start()
206
  demo.queue(default_concurrency_limit=40).launch()
src/about.py CHANGED
@@ -11,8 +11,8 @@ class Task:
11
  # Select your tasks here
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("anli_r1", "acc", "ANLI")
16
  task1 = Task("logiqa", "acc_norm", "LogiQA")
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
 
11
  # Select your tasks here
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("arenahard", "score", "score")
16
  task1 = Task("logiqa", "acc_norm", "LogiQA")
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
src/display/utils.py CHANGED
@@ -12,7 +12,7 @@ def fields(raw_class):
12
  # These classes are for user facing column names,
13
  # to avoid having to change them all around the code
14
  # when a modif is needed
15
- @dataclass
16
  class ColumnContent:
17
  name: str
18
  type: str
@@ -23,7 +23,7 @@ class ColumnContent:
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
- auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
 
12
  # These classes are for user facing column names,
13
  # to avoid having to change them all around the code
14
  # when a modif is needed
15
+ @dataclass(frozen=True)
16
  class ColumnContent:
17
  name: str
18
  type: str
 
23
  ## Leaderboard columns
24
  auto_eval_column_dict = []
25
  # Init
26
+ # auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
27
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
  #Scores
29
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
src/populate.py CHANGED
@@ -13,12 +13,15 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
- df = pd.DataFrame.from_records(all_data_json)
17
- df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
18
- df = df[cols].round(decimals=2)
 
 
 
19
 
20
  # filter out if any of the benchmarks have not been produced
21
- df = df[has_no_nan_values(df, benchmark_cols)]
22
  return df
23
 
24
 
 
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
 
16
+ df = pd.DataFrame.from_records(all_data_json,columns=cols)
17
+ df['model']="nothing"
18
+ # df.columns = cols
19
+ # df.iloc[0]= create dummy
20
+ # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
21
+ # df = df[cols].round(decimals=2)
22
 
23
  # filter out if any of the benchmarks have not been produced
24
+ # df = df[has_no_nan_values(df, benchmark_cols)]
25
  return df
26
 
27
 
src/submission/submit.py CHANGED
@@ -6,9 +6,9 @@ from src.display.formatting import styled_error, styled_message, styled_warning
6
  from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
7
  from src.submission.check_validity import (
8
  already_submitted_models,
9
- check_model_card,
10
- get_model_size,
11
- is_model_on_hub,
12
  )
13
 
14
  REQUESTED_MODELS = None
@@ -21,6 +21,7 @@ def add_new_eval(
21
  precision: str,
22
  weight_type: str,
23
  model_type: str,
 
24
  ):
25
  global REQUESTED_MODELS
26
  global USERS_TO_SUBMISSION_DATES
@@ -44,33 +45,33 @@ def add_new_eval(
44
  revision = "main"
45
 
46
  # Is the model on the hub?
47
- if weight_type in ["Delta", "Adapter"]:
48
- base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
49
- if not base_model_on_hub:
50
- return styled_error(f'Base model "{base_model}" {error}')
51
 
52
- if not weight_type == "Adapter":
53
- model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
54
- if not model_on_hub:
55
- return styled_error(f'Model "{model}" {error}')
56
 
57
  # Is the model info correctly filled?
58
- try:
59
- model_info = API.model_info(repo_id=model, revision=revision)
60
- except Exception:
61
- return styled_error("Could not get your model information. Please fill it up properly.")
62
 
63
- model_size = get_model_size(model_info=model_info, precision=precision)
64
 
65
  # Were the model card and license filled?
66
- try:
67
- license = model_info.cardData["license"]
68
- except Exception:
69
- return styled_error("Please select a license for your model")
70
 
71
- modelcard_OK, error_msg = check_model_card(model)
72
- if not modelcard_OK:
73
- return styled_error(error_msg)
74
 
75
  # Seems good, creating the eval
76
  print("Adding new eval")
@@ -84,8 +85,8 @@ def add_new_eval(
84
  "status": "PENDING",
85
  "submitted_time": current_time,
86
  "model_type": model_type,
87
- "likes": model_info.likes,
88
- "params": model_size,
89
  "license": license,
90
  "private": False,
91
  }
@@ -98,10 +99,14 @@ def add_new_eval(
98
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
99
  os.makedirs(OUT_DIR, exist_ok=True)
100
  out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
 
101
 
102
  with open(out_path, "w") as f:
103
  f.write(json.dumps(eval_entry))
104
 
 
 
 
105
  print("Uploading eval file")
106
  API.upload_file(
107
  path_or_fileobj=out_path,
@@ -110,9 +115,17 @@ def add_new_eval(
110
  repo_type="dataset",
111
  commit_message=f"Add {model} to eval queue",
112
  )
 
 
 
 
 
 
 
113
 
114
  # Remove the local file
115
  os.remove(out_path)
 
116
 
117
  return styled_message(
118
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
 
6
  from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
7
  from src.submission.check_validity import (
8
  already_submitted_models,
9
+ # check_model_card,
10
+ # get_model_size,
11
+ # is_model_on_hub,
12
  )
13
 
14
  REQUESTED_MODELS = None
 
21
  precision: str,
22
  weight_type: str,
23
  model_type: str,
24
+ ans_file: str,
25
  ):
26
  global REQUESTED_MODELS
27
  global USERS_TO_SUBMISSION_DATES
 
45
  revision = "main"
46
 
47
  # Is the model on the hub?
48
+ # if weight_type in ["Delta", "Adapter"]:
49
+ # base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
50
+ # if not base_model_on_hub:
51
+ # return styled_error(f'Base model "{base_model}" {error}')
52
 
53
+ # if not weight_type == "Adapter":
54
+ # model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
55
+ # if not model_on_hub:
56
+ # return styled_error(f'Model "{model}" {error}')
57
 
58
  # Is the model info correctly filled?
59
+ # try:
60
+ # model_info = API.model_info(repo_id=model, revision=revision)
61
+ # except Exception:
62
+ # return styled_error("Could not get your model information. Please fill it up properly.")
63
 
64
+ # model_size = get_model_size(model_info=model_info, precision=precision)
65
 
66
  # Were the model card and license filled?
67
+ # try:
68
+ # license = model_info.cardData["license"]
69
+ # except Exception:
70
+ # return styled_error("Please select a license for your model")
71
 
72
+ # modelcard_OK, error_msg = check_model_card(model)
73
+ # if not modelcard_OK:
74
+ # return styled_error(error_msg)
75
 
76
  # Seems good, creating the eval
77
  print("Adding new eval")
 
85
  "status": "PENDING",
86
  "submitted_time": current_time,
87
  "model_type": model_type,
88
+ "likes": "",
89
+ "params": "",
90
  "license": license,
91
  "private": False,
92
  }
 
99
  OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
100
  os.makedirs(OUT_DIR, exist_ok=True)
101
  out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
102
+ out_path_upload = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}_toeval.json"
103
 
104
  with open(out_path, "w") as f:
105
  f.write(json.dumps(eval_entry))
106
 
107
+ with open(out_path_upload, "w") as f:
108
+ f.write(open(ans_file).read())
109
+
110
  print("Uploading eval file")
111
  API.upload_file(
112
  path_or_fileobj=out_path,
 
115
  repo_type="dataset",
116
  commit_message=f"Add {model} to eval queue",
117
  )
118
+ API.upload_file(
119
+ path_or_fileobj=out_path_upload,
120
+ path_in_repo=out_path_upload.split("eval-queue/")[1],
121
+ repo_id=QUEUE_REPO,
122
+ repo_type="dataset",
123
+ commit_message=f"Add {model} to eval queue",
124
+ )
125
 
126
  # Remove the local file
127
  os.remove(out_path)
128
+ os.remove(out_path_upload)
129
 
130
  return styled_message(
131
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."