Spaces:

HuggingFaceH4
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

748

clefourrier HF staff

alozowski commited on 13 days ago

Commit

a5d34d3

•

1 Parent(s): b7d036c

performance-improvement (#705)

Browse files

- read_evals initial change (705a80cbc41d99ade1f153597b6a9615e9e49a6e)
- improved logging (dadbd309a2806d85f67d888071f2f462a8631573)
- wip improvement (79b2cd565d40f76388770b0703b07431d41efe2a)
- more read_evals.py improvement (9b133aab61075d213546baa519cd392206ea5d05)
- Updated app.py download_dataset function (87e47c26a99aa08208c7aca46842ef9a3f2b078d)
- Fixing WIP (f86eaae89ef990a5d0066fb92946b8d8648adfa4)
- Changes as per comments (c74b7d7ce23fd9f7df60deddf8789e51288d1821)

Co-authored-by: Alina Lozovskaya <alozowski@users.noreply.huggingface.co>

Files changed (8) hide show

app.py +30 -10
pyproject.toml +11 -5
src/display/utils.py +21 -0
src/envs.py +1 -1
src/leaderboard/filter_models.py +1 -3
src/leaderboard/read_evals.py +134 -94
src/populate.py +0 -1
src/tools/collections.py +1 -1

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import logging
 import gradio as gr
 import pandas as pd
@@ -49,6 +50,9 @@ from src.tools.collections import update_collections
 from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
 # Start ephemeral Spaces on PRs (see config in README.md)
 enable_space_ci()
@@ -57,12 +61,24 @@ def restart_space():
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
-def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3):
-    """Attempt to download dataset with retries."""
     attempt = 0
     while attempt < max_attempts:
         try:
-            print(f"Downloading {repo_id} to {local_dir}")
             snapshot_download(
                 repo_id=repo_id,
                 local_dir=local_dir,
@@ -71,21 +87,25 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3):
                 etag_timeout=30,
                 max_workers=8,
             )
             return
         except Exception as e:
-            logging.error(f"Error downloading {repo_id}: {e}")
             attempt += 1
-            if attempt == max_attempts:
-                restart_space()
 def init_space(full_init: bool = True):
     """Initializes the application space, loading only necessary data."""
     if full_init:
         # These downloads only occur on full initialization
-        download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
-        download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
-        download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
     # Always retrieve the leaderboard DataFrame
     raw_data, original_df = get_leaderboard_df(

 import os
+import time
 import logging
 import gradio as gr
 import pandas as pd
 from src.tools.plots import create_metric_plot_obj, create_plot_df, create_scores_df
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Start ephemeral Spaces on PRs (see config in README.md)
 enable_space_ci()
     API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
+def time_diff_wrapper(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        diff = end_time - start_time
+        logging.info(f"Time taken for {func.__name__}: {diff} seconds")
+        return result
+    return wrapper
+@time_diff_wrapper
+def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
+    """Download dataset with exponential backoff retries."""
     attempt = 0
     while attempt < max_attempts:
         try:
+            logging.info(f"Downloading {repo_id} to {local_dir}")
             snapshot_download(
                 repo_id=repo_id,
                 local_dir=local_dir,
                 etag_timeout=30,
                 max_workers=8,
             )
+            logging.info("Download successful")
             return
         except Exception as e:
+            wait_time = backoff_factor ** attempt
+            logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
+            time.sleep(wait_time)
             attempt += 1
+    raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
 def init_space(full_init: bool = True):
     """Initializes the application space, loading only necessary data."""
     if full_init:
         # These downloads only occur on full initialization
+        try:
+            download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
+            download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
+            download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
+        except Exception:
+            restart_space()
     # Always retrieve the leaderboard DataFrame
     raw_data, original_df = get_leaderboard_df(

pyproject.toml CHANGED Viewed

@@ -1,9 +1,15 @@
 [tool.ruff]
-# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
-lint.select = ["E", "F"]
-lint.ignore = ["E501"] # line too long (black is taking care of this)
-line-length = 119
-lint.fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
 [tool.isort]
 profile = "black"

 [tool.ruff]
+line-length = 120
+target-version = "py312"
+include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
+ignore=["I","EM","FBT","TRY003","S101","D101","D102","D103","D104","D105","G004","D107","FA102"]
+fixable=["ALL"]
+select=["ALL"]
+[tool.ruff.lint]
+select = ["E", "F"]
+fixable = ["ALL"]
+ignore = ["E501"] # line too long (black is taking care of this)
 [tool.isort]
 profile = "black"

src/display/utils.py CHANGED Viewed

@@ -1,9 +1,30 @@
 from dataclasses import dataclass, make_dataclass
 from enum import Enum
 import json
 import pandas as pd
 def load_json_data(file_path):
     """Safely load JSON data from a file."""
     try:

 from dataclasses import dataclass, make_dataclass
 from enum import Enum
 import json
+import logging
+from datetime import datetime
 import pandas as pd
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def parse_datetime(datetime_str):
+    formats = [
+        "%Y-%m-%dT%H-%M-%S.%f",  # Format with dashes
+        "%Y-%m-%dT%H:%M:%S.%f",  # Standard format with colons
+        "%Y-%m-%dT%H %M %S.%f",  # Spaces as separator
+    ]
+    for fmt in formats:
+        try:
+            return datetime.strptime(datetime_str, fmt)
+        except ValueError:
+            continue
+    # in rare cases set unix start time for files with incorrect time (legacy files)
+    logging.error(f"No valid date format found for: {datetime_str}")
+    return datetime(1970, 1, 1)
 def load_json_data(file_path):
     """Safely load JSON data from a file."""
     try:

src/envs.py CHANGED Viewed

@@ -26,7 +26,7 @@ if not os.access(HF_HOME, os.W_OK):
     HF_HOME = "."
     os.environ["HF_HOME"] = HF_HOME
 else:
-    print(f"Write access confirmed for HF_HOME")
 EVAL_REQUESTS_PATH = os.path.join(HF_HOME, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(HF_HOME, "eval-results")

     HF_HOME = "."
     os.environ["HF_HOME"] = HF_HOME
 else:
+    print("Write access confirmed for HF_HOME")
 EVAL_REQUESTS_PATH = os.path.join(HF_HOME, "eval-queue")
 EVAL_RESULTS_PATH = os.path.join(HF_HOME, "eval-results")

src/leaderboard/filter_models.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from src.display.formatting import model_hyperlink
 from src.display.utils import AutoEvalColumn
 # Models which have been flagged by users as being problematic for a reason or another
 # (Model name to forum discussion link)
 FLAGGED_MODELS = {
@@ -137,10 +138,7 @@ def flag_models(leaderboard_data: list[dict]):
             flag_key = "merged"
         else:
             flag_key = model_data[AutoEvalColumn.fullname.name]
-        print(f"model check: {flag_key}")
         if flag_key in FLAGGED_MODELS:
-            print(f"Flagged model: {flag_key}")
             issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
             issue_link = model_hyperlink(
                 FLAGGED_MODELS[flag_key],

 from src.display.formatting import model_hyperlink
 from src.display.utils import AutoEvalColumn
 # Models which have been flagged by users as being problematic for a reason or another
 # (Model name to forum discussion link)
 FLAGGED_MODELS = {
             flag_key = "merged"
         else:
             flag_key = model_data[AutoEvalColumn.fullname.name]
         if flag_key in FLAGGED_MODELS:
             issue_num = FLAGGED_MODELS[flag_key].split("/")[-1]
             issue_link = model_hyperlink(
                 FLAGGED_MODELS[flag_key],

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,55 +1,58 @@
-import glob
 import json
 import math
-import os
-from dataclasses import dataclass
-import dateutil
 import numpy as np
 from src.display.formatting import make_clickable_model
-from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType
 @dataclass
 class EvalResult:
     # Also see src.display.utils.AutoEvalColumn for what will be displayed.
-    eval_name: str  # org_model_precision (uid)
-    full_model: str  # org/model (path on hub)
-    org: str
     model: str
-    revision: str  # commit hash, "" if main
-    results: dict
     precision: Precision = Precision.Unknown
-    model_type: ModelType = ModelType.Unknown  # Pretrained, fine tuned, ...
-    weight_type: WeightType = WeightType.Original  # Original or Adapter
-    architecture: str = "Unknown"  # From config file
     license: str = "?"
     likes: int = 0
     num_params: int = 0
-    date: str = ""  # submission date of request file
     still_on_hub: bool = True
     is_merge: bool = False
     flagged: bool = False
     status: str = "FINISHED"
-    tags: list = None
     @classmethod
-    def init_from_json_file(self, json_filepath):
-        """Inits the result from the specific model result file"""
-        with open(json_filepath) as fp:
             data = json.load(fp)
-        # We manage the legacy config format
-        config = data.get("config_general")
-        # Precision
-        precision = Precision.from_str(config.get("model_dtype"))
-        # Get model and org
-        org_and_model = config.get("model_name")
-        org_and_model = org_and_model.split("/", 1)
         if len(org_and_model) == 1:
             org = None
             model = org_and_model[0]
@@ -60,25 +63,53 @@ class EvalResult:
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
-        # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
             task = task.value
             # We skip old mmlu entries
-            wrong_mmlu_version = False
             if task.benchmark == "hendrycksTest":
                 for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]:
                     if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0:
-                        wrong_mmlu_version = True
-            if wrong_mmlu_version:
-                continue
-            # Some truthfulQA values are NaNs
-            if task.benchmark == "truthfulqa:mc" and "harness|truthfulqa:mc|0" in data["results"]:
-                if math.isnan(float(data["results"]["harness|truthfulqa:mc|0"][task.metric])):
-                    results[task.benchmark] = 0.0
-                    continue
             # We average all scores of a given metric (mostly for mmlu)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark in k])
@@ -87,40 +118,54 @@ class EvalResult:
             mean_acc = np.mean(accs) * 100.0
             results[task.benchmark] = mean_acc
-        return self(
-            eval_name=result_key,
-            full_model=full_model,
-            org=org,
-            model=model,
-            results=results,
-            precision=precision,
-            revision=config.get("model_sha", ""),
-        )
     def update_with_request_file(self, requests_path):
-        """Finds the relevant request file for the current model and updates info with it"""
-        request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
         try:
             with open(request_file, "r") as f:
                 request = json.load(f)
             self.model_type = ModelType.from_str(request.get("model_type", "Unknown"))
             self.weight_type = WeightType[request.get("weight_type", "Original")]
-            self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
             self.architecture = request.get("architectures", "Unknown")
             self.status = request.get("status", "FAILED")
-        except Exception:
             self.status = "FAILED"
-            print(f"Could not find request file for {self.org}/{self.model}")
     def update_with_dynamic_file_dict(self, file_dict):
         self.license = file_dict.get("license", "?")
-        self.likes = file_dict.get("likes", 0)
-        self.still_on_hub = file_dict["still_on_hub"]
         self.tags = file_dict.get("tags", [])
-        self.flagged = any("flagged" in tag for tag in self.tags)
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -149,55 +194,48 @@ class EvalResult:
             data_dict[task.value.col_name] = self.results[task.value.benchmark]
         return data_dict
 def get_request_file_for_model(requests_path, model_name, precision):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
-    request_files = os.path.join(
-        requests_path,
-        f"{model_name}_eval_request_*.json",
-    )
-    request_files = glob.glob(request_files)
-    # Select correct request file (precision)
-    request_file = ""
-    request_files = sorted(request_files, reverse=True)
-    for tmp_request_file in request_files:
-        with open(tmp_request_file, "r") as f:
             req_content = json.load(f)
-            if req_content["status"] in ["FINISHED"] and req_content["precision"] == precision.split(".")[-1]:
-                request_file = tmp_request_file
     return request_file
 def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
-    model_result_filepaths = []
-    for root, _, files in os.walk(results_path):
-        # We should only have json files in model results
-        if len(files) == 0 or any([not f.endswith(".json") for f in files]):
-            continue
-        # Sort the files by date
-        try:
-            files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
-        except dateutil.parser._parser.ParserError:
-            files = [files[-1]]
-        for file in files:
-            model_result_filepaths.append(os.path.join(root, file))
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
     eval_results = {}
-    for model_result_filepath in model_result_filepaths:
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
-        eval_result.update_with_request_file(requests_path)
-        if eval_result.full_model == "databricks/dbrx-base":
-            print("WE HERE")
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem
@@ -212,12 +250,14 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
             eval_results[eval_name] = eval_result
     results = []
-    for v in eval_results.values():
         try:
             if v.status == "FINISHED":
                 v.to_dict()  # we test if the dict version is complete
                 results.append(v)
-        except KeyError:  # not all eval values present
             continue
     return results

 import json
+from pathlib import Path
+from json import JSONDecodeError
+import logging
 import math
+from dataclasses import dataclass, field
+from typing import Optional, Dict, List
+from tqdm import tqdm
+from tqdm.contrib.logging import logging_redirect_tqdm
 import numpy as np
 from src.display.formatting import make_clickable_model
+from src.display.utils import AutoEvalColumn, ModelType, Precision, Tasks, WeightType, parse_datetime
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 @dataclass
 class EvalResult:
     # Also see src.display.utils.AutoEvalColumn for what will be displayed.
+    eval_name: str # org_model_precision (uid)
+    full_model: str # org/model (path on hub)
+    org: Optional[str]
     model: str
+    revision: str # commit hash, "" if main
+    results: Dict[str, float]
     precision: Precision = Precision.Unknown
+    model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
+    weight_type: WeightType = WeightType.Original
+    architecture: str = "Unknown" # From config file
     license: str = "?"
     likes: int = 0
     num_params: int = 0
+    date: str = "" # submission date of request file
     still_on_hub: bool = True
     is_merge: bool = False
     flagged: bool = False
     status: str = "FINISHED"
+    # List of tags, initialized to a new empty list for each instance to avoid the pitfalls of mutable default arguments.
+    tags: List[str] = field(default_factory=list)
     @classmethod
+    def init_from_json_file(cls, json_filepath: str) -> 'EvalResult':
+        with open(json_filepath, 'r') as fp:
             data = json.load(fp)
+        config = data.get("config_general", {})
+        precision = Precision.from_str(config.get("model_dtype", "unknown"))
+        org_and_model = config.get("model_name", "").split("/", 1)
+        org = org_and_model[0] if len(org_and_model) > 1 else None
+        model = org_and_model[-1]
         if len(org_and_model) == 1:
             org = None
             model = org_and_model[0]
             result_key = f"{org}_{model}_{precision.value.name}"
         full_model = "/".join(org_and_model)
+        results = cls.extract_results(data)  # Properly call the method to extract results
+        return cls(
+            eval_name=result_key,
+            full_model=full_model,
+            org=org,
+            model=model,
+            results=results,
+            precision=precision,
+            revision=config.get("model_sha", "")
+        )
+    @staticmethod
+    def extract_results(data: Dict) -> Dict[str, float]:
+        """
+        Extract and process benchmark results from a given dict.
+        Parameters:
+        - data (Dict): A dictionary containing benchmark data. This dictionary must
+        include 'versions' and 'results' keys with respective sub-data.
+        Returns:
+        - Dict[str, float]: A dictionary where keys are benchmark names and values
+        are the processed average scores as percentages.
+        Notes:
+        - The method specifically checks for certain benchmark names to skip outdated entries.
+        - Handles NaN values by setting the corresponding benchmark result to 0.0.
+        - Averages scores across metrics for benchmarks found in the data, in a percentage format.
+        """
         results = {}
         for task in Tasks:
             task = task.value
             # We skip old mmlu entries
             if task.benchmark == "hendrycksTest":
                 for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]:
                     if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0:
+                        continue
+            # Some benchamrk values are NaNs, mostly truthfulQA
+            # Would be more optimal (without the whole dict itertion) if benchmark name was same as key in results
+            # e.g. not harness|truthfulqa:mc|0 but truthfulqa:mc
+            for k, v in data["results"].items():
+                if task.benchmark in k:
+                    if math.isnan(float(v[task.metric])):
+                        results[task.benchmark] = 0.0
+                        continue
             # We average all scores of a given metric (mostly for mmlu)
             accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark in k])
             mean_acc = np.mean(accs) * 100.0
             results[task.benchmark] = mean_acc
+        return results
     def update_with_request_file(self, requests_path):
+        """Finds the relevant request file for the current model and updates info with it."""
         try:
+            request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
+            if request_file is None:
+                logging.warning(f"No request file for {self.org}/{self.model}")
+                self.status = "FAILED"
+                return
             with open(request_file, "r") as f:
                 request = json.load(f)
             self.model_type = ModelType.from_str(request.get("model_type", "Unknown"))
             self.weight_type = WeightType[request.get("weight_type", "Original")]
+            self.num_params = int(request.get("params", 0))  # Ensuring type safety
             self.date = request.get("submitted_time", "")
             self.architecture = request.get("architectures", "Unknown")
             self.status = request.get("status", "FAILED")
+        except FileNotFoundError:
+            self.status = "FAILED"
+            logging.error(f"Request file: {request_file} not found for {self.org}/{self.model}")
+        except JSONDecodeError:
+            self.status = "FAILED"
+            logging.error(f"Error decoding JSON from the request file for {self.org}/{self.model}")
+        except KeyError as e:
             self.status = "FAILED"
+            logging.error(f"Key error {e} in processing request file for {self.org}/{self.model}")
+        except Exception as e:  # Catch-all for any other unexpected exceptions
+            self.status = "FAILED"
+            logging.error(f"Unexpected error {e} for {self.org}/{self.model}")
     def update_with_dynamic_file_dict(self, file_dict):
+        """Update object attributes based on the provided dictionary, with error handling for missing keys and type validation."""
+        # Default values set for optional or potentially missing keys.
         self.license = file_dict.get("license", "?")
+        self.likes = int(file_dict.get("likes", 0))  # Ensure likes is treated as an integer
+        self.still_on_hub = file_dict.get("still_on_hub", False)  # Default to False if key is missing
         self.tags = file_dict.get("tags", [])
+        # Calculate `flagged` only if 'tags' is not empty and avoid calculating each time
+        self.flagged = "flagged" in self.tags
     def to_dict(self):
         """Converts the Eval Result to a dict compatible with our dataframe display"""
             data_dict[task.value.col_name] = self.results[task.value.benchmark]
         return data_dict
 def get_request_file_for_model(requests_path, model_name, precision):
     """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
+    requests_path = Path(requests_path)
+    pattern = f"{model_name}_eval_request_*.json"
+    # Using pathlib to find files matching the pattern
+    request_files = list(requests_path.glob(pattern))
+    # Sort the files by name in descending order to mimic 'reverse=True'
+    request_files.sort(reverse=True)
+    # Select the correct request file based on 'status' and 'precision'
+    request_file = None
+    for request_file in request_files:
+        with request_file.open("r") as f:
             req_content = json.load(f)
+            if req_content["status"] == "FINISHED" and req_content["precision"] == precision.split(".")[-1]:
+                request_file = str(request_file)
+    # Return empty string if no file found that matches criteria
     return request_file
 def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     with open(dynamic_path) as f:
         dynamic_data = json.load(f)
+    results_path = Path(results_path)
+    model_files = list(results_path.rglob('results_*.json'))
+    model_files.sort(key=lambda file: parse_datetime(file.stem.removeprefix("results_")))
     eval_results = {}
+    # Wrap model_files iteration with tqdm for progress display
+    for model_result_filepath in tqdm(model_files, desc="Processing model files"):
         # Creation of result
         eval_result = EvalResult.init_from_json_file(model_result_filepath)
+        with logging_redirect_tqdm():
+            eval_result.update_with_request_file(requests_path)
         if eval_result.full_model in dynamic_data:
             eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
             # Hardcoding because of gating problem
             eval_results[eval_name] = eval_result
     results = []
+    for k, v in eval_results.items():
         try:
             if v.status == "FINISHED":
                 v.to_dict()  # we test if the dict version is complete
                 results.append(v)
+        except KeyError as e:
+            logging.error(f"Error while checking model {k} {v.date} json, no key: {e}")  # not all eval values present
             continue
     return results

src/populate.py CHANGED Viewed

@@ -52,4 +52,3 @@ def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmar
     df = df[cols].round(decimals=2)
     df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df

     df = df[cols].round(decimals=2)
     df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df

src/tools/collections.py CHANGED Viewed

@@ -73,4 +73,4 @@ def update_collections(df: DataFrame):
         try:
             delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
         except HfHubHTTPError:
-            continue

         try:
             delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
         except HfHubHTTPError:
+            continue