import json import logging import os import argilla as rg from huggingface_hub import HfApi logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) if __name__ == "__main__": logger.info("*** Initializing Argilla session ***") rg.init( api_url=os.getenv("ARGILLA_API_URL"), api_key=os.getenv("ARGILLA_API_KEY"), extra_headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}, ) logger.info("*** Fetching dataset from Argilla ***") dataset = rg.FeedbackDataset.from_argilla( os.getenv("SOURCE_DATASET"), workspace=os.getenv("SOURCE_WORKSPACE"), ) logger.info("*** Filtering records by `response_status` ***") dataset = dataset.filter_by(response_status=["submitted"]) # type: ignore logger.info("*** Calculating users and annotation count ***") output = {} for record in dataset.records: for response in record.responses: if response.user_id not in output: output[response.user_id] = 0 output[response.user_id] += 1 for key in list(output.keys()): output[rg.User.from_id(key).username] = output.pop(key) logger.info("*** Users and annotation count successfully calculated! ***") logger.info("*** Dumping Python dict into `stats.json` ***") with open("stats.json", "w") as file: json.dump(output, file, indent=4) logger.info("*** Uploading `stats.json` to Hugging Face Hub ***") api = HfApi(token=os.getenv("HF_TOKEN")) api.upload_file( path_or_fileobj="stats.json", path_in_repo="stats.json", repo_id="DIBT/prompt-collective-dashboard", repo_type="space", ) logger.info("*** `stats.json` successfully uploaded to Hugging Face Hub! ***")