Wauplin HF staff commited on
Commit
5ae4839
1 Parent(s): 72ac185

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -33
README.md CHANGED
@@ -28,16 +28,9 @@ model-index:
28
  args:
29
  num_few_shot: 25
30
  metrics:
31
- - type: acc
32
- name: accuracy
33
- value: 0.590443686006826
34
- - type: acc_stderr
35
- value: 0.014370358632472437
36
  - type: acc_norm
37
  name: normalized accuracy
38
  value: 0.6203071672354948
39
- - type: acc_norm_stderr
40
- value: 0.01418211986697487
41
  source:
42
  name: Open LLM Leaderboard
43
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
@@ -48,21 +41,14 @@ model-index:
48
  name: Text Generation
49
  dataset:
50
  name: HellaSwag (10-Shot)
51
- type: Rowan/hellaswag
52
- split: test # or validation?
53
  args:
54
  num_few_shot: 10
55
  metrics:
56
- - type: acc
57
- name: accuracy
58
- value: 0.6491734714200359
59
- - type: acc_stderr
60
- value: 0.004762534245488399
61
  - type: acc_norm
62
  name: normalized accuracy
63
  value: 0.8435570603465445
64
- - type: acc_norm_stderr
65
- value: 0.003625323221166244
66
  source:
67
  name: Open LLM Leaderboard
68
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
@@ -74,20 +60,13 @@ model-index:
74
  dataset:
75
  name: Drop (3-Shot)
76
  type: drop
77
- split: test
78
  args:
79
  num_few_shot: 3
80
  metrics:
81
- - type: em
82
- name: exact match
83
- value: 0.004928691275167785
84
- - type: em_stderr
85
- value: 0.0007171872517059793
86
  - type: f1
87
  name: f1 score
88
  value: 0.09662437080536909
89
- - type: f1_stderr
90
- value: 0.0018807376338089597
91
  source:
92
  name: Open LLM Leaderboard
93
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
@@ -104,14 +83,8 @@ model-index:
104
  args:
105
  num_few_shot: 0
106
  metrics:
107
- - type: mc1
108
- value: 0.40636474908200737
109
- - type: mc1_stderr
110
- value: 0.017193835812093893
111
  - type: mc2
112
  value: 0.5744916942762855
113
- - type: mc2_stderr
114
- value: 0.015742095840959796
115
  source:
116
  name: Open LLM Leaderboard
117
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
@@ -123,6 +96,7 @@ model-index:
123
  dataset:
124
  name: GSM8k (5-shot)
125
  type: gsm8k
 
126
  split: test
127
  args:
128
  num_few_shot: 5
@@ -130,14 +104,47 @@ model-index:
130
  - type: acc
131
  name: accuracy
132
  value: 0.12736921910538287
133
- - type: acc_stderr
134
- value: 0.009183110326737829
135
  source:
136
  name: Open LLM Leaderboard
137
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
138
 
139
  # MMLU (5-Shot)
140
- # ???
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # AlpacaEval (taken from model card)
143
  - task:
 
28
  args:
29
  num_few_shot: 25
30
  metrics:
 
 
 
 
 
31
  - type: acc_norm
32
  name: normalized accuracy
33
  value: 0.6203071672354948
 
 
34
  source:
35
  name: Open LLM Leaderboard
36
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
 
41
  name: Text Generation
42
  dataset:
43
  name: HellaSwag (10-Shot)
44
+ type: hellaswag
45
+ split: validation
46
  args:
47
  num_few_shot: 10
48
  metrics:
 
 
 
 
 
49
  - type: acc_norm
50
  name: normalized accuracy
51
  value: 0.8435570603465445
 
 
52
  source:
53
  name: Open LLM Leaderboard
54
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
 
60
  dataset:
61
  name: Drop (3-Shot)
62
  type: drop
63
+ split: validation
64
  args:
65
  num_few_shot: 3
66
  metrics:
 
 
 
 
 
67
  - type: f1
68
  name: f1 score
69
  value: 0.09662437080536909
 
 
70
  source:
71
  name: Open LLM Leaderboard
72
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
 
83
  args:
84
  num_few_shot: 0
85
  metrics:
 
 
 
 
86
  - type: mc2
87
  value: 0.5744916942762855
 
 
88
  source:
89
  name: Open LLM Leaderboard
90
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
 
96
  dataset:
97
  name: GSM8k (5-shot)
98
  type: gsm8k
99
+ config: main
100
  split: test
101
  args:
102
  num_few_shot: 5
 
104
  - type: acc
105
  name: accuracy
106
  value: 0.12736921910538287
 
 
107
  source:
108
  name: Open LLM Leaderboard
109
  url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
110
 
111
  # MMLU (5-Shot)
112
+ - task:
113
+ type: text-generation
114
+ name: Text Generation
115
+ dataset:
116
+ name: MMLU (5-Shot)
117
+ type: cais/mmlu
118
+ config: all
119
+ split: test
120
+ args:
121
+ num_few_shot: 5
122
+ metrics:
123
+ - type: acc
124
+ name: accuracy
125
+ value: 0.12736921910538287
126
+ source:
127
+ name: Open LLM Leaderboard
128
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
129
+
130
+ # Winogrande (5-shot)
131
+ - task:
132
+ type: text-generation
133
+ name: Text Generation
134
+ dataset:
135
+ name: Winogrande (5-shot)
136
+ type: winogrande
137
+ config: winogrande_xl
138
+ split: validation
139
+ args:
140
+ num_few_shot: 5
141
+ metrics:
142
+ - type: acc
143
+ name: accuracy
144
+ value: 0.7774269928966061
145
+ source:
146
+ name: Open LLM Leaderboard
147
+ url: https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-beta_public
148
 
149
  # AlpacaEval (taken from model card)
150
  - task: