Carsten commited on
Commit
c028b5a
1 Parent(s): 7001010
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .venv
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ quants = (
5
+ pd.read_csv("quants.csv")
6
+ .applymap(str)
7
+ .groupby("quant")["bpw"]
8
+ .apply(float)
9
+ .to_dict()
10
+ )
11
+ models = (
12
+ pd.read_csv("models.csv")
13
+ .applymap(str)
14
+ .groupby("model")["params"]
15
+ .apply(float)
16
+ .to_dict()
17
+ )
18
+
19
+
20
+ def context_sizes(model):
21
+ return pd.read_csv(
22
+ "context_sizes/" + model.replace("/", "_") + ".csv",
23
+ header=None,
24
+ names=["context", "size"],
25
+ )
26
+
27
+
28
+ def linear_regression(xs, ys) -> tuple[float, float]:
29
+ sum_y = ys.sum()
30
+ sum_x = sum(xs)
31
+ sum_xy = sum([x * y for x, y in zip(xs, ys)])
32
+ sum_x2 = sum([x**2 for x in xs])
33
+ n = len(xs)
34
+
35
+ a = (sum_y * sum_x2 - sum_x * sum_xy) / (n * sum_x2 - sum_x**2)
36
+ b = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
37
+ return a, b
38
+
39
+
40
+ def calc_model_size(parameters: float, quant: float) -> float:
41
+ return round(parameters * quant / 8, 2)
42
+
43
+
44
+ def calc_context_size(context, model) -> float:
45
+ sizes = context_sizes(model)
46
+ a, b = linear_regression(sizes["context"], sizes["size"])
47
+ return round((a + b * context) / 1024, 2)
48
+
49
+
50
+ def calc(model_base, context, quant_size):
51
+ model_params = models[model_base]
52
+ quant_bpw = quants[quant_size]
53
+
54
+ model_size = calc_model_size(model_params, quant_bpw)
55
+ context_size = calc_context_size(context, model_base)
56
+
57
+ return model_size, context_size, model_size + context_size
58
+
59
+
60
+ title = "GGUF VRAM Calculator"
61
+
62
+ with gr.Blocks(title=title, theme=gr.themes.Monochrome()) as app:
63
+ default_model = "Mistral 7B"
64
+ default_quant = "Q4_K_S"
65
+ default_context = 8192
66
+ default_model_size = calc_model_size(models[default_model], quants[default_quant])
67
+ default_context_size = calc_context_size(default_context, default_model)
68
+
69
+ gr.Markdown(f"# {app.title}")
70
+ model = gr.Dropdown(
71
+ list(models.keys()), value=default_model, label="Select Model Base"
72
+ )
73
+ context = gr.Number(minimum=1, value=default_context, label="Context Size (Tokens)")
74
+ quant = gr.Dropdown(
75
+ list(quants.keys()), value=default_quant, label="Select Quant Size"
76
+ )
77
+ btn = gr.Button(value="Submit", variant="primary")
78
+ btn.click(
79
+ calc,
80
+ inputs=[
81
+ model,
82
+ context,
83
+ quant,
84
+ ],
85
+ outputs=[
86
+ gr.Number(
87
+ label="Model Size (GB)",
88
+ value=default_model_size,
89
+ ),
90
+ gr.Number(
91
+ label="Context Size (GB)",
92
+ value=default_context_size,
93
+ ),
94
+ gr.Number(
95
+ label="Total Size (GB)",
96
+ value=default_model_size + default_context_size,
97
+ ),
98
+ ],
99
+ )
100
+
101
+ app.launch()
context_sizes/Llama2 13B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,475
2
+ 1024,912
3
+ 2048,1794
4
+ 3072,2676
5
+ 4096,3558
6
+ 6144,5322
7
+ 8192,7086
8
+ 12288,10614
9
+ 16384,14142
10
+ 24576,21198
11
+ 32768,28254
12
+ 65536,56508
context_sizes/Llama2 20B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,695
2
+ 1024,1352
3
+ 2048,2674
4
+ 3072,3996
5
+ 4096,5318
6
+ 6144,7962
7
+ 8192,10606
8
+ 12288,15894
9
+ 16384,21182
10
+ 24576,31782.52
11
+ 32768,42335.26
12
+ 65536,84670.52
context_sizes/Llama2 70B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,305
2
+ 1024,498
3
+ 2048,948
4
+ 3072,1398
5
+ 4096,1848
6
+ 6144,2748
7
+ 8192,3648
8
+ 12288,5448
9
+ 16384,7248
10
+ 24576,10848
11
+ 32768,14448
12
+ 65536,28896
context_sizes/Llama2 7B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,326.5
2
+ 1024,602
3
+ 2048,1180
4
+ 3072,1758
5
+ 4096,2336
6
+ 6144,3492
7
+ 8192,4648
8
+ 12288,6960
9
+ 16384,9272
10
+ 24576,13896
11
+ 32768,18520
12
+ 65536,37016
context_sizes/Mistral 7B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,137
2
+ 1024,218
3
+ 2048,412
4
+ 3072,606
5
+ 4096,800
6
+ 6144,1188
7
+ 8192,1576
8
+ 12288,2352
9
+ 16384,3128
10
+ 24576,4680
11
+ 32768,6232
12
+ 65536,12440
context_sizes/Mixtral 8x7B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,181.72
2
+ 1024,249.22
3
+ 2048,443.22
4
+ 3072,637.22
5
+ 4096,831.22
6
+ 6144,1219.22
7
+ 8192,1607.22
8
+ 12288,2383.22
9
+ 16384,3159.22
10
+ 24576,4711.22
11
+ 32768,6263.22
12
+ 65536,12471.22
context_sizes/Solar 10.7B_11B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,172.19
2
+ 1024,285.19
3
+ 2048,543.19
4
+ 3072,801.19
5
+ 4096,1059.19
6
+ 6144,1575.19
7
+ 8192,2091.19
8
+ 12288,3123.19
9
+ 16384,4155.19
10
+ 24576,6219.19
11
+ 32768,8283.19
12
+ 65536,16539.19
context_sizes/Yi 34B.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 512,262.19
2
+ 1024,399.19
3
+ 2048,753.19
4
+ 3072,1107.19
5
+ 4096,1461.19
6
+ 6144,2169.19
7
+ 8192,2877.19
8
+ 12288,4293.19
9
+ 16384,5709.19
10
+ 24576,8541.19
11
+ 32768,11373.19
12
+ 65536,22701.19
models.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ model,params
2
+ Llama2 7B,7
3
+ Llama2 13B,13
4
+ Llama2 70B,70
5
+ Mistral 7B,7
6
+ Llama2 20B,20
7
+ Mixtral 8x7B,46.7
8
+ Yi 34B,34
9
+ Solar 10.7B/11B,10.7
quants.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quant,bpw
2
+ Q2_K,3.35
3
+ Q3_K_S,3.5
4
+ Q3_K_M,3.91
5
+ Q3_K_L,4.27
6
+ Q4_0,4.55
7
+ Q4_K_S,4.58
8
+ Q4_K_M,4.85
9
+ Q5_0,5.54
10
+ Q5_K_S,5.54
11
+ Q5_K_M,5.69
12
+ Q6_K,6.59
13
+ Q8_0,8.5
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio==4.15.0
2
+ pandas==2.2.0