Upload folder using huggingface_hub
Browse files- best-model.pt +3 -0
- dev.tsv +0 -0
- loss.tsv +11 -0
- runs/events.out.tfevents.1697653857.46dc0c540dd0.2878.18 +3 -0
- test.tsv +0 -0
- training.log +246 -0
best-model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f06eb9cd34db8a029a78fae56aa998b2d21adccad41551ee78b8a06e7821cc6
|
3 |
+
size 19050210
|
dev.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
loss.tsv
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 18:31:09 0.0000 1.7910 0.4837 0.0000 0.0000 0.0000 0.0000
|
3 |
+
2 18:31:25 0.0000 0.5383 0.3945 0.0000 0.0000 0.0000 0.0000
|
4 |
+
3 18:31:40 0.0000 0.4484 0.3370 0.3082 0.0672 0.1104 0.0591
|
5 |
+
4 18:31:55 0.0000 0.4120 0.3235 0.3681 0.1462 0.2093 0.1202
|
6 |
+
5 18:32:11 0.0000 0.3803 0.3204 0.4184 0.2205 0.2888 0.1744
|
7 |
+
6 18:32:26 0.0000 0.3695 0.3224 0.4064 0.2478 0.3079 0.1890
|
8 |
+
7 18:32:42 0.0000 0.3576 0.3122 0.4199 0.2643 0.3244 0.2012
|
9 |
+
8 18:32:58 0.0000 0.3514 0.3059 0.4129 0.2909 0.3413 0.2140
|
10 |
+
9 18:33:13 0.0000 0.3429 0.3110 0.4219 0.2807 0.3371 0.2103
|
11 |
+
10 18:33:29 0.0000 0.3392 0.3086 0.4123 0.2924 0.3422 0.2149
|
runs/events.out.tfevents.1697653857.46dc0c540dd0.2878.18
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91a7c61e48f32acd4971eee0c3937f0c13b7e4c5699d85f31529dde02e58ea28
|
3 |
+
size 253592
|
test.tsv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training.log
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-18 18:30:57,326 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-10-18 18:30:57,326 Model: "SequenceTagger(
|
3 |
+
(embeddings): TransformerWordEmbeddings(
|
4 |
+
(model): BertModel(
|
5 |
+
(embeddings): BertEmbeddings(
|
6 |
+
(word_embeddings): Embedding(32001, 128)
|
7 |
+
(position_embeddings): Embedding(512, 128)
|
8 |
+
(token_type_embeddings): Embedding(2, 128)
|
9 |
+
(LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
|
10 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
11 |
+
)
|
12 |
+
(encoder): BertEncoder(
|
13 |
+
(layer): ModuleList(
|
14 |
+
(0-1): 2 x BertLayer(
|
15 |
+
(attention): BertAttention(
|
16 |
+
(self): BertSelfAttention(
|
17 |
+
(query): Linear(in_features=128, out_features=128, bias=True)
|
18 |
+
(key): Linear(in_features=128, out_features=128, bias=True)
|
19 |
+
(value): Linear(in_features=128, out_features=128, bias=True)
|
20 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
21 |
+
)
|
22 |
+
(output): BertSelfOutput(
|
23 |
+
(dense): Linear(in_features=128, out_features=128, bias=True)
|
24 |
+
(LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
|
25 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
26 |
+
)
|
27 |
+
)
|
28 |
+
(intermediate): BertIntermediate(
|
29 |
+
(dense): Linear(in_features=128, out_features=512, bias=True)
|
30 |
+
(intermediate_act_fn): GELUActivation()
|
31 |
+
)
|
32 |
+
(output): BertOutput(
|
33 |
+
(dense): Linear(in_features=512, out_features=128, bias=True)
|
34 |
+
(LayerNorm): LayerNorm((128,), eps=1e-12, elementwise_affine=True)
|
35 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
36 |
+
)
|
37 |
+
)
|
38 |
+
)
|
39 |
+
)
|
40 |
+
(pooler): BertPooler(
|
41 |
+
(dense): Linear(in_features=128, out_features=128, bias=True)
|
42 |
+
(activation): Tanh()
|
43 |
+
)
|
44 |
+
)
|
45 |
+
)
|
46 |
+
(locked_dropout): LockedDropout(p=0.5)
|
47 |
+
(linear): Linear(in_features=128, out_features=21, bias=True)
|
48 |
+
(loss_function): CrossEntropyLoss()
|
49 |
+
)"
|
50 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
51 |
+
2023-10-18 18:30:57,327 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences
|
52 |
+
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /root/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator
|
53 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
54 |
+
2023-10-18 18:30:57,327 Train: 3575 sentences
|
55 |
+
2023-10-18 18:30:57,327 (train_with_dev=False, train_with_test=False)
|
56 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
57 |
+
2023-10-18 18:30:57,327 Training Params:
|
58 |
+
2023-10-18 18:30:57,327 - learning_rate: "3e-05"
|
59 |
+
2023-10-18 18:30:57,327 - mini_batch_size: "8"
|
60 |
+
2023-10-18 18:30:57,327 - max_epochs: "10"
|
61 |
+
2023-10-18 18:30:57,327 - shuffle: "True"
|
62 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
63 |
+
2023-10-18 18:30:57,327 Plugins:
|
64 |
+
2023-10-18 18:30:57,327 - TensorboardLogger
|
65 |
+
2023-10-18 18:30:57,327 - LinearScheduler | warmup_fraction: '0.1'
|
66 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
67 |
+
2023-10-18 18:30:57,327 Final evaluation on model from best epoch (best-model.pt)
|
68 |
+
2023-10-18 18:30:57,327 - metric: "('micro avg', 'f1-score')"
|
69 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
70 |
+
2023-10-18 18:30:57,327 Computation:
|
71 |
+
2023-10-18 18:30:57,327 - compute on device: cuda:0
|
72 |
+
2023-10-18 18:30:57,327 - embedding storage: none
|
73 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
74 |
+
2023-10-18 18:30:57,327 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-tiny-historic-multilingual-cased-bs8-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-5"
|
75 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
76 |
+
2023-10-18 18:30:57,327 ----------------------------------------------------------------------------------------------------
|
77 |
+
2023-10-18 18:30:57,327 Logging anything other than scalars to TensorBoard is currently not supported.
|
78 |
+
2023-10-18 18:30:58,389 epoch 1 - iter 44/447 - loss 3.35537987 - time (sec): 1.06 - samples/sec: 8886.60 - lr: 0.000003 - momentum: 0.000000
|
79 |
+
2023-10-18 18:30:59,392 epoch 1 - iter 88/447 - loss 3.30743351 - time (sec): 2.06 - samples/sec: 8809.53 - lr: 0.000006 - momentum: 0.000000
|
80 |
+
2023-10-18 18:31:00,382 epoch 1 - iter 132/447 - loss 3.17927816 - time (sec): 3.05 - samples/sec: 8871.01 - lr: 0.000009 - momentum: 0.000000
|
81 |
+
2023-10-18 18:31:01,357 epoch 1 - iter 176/447 - loss 2.99703148 - time (sec): 4.03 - samples/sec: 8719.11 - lr: 0.000012 - momentum: 0.000000
|
82 |
+
2023-10-18 18:31:02,365 epoch 1 - iter 220/447 - loss 2.74692973 - time (sec): 5.04 - samples/sec: 8721.79 - lr: 0.000015 - momentum: 0.000000
|
83 |
+
2023-10-18 18:31:03,341 epoch 1 - iter 264/447 - loss 2.49918578 - time (sec): 6.01 - samples/sec: 8633.99 - lr: 0.000018 - momentum: 0.000000
|
84 |
+
2023-10-18 18:31:04,342 epoch 1 - iter 308/447 - loss 2.25178508 - time (sec): 7.01 - samples/sec: 8657.38 - lr: 0.000021 - momentum: 0.000000
|
85 |
+
2023-10-18 18:31:05,334 epoch 1 - iter 352/447 - loss 2.07493418 - time (sec): 8.01 - samples/sec: 8609.55 - lr: 0.000024 - momentum: 0.000000
|
86 |
+
2023-10-18 18:31:06,341 epoch 1 - iter 396/447 - loss 1.92933120 - time (sec): 9.01 - samples/sec: 8566.47 - lr: 0.000027 - momentum: 0.000000
|
87 |
+
2023-10-18 18:31:07,338 epoch 1 - iter 440/447 - loss 1.80685455 - time (sec): 10.01 - samples/sec: 8529.85 - lr: 0.000029 - momentum: 0.000000
|
88 |
+
2023-10-18 18:31:07,493 ----------------------------------------------------------------------------------------------------
|
89 |
+
2023-10-18 18:31:07,493 EPOCH 1 done: loss 1.7910 - lr: 0.000029
|
90 |
+
2023-10-18 18:31:09,707 DEV : loss 0.4836609661579132 - f1-score (micro avg) 0.0
|
91 |
+
2023-10-18 18:31:09,732 ----------------------------------------------------------------------------------------------------
|
92 |
+
2023-10-18 18:31:10,762 epoch 2 - iter 44/447 - loss 0.60346430 - time (sec): 1.03 - samples/sec: 9314.76 - lr: 0.000030 - momentum: 0.000000
|
93 |
+
2023-10-18 18:31:11,762 epoch 2 - iter 88/447 - loss 0.61427853 - time (sec): 2.03 - samples/sec: 9124.62 - lr: 0.000029 - momentum: 0.000000
|
94 |
+
2023-10-18 18:31:12,772 epoch 2 - iter 132/447 - loss 0.58695454 - time (sec): 3.04 - samples/sec: 8816.43 - lr: 0.000029 - momentum: 0.000000
|
95 |
+
2023-10-18 18:31:13,783 epoch 2 - iter 176/447 - loss 0.55566412 - time (sec): 4.05 - samples/sec: 8706.98 - lr: 0.000029 - momentum: 0.000000
|
96 |
+
2023-10-18 18:31:14,753 epoch 2 - iter 220/447 - loss 0.55262370 - time (sec): 5.02 - samples/sec: 8579.61 - lr: 0.000028 - momentum: 0.000000
|
97 |
+
2023-10-18 18:31:15,753 epoch 2 - iter 264/447 - loss 0.54627616 - time (sec): 6.02 - samples/sec: 8510.33 - lr: 0.000028 - momentum: 0.000000
|
98 |
+
2023-10-18 18:31:16,783 epoch 2 - iter 308/447 - loss 0.54522378 - time (sec): 7.05 - samples/sec: 8520.23 - lr: 0.000028 - momentum: 0.000000
|
99 |
+
2023-10-18 18:31:17,795 epoch 2 - iter 352/447 - loss 0.54685730 - time (sec): 8.06 - samples/sec: 8537.76 - lr: 0.000027 - momentum: 0.000000
|
100 |
+
2023-10-18 18:31:18,764 epoch 2 - iter 396/447 - loss 0.54330644 - time (sec): 9.03 - samples/sec: 8524.04 - lr: 0.000027 - momentum: 0.000000
|
101 |
+
2023-10-18 18:31:19,772 epoch 2 - iter 440/447 - loss 0.53883687 - time (sec): 10.04 - samples/sec: 8512.29 - lr: 0.000027 - momentum: 0.000000
|
102 |
+
2023-10-18 18:31:19,927 ----------------------------------------------------------------------------------------------------
|
103 |
+
2023-10-18 18:31:19,927 EPOCH 2 done: loss 0.5383 - lr: 0.000027
|
104 |
+
2023-10-18 18:31:25,102 DEV : loss 0.3944970965385437 - f1-score (micro avg) 0.0
|
105 |
+
2023-10-18 18:31:25,127 ----------------------------------------------------------------------------------------------------
|
106 |
+
2023-10-18 18:31:26,144 epoch 3 - iter 44/447 - loss 0.47928256 - time (sec): 1.02 - samples/sec: 8573.52 - lr: 0.000026 - momentum: 0.000000
|
107 |
+
2023-10-18 18:31:27,140 epoch 3 - iter 88/447 - loss 0.48481466 - time (sec): 2.01 - samples/sec: 8455.08 - lr: 0.000026 - momentum: 0.000000
|
108 |
+
2023-10-18 18:31:28,157 epoch 3 - iter 132/447 - loss 0.47210746 - time (sec): 3.03 - samples/sec: 8451.26 - lr: 0.000026 - momentum: 0.000000
|
109 |
+
2023-10-18 18:31:29,130 epoch 3 - iter 176/447 - loss 0.45204171 - time (sec): 4.00 - samples/sec: 8404.07 - lr: 0.000025 - momentum: 0.000000
|
110 |
+
2023-10-18 18:31:30,125 epoch 3 - iter 220/447 - loss 0.45951440 - time (sec): 5.00 - samples/sec: 8504.07 - lr: 0.000025 - momentum: 0.000000
|
111 |
+
2023-10-18 18:31:31,104 epoch 3 - iter 264/447 - loss 0.45609998 - time (sec): 5.98 - samples/sec: 8498.35 - lr: 0.000025 - momentum: 0.000000
|
112 |
+
2023-10-18 18:31:32,130 epoch 3 - iter 308/447 - loss 0.45039667 - time (sec): 7.00 - samples/sec: 8587.72 - lr: 0.000024 - momentum: 0.000000
|
113 |
+
2023-10-18 18:31:33,139 epoch 3 - iter 352/447 - loss 0.44769705 - time (sec): 8.01 - samples/sec: 8519.35 - lr: 0.000024 - momentum: 0.000000
|
114 |
+
2023-10-18 18:31:34,149 epoch 3 - iter 396/447 - loss 0.45480444 - time (sec): 9.02 - samples/sec: 8549.84 - lr: 0.000024 - momentum: 0.000000
|
115 |
+
2023-10-18 18:31:35,141 epoch 3 - iter 440/447 - loss 0.44997278 - time (sec): 10.01 - samples/sec: 8531.73 - lr: 0.000023 - momentum: 0.000000
|
116 |
+
2023-10-18 18:31:35,299 ----------------------------------------------------------------------------------------------------
|
117 |
+
2023-10-18 18:31:35,299 EPOCH 3 done: loss 0.4484 - lr: 0.000023
|
118 |
+
2023-10-18 18:31:40,464 DEV : loss 0.3369644582271576 - f1-score (micro avg) 0.1104
|
119 |
+
2023-10-18 18:31:40,487 saving best model
|
120 |
+
2023-10-18 18:31:40,521 ----------------------------------------------------------------------------------------------------
|
121 |
+
2023-10-18 18:31:41,556 epoch 4 - iter 44/447 - loss 0.39542898 - time (sec): 1.03 - samples/sec: 8636.65 - lr: 0.000023 - momentum: 0.000000
|
122 |
+
2023-10-18 18:31:42,561 epoch 4 - iter 88/447 - loss 0.38736877 - time (sec): 2.04 - samples/sec: 8628.03 - lr: 0.000023 - momentum: 0.000000
|
123 |
+
2023-10-18 18:31:43,608 epoch 4 - iter 132/447 - loss 0.38899063 - time (sec): 3.09 - samples/sec: 8799.06 - lr: 0.000022 - momentum: 0.000000
|
124 |
+
2023-10-18 18:31:44,585 epoch 4 - iter 176/447 - loss 0.40082774 - time (sec): 4.06 - samples/sec: 8811.42 - lr: 0.000022 - momentum: 0.000000
|
125 |
+
2023-10-18 18:31:45,571 epoch 4 - iter 220/447 - loss 0.40145089 - time (sec): 5.05 - samples/sec: 8656.73 - lr: 0.000022 - momentum: 0.000000
|
126 |
+
2023-10-18 18:31:46,581 epoch 4 - iter 264/447 - loss 0.40563358 - time (sec): 6.06 - samples/sec: 8696.86 - lr: 0.000021 - momentum: 0.000000
|
127 |
+
2023-10-18 18:31:47,552 epoch 4 - iter 308/447 - loss 0.40938382 - time (sec): 7.03 - samples/sec: 8654.08 - lr: 0.000021 - momentum: 0.000000
|
128 |
+
2023-10-18 18:31:48,541 epoch 4 - iter 352/447 - loss 0.41291327 - time (sec): 8.02 - samples/sec: 8564.35 - lr: 0.000021 - momentum: 0.000000
|
129 |
+
2023-10-18 18:31:49,574 epoch 4 - iter 396/447 - loss 0.41249266 - time (sec): 9.05 - samples/sec: 8505.34 - lr: 0.000020 - momentum: 0.000000
|
130 |
+
2023-10-18 18:31:50,537 epoch 4 - iter 440/447 - loss 0.41073576 - time (sec): 10.02 - samples/sec: 8526.40 - lr: 0.000020 - momentum: 0.000000
|
131 |
+
2023-10-18 18:31:50,686 ----------------------------------------------------------------------------------------------------
|
132 |
+
2023-10-18 18:31:50,686 EPOCH 4 done: loss 0.4120 - lr: 0.000020
|
133 |
+
2023-10-18 18:31:55,601 DEV : loss 0.32354578375816345 - f1-score (micro avg) 0.2093
|
134 |
+
2023-10-18 18:31:55,626 saving best model
|
135 |
+
2023-10-18 18:31:55,665 ----------------------------------------------------------------------------------------------------
|
136 |
+
2023-10-18 18:31:56,674 epoch 5 - iter 44/447 - loss 0.43829726 - time (sec): 1.01 - samples/sec: 7584.40 - lr: 0.000020 - momentum: 0.000000
|
137 |
+
2023-10-18 18:31:57,661 epoch 5 - iter 88/447 - loss 0.40228181 - time (sec): 2.00 - samples/sec: 7864.32 - lr: 0.000019 - momentum: 0.000000
|
138 |
+
2023-10-18 18:31:58,638 epoch 5 - iter 132/447 - loss 0.40280517 - time (sec): 2.97 - samples/sec: 7990.30 - lr: 0.000019 - momentum: 0.000000
|
139 |
+
2023-10-18 18:31:59,710 epoch 5 - iter 176/447 - loss 0.37902870 - time (sec): 4.04 - samples/sec: 8287.50 - lr: 0.000019 - momentum: 0.000000
|
140 |
+
2023-10-18 18:32:00,769 epoch 5 - iter 220/447 - loss 0.37122151 - time (sec): 5.10 - samples/sec: 8416.45 - lr: 0.000018 - momentum: 0.000000
|
141 |
+
2023-10-18 18:32:01,778 epoch 5 - iter 264/447 - loss 0.37513444 - time (sec): 6.11 - samples/sec: 8523.10 - lr: 0.000018 - momentum: 0.000000
|
142 |
+
2023-10-18 18:32:02,807 epoch 5 - iter 308/447 - loss 0.37468618 - time (sec): 7.14 - samples/sec: 8462.69 - lr: 0.000018 - momentum: 0.000000
|
143 |
+
2023-10-18 18:32:03,762 epoch 5 - iter 352/447 - loss 0.37705908 - time (sec): 8.10 - samples/sec: 8454.46 - lr: 0.000017 - momentum: 0.000000
|
144 |
+
2023-10-18 18:32:04,801 epoch 5 - iter 396/447 - loss 0.37945783 - time (sec): 9.14 - samples/sec: 8410.18 - lr: 0.000017 - momentum: 0.000000
|
145 |
+
2023-10-18 18:32:05,811 epoch 5 - iter 440/447 - loss 0.38037714 - time (sec): 10.15 - samples/sec: 8405.36 - lr: 0.000017 - momentum: 0.000000
|
146 |
+
2023-10-18 18:32:05,962 ----------------------------------------------------------------------------------------------------
|
147 |
+
2023-10-18 18:32:05,962 EPOCH 5 done: loss 0.3803 - lr: 0.000017
|
148 |
+
2023-10-18 18:32:11,182 DEV : loss 0.32035842537879944 - f1-score (micro avg) 0.2888
|
149 |
+
2023-10-18 18:32:11,207 saving best model
|
150 |
+
2023-10-18 18:32:11,240 ----------------------------------------------------------------------------------------------------
|
151 |
+
2023-10-18 18:32:12,172 epoch 6 - iter 44/447 - loss 0.33483908 - time (sec): 0.93 - samples/sec: 9115.05 - lr: 0.000016 - momentum: 0.000000
|
152 |
+
2023-10-18 18:32:13,137 epoch 6 - iter 88/447 - loss 0.35899358 - time (sec): 1.90 - samples/sec: 9080.63 - lr: 0.000016 - momentum: 0.000000
|
153 |
+
2023-10-18 18:32:14,206 epoch 6 - iter 132/447 - loss 0.35690744 - time (sec): 2.96 - samples/sec: 9076.59 - lr: 0.000016 - momentum: 0.000000
|
154 |
+
2023-10-18 18:32:15,164 epoch 6 - iter 176/447 - loss 0.36576389 - time (sec): 3.92 - samples/sec: 8994.93 - lr: 0.000015 - momentum: 0.000000
|
155 |
+
2023-10-18 18:32:16,184 epoch 6 - iter 220/447 - loss 0.37498136 - time (sec): 4.94 - samples/sec: 8676.02 - lr: 0.000015 - momentum: 0.000000
|
156 |
+
2023-10-18 18:32:17,234 epoch 6 - iter 264/447 - loss 0.37144901 - time (sec): 5.99 - samples/sec: 8495.57 - lr: 0.000015 - momentum: 0.000000
|
157 |
+
2023-10-18 18:32:18,297 epoch 6 - iter 308/447 - loss 0.37135490 - time (sec): 7.06 - samples/sec: 8449.37 - lr: 0.000014 - momentum: 0.000000
|
158 |
+
2023-10-18 18:32:19,337 epoch 6 - iter 352/447 - loss 0.37580628 - time (sec): 8.10 - samples/sec: 8398.88 - lr: 0.000014 - momentum: 0.000000
|
159 |
+
2023-10-18 18:32:20,381 epoch 6 - iter 396/447 - loss 0.37311173 - time (sec): 9.14 - samples/sec: 8374.81 - lr: 0.000014 - momentum: 0.000000
|
160 |
+
2023-10-18 18:32:21,351 epoch 6 - iter 440/447 - loss 0.37059963 - time (sec): 10.11 - samples/sec: 8425.98 - lr: 0.000013 - momentum: 0.000000
|
161 |
+
2023-10-18 18:32:21,511 ----------------------------------------------------------------------------------------------------
|
162 |
+
2023-10-18 18:32:21,511 EPOCH 6 done: loss 0.3695 - lr: 0.000013
|
163 |
+
2023-10-18 18:32:26,767 DEV : loss 0.3224472403526306 - f1-score (micro avg) 0.3079
|
164 |
+
2023-10-18 18:32:26,792 saving best model
|
165 |
+
2023-10-18 18:32:26,834 ----------------------------------------------------------------------------------------------------
|
166 |
+
2023-10-18 18:32:27,937 epoch 7 - iter 44/447 - loss 0.38634909 - time (sec): 1.10 - samples/sec: 7574.69 - lr: 0.000013 - momentum: 0.000000
|
167 |
+
2023-10-18 18:32:28,992 epoch 7 - iter 88/447 - loss 0.35989766 - time (sec): 2.16 - samples/sec: 7720.36 - lr: 0.000013 - momentum: 0.000000
|
168 |
+
2023-10-18 18:32:30,010 epoch 7 - iter 132/447 - loss 0.35635785 - time (sec): 3.18 - samples/sec: 7724.99 - lr: 0.000012 - momentum: 0.000000
|
169 |
+
2023-10-18 18:32:31,063 epoch 7 - iter 176/447 - loss 0.34873541 - time (sec): 4.23 - samples/sec: 7992.13 - lr: 0.000012 - momentum: 0.000000
|
170 |
+
2023-10-18 18:32:32,072 epoch 7 - iter 220/447 - loss 0.35566055 - time (sec): 5.24 - samples/sec: 8143.25 - lr: 0.000012 - momentum: 0.000000
|
171 |
+
2023-10-18 18:32:33,110 epoch 7 - iter 264/447 - loss 0.34763579 - time (sec): 6.28 - samples/sec: 8178.77 - lr: 0.000011 - momentum: 0.000000
|
172 |
+
2023-10-18 18:32:34,137 epoch 7 - iter 308/447 - loss 0.35746887 - time (sec): 7.30 - samples/sec: 8187.89 - lr: 0.000011 - momentum: 0.000000
|
173 |
+
2023-10-18 18:32:35,174 epoch 7 - iter 352/447 - loss 0.35715054 - time (sec): 8.34 - samples/sec: 8266.95 - lr: 0.000011 - momentum: 0.000000
|
174 |
+
2023-10-18 18:32:36,181 epoch 7 - iter 396/447 - loss 0.35950966 - time (sec): 9.35 - samples/sec: 8263.77 - lr: 0.000010 - momentum: 0.000000
|
175 |
+
2023-10-18 18:32:37,181 epoch 7 - iter 440/447 - loss 0.35766237 - time (sec): 10.35 - samples/sec: 8235.62 - lr: 0.000010 - momentum: 0.000000
|
176 |
+
2023-10-18 18:32:37,340 ----------------------------------------------------------------------------------------------------
|
177 |
+
2023-10-18 18:32:37,341 EPOCH 7 done: loss 0.3576 - lr: 0.000010
|
178 |
+
2023-10-18 18:32:42,619 DEV : loss 0.3121800422668457 - f1-score (micro avg) 0.3244
|
179 |
+
2023-10-18 18:32:42,644 saving best model
|
180 |
+
2023-10-18 18:32:42,678 ----------------------------------------------------------------------------------------------------
|
181 |
+
2023-10-18 18:32:43,671 epoch 8 - iter 44/447 - loss 0.37422031 - time (sec): 0.99 - samples/sec: 8531.72 - lr: 0.000010 - momentum: 0.000000
|
182 |
+
2023-10-18 18:32:44,640 epoch 8 - iter 88/447 - loss 0.36592520 - time (sec): 1.96 - samples/sec: 8542.01 - lr: 0.000009 - momentum: 0.000000
|
183 |
+
2023-10-18 18:32:45,668 epoch 8 - iter 132/447 - loss 0.36290806 - time (sec): 2.99 - samples/sec: 8534.38 - lr: 0.000009 - momentum: 0.000000
|
184 |
+
2023-10-18 18:32:46,673 epoch 8 - iter 176/447 - loss 0.36281915 - time (sec): 3.99 - samples/sec: 8615.59 - lr: 0.000009 - momentum: 0.000000
|
185 |
+
2023-10-18 18:32:47,726 epoch 8 - iter 220/447 - loss 0.35917912 - time (sec): 5.05 - samples/sec: 8471.13 - lr: 0.000008 - momentum: 0.000000
|
186 |
+
2023-10-18 18:32:48,759 epoch 8 - iter 264/447 - loss 0.35714516 - time (sec): 6.08 - samples/sec: 8611.74 - lr: 0.000008 - momentum: 0.000000
|
187 |
+
2023-10-18 18:32:49,745 epoch 8 - iter 308/447 - loss 0.35463981 - time (sec): 7.07 - samples/sec: 8541.08 - lr: 0.000008 - momentum: 0.000000
|
188 |
+
2023-10-18 18:32:50,767 epoch 8 - iter 352/447 - loss 0.35443539 - time (sec): 8.09 - samples/sec: 8533.45 - lr: 0.000007 - momentum: 0.000000
|
189 |
+
2023-10-18 18:32:51,802 epoch 8 - iter 396/447 - loss 0.34812574 - time (sec): 9.12 - samples/sec: 8528.33 - lr: 0.000007 - momentum: 0.000000
|
190 |
+
2023-10-18 18:32:52,836 epoch 8 - iter 440/447 - loss 0.35044269 - time (sec): 10.16 - samples/sec: 8418.84 - lr: 0.000007 - momentum: 0.000000
|
191 |
+
2023-10-18 18:32:52,990 ----------------------------------------------------------------------------------------------------
|
192 |
+
2023-10-18 18:32:52,990 EPOCH 8 done: loss 0.3514 - lr: 0.000007
|
193 |
+
2023-10-18 18:32:58,218 DEV : loss 0.3059460520744324 - f1-score (micro avg) 0.3413
|
194 |
+
2023-10-18 18:32:58,244 saving best model
|
195 |
+
2023-10-18 18:32:58,278 ----------------------------------------------------------------------------------------------------
|
196 |
+
2023-10-18 18:32:59,317 epoch 9 - iter 44/447 - loss 0.33594296 - time (sec): 1.04 - samples/sec: 7558.99 - lr: 0.000006 - momentum: 0.000000
|
197 |
+
2023-10-18 18:33:00,364 epoch 9 - iter 88/447 - loss 0.35405140 - time (sec): 2.09 - samples/sec: 8481.46 - lr: 0.000006 - momentum: 0.000000
|
198 |
+
2023-10-18 18:33:01,374 epoch 9 - iter 132/447 - loss 0.37457981 - time (sec): 3.10 - samples/sec: 8446.61 - lr: 0.000006 - momentum: 0.000000
|
199 |
+
2023-10-18 18:33:02,440 epoch 9 - iter 176/447 - loss 0.36377188 - time (sec): 4.16 - samples/sec: 8311.21 - lr: 0.000005 - momentum: 0.000000
|
200 |
+
2023-10-18 18:33:03,383 epoch 9 - iter 220/447 - loss 0.35955115 - time (sec): 5.10 - samples/sec: 8335.73 - lr: 0.000005 - momentum: 0.000000
|
201 |
+
2023-10-18 18:33:04,496 epoch 9 - iter 264/447 - loss 0.35376935 - time (sec): 6.22 - samples/sec: 8381.32 - lr: 0.000005 - momentum: 0.000000
|
202 |
+
2023-10-18 18:33:05,535 epoch 9 - iter 308/447 - loss 0.34765466 - time (sec): 7.26 - samples/sec: 8417.96 - lr: 0.000004 - momentum: 0.000000
|
203 |
+
2023-10-18 18:33:06,610 epoch 9 - iter 352/447 - loss 0.34682011 - time (sec): 8.33 - samples/sec: 8303.00 - lr: 0.000004 - momentum: 0.000000
|
204 |
+
2023-10-18 18:33:07,629 epoch 9 - iter 396/447 - loss 0.34678332 - time (sec): 9.35 - samples/sec: 8308.58 - lr: 0.000004 - momentum: 0.000000
|
205 |
+
2023-10-18 18:33:08,589 epoch 9 - iter 440/447 - loss 0.34367673 - time (sec): 10.31 - samples/sec: 8285.48 - lr: 0.000003 - momentum: 0.000000
|
206 |
+
2023-10-18 18:33:08,741 ----------------------------------------------------------------------------------------------------
|
207 |
+
2023-10-18 18:33:08,741 EPOCH 9 done: loss 0.3429 - lr: 0.000003
|
208 |
+
2023-10-18 18:33:13,698 DEV : loss 0.3109874725341797 - f1-score (micro avg) 0.3371
|
209 |
+
2023-10-18 18:33:13,723 ----------------------------------------------------------------------------------------------------
|
210 |
+
2023-10-18 18:33:14,605 epoch 10 - iter 44/447 - loss 0.28949017 - time (sec): 0.88 - samples/sec: 10077.39 - lr: 0.000003 - momentum: 0.000000
|
211 |
+
2023-10-18 18:33:15,622 epoch 10 - iter 88/447 - loss 0.30867757 - time (sec): 1.90 - samples/sec: 9115.42 - lr: 0.000003 - momentum: 0.000000
|
212 |
+
2023-10-18 18:33:16,601 epoch 10 - iter 132/447 - loss 0.30207633 - time (sec): 2.88 - samples/sec: 8598.52 - lr: 0.000002 - momentum: 0.000000
|
213 |
+
2023-10-18 18:33:17,584 epoch 10 - iter 176/447 - loss 0.31228723 - time (sec): 3.86 - samples/sec: 8535.06 - lr: 0.000002 - momentum: 0.000000
|
214 |
+
2023-10-18 18:33:18,593 epoch 10 - iter 220/447 - loss 0.32028080 - time (sec): 4.87 - samples/sec: 8454.58 - lr: 0.000002 - momentum: 0.000000
|
215 |
+
2023-10-18 18:33:19,587 epoch 10 - iter 264/447 - loss 0.32693675 - time (sec): 5.86 - samples/sec: 8394.92 - lr: 0.000001 - momentum: 0.000000
|
216 |
+
2023-10-18 18:33:20,570 epoch 10 - iter 308/447 - loss 0.33101357 - time (sec): 6.85 - samples/sec: 8374.72 - lr: 0.000001 - momentum: 0.000000
|
217 |
+
2023-10-18 18:33:21,645 epoch 10 - iter 352/447 - loss 0.33462225 - time (sec): 7.92 - samples/sec: 8439.68 - lr: 0.000001 - momentum: 0.000000
|
218 |
+
2023-10-18 18:33:22,709 epoch 10 - iter 396/447 - loss 0.32984143 - time (sec): 8.99 - samples/sec: 8551.25 - lr: 0.000000 - momentum: 0.000000
|
219 |
+
2023-10-18 18:33:23,715 epoch 10 - iter 440/447 - loss 0.33800628 - time (sec): 9.99 - samples/sec: 8523.11 - lr: 0.000000 - momentum: 0.000000
|
220 |
+
2023-10-18 18:33:23,878 ----------------------------------------------------------------------------------------------------
|
221 |
+
2023-10-18 18:33:23,878 EPOCH 10 done: loss 0.3392 - lr: 0.000000
|
222 |
+
2023-10-18 18:33:29,185 DEV : loss 0.3085727393627167 - f1-score (micro avg) 0.3422
|
223 |
+
2023-10-18 18:33:29,211 saving best model
|
224 |
+
2023-10-18 18:33:29,272 ----------------------------------------------------------------------------------------------------
|
225 |
+
2023-10-18 18:33:29,273 Loading model from best epoch ...
|
226 |
+
2023-10-18 18:33:29,350 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time
|
227 |
+
2023-10-18 18:33:31,641
|
228 |
+
Results:
|
229 |
+
- F-score (micro) 0.3266
|
230 |
+
- F-score (macro) 0.1301
|
231 |
+
- Accuracy 0.2057
|
232 |
+
|
233 |
+
By class:
|
234 |
+
precision recall f1-score support
|
235 |
+
|
236 |
+
loc 0.4855 0.4765 0.4809 596
|
237 |
+
pers 0.1746 0.1652 0.1698 333
|
238 |
+
org 0.0000 0.0000 0.0000 132
|
239 |
+
prod 0.0000 0.0000 0.0000 66
|
240 |
+
time 0.0000 0.0000 0.0000 49
|
241 |
+
|
242 |
+
micro avg 0.3767 0.2883 0.3266 1176
|
243 |
+
macro avg 0.1320 0.1283 0.1301 1176
|
244 |
+
weighted avg 0.2955 0.2883 0.2918 1176
|
245 |
+
|
246 |
+
2023-10-18 18:33:31,641 ----------------------------------------------------------------------------------------------------
|