|
2023-10-23 20:15:29,815 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Train: 3575 sentences |
|
2023-10-23 20:15:29,816 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Training Params: |
|
2023-10-23 20:15:29,816 - learning_rate: "5e-05" |
|
2023-10-23 20:15:29,816 - mini_batch_size: "4" |
|
2023-10-23 20:15:29,816 - max_epochs: "10" |
|
2023-10-23 20:15:29,816 - shuffle: "True" |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Plugins: |
|
2023-10-23 20:15:29,816 - TensorboardLogger |
|
2023-10-23 20:15:29,816 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 20:15:29,816 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Computation: |
|
2023-10-23 20:15:29,816 - compute on device: cuda:0 |
|
2023-10-23 20:15:29,816 - embedding storage: none |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,816 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-1" |
|
2023-10-23 20:15:29,816 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,817 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:29,817 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 20:15:35,386 epoch 1 - iter 89/894 - loss 2.59714919 - time (sec): 5.57 - samples/sec: 1547.95 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:15:40,852 epoch 1 - iter 178/894 - loss 1.60634117 - time (sec): 11.03 - samples/sec: 1490.99 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:15:46,460 epoch 1 - iter 267/894 - loss 1.17695277 - time (sec): 16.64 - samples/sec: 1520.00 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:15:52,105 epoch 1 - iter 356/894 - loss 0.96077996 - time (sec): 22.29 - samples/sec: 1501.10 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:15:57,665 epoch 1 - iter 445/894 - loss 0.81879608 - time (sec): 27.85 - samples/sec: 1518.05 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:16:03,183 epoch 1 - iter 534/894 - loss 0.71961776 - time (sec): 33.37 - samples/sec: 1507.97 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:16:08,866 epoch 1 - iter 623/894 - loss 0.64745412 - time (sec): 39.05 - samples/sec: 1511.76 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-23 20:16:14,814 epoch 1 - iter 712/894 - loss 0.58589812 - time (sec): 45.00 - samples/sec: 1526.68 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-23 20:16:20,477 epoch 1 - iter 801/894 - loss 0.54172043 - time (sec): 50.66 - samples/sec: 1532.32 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-23 20:16:26,150 epoch 1 - iter 890/894 - loss 0.51165477 - time (sec): 56.33 - samples/sec: 1530.32 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-23 20:16:26,393 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:16:26,393 EPOCH 1 done: loss 0.5100 - lr: 0.000050 |
|
2023-10-23 20:16:31,218 DEV : loss 0.20967631042003632 - f1-score (micro avg) 0.6202 |
|
2023-10-23 20:16:31,237 saving best model |
|
2023-10-23 20:16:31,793 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:16:37,755 epoch 2 - iter 89/894 - loss 0.18366032 - time (sec): 5.96 - samples/sec: 1615.72 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-23 20:16:43,374 epoch 2 - iter 178/894 - loss 0.16667220 - time (sec): 11.58 - samples/sec: 1564.41 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-23 20:16:49,071 epoch 2 - iter 267/894 - loss 0.15915199 - time (sec): 17.28 - samples/sec: 1526.16 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-23 20:16:54,613 epoch 2 - iter 356/894 - loss 0.15848074 - time (sec): 22.82 - samples/sec: 1523.92 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-23 20:17:00,211 epoch 2 - iter 445/894 - loss 0.15309957 - time (sec): 28.42 - samples/sec: 1513.47 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-23 20:17:05,880 epoch 2 - iter 534/894 - loss 0.15013878 - time (sec): 34.09 - samples/sec: 1522.31 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-23 20:17:11,569 epoch 2 - iter 623/894 - loss 0.14743636 - time (sec): 39.77 - samples/sec: 1525.40 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-23 20:17:17,366 epoch 2 - iter 712/894 - loss 0.14852606 - time (sec): 45.57 - samples/sec: 1527.38 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-23 20:17:22,933 epoch 2 - iter 801/894 - loss 0.14586868 - time (sec): 51.14 - samples/sec: 1518.64 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-23 20:17:28,523 epoch 2 - iter 890/894 - loss 0.14382193 - time (sec): 56.73 - samples/sec: 1521.15 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-23 20:17:28,753 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:17:28,753 EPOCH 2 done: loss 0.1435 - lr: 0.000044 |
|
2023-10-23 20:17:35,228 DEV : loss 0.1754632592201233 - f1-score (micro avg) 0.6982 |
|
2023-10-23 20:17:35,247 saving best model |
|
2023-10-23 20:17:35,954 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:17:41,518 epoch 3 - iter 89/894 - loss 0.08477308 - time (sec): 5.56 - samples/sec: 1454.04 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-23 20:17:47,277 epoch 3 - iter 178/894 - loss 0.08190529 - time (sec): 11.32 - samples/sec: 1524.85 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-23 20:17:52,956 epoch 3 - iter 267/894 - loss 0.09010457 - time (sec): 17.00 - samples/sec: 1507.35 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-23 20:17:58,504 epoch 3 - iter 356/894 - loss 0.09565392 - time (sec): 22.55 - samples/sec: 1503.70 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-23 20:18:04,015 epoch 3 - iter 445/894 - loss 0.09548354 - time (sec): 28.06 - samples/sec: 1489.93 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-23 20:18:09,634 epoch 3 - iter 534/894 - loss 0.09453357 - time (sec): 33.68 - samples/sec: 1498.65 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-23 20:18:15,326 epoch 3 - iter 623/894 - loss 0.09389976 - time (sec): 39.37 - samples/sec: 1506.16 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-23 20:18:20,834 epoch 3 - iter 712/894 - loss 0.09274930 - time (sec): 44.88 - samples/sec: 1500.50 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-23 20:18:26,727 epoch 3 - iter 801/894 - loss 0.09367745 - time (sec): 50.77 - samples/sec: 1500.66 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-23 20:18:32,324 epoch 3 - iter 890/894 - loss 0.09229067 - time (sec): 56.37 - samples/sec: 1511.20 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-23 20:18:32,860 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:18:32,860 EPOCH 3 done: loss 0.0928 - lr: 0.000039 |
|
2023-10-23 20:18:39,341 DEV : loss 0.22932375967502594 - f1-score (micro avg) 0.7193 |
|
2023-10-23 20:18:39,360 saving best model |
|
2023-10-23 20:18:40,075 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:18:45,881 epoch 4 - iter 89/894 - loss 0.08520341 - time (sec): 5.80 - samples/sec: 1562.49 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-23 20:18:51,471 epoch 4 - iter 178/894 - loss 0.06747261 - time (sec): 11.39 - samples/sec: 1540.12 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-23 20:18:57,021 epoch 4 - iter 267/894 - loss 0.06484200 - time (sec): 16.94 - samples/sec: 1520.82 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-23 20:19:02,538 epoch 4 - iter 356/894 - loss 0.06396801 - time (sec): 22.46 - samples/sec: 1512.00 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-23 20:19:08,385 epoch 4 - iter 445/894 - loss 0.06386746 - time (sec): 28.31 - samples/sec: 1519.40 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-23 20:19:13,919 epoch 4 - iter 534/894 - loss 0.06819252 - time (sec): 33.84 - samples/sec: 1502.97 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-23 20:19:19,502 epoch 4 - iter 623/894 - loss 0.06552692 - time (sec): 39.43 - samples/sec: 1499.90 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-23 20:19:25,090 epoch 4 - iter 712/894 - loss 0.06382362 - time (sec): 45.01 - samples/sec: 1502.62 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-23 20:19:31,108 epoch 4 - iter 801/894 - loss 0.06400891 - time (sec): 51.03 - samples/sec: 1514.84 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-23 20:19:36,794 epoch 4 - iter 890/894 - loss 0.06319753 - time (sec): 56.72 - samples/sec: 1520.07 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-23 20:19:37,032 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:19:37,032 EPOCH 4 done: loss 0.0633 - lr: 0.000033 |
|
2023-10-23 20:19:43,543 DEV : loss 0.2182805985212326 - f1-score (micro avg) 0.7228 |
|
2023-10-23 20:19:43,563 saving best model |
|
2023-10-23 20:19:44,283 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:19:49,735 epoch 5 - iter 89/894 - loss 0.05109681 - time (sec): 5.45 - samples/sec: 1414.44 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-23 20:19:55,308 epoch 5 - iter 178/894 - loss 0.04970923 - time (sec): 11.02 - samples/sec: 1448.83 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-23 20:20:01,154 epoch 5 - iter 267/894 - loss 0.04812104 - time (sec): 16.87 - samples/sec: 1485.74 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-23 20:20:06,745 epoch 5 - iter 356/894 - loss 0.04536285 - time (sec): 22.46 - samples/sec: 1489.13 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-23 20:20:12,265 epoch 5 - iter 445/894 - loss 0.04362274 - time (sec): 27.98 - samples/sec: 1492.64 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-23 20:20:18,202 epoch 5 - iter 534/894 - loss 0.04019401 - time (sec): 33.92 - samples/sec: 1516.77 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:20:23,667 epoch 5 - iter 623/894 - loss 0.04294962 - time (sec): 39.38 - samples/sec: 1511.11 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:20:29,448 epoch 5 - iter 712/894 - loss 0.04335889 - time (sec): 45.16 - samples/sec: 1521.84 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:20:34,931 epoch 5 - iter 801/894 - loss 0.04222422 - time (sec): 50.65 - samples/sec: 1512.92 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:20:40,953 epoch 5 - iter 890/894 - loss 0.04335523 - time (sec): 56.67 - samples/sec: 1520.97 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:20:41,195 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:20:41,195 EPOCH 5 done: loss 0.0434 - lr: 0.000028 |
|
2023-10-23 20:20:47,680 DEV : loss 0.24800466001033783 - f1-score (micro avg) 0.7504 |
|
2023-10-23 20:20:47,700 saving best model |
|
2023-10-23 20:20:48,415 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:20:53,961 epoch 6 - iter 89/894 - loss 0.04036583 - time (sec): 5.54 - samples/sec: 1519.94 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:20:59,480 epoch 6 - iter 178/894 - loss 0.04066711 - time (sec): 11.06 - samples/sec: 1518.47 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:21:05,525 epoch 6 - iter 267/894 - loss 0.03625227 - time (sec): 17.11 - samples/sec: 1560.56 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:21:11,360 epoch 6 - iter 356/894 - loss 0.03589946 - time (sec): 22.94 - samples/sec: 1556.70 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:21:17,117 epoch 6 - iter 445/894 - loss 0.03248591 - time (sec): 28.70 - samples/sec: 1553.29 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:21:22,850 epoch 6 - iter 534/894 - loss 0.03006008 - time (sec): 34.43 - samples/sec: 1547.46 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:21:28,268 epoch 6 - iter 623/894 - loss 0.03099498 - time (sec): 39.85 - samples/sec: 1522.81 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:21:33,853 epoch 6 - iter 712/894 - loss 0.03031837 - time (sec): 45.44 - samples/sec: 1525.00 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:21:39,378 epoch 6 - iter 801/894 - loss 0.02979243 - time (sec): 50.96 - samples/sec: 1521.35 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:21:45,014 epoch 6 - iter 890/894 - loss 0.02953982 - time (sec): 56.60 - samples/sec: 1525.06 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:21:45,249 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:21:45,249 EPOCH 6 done: loss 0.0295 - lr: 0.000022 |
|
2023-10-23 20:21:51,707 DEV : loss 0.25876209139823914 - f1-score (micro avg) 0.7494 |
|
2023-10-23 20:21:51,727 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:21:57,599 epoch 7 - iter 89/894 - loss 0.02261720 - time (sec): 5.87 - samples/sec: 1617.41 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:22:03,344 epoch 7 - iter 178/894 - loss 0.01960647 - time (sec): 11.62 - samples/sec: 1553.38 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:22:09,118 epoch 7 - iter 267/894 - loss 0.01834675 - time (sec): 17.39 - samples/sec: 1571.69 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:22:14,677 epoch 7 - iter 356/894 - loss 0.01830131 - time (sec): 22.95 - samples/sec: 1546.10 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:22:20,171 epoch 7 - iter 445/894 - loss 0.01780005 - time (sec): 28.44 - samples/sec: 1525.74 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:22:25,714 epoch 7 - iter 534/894 - loss 0.01660419 - time (sec): 33.99 - samples/sec: 1520.88 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:22:31,588 epoch 7 - iter 623/894 - loss 0.01763287 - time (sec): 39.86 - samples/sec: 1530.70 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:22:37,130 epoch 7 - iter 712/894 - loss 0.01728831 - time (sec): 45.40 - samples/sec: 1534.34 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:22:42,777 epoch 7 - iter 801/894 - loss 0.01731147 - time (sec): 51.05 - samples/sec: 1526.20 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:22:48,332 epoch 7 - iter 890/894 - loss 0.01662326 - time (sec): 56.60 - samples/sec: 1525.26 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:22:48,557 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:22:48,558 EPOCH 7 done: loss 0.0166 - lr: 0.000017 |
|
2023-10-23 20:22:55,037 DEV : loss 0.2759707570075989 - f1-score (micro avg) 0.7606 |
|
2023-10-23 20:22:55,057 saving best model |
|
2023-10-23 20:22:55,730 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:23:01,335 epoch 8 - iter 89/894 - loss 0.01201542 - time (sec): 5.60 - samples/sec: 1500.20 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:23:06,914 epoch 8 - iter 178/894 - loss 0.01428946 - time (sec): 11.18 - samples/sec: 1520.35 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:23:12,951 epoch 8 - iter 267/894 - loss 0.01387472 - time (sec): 17.22 - samples/sec: 1533.94 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:23:18,478 epoch 8 - iter 356/894 - loss 0.01525323 - time (sec): 22.75 - samples/sec: 1525.25 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:23:24,218 epoch 8 - iter 445/894 - loss 0.01448934 - time (sec): 28.49 - samples/sec: 1526.82 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:23:29,661 epoch 8 - iter 534/894 - loss 0.01370769 - time (sec): 33.93 - samples/sec: 1510.28 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:23:35,326 epoch 8 - iter 623/894 - loss 0.01268812 - time (sec): 39.60 - samples/sec: 1507.85 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:23:40,954 epoch 8 - iter 712/894 - loss 0.01219014 - time (sec): 45.22 - samples/sec: 1511.45 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:23:46,585 epoch 8 - iter 801/894 - loss 0.01209750 - time (sec): 50.85 - samples/sec: 1511.71 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:23:52,414 epoch 8 - iter 890/894 - loss 0.01204504 - time (sec): 56.68 - samples/sec: 1518.52 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:23:52,702 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:23:52,702 EPOCH 8 done: loss 0.0120 - lr: 0.000011 |
|
2023-10-23 20:23:59,204 DEV : loss 0.28997981548309326 - f1-score (micro avg) 0.7647 |
|
2023-10-23 20:23:59,224 saving best model |
|
2023-10-23 20:23:59,946 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:24:05,755 epoch 9 - iter 89/894 - loss 0.01213321 - time (sec): 5.81 - samples/sec: 1565.14 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:24:11,568 epoch 9 - iter 178/894 - loss 0.00698303 - time (sec): 11.62 - samples/sec: 1569.46 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:24:17,293 epoch 9 - iter 267/894 - loss 0.01040212 - time (sec): 17.35 - samples/sec: 1531.57 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:24:22,767 epoch 9 - iter 356/894 - loss 0.00823786 - time (sec): 22.82 - samples/sec: 1501.57 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:24:28,305 epoch 9 - iter 445/894 - loss 0.00862818 - time (sec): 28.36 - samples/sec: 1492.36 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:24:33,898 epoch 9 - iter 534/894 - loss 0.00924446 - time (sec): 33.95 - samples/sec: 1496.96 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:24:39,422 epoch 9 - iter 623/894 - loss 0.00818246 - time (sec): 39.47 - samples/sec: 1501.56 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:24:45,403 epoch 9 - iter 712/894 - loss 0.00806625 - time (sec): 45.46 - samples/sec: 1536.93 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:24:50,942 epoch 9 - iter 801/894 - loss 0.00827461 - time (sec): 51.00 - samples/sec: 1534.62 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:24:56,461 epoch 9 - iter 890/894 - loss 0.00783743 - time (sec): 56.51 - samples/sec: 1526.78 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:24:56,697 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:24:56,698 EPOCH 9 done: loss 0.0078 - lr: 0.000006 |
|
2023-10-23 20:25:03,190 DEV : loss 0.3125050365924835 - f1-score (micro avg) 0.7634 |
|
2023-10-23 20:25:03,210 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:25:08,957 epoch 10 - iter 89/894 - loss 0.00288764 - time (sec): 5.75 - samples/sec: 1526.40 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:25:14,516 epoch 10 - iter 178/894 - loss 0.00185958 - time (sec): 11.31 - samples/sec: 1529.00 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:25:20,062 epoch 10 - iter 267/894 - loss 0.00144000 - time (sec): 16.85 - samples/sec: 1507.69 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:25:25,639 epoch 10 - iter 356/894 - loss 0.00308797 - time (sec): 22.43 - samples/sec: 1498.26 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:25:31,188 epoch 10 - iter 445/894 - loss 0.00408854 - time (sec): 27.98 - samples/sec: 1494.66 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:25:36,826 epoch 10 - iter 534/894 - loss 0.00462897 - time (sec): 33.62 - samples/sec: 1495.18 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:25:42,386 epoch 10 - iter 623/894 - loss 0.00476347 - time (sec): 39.18 - samples/sec: 1490.78 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:25:47,899 epoch 10 - iter 712/894 - loss 0.00468077 - time (sec): 44.69 - samples/sec: 1492.54 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:25:54,051 epoch 10 - iter 801/894 - loss 0.00502664 - time (sec): 50.84 - samples/sec: 1522.93 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:25:59,655 epoch 10 - iter 890/894 - loss 0.00535314 - time (sec): 56.44 - samples/sec: 1517.84 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 20:26:00,072 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:26:00,072 EPOCH 10 done: loss 0.0053 - lr: 0.000000 |
|
2023-10-23 20:26:06,280 DEV : loss 0.28623971343040466 - f1-score (micro avg) 0.7714 |
|
2023-10-23 20:26:06,300 saving best model |
|
2023-10-23 20:26:07,617 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:26:07,618 Loading model from best epoch ... |
|
2023-10-23 20:26:09,700 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 20:26:14,246 |
|
Results: |
|
- F-score (micro) 0.7448 |
|
- F-score (macro) 0.6746 |
|
- Accuracy 0.6092 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.7994 0.8624 0.8297 596 |
|
pers 0.6845 0.7297 0.7064 333 |
|
org 0.5351 0.4621 0.4959 132 |
|
prod 0.6667 0.5152 0.5812 66 |
|
time 0.7451 0.7755 0.7600 49 |
|
|
|
micro avg 0.7331 0.7568 0.7448 1176 |
|
macro avg 0.6861 0.6690 0.6746 1176 |
|
weighted avg 0.7275 0.7568 0.7405 1176 |
|
|
|
2023-10-23 20:26:14,247 ---------------------------------------------------------------------------------------------------- |
|
|