|
2023-10-23 20:42:58,126 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,127 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 20:42:58,127 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 20:42:58,128 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 Train: 3575 sentences |
|
2023-10-23 20:42:58,128 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 20:42:58,128 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 Training Params: |
|
2023-10-23 20:42:58,128 - learning_rate: "3e-05" |
|
2023-10-23 20:42:58,128 - mini_batch_size: "4" |
|
2023-10-23 20:42:58,128 - max_epochs: "10" |
|
2023-10-23 20:42:58,128 - shuffle: "True" |
|
2023-10-23 20:42:58,128 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 Plugins: |
|
2023-10-23 20:42:58,128 - TensorboardLogger |
|
2023-10-23 20:42:58,128 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 20:42:58,128 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 20:42:58,128 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 20:42:58,128 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,128 Computation: |
|
2023-10-23 20:42:58,129 - compute on device: cuda:0 |
|
2023-10-23 20:42:58,129 - embedding storage: none |
|
2023-10-23 20:42:58,129 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,129 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-2" |
|
2023-10-23 20:42:58,129 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,129 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:42:58,129 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 20:43:03,640 epoch 1 - iter 89/894 - loss 2.48080723 - time (sec): 5.51 - samples/sec: 1445.17 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:43:09,345 epoch 1 - iter 178/894 - loss 1.45885269 - time (sec): 11.22 - samples/sec: 1485.05 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:43:15,029 epoch 1 - iter 267/894 - loss 1.08472181 - time (sec): 16.90 - samples/sec: 1488.89 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:43:20,705 epoch 1 - iter 356/894 - loss 0.90566478 - time (sec): 22.58 - samples/sec: 1493.43 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:43:26,321 epoch 1 - iter 445/894 - loss 0.78795357 - time (sec): 28.19 - samples/sec: 1501.14 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:43:31,849 epoch 1 - iter 534/894 - loss 0.70900774 - time (sec): 33.72 - samples/sec: 1494.09 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:43:37,466 epoch 1 - iter 623/894 - loss 0.63979138 - time (sec): 39.34 - samples/sec: 1498.39 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:43:43,114 epoch 1 - iter 712/894 - loss 0.58612567 - time (sec): 44.98 - samples/sec: 1502.92 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:43:49,081 epoch 1 - iter 801/894 - loss 0.54155740 - time (sec): 50.95 - samples/sec: 1515.59 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:43:54,698 epoch 1 - iter 890/894 - loss 0.50622355 - time (sec): 56.57 - samples/sec: 1524.76 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:43:54,933 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:43:54,933 EPOCH 1 done: loss 0.5053 - lr: 0.000030 |
|
2023-10-23 20:43:59,782 DEV : loss 0.1566276252269745 - f1-score (micro avg) 0.6176 |
|
2023-10-23 20:43:59,803 saving best model |
|
2023-10-23 20:44:00,374 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:44:05,850 epoch 2 - iter 89/894 - loss 0.16450987 - time (sec): 5.47 - samples/sec: 1511.62 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:44:11,563 epoch 2 - iter 178/894 - loss 0.15748841 - time (sec): 11.19 - samples/sec: 1519.78 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:44:17,274 epoch 2 - iter 267/894 - loss 0.15283243 - time (sec): 16.90 - samples/sec: 1533.58 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:44:23,049 epoch 2 - iter 356/894 - loss 0.15237744 - time (sec): 22.67 - samples/sec: 1528.63 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:44:28,594 epoch 2 - iter 445/894 - loss 0.14585054 - time (sec): 28.22 - samples/sec: 1505.44 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:44:34,379 epoch 2 - iter 534/894 - loss 0.15087654 - time (sec): 34.00 - samples/sec: 1513.75 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:44:40,096 epoch 2 - iter 623/894 - loss 0.14971394 - time (sec): 39.72 - samples/sec: 1522.56 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:44:45,566 epoch 2 - iter 712/894 - loss 0.15039161 - time (sec): 45.19 - samples/sec: 1511.99 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:44:51,516 epoch 2 - iter 801/894 - loss 0.14951375 - time (sec): 51.14 - samples/sec: 1523.17 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:44:57,059 epoch 2 - iter 890/894 - loss 0.14581873 - time (sec): 56.68 - samples/sec: 1518.95 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:44:57,322 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:44:57,322 EPOCH 2 done: loss 0.1455 - lr: 0.000027 |
|
2023-10-23 20:45:03,843 DEV : loss 0.16595827043056488 - f1-score (micro avg) 0.717 |
|
2023-10-23 20:45:03,863 saving best model |
|
2023-10-23 20:45:04,578 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:45:10,343 epoch 3 - iter 89/894 - loss 0.09957272 - time (sec): 5.76 - samples/sec: 1554.16 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:45:16,075 epoch 3 - iter 178/894 - loss 0.09503941 - time (sec): 11.50 - samples/sec: 1536.73 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:45:21,827 epoch 3 - iter 267/894 - loss 0.08946847 - time (sec): 17.25 - samples/sec: 1556.44 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:45:27,441 epoch 3 - iter 356/894 - loss 0.08424858 - time (sec): 22.86 - samples/sec: 1525.11 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:45:33,050 epoch 3 - iter 445/894 - loss 0.08506845 - time (sec): 28.47 - samples/sec: 1528.17 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:45:38,640 epoch 3 - iter 534/894 - loss 0.08475705 - time (sec): 34.06 - samples/sec: 1522.51 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:45:44,182 epoch 3 - iter 623/894 - loss 0.08369846 - time (sec): 39.60 - samples/sec: 1513.84 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:45:50,076 epoch 3 - iter 712/894 - loss 0.08163382 - time (sec): 45.50 - samples/sec: 1521.35 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:45:55,687 epoch 3 - iter 801/894 - loss 0.08201642 - time (sec): 51.11 - samples/sec: 1515.67 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:46:01,448 epoch 3 - iter 890/894 - loss 0.08247904 - time (sec): 56.87 - samples/sec: 1516.36 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:46:01,681 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:46:01,681 EPOCH 3 done: loss 0.0827 - lr: 0.000023 |
|
2023-10-23 20:46:08,204 DEV : loss 0.17428651452064514 - f1-score (micro avg) 0.7264 |
|
2023-10-23 20:46:08,224 saving best model |
|
2023-10-23 20:46:08,976 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:46:14,510 epoch 4 - iter 89/894 - loss 0.05262825 - time (sec): 5.53 - samples/sec: 1466.68 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:46:20,231 epoch 4 - iter 178/894 - loss 0.04468521 - time (sec): 11.25 - samples/sec: 1489.29 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:46:25,885 epoch 4 - iter 267/894 - loss 0.04312714 - time (sec): 16.91 - samples/sec: 1506.15 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:46:31,747 epoch 4 - iter 356/894 - loss 0.04469493 - time (sec): 22.77 - samples/sec: 1515.98 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:46:37,451 epoch 4 - iter 445/894 - loss 0.04758485 - time (sec): 28.47 - samples/sec: 1510.93 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:46:43,175 epoch 4 - iter 534/894 - loss 0.04927085 - time (sec): 34.20 - samples/sec: 1513.77 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:46:48,977 epoch 4 - iter 623/894 - loss 0.05000260 - time (sec): 40.00 - samples/sec: 1524.08 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:46:54,699 epoch 4 - iter 712/894 - loss 0.05005085 - time (sec): 45.72 - samples/sec: 1525.25 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:47:00,250 epoch 4 - iter 801/894 - loss 0.04974125 - time (sec): 51.27 - samples/sec: 1517.36 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:47:05,779 epoch 4 - iter 890/894 - loss 0.05150310 - time (sec): 56.80 - samples/sec: 1517.54 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:47:06,031 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:47:06,032 EPOCH 4 done: loss 0.0525 - lr: 0.000020 |
|
2023-10-23 20:47:12,556 DEV : loss 0.20298461616039276 - f1-score (micro avg) 0.7392 |
|
2023-10-23 20:47:12,576 saving best model |
|
2023-10-23 20:47:13,313 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:47:18,999 epoch 5 - iter 89/894 - loss 0.03232253 - time (sec): 5.68 - samples/sec: 1546.64 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:47:24,694 epoch 5 - iter 178/894 - loss 0.03349854 - time (sec): 11.38 - samples/sec: 1498.69 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:47:30,205 epoch 5 - iter 267/894 - loss 0.03340786 - time (sec): 16.89 - samples/sec: 1484.26 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:47:36,073 epoch 5 - iter 356/894 - loss 0.03460467 - time (sec): 22.76 - samples/sec: 1518.03 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:47:41,657 epoch 5 - iter 445/894 - loss 0.03216847 - time (sec): 28.34 - samples/sec: 1504.36 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:47:47,217 epoch 5 - iter 534/894 - loss 0.03332744 - time (sec): 33.90 - samples/sec: 1500.36 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:47:53,166 epoch 5 - iter 623/894 - loss 0.03407386 - time (sec): 39.85 - samples/sec: 1514.43 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:47:58,849 epoch 5 - iter 712/894 - loss 0.03376706 - time (sec): 45.53 - samples/sec: 1516.40 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:48:04,460 epoch 5 - iter 801/894 - loss 0.03450533 - time (sec): 51.15 - samples/sec: 1523.39 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:48:10,007 epoch 5 - iter 890/894 - loss 0.03436077 - time (sec): 56.69 - samples/sec: 1519.59 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:48:10,262 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:48:10,262 EPOCH 5 done: loss 0.0344 - lr: 0.000017 |
|
2023-10-23 20:48:16,766 DEV : loss 0.24649250507354736 - f1-score (micro avg) 0.7686 |
|
2023-10-23 20:48:16,786 saving best model |
|
2023-10-23 20:48:17,554 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:48:23,117 epoch 6 - iter 89/894 - loss 0.02315835 - time (sec): 5.56 - samples/sec: 1440.11 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:48:28,761 epoch 6 - iter 178/894 - loss 0.01840372 - time (sec): 11.21 - samples/sec: 1438.66 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:48:34,527 epoch 6 - iter 267/894 - loss 0.01933114 - time (sec): 16.97 - samples/sec: 1477.92 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:48:40,231 epoch 6 - iter 356/894 - loss 0.01924240 - time (sec): 22.68 - samples/sec: 1518.61 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:48:45,870 epoch 6 - iter 445/894 - loss 0.01840968 - time (sec): 28.31 - samples/sec: 1521.80 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:48:51,569 epoch 6 - iter 534/894 - loss 0.01732048 - time (sec): 34.01 - samples/sec: 1511.68 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:48:57,092 epoch 6 - iter 623/894 - loss 0.01729568 - time (sec): 39.54 - samples/sec: 1511.72 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:49:02,783 epoch 6 - iter 712/894 - loss 0.02022187 - time (sec): 45.23 - samples/sec: 1519.40 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:49:08,622 epoch 6 - iter 801/894 - loss 0.02054773 - time (sec): 51.07 - samples/sec: 1513.92 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:49:14,229 epoch 6 - iter 890/894 - loss 0.02089905 - time (sec): 56.67 - samples/sec: 1521.70 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:49:14,474 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:49:14,474 EPOCH 6 done: loss 0.0210 - lr: 0.000013 |
|
2023-10-23 20:49:21,009 DEV : loss 0.22474254667758942 - f1-score (micro avg) 0.7671 |
|
2023-10-23 20:49:21,030 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:49:26,585 epoch 7 - iter 89/894 - loss 0.00932732 - time (sec): 5.55 - samples/sec: 1525.93 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:49:32,387 epoch 7 - iter 178/894 - loss 0.01432918 - time (sec): 11.36 - samples/sec: 1517.03 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:49:38,406 epoch 7 - iter 267/894 - loss 0.01700073 - time (sec): 17.38 - samples/sec: 1533.79 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:49:44,028 epoch 7 - iter 356/894 - loss 0.01537951 - time (sec): 23.00 - samples/sec: 1525.27 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:49:49,679 epoch 7 - iter 445/894 - loss 0.01506681 - time (sec): 28.65 - samples/sec: 1519.99 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:49:55,356 epoch 7 - iter 534/894 - loss 0.01557746 - time (sec): 34.33 - samples/sec: 1523.54 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:50:01,041 epoch 7 - iter 623/894 - loss 0.01611775 - time (sec): 40.01 - samples/sec: 1520.46 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:50:06,612 epoch 7 - iter 712/894 - loss 0.01564183 - time (sec): 45.58 - samples/sec: 1517.64 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:50:12,188 epoch 7 - iter 801/894 - loss 0.01618850 - time (sec): 51.16 - samples/sec: 1520.34 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:50:17,797 epoch 7 - iter 890/894 - loss 0.01525803 - time (sec): 56.77 - samples/sec: 1519.19 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:50:18,038 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:50:18,038 EPOCH 7 done: loss 0.0152 - lr: 0.000010 |
|
2023-10-23 20:50:24,588 DEV : loss 0.25017356872558594 - f1-score (micro avg) 0.7642 |
|
2023-10-23 20:50:24,609 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:50:30,199 epoch 8 - iter 89/894 - loss 0.01612093 - time (sec): 5.59 - samples/sec: 1518.14 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:50:35,787 epoch 8 - iter 178/894 - loss 0.01639135 - time (sec): 11.18 - samples/sec: 1523.56 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:50:41,367 epoch 8 - iter 267/894 - loss 0.01252323 - time (sec): 16.76 - samples/sec: 1488.68 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:50:47,473 epoch 8 - iter 356/894 - loss 0.01262541 - time (sec): 22.86 - samples/sec: 1534.17 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:50:53,120 epoch 8 - iter 445/894 - loss 0.01149455 - time (sec): 28.51 - samples/sec: 1537.53 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:50:58,782 epoch 8 - iter 534/894 - loss 0.01152579 - time (sec): 34.17 - samples/sec: 1517.00 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:51:04,387 epoch 8 - iter 623/894 - loss 0.01078718 - time (sec): 39.78 - samples/sec: 1515.93 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:51:10,043 epoch 8 - iter 712/894 - loss 0.01121208 - time (sec): 45.43 - samples/sec: 1514.39 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:51:15,934 epoch 8 - iter 801/894 - loss 0.01081237 - time (sec): 51.32 - samples/sec: 1519.75 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:51:21,479 epoch 8 - iter 890/894 - loss 0.01053265 - time (sec): 56.87 - samples/sec: 1516.06 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:51:21,720 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:51:21,720 EPOCH 8 done: loss 0.0106 - lr: 0.000007 |
|
2023-10-23 20:51:27,969 DEV : loss 0.29330340027809143 - f1-score (micro avg) 0.7596 |
|
2023-10-23 20:51:27,990 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:51:33,729 epoch 9 - iter 89/894 - loss 0.00425727 - time (sec): 5.74 - samples/sec: 1410.23 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:51:39,392 epoch 9 - iter 178/894 - loss 0.00701630 - time (sec): 11.40 - samples/sec: 1444.35 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:51:45,246 epoch 9 - iter 267/894 - loss 0.00717471 - time (sec): 17.26 - samples/sec: 1473.49 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:51:50,879 epoch 9 - iter 356/894 - loss 0.00646948 - time (sec): 22.89 - samples/sec: 1489.65 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:51:56,536 epoch 9 - iter 445/894 - loss 0.00613541 - time (sec): 28.54 - samples/sec: 1503.56 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:52:02,312 epoch 9 - iter 534/894 - loss 0.00574876 - time (sec): 34.32 - samples/sec: 1509.25 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:52:08,201 epoch 9 - iter 623/894 - loss 0.00558351 - time (sec): 40.21 - samples/sec: 1520.38 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:52:13,789 epoch 9 - iter 712/894 - loss 0.00596474 - time (sec): 45.80 - samples/sec: 1514.27 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:52:19,295 epoch 9 - iter 801/894 - loss 0.00595036 - time (sec): 51.30 - samples/sec: 1512.79 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:52:25,015 epoch 9 - iter 890/894 - loss 0.00559881 - time (sec): 57.02 - samples/sec: 1513.03 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:52:25,245 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:52:25,245 EPOCH 9 done: loss 0.0056 - lr: 0.000003 |
|
2023-10-23 20:52:31,486 DEV : loss 0.27675920724868774 - f1-score (micro avg) 0.7733 |
|
2023-10-23 20:52:31,507 saving best model |
|
2023-10-23 20:52:32,180 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:52:37,758 epoch 10 - iter 89/894 - loss 0.00098253 - time (sec): 5.58 - samples/sec: 1545.49 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:52:43,517 epoch 10 - iter 178/894 - loss 0.00095969 - time (sec): 11.34 - samples/sec: 1560.78 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:52:49,428 epoch 10 - iter 267/894 - loss 0.00128812 - time (sec): 17.25 - samples/sec: 1508.63 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:52:54,932 epoch 10 - iter 356/894 - loss 0.00278304 - time (sec): 22.75 - samples/sec: 1509.09 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:53:00,826 epoch 10 - iter 445/894 - loss 0.00345471 - time (sec): 28.64 - samples/sec: 1527.64 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:53:06,392 epoch 10 - iter 534/894 - loss 0.00389630 - time (sec): 34.21 - samples/sec: 1511.09 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:53:11,957 epoch 10 - iter 623/894 - loss 0.00368630 - time (sec): 39.78 - samples/sec: 1515.44 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:53:17,683 epoch 10 - iter 712/894 - loss 0.00344338 - time (sec): 45.50 - samples/sec: 1513.16 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:53:23,567 epoch 10 - iter 801/894 - loss 0.00365553 - time (sec): 51.39 - samples/sec: 1510.67 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 20:53:29,280 epoch 10 - iter 890/894 - loss 0.00368854 - time (sec): 57.10 - samples/sec: 1509.37 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 20:53:29,517 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:53:29,517 EPOCH 10 done: loss 0.0039 - lr: 0.000000 |
|
2023-10-23 20:53:35,745 DEV : loss 0.27931466698646545 - f1-score (micro avg) 0.7767 |
|
2023-10-23 20:53:35,765 saving best model |
|
2023-10-23 20:53:37,037 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:53:37,038 Loading model from best epoch ... |
|
2023-10-23 20:53:38,841 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 20:53:43,691 |
|
Results: |
|
- F-score (micro) 0.7553 |
|
- F-score (macro) 0.6752 |
|
- Accuracy 0.6225 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8232 0.8674 0.8448 596 |
|
pers 0.6800 0.7658 0.7203 333 |
|
org 0.5379 0.5379 0.5379 132 |
|
prod 0.6316 0.5455 0.5854 66 |
|
time 0.7021 0.6735 0.6875 49 |
|
|
|
micro avg 0.7361 0.7755 0.7553 1176 |
|
macro avg 0.6750 0.6780 0.6752 1176 |
|
weighted avg 0.7349 0.7755 0.7540 1176 |
|
|
|
2023-10-23 20:53:43,691 ---------------------------------------------------------------------------------------------------- |
|
|