|
2023-10-23 21:21:30,456 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,457 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 21:21:30,457 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,457 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 21:21:30,457 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,457 Train: 3575 sentences |
|
2023-10-23 21:21:30,457 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 21:21:30,457 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,457 Training Params: |
|
2023-10-23 21:21:30,457 - learning_rate: "3e-05" |
|
2023-10-23 21:21:30,457 - mini_batch_size: "4" |
|
2023-10-23 21:21:30,457 - max_epochs: "10" |
|
2023-10-23 21:21:30,457 - shuffle: "True" |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 Plugins: |
|
2023-10-23 21:21:30,458 - TensorboardLogger |
|
2023-10-23 21:21:30,458 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 21:21:30,458 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 Computation: |
|
2023-10-23 21:21:30,458 - compute on device: cuda:0 |
|
2023-10-23 21:21:30,458 - embedding storage: none |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-3" |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:21:30,458 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 21:21:35,957 epoch 1 - iter 89/894 - loss 2.21664487 - time (sec): 5.50 - samples/sec: 1523.65 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:21:41,733 epoch 1 - iter 178/894 - loss 1.33031967 - time (sec): 11.27 - samples/sec: 1548.64 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:21:47,322 epoch 1 - iter 267/894 - loss 1.02761727 - time (sec): 16.86 - samples/sec: 1558.12 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:21:52,888 epoch 1 - iter 356/894 - loss 0.85080042 - time (sec): 22.43 - samples/sec: 1560.61 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:21:58,685 epoch 1 - iter 445/894 - loss 0.74598055 - time (sec): 28.23 - samples/sec: 1565.20 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:22:04,296 epoch 1 - iter 534/894 - loss 0.67330125 - time (sec): 33.84 - samples/sec: 1557.07 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:22:09,966 epoch 1 - iter 623/894 - loss 0.61339427 - time (sec): 39.51 - samples/sec: 1548.21 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:22:15,476 epoch 1 - iter 712/894 - loss 0.56491664 - time (sec): 45.02 - samples/sec: 1547.50 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:22:21,077 epoch 1 - iter 801/894 - loss 0.52761211 - time (sec): 50.62 - samples/sec: 1541.15 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:22:26,797 epoch 1 - iter 890/894 - loss 0.49680906 - time (sec): 56.34 - samples/sec: 1528.78 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 21:22:27,043 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:22:27,043 EPOCH 1 done: loss 0.4947 - lr: 0.000030 |
|
2023-10-23 21:22:31,863 DEV : loss 0.1481543481349945 - f1-score (micro avg) 0.6598 |
|
2023-10-23 21:22:31,883 saving best model |
|
2023-10-23 21:22:32,350 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:22:38,121 epoch 2 - iter 89/894 - loss 0.17708347 - time (sec): 5.77 - samples/sec: 1644.56 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 21:22:43,647 epoch 2 - iter 178/894 - loss 0.17307608 - time (sec): 11.30 - samples/sec: 1556.96 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:22:49,452 epoch 2 - iter 267/894 - loss 0.15463895 - time (sec): 17.10 - samples/sec: 1572.23 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:22:55,050 epoch 2 - iter 356/894 - loss 0.14758882 - time (sec): 22.70 - samples/sec: 1557.85 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 21:23:00,620 epoch 2 - iter 445/894 - loss 0.14968602 - time (sec): 28.27 - samples/sec: 1554.22 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:23:06,260 epoch 2 - iter 534/894 - loss 0.14924781 - time (sec): 33.91 - samples/sec: 1543.18 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:23:11,750 epoch 2 - iter 623/894 - loss 0.14556656 - time (sec): 39.40 - samples/sec: 1540.00 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 21:23:17,236 epoch 2 - iter 712/894 - loss 0.14492055 - time (sec): 44.88 - samples/sec: 1531.25 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:23:23,139 epoch 2 - iter 801/894 - loss 0.14248077 - time (sec): 50.79 - samples/sec: 1540.25 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:23:28,702 epoch 2 - iter 890/894 - loss 0.14341120 - time (sec): 56.35 - samples/sec: 1528.57 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 21:23:28,955 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:23:28,955 EPOCH 2 done: loss 0.1432 - lr: 0.000027 |
|
2023-10-23 21:23:35,410 DEV : loss 0.16756634414196014 - f1-score (micro avg) 0.686 |
|
2023-10-23 21:23:35,430 saving best model |
|
2023-10-23 21:23:36,027 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:23:42,076 epoch 3 - iter 89/894 - loss 0.07202634 - time (sec): 6.05 - samples/sec: 1740.28 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:23:47,728 epoch 3 - iter 178/894 - loss 0.08210135 - time (sec): 11.70 - samples/sec: 1636.27 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:23:53,320 epoch 3 - iter 267/894 - loss 0.08942123 - time (sec): 17.29 - samples/sec: 1592.06 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 21:23:58,906 epoch 3 - iter 356/894 - loss 0.08248351 - time (sec): 22.88 - samples/sec: 1560.39 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:24:04,612 epoch 3 - iter 445/894 - loss 0.08206964 - time (sec): 28.58 - samples/sec: 1547.29 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:24:10,207 epoch 3 - iter 534/894 - loss 0.08479942 - time (sec): 34.18 - samples/sec: 1547.26 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 21:24:15,857 epoch 3 - iter 623/894 - loss 0.08185387 - time (sec): 39.83 - samples/sec: 1557.23 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:24:21,255 epoch 3 - iter 712/894 - loss 0.08050106 - time (sec): 45.23 - samples/sec: 1531.85 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:24:26,934 epoch 3 - iter 801/894 - loss 0.08377902 - time (sec): 50.91 - samples/sec: 1526.36 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 21:24:32,544 epoch 3 - iter 890/894 - loss 0.08294752 - time (sec): 56.52 - samples/sec: 1527.26 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:24:32,776 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:24:32,776 EPOCH 3 done: loss 0.0827 - lr: 0.000023 |
|
2023-10-23 21:24:39,253 DEV : loss 0.18027736246585846 - f1-score (micro avg) 0.7222 |
|
2023-10-23 21:24:39,273 saving best model |
|
2023-10-23 21:24:39,864 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:24:45,407 epoch 4 - iter 89/894 - loss 0.04271264 - time (sec): 5.54 - samples/sec: 1527.69 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:24:50,971 epoch 4 - iter 178/894 - loss 0.05003284 - time (sec): 11.11 - samples/sec: 1532.92 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 21:24:56,717 epoch 4 - iter 267/894 - loss 0.04645080 - time (sec): 16.85 - samples/sec: 1556.89 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:25:02,380 epoch 4 - iter 356/894 - loss 0.04895070 - time (sec): 22.52 - samples/sec: 1537.54 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:25:08,261 epoch 4 - iter 445/894 - loss 0.04925683 - time (sec): 28.40 - samples/sec: 1558.22 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 21:25:13,823 epoch 4 - iter 534/894 - loss 0.05485294 - time (sec): 33.96 - samples/sec: 1542.54 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:25:19,366 epoch 4 - iter 623/894 - loss 0.05445647 - time (sec): 39.50 - samples/sec: 1531.79 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:25:24,826 epoch 4 - iter 712/894 - loss 0.05558929 - time (sec): 44.96 - samples/sec: 1516.15 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 21:25:30,589 epoch 4 - iter 801/894 - loss 0.05569812 - time (sec): 50.72 - samples/sec: 1516.91 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:25:36,393 epoch 4 - iter 890/894 - loss 0.05395194 - time (sec): 56.53 - samples/sec: 1526.24 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:25:36,624 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:25:36,625 EPOCH 4 done: loss 0.0543 - lr: 0.000020 |
|
2023-10-23 21:25:43,133 DEV : loss 0.19269603490829468 - f1-score (micro avg) 0.7635 |
|
2023-10-23 21:25:43,153 saving best model |
|
2023-10-23 21:25:43,753 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:25:49,397 epoch 5 - iter 89/894 - loss 0.04613190 - time (sec): 5.64 - samples/sec: 1539.22 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 21:25:55,377 epoch 5 - iter 178/894 - loss 0.04206252 - time (sec): 11.62 - samples/sec: 1624.43 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:26:00,952 epoch 5 - iter 267/894 - loss 0.03516016 - time (sec): 17.20 - samples/sec: 1576.75 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:26:06,711 epoch 5 - iter 356/894 - loss 0.03286165 - time (sec): 22.96 - samples/sec: 1573.64 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 21:26:12,286 epoch 5 - iter 445/894 - loss 0.03370259 - time (sec): 28.53 - samples/sec: 1561.28 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:26:17,963 epoch 5 - iter 534/894 - loss 0.03464729 - time (sec): 34.21 - samples/sec: 1551.36 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:26:23,726 epoch 5 - iter 623/894 - loss 0.03428769 - time (sec): 39.97 - samples/sec: 1545.26 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 21:26:29,269 epoch 5 - iter 712/894 - loss 0.03634000 - time (sec): 45.52 - samples/sec: 1528.85 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:26:35,025 epoch 5 - iter 801/894 - loss 0.03738652 - time (sec): 51.27 - samples/sec: 1528.26 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:26:40,464 epoch 5 - iter 890/894 - loss 0.03658564 - time (sec): 56.71 - samples/sec: 1519.86 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 21:26:40,710 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:26:40,710 EPOCH 5 done: loss 0.0367 - lr: 0.000017 |
|
2023-10-23 21:26:47,225 DEV : loss 0.19243519008159637 - f1-score (micro avg) 0.7571 |
|
2023-10-23 21:26:47,245 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:26:53,171 epoch 6 - iter 89/894 - loss 0.01488108 - time (sec): 5.93 - samples/sec: 1589.90 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:26:58,559 epoch 6 - iter 178/894 - loss 0.02507305 - time (sec): 11.31 - samples/sec: 1492.30 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:27:04,219 epoch 6 - iter 267/894 - loss 0.02660050 - time (sec): 16.97 - samples/sec: 1514.85 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 21:27:10,264 epoch 6 - iter 356/894 - loss 0.02304923 - time (sec): 23.02 - samples/sec: 1526.25 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:27:15,928 epoch 6 - iter 445/894 - loss 0.02395097 - time (sec): 28.68 - samples/sec: 1518.59 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:27:21,500 epoch 6 - iter 534/894 - loss 0.02312750 - time (sec): 34.25 - samples/sec: 1514.25 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 21:27:26,995 epoch 6 - iter 623/894 - loss 0.02395946 - time (sec): 39.75 - samples/sec: 1504.16 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:27:32,503 epoch 6 - iter 712/894 - loss 0.02320612 - time (sec): 45.26 - samples/sec: 1509.94 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:27:38,101 epoch 6 - iter 801/894 - loss 0.02250887 - time (sec): 50.86 - samples/sec: 1520.80 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 21:27:43,863 epoch 6 - iter 890/894 - loss 0.02277949 - time (sec): 56.62 - samples/sec: 1522.19 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:27:44,111 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:27:44,111 EPOCH 6 done: loss 0.0227 - lr: 0.000013 |
|
2023-10-23 21:27:50,618 DEV : loss 0.2726885974407196 - f1-score (micro avg) 0.7458 |
|
2023-10-23 21:27:50,639 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:27:56,471 epoch 7 - iter 89/894 - loss 0.01202364 - time (sec): 5.83 - samples/sec: 1583.99 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:28:02,569 epoch 7 - iter 178/894 - loss 0.01669844 - time (sec): 11.93 - samples/sec: 1590.98 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 21:28:08,156 epoch 7 - iter 267/894 - loss 0.01475230 - time (sec): 17.52 - samples/sec: 1574.38 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:28:13,665 epoch 7 - iter 356/894 - loss 0.01339422 - time (sec): 23.02 - samples/sec: 1538.11 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:28:19,314 epoch 7 - iter 445/894 - loss 0.01478419 - time (sec): 28.67 - samples/sec: 1518.67 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 21:28:24,952 epoch 7 - iter 534/894 - loss 0.01540978 - time (sec): 34.31 - samples/sec: 1519.79 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:28:30,561 epoch 7 - iter 623/894 - loss 0.01527742 - time (sec): 39.92 - samples/sec: 1526.87 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:28:36,196 epoch 7 - iter 712/894 - loss 0.01519423 - time (sec): 45.56 - samples/sec: 1521.04 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 21:28:41,775 epoch 7 - iter 801/894 - loss 0.01588042 - time (sec): 51.14 - samples/sec: 1520.45 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:28:47,386 epoch 7 - iter 890/894 - loss 0.01506212 - time (sec): 56.75 - samples/sec: 1518.15 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:28:47,656 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:28:47,657 EPOCH 7 done: loss 0.0153 - lr: 0.000010 |
|
2023-10-23 21:28:54,151 DEV : loss 0.2593619227409363 - f1-score (micro avg) 0.7681 |
|
2023-10-23 21:28:54,171 saving best model |
|
2023-10-23 21:28:54,771 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:29:00,547 epoch 8 - iter 89/894 - loss 0.01205340 - time (sec): 5.77 - samples/sec: 1505.27 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 21:29:06,066 epoch 8 - iter 178/894 - loss 0.01236707 - time (sec): 11.29 - samples/sec: 1492.63 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:29:11,754 epoch 8 - iter 267/894 - loss 0.00992080 - time (sec): 16.98 - samples/sec: 1498.04 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:29:17,249 epoch 8 - iter 356/894 - loss 0.00995604 - time (sec): 22.48 - samples/sec: 1484.55 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 21:29:22,953 epoch 8 - iter 445/894 - loss 0.01027724 - time (sec): 28.18 - samples/sec: 1484.35 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:29:28,531 epoch 8 - iter 534/894 - loss 0.00935046 - time (sec): 33.76 - samples/sec: 1491.26 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:29:34,441 epoch 8 - iter 623/894 - loss 0.01009927 - time (sec): 39.67 - samples/sec: 1508.32 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 21:29:40,012 epoch 8 - iter 712/894 - loss 0.01012031 - time (sec): 45.24 - samples/sec: 1499.94 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:29:45,715 epoch 8 - iter 801/894 - loss 0.01002887 - time (sec): 50.94 - samples/sec: 1513.77 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:29:51,485 epoch 8 - iter 890/894 - loss 0.00972871 - time (sec): 56.71 - samples/sec: 1519.80 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 21:29:51,727 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:29:51,728 EPOCH 8 done: loss 0.0097 - lr: 0.000007 |
|
2023-10-23 21:29:58,241 DEV : loss 0.27069804072380066 - f1-score (micro avg) 0.7757 |
|
2023-10-23 21:29:58,261 saving best model |
|
2023-10-23 21:29:58,854 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:30:04,344 epoch 9 - iter 89/894 - loss 0.00394006 - time (sec): 5.49 - samples/sec: 1434.06 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:30:10,325 epoch 9 - iter 178/894 - loss 0.00797501 - time (sec): 11.47 - samples/sec: 1558.94 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:30:16,052 epoch 9 - iter 267/894 - loss 0.00619020 - time (sec): 17.20 - samples/sec: 1553.30 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 21:30:21,716 epoch 9 - iter 356/894 - loss 0.00643612 - time (sec): 22.86 - samples/sec: 1549.09 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:30:27,475 epoch 9 - iter 445/894 - loss 0.00778558 - time (sec): 28.62 - samples/sec: 1544.90 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:30:33,413 epoch 9 - iter 534/894 - loss 0.00814245 - time (sec): 34.56 - samples/sec: 1544.30 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 21:30:38,995 epoch 9 - iter 623/894 - loss 0.00794589 - time (sec): 40.14 - samples/sec: 1538.36 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:30:44,474 epoch 9 - iter 712/894 - loss 0.00766539 - time (sec): 45.62 - samples/sec: 1523.76 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:30:49,952 epoch 9 - iter 801/894 - loss 0.00726590 - time (sec): 51.10 - samples/sec: 1511.71 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 21:30:55,592 epoch 9 - iter 890/894 - loss 0.00734059 - time (sec): 56.74 - samples/sec: 1517.12 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:30:55,841 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:30:55,841 EPOCH 9 done: loss 0.0073 - lr: 0.000003 |
|
2023-10-23 21:31:02,068 DEV : loss 0.2792131006717682 - f1-score (micro avg) 0.7783 |
|
2023-10-23 21:31:02,089 saving best model |
|
2023-10-23 21:31:02,684 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:31:08,637 epoch 10 - iter 89/894 - loss 0.00337450 - time (sec): 5.95 - samples/sec: 1563.46 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:31:14,533 epoch 10 - iter 178/894 - loss 0.00493983 - time (sec): 11.85 - samples/sec: 1498.08 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 21:31:20,499 epoch 10 - iter 267/894 - loss 0.00387315 - time (sec): 17.81 - samples/sec: 1549.82 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:31:26,020 epoch 10 - iter 356/894 - loss 0.00316311 - time (sec): 23.33 - samples/sec: 1531.19 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:31:31,606 epoch 10 - iter 445/894 - loss 0.00345817 - time (sec): 28.92 - samples/sec: 1520.25 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 21:31:37,261 epoch 10 - iter 534/894 - loss 0.00498121 - time (sec): 34.58 - samples/sec: 1528.01 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:31:42,794 epoch 10 - iter 623/894 - loss 0.00480233 - time (sec): 40.11 - samples/sec: 1520.50 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:31:48,587 epoch 10 - iter 712/894 - loss 0.00498084 - time (sec): 45.90 - samples/sec: 1530.48 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 21:31:54,056 epoch 10 - iter 801/894 - loss 0.00461055 - time (sec): 51.37 - samples/sec: 1513.87 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 21:31:59,716 epoch 10 - iter 890/894 - loss 0.00466250 - time (sec): 57.03 - samples/sec: 1512.46 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 21:31:59,949 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:31:59,949 EPOCH 10 done: loss 0.0047 - lr: 0.000000 |
|
2023-10-23 21:32:06,205 DEV : loss 0.2752907872200012 - f1-score (micro avg) 0.7757 |
|
2023-10-23 21:32:06,712 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 21:32:06,713 Loading model from best epoch ... |
|
2023-10-23 21:32:08,410 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 21:32:13,254 |
|
Results: |
|
- F-score (micro) 0.7561 |
|
- F-score (macro) 0.6654 |
|
- Accuracy 0.6236 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8444 0.8557 0.8500 596 |
|
pers 0.6882 0.7688 0.7262 333 |
|
org 0.5437 0.4242 0.4766 132 |
|
prod 0.6800 0.5152 0.5862 66 |
|
time 0.7273 0.6531 0.6882 49 |
|
|
|
micro avg 0.7570 0.7551 0.7561 1176 |
|
macro avg 0.6967 0.6434 0.6654 1176 |
|
weighted avg 0.7523 0.7551 0.7515 1176 |
|
|
|
2023-10-23 21:32:13,255 ---------------------------------------------------------------------------------------------------- |
|
|