|
2023-10-25 02:28:12,200 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 MultiCorpus: 5777 train + 722 dev + 723 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 5777 train + 722 dev + 723 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/nl |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Train: 5777 sentences |
|
2023-10-25 02:28:12,201 (train_with_dev=False, train_with_test=False) |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Training Params: |
|
2023-10-25 02:28:12,201 - learning_rate: "3e-05" |
|
2023-10-25 02:28:12,201 - mini_batch_size: "4" |
|
2023-10-25 02:28:12,201 - max_epochs: "10" |
|
2023-10-25 02:28:12,201 - shuffle: "True" |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Plugins: |
|
2023-10-25 02:28:12,201 - TensorboardLogger |
|
2023-10-25 02:28:12,201 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-25 02:28:12,201 - metric: "('micro avg', 'f1-score')" |
|
2023-10-25 02:28:12,201 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,201 Computation: |
|
2023-10-25 02:28:12,202 - compute on device: cuda:0 |
|
2023-10-25 02:28:12,202 - embedding storage: none |
|
2023-10-25 02:28:12,202 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,202 Model training base path: "hmbench-icdar/nl-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-5" |
|
2023-10-25 02:28:12,202 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,202 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:28:12,202 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-25 02:28:22,627 epoch 1 - iter 144/1445 - loss 1.69623735 - time (sec): 10.42 - samples/sec: 1607.43 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 02:28:32,738 epoch 1 - iter 288/1445 - loss 0.99201733 - time (sec): 20.54 - samples/sec: 1604.83 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 02:28:43,079 epoch 1 - iter 432/1445 - loss 0.72269377 - time (sec): 30.88 - samples/sec: 1625.94 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 02:28:53,831 epoch 1 - iter 576/1445 - loss 0.57817544 - time (sec): 41.63 - samples/sec: 1644.86 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 02:29:04,052 epoch 1 - iter 720/1445 - loss 0.49608950 - time (sec): 51.85 - samples/sec: 1642.11 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 02:29:14,679 epoch 1 - iter 864/1445 - loss 0.43533373 - time (sec): 62.48 - samples/sec: 1660.76 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 02:29:25,385 epoch 1 - iter 1008/1445 - loss 0.39381830 - time (sec): 73.18 - samples/sec: 1662.51 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 02:29:35,741 epoch 1 - iter 1152/1445 - loss 0.36268391 - time (sec): 83.54 - samples/sec: 1662.37 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 02:29:46,580 epoch 1 - iter 1296/1445 - loss 0.33535990 - time (sec): 94.38 - samples/sec: 1668.25 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 02:29:57,358 epoch 1 - iter 1440/1445 - loss 0.31447640 - time (sec): 105.16 - samples/sec: 1669.65 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 02:29:57,731 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:29:57,732 EPOCH 1 done: loss 0.3138 - lr: 0.000030 |
|
2023-10-25 02:30:00,756 DEV : loss 0.12302611023187637 - f1-score (micro avg) 0.7008 |
|
2023-10-25 02:30:00,768 saving best model |
|
2023-10-25 02:30:01,240 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:30:11,696 epoch 2 - iter 144/1445 - loss 0.12057633 - time (sec): 10.46 - samples/sec: 1636.43 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 02:30:22,267 epoch 2 - iter 288/1445 - loss 0.11145481 - time (sec): 21.03 - samples/sec: 1632.65 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 02:30:32,618 epoch 2 - iter 432/1445 - loss 0.10749836 - time (sec): 31.38 - samples/sec: 1647.82 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 02:30:43,133 epoch 2 - iter 576/1445 - loss 0.10585594 - time (sec): 41.89 - samples/sec: 1652.25 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 02:30:53,723 epoch 2 - iter 720/1445 - loss 0.10446472 - time (sec): 52.48 - samples/sec: 1648.50 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 02:31:04,130 epoch 2 - iter 864/1445 - loss 0.10078974 - time (sec): 62.89 - samples/sec: 1650.74 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 02:31:14,622 epoch 2 - iter 1008/1445 - loss 0.10225189 - time (sec): 73.38 - samples/sec: 1649.81 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 02:31:25,109 epoch 2 - iter 1152/1445 - loss 0.10050073 - time (sec): 83.87 - samples/sec: 1651.08 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 02:31:35,906 epoch 2 - iter 1296/1445 - loss 0.10166136 - time (sec): 94.66 - samples/sec: 1657.81 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 02:31:46,840 epoch 2 - iter 1440/1445 - loss 0.09962111 - time (sec): 105.60 - samples/sec: 1663.26 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 02:31:47,214 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:31:47,214 EPOCH 2 done: loss 0.0998 - lr: 0.000027 |
|
2023-10-25 02:31:50,640 DEV : loss 0.10188853740692139 - f1-score (micro avg) 0.7879 |
|
2023-10-25 02:31:50,651 saving best model |
|
2023-10-25 02:31:51,242 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:32:01,694 epoch 3 - iter 144/1445 - loss 0.06891371 - time (sec): 10.45 - samples/sec: 1638.65 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 02:32:12,153 epoch 3 - iter 288/1445 - loss 0.06760573 - time (sec): 20.91 - samples/sec: 1653.85 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 02:32:23,164 epoch 3 - iter 432/1445 - loss 0.06567901 - time (sec): 31.92 - samples/sec: 1678.89 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 02:32:33,778 epoch 3 - iter 576/1445 - loss 0.06561010 - time (sec): 42.53 - samples/sec: 1683.01 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 02:32:44,345 epoch 3 - iter 720/1445 - loss 0.06427575 - time (sec): 53.10 - samples/sec: 1688.76 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 02:32:54,681 epoch 3 - iter 864/1445 - loss 0.06647145 - time (sec): 63.44 - samples/sec: 1679.21 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 02:33:04,940 epoch 3 - iter 1008/1445 - loss 0.06569838 - time (sec): 73.70 - samples/sec: 1673.05 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 02:33:15,335 epoch 3 - iter 1152/1445 - loss 0.06646772 - time (sec): 84.09 - samples/sec: 1672.24 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 02:33:25,729 epoch 3 - iter 1296/1445 - loss 0.06733816 - time (sec): 94.49 - samples/sec: 1670.32 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 02:33:36,452 epoch 3 - iter 1440/1445 - loss 0.06745264 - time (sec): 105.21 - samples/sec: 1667.87 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 02:33:36,864 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:33:36,864 EPOCH 3 done: loss 0.0672 - lr: 0.000023 |
|
2023-10-25 02:33:40,583 DEV : loss 0.11046253144741058 - f1-score (micro avg) 0.8152 |
|
2023-10-25 02:33:40,595 saving best model |
|
2023-10-25 02:33:41,189 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:33:51,542 epoch 4 - iter 144/1445 - loss 0.04149391 - time (sec): 10.35 - samples/sec: 1624.66 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 02:34:02,084 epoch 4 - iter 288/1445 - loss 0.04446770 - time (sec): 20.89 - samples/sec: 1665.66 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 02:34:13,003 epoch 4 - iter 432/1445 - loss 0.05068145 - time (sec): 31.81 - samples/sec: 1676.21 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 02:34:23,904 epoch 4 - iter 576/1445 - loss 0.05239489 - time (sec): 42.71 - samples/sec: 1663.31 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 02:34:34,505 epoch 4 - iter 720/1445 - loss 0.05557338 - time (sec): 53.31 - samples/sec: 1664.78 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 02:34:44,749 epoch 4 - iter 864/1445 - loss 0.05372699 - time (sec): 63.56 - samples/sec: 1658.37 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 02:34:55,598 epoch 4 - iter 1008/1445 - loss 0.05084612 - time (sec): 74.41 - samples/sec: 1669.08 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 02:35:06,129 epoch 4 - iter 1152/1445 - loss 0.04931234 - time (sec): 84.94 - samples/sec: 1665.16 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 02:35:16,302 epoch 4 - iter 1296/1445 - loss 0.05056334 - time (sec): 95.11 - samples/sec: 1659.23 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 02:35:26,738 epoch 4 - iter 1440/1445 - loss 0.05038621 - time (sec): 105.55 - samples/sec: 1666.33 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 02:35:27,054 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:35:27,055 EPOCH 4 done: loss 0.0503 - lr: 0.000020 |
|
2023-10-25 02:35:30,488 DEV : loss 0.10373895615339279 - f1-score (micro avg) 0.8185 |
|
2023-10-25 02:35:30,500 saving best model |
|
2023-10-25 02:35:31,088 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:35:41,464 epoch 5 - iter 144/1445 - loss 0.03554515 - time (sec): 10.37 - samples/sec: 1706.87 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 02:35:52,121 epoch 5 - iter 288/1445 - loss 0.04085931 - time (sec): 21.03 - samples/sec: 1684.50 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 02:36:02,826 epoch 5 - iter 432/1445 - loss 0.03930504 - time (sec): 31.74 - samples/sec: 1664.63 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 02:36:13,033 epoch 5 - iter 576/1445 - loss 0.03674934 - time (sec): 41.94 - samples/sec: 1649.33 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 02:36:23,408 epoch 5 - iter 720/1445 - loss 0.03552619 - time (sec): 52.32 - samples/sec: 1648.03 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 02:36:34,416 epoch 5 - iter 864/1445 - loss 0.03484342 - time (sec): 63.33 - samples/sec: 1652.24 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 02:36:44,748 epoch 5 - iter 1008/1445 - loss 0.03536942 - time (sec): 73.66 - samples/sec: 1649.16 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 02:36:55,468 epoch 5 - iter 1152/1445 - loss 0.03839931 - time (sec): 84.38 - samples/sec: 1654.47 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 02:37:06,377 epoch 5 - iter 1296/1445 - loss 0.03661502 - time (sec): 95.29 - samples/sec: 1661.17 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 02:37:16,902 epoch 5 - iter 1440/1445 - loss 0.03750327 - time (sec): 105.81 - samples/sec: 1661.42 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 02:37:17,208 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:37:17,209 EPOCH 5 done: loss 0.0374 - lr: 0.000017 |
|
2023-10-25 02:37:20,640 DEV : loss 0.14838159084320068 - f1-score (micro avg) 0.8061 |
|
2023-10-25 02:37:20,652 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:37:31,289 epoch 6 - iter 144/1445 - loss 0.01890144 - time (sec): 10.64 - samples/sec: 1694.17 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 02:37:41,340 epoch 6 - iter 288/1445 - loss 0.02387650 - time (sec): 20.69 - samples/sec: 1647.82 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 02:37:51,893 epoch 6 - iter 432/1445 - loss 0.02538740 - time (sec): 31.24 - samples/sec: 1666.20 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 02:38:02,979 epoch 6 - iter 576/1445 - loss 0.02588859 - time (sec): 42.33 - samples/sec: 1682.42 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 02:38:13,646 epoch 6 - iter 720/1445 - loss 0.02605717 - time (sec): 52.99 - samples/sec: 1679.91 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 02:38:24,416 epoch 6 - iter 864/1445 - loss 0.02720202 - time (sec): 63.76 - samples/sec: 1676.45 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 02:38:35,122 epoch 6 - iter 1008/1445 - loss 0.02683121 - time (sec): 74.47 - samples/sec: 1673.04 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 02:38:45,492 epoch 6 - iter 1152/1445 - loss 0.02733399 - time (sec): 84.84 - samples/sec: 1671.83 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 02:38:55,591 epoch 6 - iter 1296/1445 - loss 0.02715393 - time (sec): 94.94 - samples/sec: 1668.05 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 02:39:06,195 epoch 6 - iter 1440/1445 - loss 0.02637222 - time (sec): 105.54 - samples/sec: 1665.23 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 02:39:06,542 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:39:06,542 EPOCH 6 done: loss 0.0265 - lr: 0.000013 |
|
2023-10-25 02:39:10,260 DEV : loss 0.17533475160598755 - f1-score (micro avg) 0.8212 |
|
2023-10-25 02:39:10,272 saving best model |
|
2023-10-25 02:39:10,864 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:39:21,556 epoch 7 - iter 144/1445 - loss 0.01856109 - time (sec): 10.69 - samples/sec: 1705.02 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 02:39:32,439 epoch 7 - iter 288/1445 - loss 0.01973145 - time (sec): 21.57 - samples/sec: 1694.19 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 02:39:42,798 epoch 7 - iter 432/1445 - loss 0.01739414 - time (sec): 31.93 - samples/sec: 1672.05 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 02:39:53,362 epoch 7 - iter 576/1445 - loss 0.01910287 - time (sec): 42.50 - samples/sec: 1672.64 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 02:40:03,958 epoch 7 - iter 720/1445 - loss 0.01991727 - time (sec): 53.09 - samples/sec: 1667.60 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 02:40:14,416 epoch 7 - iter 864/1445 - loss 0.01964088 - time (sec): 63.55 - samples/sec: 1667.71 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 02:40:25,070 epoch 7 - iter 1008/1445 - loss 0.01990403 - time (sec): 74.20 - samples/sec: 1659.03 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 02:40:35,855 epoch 7 - iter 1152/1445 - loss 0.02038628 - time (sec): 84.99 - samples/sec: 1665.86 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 02:40:46,303 epoch 7 - iter 1296/1445 - loss 0.02016692 - time (sec): 95.44 - samples/sec: 1664.70 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 02:40:56,521 epoch 7 - iter 1440/1445 - loss 0.02025843 - time (sec): 105.66 - samples/sec: 1661.05 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 02:40:56,962 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:40:56,962 EPOCH 7 done: loss 0.0202 - lr: 0.000010 |
|
2023-10-25 02:41:00,395 DEV : loss 0.18694642186164856 - f1-score (micro avg) 0.802 |
|
2023-10-25 02:41:00,407 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:41:10,706 epoch 8 - iter 144/1445 - loss 0.02198001 - time (sec): 10.30 - samples/sec: 1643.22 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 02:41:20,950 epoch 8 - iter 288/1445 - loss 0.01680433 - time (sec): 20.54 - samples/sec: 1652.00 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 02:41:31,250 epoch 8 - iter 432/1445 - loss 0.01573486 - time (sec): 30.84 - samples/sec: 1636.48 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 02:41:42,127 epoch 8 - iter 576/1445 - loss 0.01430969 - time (sec): 41.72 - samples/sec: 1617.81 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 02:41:52,698 epoch 8 - iter 720/1445 - loss 0.01460439 - time (sec): 52.29 - samples/sec: 1623.66 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 02:42:03,282 epoch 8 - iter 864/1445 - loss 0.01438062 - time (sec): 62.87 - samples/sec: 1631.98 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 02:42:13,987 epoch 8 - iter 1008/1445 - loss 0.01351538 - time (sec): 73.58 - samples/sec: 1630.78 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 02:42:24,433 epoch 8 - iter 1152/1445 - loss 0.01387078 - time (sec): 84.02 - samples/sec: 1637.08 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 02:42:35,072 epoch 8 - iter 1296/1445 - loss 0.01423724 - time (sec): 94.66 - samples/sec: 1650.65 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 02:42:46,259 epoch 8 - iter 1440/1445 - loss 0.01339430 - time (sec): 105.85 - samples/sec: 1659.27 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 02:42:46,581 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:42:46,582 EPOCH 8 done: loss 0.0134 - lr: 0.000007 |
|
2023-10-25 02:42:50,010 DEV : loss 0.1790740042924881 - f1-score (micro avg) 0.8274 |
|
2023-10-25 02:42:50,022 saving best model |
|
2023-10-25 02:42:50,591 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:43:01,609 epoch 9 - iter 144/1445 - loss 0.00937956 - time (sec): 11.02 - samples/sec: 1647.52 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 02:43:12,221 epoch 9 - iter 288/1445 - loss 0.00904487 - time (sec): 21.63 - samples/sec: 1667.90 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 02:43:22,893 epoch 9 - iter 432/1445 - loss 0.00801036 - time (sec): 32.30 - samples/sec: 1691.47 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 02:43:33,390 epoch 9 - iter 576/1445 - loss 0.00824995 - time (sec): 42.80 - samples/sec: 1692.09 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 02:43:43,523 epoch 9 - iter 720/1445 - loss 0.00914915 - time (sec): 52.93 - samples/sec: 1678.62 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 02:43:53,812 epoch 9 - iter 864/1445 - loss 0.00891299 - time (sec): 63.22 - samples/sec: 1666.62 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 02:44:04,387 epoch 9 - iter 1008/1445 - loss 0.00823849 - time (sec): 73.80 - samples/sec: 1674.31 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 02:44:14,905 epoch 9 - iter 1152/1445 - loss 0.00800139 - time (sec): 84.31 - samples/sec: 1673.85 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 02:44:25,628 epoch 9 - iter 1296/1445 - loss 0.00888461 - time (sec): 95.04 - samples/sec: 1671.26 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 02:44:36,004 epoch 9 - iter 1440/1445 - loss 0.00918427 - time (sec): 105.41 - samples/sec: 1667.47 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 02:44:36,339 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:44:36,340 EPOCH 9 done: loss 0.0092 - lr: 0.000003 |
|
2023-10-25 02:44:40,061 DEV : loss 0.1937129944562912 - f1-score (micro avg) 0.812 |
|
2023-10-25 02:44:40,073 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:44:50,767 epoch 10 - iter 144/1445 - loss 0.00477797 - time (sec): 10.69 - samples/sec: 1664.16 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 02:45:01,498 epoch 10 - iter 288/1445 - loss 0.00496410 - time (sec): 21.42 - samples/sec: 1671.48 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 02:45:11,877 epoch 10 - iter 432/1445 - loss 0.00657026 - time (sec): 31.80 - samples/sec: 1651.31 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 02:45:22,278 epoch 10 - iter 576/1445 - loss 0.00657324 - time (sec): 42.20 - samples/sec: 1646.03 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 02:45:32,701 epoch 10 - iter 720/1445 - loss 0.00665808 - time (sec): 52.63 - samples/sec: 1645.24 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 02:45:43,094 epoch 10 - iter 864/1445 - loss 0.00612829 - time (sec): 63.02 - samples/sec: 1654.31 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 02:45:53,683 epoch 10 - iter 1008/1445 - loss 0.00578960 - time (sec): 73.61 - samples/sec: 1649.52 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 02:46:04,171 epoch 10 - iter 1152/1445 - loss 0.00614870 - time (sec): 84.10 - samples/sec: 1643.68 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 02:46:14,989 epoch 10 - iter 1296/1445 - loss 0.00615414 - time (sec): 94.92 - samples/sec: 1649.61 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 02:46:25,822 epoch 10 - iter 1440/1445 - loss 0.00620043 - time (sec): 105.75 - samples/sec: 1659.93 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 02:46:26,172 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:46:26,172 EPOCH 10 done: loss 0.0062 - lr: 0.000000 |
|
2023-10-25 02:46:29,604 DEV : loss 0.1986187845468521 - f1-score (micro avg) 0.8114 |
|
2023-10-25 02:46:30,087 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 02:46:30,088 Loading model from best epoch ... |
|
2023-10-25 02:46:31,822 SequenceTagger predicts: Dictionary with 13 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-25 02:46:35,350 |
|
Results: |
|
- F-score (micro) 0.7927 |
|
- F-score (macro) 0.6951 |
|
- Accuracy 0.6711 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
PER 0.8371 0.7780 0.8065 482 |
|
LOC 0.8957 0.7686 0.8273 458 |
|
ORG 0.5091 0.4058 0.4516 69 |
|
|
|
micro avg 0.8426 0.7483 0.7927 1009 |
|
macro avg 0.7473 0.6508 0.6951 1009 |
|
weighted avg 0.8412 0.7483 0.7916 1009 |
|
|
|
2023-10-25 02:46:35,350 ---------------------------------------------------------------------------------------------------- |
|
|