|
2023-10-25 01:04:07,095 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=13, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 MultiCorpus: 5777 train + 722 dev + 723 test sentences |
|
- NER_ICDAR_EUROPEANA Corpus: 5777 train + 722 dev + 723 test sentences - /home/ubuntu/.flair/datasets/ner_icdar_europeana/nl |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Train: 5777 sentences |
|
2023-10-25 01:04:07,096 (train_with_dev=False, train_with_test=False) |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Training Params: |
|
2023-10-25 01:04:07,096 - learning_rate: "5e-05" |
|
2023-10-25 01:04:07,096 - mini_batch_size: "8" |
|
2023-10-25 01:04:07,096 - max_epochs: "10" |
|
2023-10-25 01:04:07,096 - shuffle: "True" |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Plugins: |
|
2023-10-25 01:04:07,096 - TensorboardLogger |
|
2023-10-25 01:04:07,096 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-25 01:04:07,096 - metric: "('micro avg', 'f1-score')" |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Computation: |
|
2023-10-25 01:04:07,096 - compute on device: cuda:0 |
|
2023-10-25 01:04:07,096 - embedding storage: none |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 Model training base path: "hmbench-icdar/nl-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-3" |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,096 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:04:07,097 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-25 01:04:16,146 epoch 1 - iter 72/723 - loss 1.45572002 - time (sec): 9.05 - samples/sec: 2035.83 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 01:04:24,086 epoch 1 - iter 144/723 - loss 0.91676845 - time (sec): 16.99 - samples/sec: 2036.20 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 01:04:32,562 epoch 1 - iter 216/723 - loss 0.68403494 - time (sec): 25.46 - samples/sec: 2036.16 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 01:04:40,618 epoch 1 - iter 288/723 - loss 0.55697347 - time (sec): 33.52 - samples/sec: 2055.30 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 01:04:49,973 epoch 1 - iter 360/723 - loss 0.46763627 - time (sec): 42.88 - samples/sec: 2046.16 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 01:04:58,195 epoch 1 - iter 432/723 - loss 0.41649453 - time (sec): 51.10 - samples/sec: 2044.75 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 01:05:06,938 epoch 1 - iter 504/723 - loss 0.37262470 - time (sec): 59.84 - samples/sec: 2046.12 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 01:05:15,395 epoch 1 - iter 576/723 - loss 0.34280046 - time (sec): 68.30 - samples/sec: 2052.05 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 01:05:24,097 epoch 1 - iter 648/723 - loss 0.31876788 - time (sec): 77.00 - samples/sec: 2051.20 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 01:05:32,568 epoch 1 - iter 720/723 - loss 0.30091972 - time (sec): 85.47 - samples/sec: 2053.94 - lr: 0.000050 - momentum: 0.000000 |
|
2023-10-25 01:05:32,962 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:05:32,962 EPOCH 1 done: loss 0.3002 - lr: 0.000050 |
|
2023-10-25 01:05:36,274 DEV : loss 0.10925032198429108 - f1-score (micro avg) 0.6533 |
|
2023-10-25 01:05:36,286 saving best model |
|
2023-10-25 01:05:36,754 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:05:45,327 epoch 2 - iter 72/723 - loss 0.10117721 - time (sec): 8.57 - samples/sec: 2019.64 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 01:05:54,253 epoch 2 - iter 144/723 - loss 0.11289550 - time (sec): 17.50 - samples/sec: 2043.53 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-25 01:06:03,191 epoch 2 - iter 216/723 - loss 0.10874155 - time (sec): 26.44 - samples/sec: 2043.06 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 01:06:12,383 epoch 2 - iter 288/723 - loss 0.10220424 - time (sec): 35.63 - samples/sec: 2026.55 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-25 01:06:21,044 epoch 2 - iter 360/723 - loss 0.09955654 - time (sec): 44.29 - samples/sec: 2016.09 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 01:06:29,651 epoch 2 - iter 432/723 - loss 0.09894453 - time (sec): 52.90 - samples/sec: 2031.69 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-25 01:06:38,089 epoch 2 - iter 504/723 - loss 0.09726539 - time (sec): 61.33 - samples/sec: 2020.69 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 01:06:46,214 epoch 2 - iter 576/723 - loss 0.09549694 - time (sec): 69.46 - samples/sec: 2030.63 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-25 01:06:54,638 epoch 2 - iter 648/723 - loss 0.09612591 - time (sec): 77.88 - samples/sec: 2030.39 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-25 01:07:03,175 epoch 2 - iter 720/723 - loss 0.09317351 - time (sec): 86.42 - samples/sec: 2031.72 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 01:07:03,611 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:07:03,611 EPOCH 2 done: loss 0.0930 - lr: 0.000044 |
|
2023-10-25 01:07:07,320 DEV : loss 0.08034814149141312 - f1-score (micro avg) 0.8091 |
|
2023-10-25 01:07:07,332 saving best model |
|
2023-10-25 01:07:07,927 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:07:16,706 epoch 3 - iter 72/723 - loss 0.06500369 - time (sec): 8.78 - samples/sec: 1959.46 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-25 01:07:25,278 epoch 3 - iter 144/723 - loss 0.05478318 - time (sec): 17.35 - samples/sec: 2025.43 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 01:07:33,684 epoch 3 - iter 216/723 - loss 0.05889560 - time (sec): 25.76 - samples/sec: 2039.59 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-25 01:07:42,096 epoch 3 - iter 288/723 - loss 0.05877536 - time (sec): 34.17 - samples/sec: 2046.64 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 01:07:50,833 epoch 3 - iter 360/723 - loss 0.06175769 - time (sec): 42.91 - samples/sec: 2039.39 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-25 01:07:59,520 epoch 3 - iter 432/723 - loss 0.06122404 - time (sec): 51.59 - samples/sec: 2026.71 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 01:08:07,640 epoch 3 - iter 504/723 - loss 0.06123421 - time (sec): 59.71 - samples/sec: 2039.37 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-25 01:08:16,944 epoch 3 - iter 576/723 - loss 0.05995821 - time (sec): 69.02 - samples/sec: 2039.11 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-25 01:08:25,894 epoch 3 - iter 648/723 - loss 0.05960975 - time (sec): 77.97 - samples/sec: 2030.78 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 01:08:34,567 epoch 3 - iter 720/723 - loss 0.05948934 - time (sec): 86.64 - samples/sec: 2028.16 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-25 01:08:34,873 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:08:34,873 EPOCH 3 done: loss 0.0595 - lr: 0.000039 |
|
2023-10-25 01:08:38,299 DEV : loss 0.09194374829530716 - f1-score (micro avg) 0.7961 |
|
2023-10-25 01:08:38,311 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:08:47,136 epoch 4 - iter 72/723 - loss 0.05461547 - time (sec): 8.82 - samples/sec: 2018.57 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 01:08:56,029 epoch 4 - iter 144/723 - loss 0.04518897 - time (sec): 17.72 - samples/sec: 1973.08 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-25 01:09:04,623 epoch 4 - iter 216/723 - loss 0.04012520 - time (sec): 26.31 - samples/sec: 2031.07 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 01:09:13,366 epoch 4 - iter 288/723 - loss 0.04000327 - time (sec): 35.05 - samples/sec: 2045.04 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-25 01:09:21,202 epoch 4 - iter 360/723 - loss 0.04233806 - time (sec): 42.89 - samples/sec: 2043.88 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 01:09:29,969 epoch 4 - iter 432/723 - loss 0.04049656 - time (sec): 51.66 - samples/sec: 2036.49 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-25 01:09:38,518 epoch 4 - iter 504/723 - loss 0.03988417 - time (sec): 60.21 - samples/sec: 2036.32 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-25 01:09:47,402 epoch 4 - iter 576/723 - loss 0.04017909 - time (sec): 69.09 - samples/sec: 2032.75 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 01:09:55,982 epoch 4 - iter 648/723 - loss 0.04015339 - time (sec): 77.67 - samples/sec: 2037.31 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-25 01:10:04,519 epoch 4 - iter 720/723 - loss 0.03986297 - time (sec): 86.21 - samples/sec: 2039.16 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 01:10:04,803 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:10:04,804 EPOCH 4 done: loss 0.0398 - lr: 0.000033 |
|
2023-10-25 01:10:08,229 DEV : loss 0.0897688940167427 - f1-score (micro avg) 0.8166 |
|
2023-10-25 01:10:08,241 saving best model |
|
2023-10-25 01:10:08,828 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:10:17,244 epoch 5 - iter 72/723 - loss 0.02608406 - time (sec): 8.41 - samples/sec: 2015.12 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-25 01:10:25,537 epoch 5 - iter 144/723 - loss 0.02753241 - time (sec): 16.71 - samples/sec: 2036.80 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 01:10:34,490 epoch 5 - iter 216/723 - loss 0.02546037 - time (sec): 25.66 - samples/sec: 2040.51 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-25 01:10:42,970 epoch 5 - iter 288/723 - loss 0.02608071 - time (sec): 34.14 - samples/sec: 2029.11 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 01:10:51,760 epoch 5 - iter 360/723 - loss 0.02633603 - time (sec): 42.93 - samples/sec: 2025.29 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-25 01:11:01,014 epoch 5 - iter 432/723 - loss 0.02664793 - time (sec): 52.18 - samples/sec: 2028.08 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-25 01:11:09,396 epoch 5 - iter 504/723 - loss 0.02811342 - time (sec): 60.57 - samples/sec: 2031.95 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 01:11:17,740 epoch 5 - iter 576/723 - loss 0.02890323 - time (sec): 68.91 - samples/sec: 2034.48 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-25 01:11:26,353 epoch 5 - iter 648/723 - loss 0.02866005 - time (sec): 77.52 - samples/sec: 2041.65 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 01:11:35,016 epoch 5 - iter 720/723 - loss 0.02960388 - time (sec): 86.19 - samples/sec: 2039.54 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-25 01:11:35,307 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:11:35,308 EPOCH 5 done: loss 0.0295 - lr: 0.000028 |
|
2023-10-25 01:11:39,046 DEV : loss 0.11081855744123459 - f1-score (micro avg) 0.8258 |
|
2023-10-25 01:11:39,058 saving best model |
|
2023-10-25 01:11:39,628 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:11:48,542 epoch 6 - iter 72/723 - loss 0.01740019 - time (sec): 8.91 - samples/sec: 2025.45 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 01:11:56,774 epoch 6 - iter 144/723 - loss 0.02061731 - time (sec): 17.14 - samples/sec: 2046.74 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-25 01:12:05,125 epoch 6 - iter 216/723 - loss 0.02286059 - time (sec): 25.50 - samples/sec: 2066.00 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 01:12:13,809 epoch 6 - iter 288/723 - loss 0.02094299 - time (sec): 34.18 - samples/sec: 2052.58 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-25 01:12:22,536 epoch 6 - iter 360/723 - loss 0.02022481 - time (sec): 42.91 - samples/sec: 2053.74 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-25 01:12:31,013 epoch 6 - iter 432/723 - loss 0.02046349 - time (sec): 51.38 - samples/sec: 2050.16 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 01:12:39,322 epoch 6 - iter 504/723 - loss 0.02075425 - time (sec): 59.69 - samples/sec: 2050.42 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-25 01:12:47,992 epoch 6 - iter 576/723 - loss 0.02113536 - time (sec): 68.36 - samples/sec: 2049.29 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 01:12:57,106 epoch 6 - iter 648/723 - loss 0.02052168 - time (sec): 77.48 - samples/sec: 2053.36 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-25 01:13:05,565 epoch 6 - iter 720/723 - loss 0.02112062 - time (sec): 85.94 - samples/sec: 2046.47 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 01:13:05,794 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:13:05,794 EPOCH 6 done: loss 0.0212 - lr: 0.000022 |
|
2023-10-25 01:13:09,529 DEV : loss 0.1646719127893448 - f1-score (micro avg) 0.8134 |
|
2023-10-25 01:13:09,541 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:13:18,095 epoch 7 - iter 72/723 - loss 0.01040535 - time (sec): 8.55 - samples/sec: 2041.45 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-25 01:13:26,230 epoch 7 - iter 144/723 - loss 0.01121216 - time (sec): 16.69 - samples/sec: 2033.43 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 01:13:35,816 epoch 7 - iter 216/723 - loss 0.01316319 - time (sec): 26.27 - samples/sec: 2049.85 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-25 01:13:44,360 epoch 7 - iter 288/723 - loss 0.01275497 - time (sec): 34.82 - samples/sec: 2045.24 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-25 01:13:53,420 epoch 7 - iter 360/723 - loss 0.01587086 - time (sec): 43.88 - samples/sec: 2036.49 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 01:14:01,473 epoch 7 - iter 432/723 - loss 0.01563474 - time (sec): 51.93 - samples/sec: 2038.39 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-25 01:14:11,066 epoch 7 - iter 504/723 - loss 0.01717798 - time (sec): 61.52 - samples/sec: 2029.83 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 01:14:19,390 epoch 7 - iter 576/723 - loss 0.01737380 - time (sec): 69.85 - samples/sec: 2018.91 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-25 01:14:28,180 epoch 7 - iter 648/723 - loss 0.01659881 - time (sec): 78.64 - samples/sec: 2016.98 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 01:14:36,001 epoch 7 - iter 720/723 - loss 0.01600230 - time (sec): 86.46 - samples/sec: 2032.42 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-25 01:14:36,248 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:14:36,248 EPOCH 7 done: loss 0.0160 - lr: 0.000017 |
|
2023-10-25 01:14:39,688 DEV : loss 0.1934468001127243 - f1-score (micro avg) 0.8154 |
|
2023-10-25 01:14:39,700 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:14:48,448 epoch 8 - iter 72/723 - loss 0.00946057 - time (sec): 8.75 - samples/sec: 1925.93 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 01:14:57,423 epoch 8 - iter 144/723 - loss 0.01042056 - time (sec): 17.72 - samples/sec: 1934.31 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-25 01:15:05,737 epoch 8 - iter 216/723 - loss 0.01240594 - time (sec): 26.04 - samples/sec: 1943.39 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-25 01:15:15,629 epoch 8 - iter 288/723 - loss 0.01202637 - time (sec): 35.93 - samples/sec: 1953.65 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 01:15:24,097 epoch 8 - iter 360/723 - loss 0.01177067 - time (sec): 44.40 - samples/sec: 1974.49 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-25 01:15:32,663 epoch 8 - iter 432/723 - loss 0.01169836 - time (sec): 52.96 - samples/sec: 1986.72 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 01:15:41,331 epoch 8 - iter 504/723 - loss 0.01101480 - time (sec): 61.63 - samples/sec: 1996.22 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-25 01:15:49,951 epoch 8 - iter 576/723 - loss 0.01097938 - time (sec): 70.25 - samples/sec: 2000.21 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 01:15:58,282 epoch 8 - iter 648/723 - loss 0.01051937 - time (sec): 78.58 - samples/sec: 2007.38 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-25 01:16:06,764 epoch 8 - iter 720/723 - loss 0.01094169 - time (sec): 87.06 - samples/sec: 2018.87 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 01:16:07,022 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:16:07,022 EPOCH 8 done: loss 0.0109 - lr: 0.000011 |
|
2023-10-25 01:16:10,453 DEV : loss 0.18262676894664764 - f1-score (micro avg) 0.8151 |
|
2023-10-25 01:16:10,464 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:16:19,248 epoch 9 - iter 72/723 - loss 0.00504756 - time (sec): 8.78 - samples/sec: 2043.13 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-25 01:16:27,794 epoch 9 - iter 144/723 - loss 0.00575751 - time (sec): 17.33 - samples/sec: 2037.49 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-25 01:16:36,589 epoch 9 - iter 216/723 - loss 0.00648151 - time (sec): 26.12 - samples/sec: 2029.63 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 01:16:45,733 epoch 9 - iter 288/723 - loss 0.00591136 - time (sec): 35.27 - samples/sec: 2028.64 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-25 01:16:54,198 epoch 9 - iter 360/723 - loss 0.00640861 - time (sec): 43.73 - samples/sec: 2021.41 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 01:17:02,610 epoch 9 - iter 432/723 - loss 0.00757996 - time (sec): 52.14 - samples/sec: 2020.88 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-25 01:17:11,177 epoch 9 - iter 504/723 - loss 0.00703844 - time (sec): 60.71 - samples/sec: 2027.79 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 01:17:20,097 epoch 9 - iter 576/723 - loss 0.00718620 - time (sec): 69.63 - samples/sec: 2033.48 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-25 01:17:28,358 epoch 9 - iter 648/723 - loss 0.00727799 - time (sec): 77.89 - samples/sec: 2032.25 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 01:17:37,072 epoch 9 - iter 720/723 - loss 0.00689246 - time (sec): 86.61 - samples/sec: 2028.57 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-25 01:17:37,328 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:17:37,328 EPOCH 9 done: loss 0.0069 - lr: 0.000006 |
|
2023-10-25 01:17:41,057 DEV : loss 0.18817579746246338 - f1-score (micro avg) 0.831 |
|
2023-10-25 01:17:41,069 saving best model |
|
2023-10-25 01:17:41,641 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:17:50,099 epoch 10 - iter 72/723 - loss 0.00275015 - time (sec): 8.46 - samples/sec: 2043.87 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-25 01:17:59,219 epoch 10 - iter 144/723 - loss 0.00549173 - time (sec): 17.58 - samples/sec: 1986.63 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 01:18:07,521 epoch 10 - iter 216/723 - loss 0.00495516 - time (sec): 25.88 - samples/sec: 2013.02 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-25 01:18:16,152 epoch 10 - iter 288/723 - loss 0.00443758 - time (sec): 34.51 - samples/sec: 2029.88 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 01:18:24,639 epoch 10 - iter 360/723 - loss 0.00390511 - time (sec): 43.00 - samples/sec: 2032.28 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-25 01:18:34,000 epoch 10 - iter 432/723 - loss 0.00448422 - time (sec): 52.36 - samples/sec: 2043.70 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 01:18:42,587 epoch 10 - iter 504/723 - loss 0.00432537 - time (sec): 60.95 - samples/sec: 2044.40 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-25 01:18:51,329 epoch 10 - iter 576/723 - loss 0.00427777 - time (sec): 69.69 - samples/sec: 2042.84 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 01:18:59,297 epoch 10 - iter 648/723 - loss 0.00428801 - time (sec): 77.66 - samples/sec: 2041.95 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-25 01:19:07,841 epoch 10 - iter 720/723 - loss 0.00428541 - time (sec): 86.20 - samples/sec: 2039.87 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-25 01:19:08,075 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:19:08,075 EPOCH 10 done: loss 0.0043 - lr: 0.000000 |
|
2023-10-25 01:19:11,506 DEV : loss 0.19799402356147766 - f1-score (micro avg) 0.829 |
|
2023-10-25 01:19:12,288 ---------------------------------------------------------------------------------------------------- |
|
2023-10-25 01:19:12,289 Loading model from best epoch ... |
|
2023-10-25 01:19:14,041 SequenceTagger predicts: Dictionary with 13 tags: O, S-LOC, B-LOC, E-LOC, I-LOC, S-PER, B-PER, E-PER, I-PER, S-ORG, B-ORG, E-ORG, I-ORG |
|
2023-10-25 01:19:17,277 |
|
Results: |
|
- F-score (micro) 0.8004 |
|
- F-score (macro) 0.6924 |
|
- Accuracy 0.6825 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
PER 0.8262 0.7988 0.8122 482 |
|
LOC 0.8798 0.7991 0.8375 458 |
|
ORG 0.5208 0.3623 0.4274 69 |
|
|
|
micro avg 0.8344 0.7691 0.8004 1009 |
|
macro avg 0.7423 0.6534 0.6924 1009 |
|
weighted avg 0.8296 0.7691 0.7974 1009 |
|
|
|
2023-10-25 01:19:17,278 ---------------------------------------------------------------------------------------------------- |
|
|