|
2023-10-23 22:00:02,468 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,469 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 22:00:02,469 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,469 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 22:00:02,469 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,469 Train: 3575 sentences |
|
2023-10-23 22:00:02,469 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 22:00:02,469 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,469 Training Params: |
|
2023-10-23 22:00:02,469 - learning_rate: "3e-05" |
|
2023-10-23 22:00:02,469 - mini_batch_size: "4" |
|
2023-10-23 22:00:02,469 - max_epochs: "10" |
|
2023-10-23 22:00:02,470 - shuffle: "True" |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 Plugins: |
|
2023-10-23 22:00:02,470 - TensorboardLogger |
|
2023-10-23 22:00:02,470 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 22:00:02,470 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 Computation: |
|
2023-10-23 22:00:02,470 - compute on device: cuda:0 |
|
2023-10-23 22:00:02,470 - embedding storage: none |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:02,470 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 22:00:08,104 epoch 1 - iter 89/894 - loss 2.62343567 - time (sec): 5.63 - samples/sec: 1469.36 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:00:13,867 epoch 1 - iter 178/894 - loss 1.57512476 - time (sec): 11.40 - samples/sec: 1513.53 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:00:19,441 epoch 1 - iter 267/894 - loss 1.20032023 - time (sec): 16.97 - samples/sec: 1504.38 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 22:00:24,975 epoch 1 - iter 356/894 - loss 1.00779245 - time (sec): 22.50 - samples/sec: 1503.59 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:00:30,549 epoch 1 - iter 445/894 - loss 0.85753315 - time (sec): 28.08 - samples/sec: 1512.61 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:00:36,026 epoch 1 - iter 534/894 - loss 0.75991655 - time (sec): 33.56 - samples/sec: 1510.60 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:00:41,741 epoch 1 - iter 623/894 - loss 0.68458506 - time (sec): 39.27 - samples/sec: 1518.10 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:00:47,325 epoch 1 - iter 712/894 - loss 0.62574853 - time (sec): 44.85 - samples/sec: 1520.57 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:00:52,887 epoch 1 - iter 801/894 - loss 0.58111481 - time (sec): 50.42 - samples/sec: 1517.67 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:00:58,859 epoch 1 - iter 890/894 - loss 0.53984463 - time (sec): 56.39 - samples/sec: 1524.89 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 22:00:59,171 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:00:59,172 EPOCH 1 done: loss 0.5387 - lr: 0.000030 |
|
2023-10-23 22:01:04,018 DEV : loss 0.14558294415473938 - f1-score (micro avg) 0.6342 |
|
2023-10-23 22:01:04,039 saving best model |
|
2023-10-23 22:01:04,602 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:01:10,103 epoch 2 - iter 89/894 - loss 0.14667895 - time (sec): 5.50 - samples/sec: 1469.57 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 22:01:15,704 epoch 2 - iter 178/894 - loss 0.15235244 - time (sec): 11.10 - samples/sec: 1534.04 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 22:01:21,587 epoch 2 - iter 267/894 - loss 0.15030861 - time (sec): 16.98 - samples/sec: 1547.95 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 22:01:27,195 epoch 2 - iter 356/894 - loss 0.15389929 - time (sec): 22.59 - samples/sec: 1533.96 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 22:01:32,890 epoch 2 - iter 445/894 - loss 0.15549779 - time (sec): 28.29 - samples/sec: 1531.29 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 22:01:38,600 epoch 2 - iter 534/894 - loss 0.14895964 - time (sec): 34.00 - samples/sec: 1517.66 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 22:01:44,182 epoch 2 - iter 623/894 - loss 0.15223696 - time (sec): 39.58 - samples/sec: 1512.68 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 22:01:49,668 epoch 2 - iter 712/894 - loss 0.14480688 - time (sec): 45.06 - samples/sec: 1499.66 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:01:55,550 epoch 2 - iter 801/894 - loss 0.14199992 - time (sec): 50.95 - samples/sec: 1512.58 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:02:01,279 epoch 2 - iter 890/894 - loss 0.14023230 - time (sec): 56.68 - samples/sec: 1520.24 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:02:01,528 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:02:01,528 EPOCH 2 done: loss 0.1398 - lr: 0.000027 |
|
2023-10-23 22:02:08,034 DEV : loss 0.15009824931621552 - f1-score (micro avg) 0.7154 |
|
2023-10-23 22:02:08,054 saving best model |
|
2023-10-23 22:02:08,783 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:02:14,310 epoch 3 - iter 89/894 - loss 0.07903434 - time (sec): 5.53 - samples/sec: 1419.56 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 22:02:20,077 epoch 3 - iter 178/894 - loss 0.08627761 - time (sec): 11.29 - samples/sec: 1445.44 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 22:02:25,634 epoch 3 - iter 267/894 - loss 0.08164110 - time (sec): 16.85 - samples/sec: 1471.01 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 22:02:31,429 epoch 3 - iter 356/894 - loss 0.08568194 - time (sec): 22.64 - samples/sec: 1501.09 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 22:02:36,950 epoch 3 - iter 445/894 - loss 0.08356653 - time (sec): 28.17 - samples/sec: 1476.31 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 22:02:42,852 epoch 3 - iter 534/894 - loss 0.08369738 - time (sec): 34.07 - samples/sec: 1491.63 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 22:02:48,743 epoch 3 - iter 623/894 - loss 0.08210453 - time (sec): 39.96 - samples/sec: 1504.93 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:02:54,331 epoch 3 - iter 712/894 - loss 0.08028534 - time (sec): 45.55 - samples/sec: 1517.67 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:02:59,876 epoch 3 - iter 801/894 - loss 0.08231477 - time (sec): 51.09 - samples/sec: 1515.30 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:03:05,538 epoch 3 - iter 890/894 - loss 0.08214108 - time (sec): 56.75 - samples/sec: 1518.46 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 22:03:05,783 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:03:05,783 EPOCH 3 done: loss 0.0828 - lr: 0.000023 |
|
2023-10-23 22:03:12,290 DEV : loss 0.1573496311903 - f1-score (micro avg) 0.7352 |
|
2023-10-23 22:03:12,310 saving best model |
|
2023-10-23 22:03:13,023 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:03:18,641 epoch 4 - iter 89/894 - loss 0.06023096 - time (sec): 5.62 - samples/sec: 1509.49 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 22:03:24,166 epoch 4 - iter 178/894 - loss 0.05491063 - time (sec): 11.14 - samples/sec: 1486.24 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 22:03:29,752 epoch 4 - iter 267/894 - loss 0.04891097 - time (sec): 16.73 - samples/sec: 1506.70 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 22:03:35,652 epoch 4 - iter 356/894 - loss 0.04752950 - time (sec): 22.63 - samples/sec: 1527.14 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 22:03:41,423 epoch 4 - iter 445/894 - loss 0.04766369 - time (sec): 28.40 - samples/sec: 1527.67 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 22:03:47,052 epoch 4 - iter 534/894 - loss 0.05016494 - time (sec): 34.03 - samples/sec: 1521.48 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:03:52,551 epoch 4 - iter 623/894 - loss 0.04807291 - time (sec): 39.53 - samples/sec: 1522.71 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:03:58,196 epoch 4 - iter 712/894 - loss 0.04922704 - time (sec): 45.17 - samples/sec: 1521.89 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:04:03,969 epoch 4 - iter 801/894 - loss 0.05047950 - time (sec): 50.95 - samples/sec: 1520.51 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:04:09,644 epoch 4 - iter 890/894 - loss 0.05249311 - time (sec): 56.62 - samples/sec: 1522.65 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:04:09,891 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:04:09,891 EPOCH 4 done: loss 0.0524 - lr: 0.000020 |
|
2023-10-23 22:04:16,372 DEV : loss 0.21319110691547394 - f1-score (micro avg) 0.748 |
|
2023-10-23 22:04:16,393 saving best model |
|
2023-10-23 22:04:17,106 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:04:22,892 epoch 5 - iter 89/894 - loss 0.03139489 - time (sec): 5.79 - samples/sec: 1555.16 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:04:28,574 epoch 5 - iter 178/894 - loss 0.02921430 - time (sec): 11.47 - samples/sec: 1527.76 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 22:04:34,137 epoch 5 - iter 267/894 - loss 0.03308587 - time (sec): 17.03 - samples/sec: 1517.29 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 22:04:39,873 epoch 5 - iter 356/894 - loss 0.03277134 - time (sec): 22.77 - samples/sec: 1533.13 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 22:04:45,803 epoch 5 - iter 445/894 - loss 0.03290898 - time (sec): 28.70 - samples/sec: 1557.66 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:04:51,276 epoch 5 - iter 534/894 - loss 0.03073298 - time (sec): 34.17 - samples/sec: 1537.89 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:04:57,009 epoch 5 - iter 623/894 - loss 0.03132323 - time (sec): 39.90 - samples/sec: 1528.69 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:05:02,566 epoch 5 - iter 712/894 - loss 0.03142016 - time (sec): 45.46 - samples/sec: 1531.50 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 22:05:08,111 epoch 5 - iter 801/894 - loss 0.03248282 - time (sec): 51.00 - samples/sec: 1519.37 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 22:05:13,728 epoch 5 - iter 890/894 - loss 0.03245031 - time (sec): 56.62 - samples/sec: 1517.85 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 22:05:14,035 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:05:14,036 EPOCH 5 done: loss 0.0323 - lr: 0.000017 |
|
2023-10-23 22:05:20,524 DEV : loss 0.24947355687618256 - f1-score (micro avg) 0.7729 |
|
2023-10-23 22:05:20,545 saving best model |
|
2023-10-23 22:05:21,250 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:05:26,639 epoch 6 - iter 89/894 - loss 0.01792561 - time (sec): 5.39 - samples/sec: 1388.32 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 22:05:32,260 epoch 6 - iter 178/894 - loss 0.01992217 - time (sec): 11.01 - samples/sec: 1458.36 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 22:05:38,009 epoch 6 - iter 267/894 - loss 0.02269894 - time (sec): 16.76 - samples/sec: 1509.74 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 22:05:43,702 epoch 6 - iter 356/894 - loss 0.02500337 - time (sec): 22.45 - samples/sec: 1515.16 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:05:49,609 epoch 6 - iter 445/894 - loss 0.02486673 - time (sec): 28.36 - samples/sec: 1537.92 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:05:55,122 epoch 6 - iter 534/894 - loss 0.02638849 - time (sec): 33.87 - samples/sec: 1526.69 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:06:00,860 epoch 6 - iter 623/894 - loss 0.02529988 - time (sec): 39.61 - samples/sec: 1531.21 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 22:06:06,606 epoch 6 - iter 712/894 - loss 0.02507466 - time (sec): 45.36 - samples/sec: 1522.93 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 22:06:12,290 epoch 6 - iter 801/894 - loss 0.02413790 - time (sec): 51.04 - samples/sec: 1522.67 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 22:06:17,946 epoch 6 - iter 890/894 - loss 0.02403350 - time (sec): 56.70 - samples/sec: 1520.89 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 22:06:18,185 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:06:18,185 EPOCH 6 done: loss 0.0242 - lr: 0.000013 |
|
2023-10-23 22:06:24,684 DEV : loss 0.26065516471862793 - f1-score (micro avg) 0.7557 |
|
2023-10-23 22:06:24,704 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:06:30,636 epoch 7 - iter 89/894 - loss 0.01279211 - time (sec): 5.93 - samples/sec: 1607.14 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 22:06:36,220 epoch 7 - iter 178/894 - loss 0.01603667 - time (sec): 11.52 - samples/sec: 1543.94 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 22:06:41,728 epoch 7 - iter 267/894 - loss 0.01679587 - time (sec): 17.02 - samples/sec: 1505.93 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:06:47,208 epoch 7 - iter 356/894 - loss 0.02168416 - time (sec): 22.50 - samples/sec: 1483.39 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:06:52,959 epoch 7 - iter 445/894 - loss 0.02179523 - time (sec): 28.25 - samples/sec: 1488.98 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:06:58,548 epoch 7 - iter 534/894 - loss 0.02050756 - time (sec): 33.84 - samples/sec: 1492.08 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 22:07:04,349 epoch 7 - iter 623/894 - loss 0.01912097 - time (sec): 39.64 - samples/sec: 1505.91 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 22:07:10,221 epoch 7 - iter 712/894 - loss 0.01820047 - time (sec): 45.52 - samples/sec: 1528.48 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 22:07:15,794 epoch 7 - iter 801/894 - loss 0.01769554 - time (sec): 51.09 - samples/sec: 1523.06 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:07:21,354 epoch 7 - iter 890/894 - loss 0.01702500 - time (sec): 56.65 - samples/sec: 1521.23 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:07:21,596 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:07:21,596 EPOCH 7 done: loss 0.0170 - lr: 0.000010 |
|
2023-10-23 22:07:28,083 DEV : loss 0.28167280554771423 - f1-score (micro avg) 0.7629 |
|
2023-10-23 22:07:28,104 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:07:33,751 epoch 8 - iter 89/894 - loss 0.01867921 - time (sec): 5.65 - samples/sec: 1520.34 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:07:39,538 epoch 8 - iter 178/894 - loss 0.01577457 - time (sec): 11.43 - samples/sec: 1510.75 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 22:07:45,307 epoch 8 - iter 267/894 - loss 0.01153896 - time (sec): 17.20 - samples/sec: 1532.20 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 22:07:51,236 epoch 8 - iter 356/894 - loss 0.01015795 - time (sec): 23.13 - samples/sec: 1547.33 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 22:07:56,657 epoch 8 - iter 445/894 - loss 0.01021030 - time (sec): 28.55 - samples/sec: 1523.00 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 22:08:02,222 epoch 8 - iter 534/894 - loss 0.01037248 - time (sec): 34.12 - samples/sec: 1526.13 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 22:08:07,768 epoch 8 - iter 623/894 - loss 0.01003113 - time (sec): 39.66 - samples/sec: 1524.13 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 22:08:13,205 epoch 8 - iter 712/894 - loss 0.01000431 - time (sec): 45.10 - samples/sec: 1508.44 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 22:08:19,069 epoch 8 - iter 801/894 - loss 0.01026783 - time (sec): 50.96 - samples/sec: 1518.12 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 22:08:24,766 epoch 8 - iter 890/894 - loss 0.01030903 - time (sec): 56.66 - samples/sec: 1522.06 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 22:08:25,014 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:08:25,014 EPOCH 8 done: loss 0.0103 - lr: 0.000007 |
|
2023-10-23 22:08:31,510 DEV : loss 0.281236857175827 - f1-score (micro avg) 0.7692 |
|
2023-10-23 22:08:31,530 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:08:37,018 epoch 9 - iter 89/894 - loss 0.01133282 - time (sec): 5.49 - samples/sec: 1487.95 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:08:42,585 epoch 9 - iter 178/894 - loss 0.01040047 - time (sec): 11.05 - samples/sec: 1483.85 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:08:48,138 epoch 9 - iter 267/894 - loss 0.00836655 - time (sec): 16.61 - samples/sec: 1507.47 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:08:53,608 epoch 9 - iter 356/894 - loss 0.00715496 - time (sec): 22.08 - samples/sec: 1502.77 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:08:59,232 epoch 9 - iter 445/894 - loss 0.00763303 - time (sec): 27.70 - samples/sec: 1507.64 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:09:05,207 epoch 9 - iter 534/894 - loss 0.00813773 - time (sec): 33.68 - samples/sec: 1536.08 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:09:10,929 epoch 9 - iter 623/894 - loss 0.00743043 - time (sec): 39.40 - samples/sec: 1530.64 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 22:09:16,928 epoch 9 - iter 712/894 - loss 0.00654784 - time (sec): 45.40 - samples/sec: 1538.13 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 22:09:22,403 epoch 9 - iter 801/894 - loss 0.00640737 - time (sec): 50.87 - samples/sec: 1525.27 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 22:09:28,095 epoch 9 - iter 890/894 - loss 0.00639578 - time (sec): 56.56 - samples/sec: 1526.16 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:09:28,330 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:09:28,330 EPOCH 9 done: loss 0.0064 - lr: 0.000003 |
|
2023-10-23 22:09:34,553 DEV : loss 0.27773818373680115 - f1-score (micro avg) 0.7747 |
|
2023-10-23 22:09:34,574 saving best model |
|
2023-10-23 22:09:35,270 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:09:41,400 epoch 10 - iter 89/894 - loss 0.00131393 - time (sec): 6.13 - samples/sec: 1458.45 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:09:47,083 epoch 10 - iter 178/894 - loss 0.00128416 - time (sec): 11.81 - samples/sec: 1466.61 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:09:52,565 epoch 10 - iter 267/894 - loss 0.00231588 - time (sec): 17.29 - samples/sec: 1504.94 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 22:09:58,365 epoch 10 - iter 356/894 - loss 0.00225184 - time (sec): 23.09 - samples/sec: 1530.37 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 22:10:03,874 epoch 10 - iter 445/894 - loss 0.00250123 - time (sec): 28.60 - samples/sec: 1508.55 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 22:10:09,384 epoch 10 - iter 534/894 - loss 0.00292605 - time (sec): 34.11 - samples/sec: 1505.48 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 22:10:15,106 epoch 10 - iter 623/894 - loss 0.00273991 - time (sec): 39.84 - samples/sec: 1509.10 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 22:10:20,592 epoch 10 - iter 712/894 - loss 0.00327829 - time (sec): 45.32 - samples/sec: 1503.66 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 22:10:26,277 epoch 10 - iter 801/894 - loss 0.00401238 - time (sec): 51.01 - samples/sec: 1506.60 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 22:10:31,962 epoch 10 - iter 890/894 - loss 0.00370280 - time (sec): 56.69 - samples/sec: 1507.83 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 22:10:32,457 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:10:32,457 EPOCH 10 done: loss 0.0038 - lr: 0.000000 |
|
2023-10-23 22:10:38,700 DEV : loss 0.28241854906082153 - f1-score (micro avg) 0.774 |
|
2023-10-23 22:10:39,272 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:10:39,272 Loading model from best epoch ... |
|
2023-10-23 22:10:41,022 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 22:10:45,841 |
|
Results: |
|
- F-score (micro) 0.7534 |
|
- F-score (macro) 0.6778 |
|
- Accuracy 0.6245 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8088 0.8658 0.8363 596 |
|
pers 0.7077 0.7417 0.7243 333 |
|
org 0.5487 0.4697 0.5061 132 |
|
prod 0.6441 0.5758 0.6080 66 |
|
time 0.7143 0.7143 0.7143 49 |
|
|
|
micro avg 0.7434 0.7636 0.7534 1176 |
|
macro avg 0.6847 0.6735 0.6778 1176 |
|
weighted avg 0.7378 0.7636 0.7496 1176 |
|
|
|
2023-10-23 22:10:45,841 ---------------------------------------------------------------------------------------------------- |
|
|