|
2023-10-23 20:04:33,691 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,692 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 20:04:33,692 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,692 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 20:04:33,692 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,692 Train: 3575 sentences |
|
2023-10-23 20:04:33,692 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 20:04:33,692 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Training Params: |
|
2023-10-23 20:04:33,693 - learning_rate: "3e-05" |
|
2023-10-23 20:04:33,693 - mini_batch_size: "4" |
|
2023-10-23 20:04:33,693 - max_epochs: "10" |
|
2023-10-23 20:04:33,693 - shuffle: "True" |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Plugins: |
|
2023-10-23 20:04:33,693 - TensorboardLogger |
|
2023-10-23 20:04:33,693 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 20:04:33,693 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Computation: |
|
2023-10-23 20:04:33,693 - compute on device: cuda:0 |
|
2023-10-23 20:04:33,693 - embedding storage: none |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs4-wsFalse-e10-lr3e-05-poolingfirst-layers-1-crfFalse-1" |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:04:33,693 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 20:04:39,937 epoch 1 - iter 89/894 - loss 3.05316532 - time (sec): 6.24 - samples/sec: 1380.68 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:04:45,315 epoch 1 - iter 178/894 - loss 1.89496396 - time (sec): 11.62 - samples/sec: 1415.74 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:04:50,827 epoch 1 - iter 267/894 - loss 1.38213089 - time (sec): 17.13 - samples/sec: 1476.52 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:04:56,382 epoch 1 - iter 356/894 - loss 1.12023789 - time (sec): 22.69 - samples/sec: 1474.59 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:05:01,849 epoch 1 - iter 445/894 - loss 0.95174384 - time (sec): 28.15 - samples/sec: 1501.52 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:05:07,270 epoch 1 - iter 534/894 - loss 0.83523472 - time (sec): 33.58 - samples/sec: 1498.51 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:05:12,852 epoch 1 - iter 623/894 - loss 0.74784062 - time (sec): 39.16 - samples/sec: 1507.54 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:05:18,709 epoch 1 - iter 712/894 - loss 0.67591015 - time (sec): 45.02 - samples/sec: 1526.07 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:05:24,294 epoch 1 - iter 801/894 - loss 0.62253117 - time (sec): 50.60 - samples/sec: 1534.13 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:05:29,887 epoch 1 - iter 890/894 - loss 0.58384563 - time (sec): 56.19 - samples/sec: 1534.13 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:05:30,126 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:05:30,126 EPOCH 1 done: loss 0.5819 - lr: 0.000030 |
|
2023-10-23 20:05:34,598 DEV : loss 0.1846158355474472 - f1-score (micro avg) 0.6478 |
|
2023-10-23 20:05:34,617 saving best model |
|
2023-10-23 20:05:35,169 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:05:41,041 epoch 2 - iter 89/894 - loss 0.17039932 - time (sec): 5.87 - samples/sec: 1640.40 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 20:05:46,577 epoch 2 - iter 178/894 - loss 0.15101709 - time (sec): 11.41 - samples/sec: 1588.09 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:05:52,191 epoch 2 - iter 267/894 - loss 0.15228742 - time (sec): 17.02 - samples/sec: 1549.12 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:05:57,658 epoch 2 - iter 356/894 - loss 0.14911380 - time (sec): 22.49 - samples/sec: 1546.34 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 20:06:03,176 epoch 2 - iter 445/894 - loss 0.14248969 - time (sec): 28.01 - samples/sec: 1535.66 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:06:08,785 epoch 2 - iter 534/894 - loss 0.14210291 - time (sec): 33.61 - samples/sec: 1543.64 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:06:14,394 epoch 2 - iter 623/894 - loss 0.13697959 - time (sec): 39.22 - samples/sec: 1546.81 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 20:06:20,110 epoch 2 - iter 712/894 - loss 0.13739609 - time (sec): 44.94 - samples/sec: 1548.84 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:06:25,618 epoch 2 - iter 801/894 - loss 0.13435609 - time (sec): 50.45 - samples/sec: 1539.46 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:06:31,150 epoch 2 - iter 890/894 - loss 0.13262331 - time (sec): 55.98 - samples/sec: 1541.50 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 20:06:31,379 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:06:31,379 EPOCH 2 done: loss 0.1324 - lr: 0.000027 |
|
2023-10-23 20:06:37,751 DEV : loss 0.15661019086837769 - f1-score (micro avg) 0.7258 |
|
2023-10-23 20:06:37,770 saving best model |
|
2023-10-23 20:06:38,516 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:06:44,028 epoch 3 - iter 89/894 - loss 0.07081377 - time (sec): 5.51 - samples/sec: 1467.63 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:06:49,733 epoch 3 - iter 178/894 - loss 0.06445548 - time (sec): 11.22 - samples/sec: 1539.23 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:06:55,369 epoch 3 - iter 267/894 - loss 0.07421548 - time (sec): 16.85 - samples/sec: 1520.71 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 20:07:00,871 epoch 3 - iter 356/894 - loss 0.07828797 - time (sec): 22.35 - samples/sec: 1516.78 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:07:06,328 epoch 3 - iter 445/894 - loss 0.08369600 - time (sec): 27.81 - samples/sec: 1503.21 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:07:11,900 epoch 3 - iter 534/894 - loss 0.08222038 - time (sec): 33.38 - samples/sec: 1511.91 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 20:07:17,546 epoch 3 - iter 623/894 - loss 0.08020079 - time (sec): 39.03 - samples/sec: 1519.33 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:07:23,017 epoch 3 - iter 712/894 - loss 0.08212380 - time (sec): 44.50 - samples/sec: 1513.26 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:07:28,867 epoch 3 - iter 801/894 - loss 0.08182659 - time (sec): 50.35 - samples/sec: 1513.21 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 20:07:34,426 epoch 3 - iter 890/894 - loss 0.08063802 - time (sec): 55.91 - samples/sec: 1523.62 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:07:34,955 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:07:34,956 EPOCH 3 done: loss 0.0810 - lr: 0.000023 |
|
2023-10-23 20:07:41,362 DEV : loss 0.1884111911058426 - f1-score (micro avg) 0.7402 |
|
2023-10-23 20:07:41,381 saving best model |
|
2023-10-23 20:07:42,172 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:07:47,935 epoch 4 - iter 89/894 - loss 0.06146136 - time (sec): 5.76 - samples/sec: 1573.87 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:07:53,490 epoch 4 - iter 178/894 - loss 0.05192372 - time (sec): 11.32 - samples/sec: 1550.68 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 20:07:59,004 epoch 4 - iter 267/894 - loss 0.05030131 - time (sec): 16.83 - samples/sec: 1531.00 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:08:04,492 epoch 4 - iter 356/894 - loss 0.04602569 - time (sec): 22.32 - samples/sec: 1521.68 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:08:10,309 epoch 4 - iter 445/894 - loss 0.04674359 - time (sec): 28.14 - samples/sec: 1528.69 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 20:08:15,817 epoch 4 - iter 534/894 - loss 0.04899786 - time (sec): 33.64 - samples/sec: 1511.87 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:08:21,368 epoch 4 - iter 623/894 - loss 0.04915412 - time (sec): 39.19 - samples/sec: 1508.74 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:08:26,930 epoch 4 - iter 712/894 - loss 0.05054854 - time (sec): 44.76 - samples/sec: 1511.23 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 20:08:32,927 epoch 4 - iter 801/894 - loss 0.05139540 - time (sec): 50.75 - samples/sec: 1523.11 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:08:38,584 epoch 4 - iter 890/894 - loss 0.05180747 - time (sec): 56.41 - samples/sec: 1528.34 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:08:38,820 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:08:38,820 EPOCH 4 done: loss 0.0520 - lr: 0.000020 |
|
2023-10-23 20:08:45,244 DEV : loss 0.2043798565864563 - f1-score (micro avg) 0.7489 |
|
2023-10-23 20:08:45,262 saving best model |
|
2023-10-23 20:08:45,958 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:08:51,390 epoch 5 - iter 89/894 - loss 0.03670468 - time (sec): 5.43 - samples/sec: 1419.82 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 20:08:56,933 epoch 5 - iter 178/894 - loss 0.03045295 - time (sec): 10.97 - samples/sec: 1455.41 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:09:02,754 epoch 5 - iter 267/894 - loss 0.03134785 - time (sec): 16.80 - samples/sec: 1492.41 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:09:08,318 epoch 5 - iter 356/894 - loss 0.03213895 - time (sec): 22.36 - samples/sec: 1495.96 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 20:09:13,823 epoch 5 - iter 445/894 - loss 0.03446495 - time (sec): 27.86 - samples/sec: 1498.94 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:09:19,744 epoch 5 - iter 534/894 - loss 0.03314168 - time (sec): 33.78 - samples/sec: 1522.79 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:09:25,190 epoch 5 - iter 623/894 - loss 0.03507241 - time (sec): 39.23 - samples/sec: 1516.98 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 20:09:30,956 epoch 5 - iter 712/894 - loss 0.03379707 - time (sec): 45.00 - samples/sec: 1527.51 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:09:36,428 epoch 5 - iter 801/894 - loss 0.03498774 - time (sec): 50.47 - samples/sec: 1518.25 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:09:42,441 epoch 5 - iter 890/894 - loss 0.03520890 - time (sec): 56.48 - samples/sec: 1526.03 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 20:09:42,682 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:09:42,682 EPOCH 5 done: loss 0.0351 - lr: 0.000017 |
|
2023-10-23 20:09:49,126 DEV : loss 0.21067775785923004 - f1-score (micro avg) 0.7814 |
|
2023-10-23 20:09:49,144 saving best model |
|
2023-10-23 20:09:49,860 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:09:55,405 epoch 6 - iter 89/894 - loss 0.02804411 - time (sec): 5.54 - samples/sec: 1519.87 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:10:00,924 epoch 6 - iter 178/894 - loss 0.02290773 - time (sec): 11.06 - samples/sec: 1518.53 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:10:06,965 epoch 6 - iter 267/894 - loss 0.02282067 - time (sec): 17.10 - samples/sec: 1561.00 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 20:10:12,796 epoch 6 - iter 356/894 - loss 0.02842765 - time (sec): 22.94 - samples/sec: 1557.26 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:10:18,543 epoch 6 - iter 445/894 - loss 0.02595776 - time (sec): 28.68 - samples/sec: 1554.29 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:10:24,274 epoch 6 - iter 534/894 - loss 0.02372176 - time (sec): 34.41 - samples/sec: 1548.41 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 20:10:29,688 epoch 6 - iter 623/894 - loss 0.02367998 - time (sec): 39.83 - samples/sec: 1523.75 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:10:35,272 epoch 6 - iter 712/894 - loss 0.02452133 - time (sec): 45.41 - samples/sec: 1525.84 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:10:40,796 epoch 6 - iter 801/894 - loss 0.02538859 - time (sec): 50.93 - samples/sec: 1522.16 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 20:10:46,436 epoch 6 - iter 890/894 - loss 0.02556957 - time (sec): 56.57 - samples/sec: 1525.68 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:10:46,670 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:10:46,670 EPOCH 6 done: loss 0.0255 - lr: 0.000013 |
|
2023-10-23 20:10:53,124 DEV : loss 0.24338190257549286 - f1-score (micro avg) 0.7716 |
|
2023-10-23 20:10:53,143 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:10:59,007 epoch 7 - iter 89/894 - loss 0.01776813 - time (sec): 5.86 - samples/sec: 1619.74 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:11:04,738 epoch 7 - iter 178/894 - loss 0.01191667 - time (sec): 11.59 - samples/sec: 1556.36 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 20:11:10,506 epoch 7 - iter 267/894 - loss 0.01290126 - time (sec): 17.36 - samples/sec: 1574.21 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:11:16,057 epoch 7 - iter 356/894 - loss 0.01279215 - time (sec): 22.91 - samples/sec: 1548.53 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:11:21,537 epoch 7 - iter 445/894 - loss 0.01358427 - time (sec): 28.39 - samples/sec: 1528.44 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 20:11:27,070 epoch 7 - iter 534/894 - loss 0.01257668 - time (sec): 33.93 - samples/sec: 1523.57 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:11:32,940 epoch 7 - iter 623/894 - loss 0.01268051 - time (sec): 39.80 - samples/sec: 1533.14 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:11:38,478 epoch 7 - iter 712/894 - loss 0.01210361 - time (sec): 45.33 - samples/sec: 1536.64 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 20:11:44,123 epoch 7 - iter 801/894 - loss 0.01229478 - time (sec): 50.98 - samples/sec: 1528.30 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:11:49,672 epoch 7 - iter 890/894 - loss 0.01180895 - time (sec): 56.53 - samples/sec: 1527.31 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:11:49,898 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:11:49,898 EPOCH 7 done: loss 0.0119 - lr: 0.000010 |
|
2023-10-23 20:11:56,335 DEV : loss 0.232055202126503 - f1-score (micro avg) 0.7753 |
|
2023-10-23 20:11:56,354 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:12:01,939 epoch 8 - iter 89/894 - loss 0.01217064 - time (sec): 5.58 - samples/sec: 1505.52 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 20:12:07,516 epoch 8 - iter 178/894 - loss 0.01152484 - time (sec): 11.16 - samples/sec: 1523.30 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:12:13,555 epoch 8 - iter 267/894 - loss 0.01348119 - time (sec): 17.20 - samples/sec: 1535.72 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:12:19,074 epoch 8 - iter 356/894 - loss 0.01256261 - time (sec): 22.72 - samples/sec: 1527.13 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 20:12:24,803 epoch 8 - iter 445/894 - loss 0.01250572 - time (sec): 28.45 - samples/sec: 1528.89 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:12:30,242 epoch 8 - iter 534/894 - loss 0.01172981 - time (sec): 33.89 - samples/sec: 1512.17 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:12:35,910 epoch 8 - iter 623/894 - loss 0.01075653 - time (sec): 39.56 - samples/sec: 1509.37 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 20:12:41,530 epoch 8 - iter 712/894 - loss 0.01092404 - time (sec): 45.18 - samples/sec: 1513.05 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:12:47,163 epoch 8 - iter 801/894 - loss 0.01050008 - time (sec): 50.81 - samples/sec: 1513.09 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:12:52,984 epoch 8 - iter 890/894 - loss 0.00971863 - time (sec): 56.63 - samples/sec: 1519.97 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 20:12:53,272 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:12:53,272 EPOCH 8 done: loss 0.0097 - lr: 0.000007 |
|
2023-10-23 20:12:59,742 DEV : loss 0.2622121274471283 - f1-score (micro avg) 0.7694 |
|
2023-10-23 20:12:59,761 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:13:05,573 epoch 9 - iter 89/894 - loss 0.00784636 - time (sec): 5.81 - samples/sec: 1564.08 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:13:11,386 epoch 9 - iter 178/894 - loss 0.00575954 - time (sec): 11.62 - samples/sec: 1569.07 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:13:17,114 epoch 9 - iter 267/894 - loss 0.00627537 - time (sec): 17.35 - samples/sec: 1531.04 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 20:13:22,587 epoch 9 - iter 356/894 - loss 0.00648185 - time (sec): 22.83 - samples/sec: 1501.26 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:13:28,127 epoch 9 - iter 445/894 - loss 0.00697479 - time (sec): 28.36 - samples/sec: 1492.02 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:13:33,726 epoch 9 - iter 534/894 - loss 0.00667577 - time (sec): 33.96 - samples/sec: 1496.38 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 20:13:39,256 epoch 9 - iter 623/894 - loss 0.00602441 - time (sec): 39.49 - samples/sec: 1500.84 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:13:45,253 epoch 9 - iter 712/894 - loss 0.00609223 - time (sec): 45.49 - samples/sec: 1535.76 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:13:50,815 epoch 9 - iter 801/894 - loss 0.00575853 - time (sec): 51.05 - samples/sec: 1532.88 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 20:13:56,349 epoch 9 - iter 890/894 - loss 0.00559841 - time (sec): 56.59 - samples/sec: 1524.82 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:13:56,586 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:13:56,586 EPOCH 9 done: loss 0.0056 - lr: 0.000003 |
|
2023-10-23 20:14:03,087 DEV : loss 0.2699427008628845 - f1-score (micro avg) 0.7751 |
|
2023-10-23 20:14:03,106 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:14:08,868 epoch 10 - iter 89/894 - loss 0.00285995 - time (sec): 5.76 - samples/sec: 1522.41 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:14:14,429 epoch 10 - iter 178/894 - loss 0.00175943 - time (sec): 11.32 - samples/sec: 1526.78 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 20:14:19,974 epoch 10 - iter 267/894 - loss 0.00142868 - time (sec): 16.87 - samples/sec: 1506.26 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:14:25,556 epoch 10 - iter 356/894 - loss 0.00227438 - time (sec): 22.45 - samples/sec: 1496.90 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:14:31,119 epoch 10 - iter 445/894 - loss 0.00225331 - time (sec): 28.01 - samples/sec: 1492.79 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 20:14:36,772 epoch 10 - iter 534/894 - loss 0.00274936 - time (sec): 33.67 - samples/sec: 1492.95 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:14:42,348 epoch 10 - iter 623/894 - loss 0.00267072 - time (sec): 39.24 - samples/sec: 1488.28 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:14:47,855 epoch 10 - iter 712/894 - loss 0.00255903 - time (sec): 44.75 - samples/sec: 1490.55 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 20:14:54,025 epoch 10 - iter 801/894 - loss 0.00256285 - time (sec): 50.92 - samples/sec: 1520.63 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 20:14:59,611 epoch 10 - iter 890/894 - loss 0.00299517 - time (sec): 56.50 - samples/sec: 1516.23 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 20:15:00,029 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:00,029 EPOCH 10 done: loss 0.0030 - lr: 0.000000 |
|
2023-10-23 20:15:06,498 DEV : loss 0.2668047845363617 - f1-score (micro avg) 0.7798 |
|
2023-10-23 20:15:07,075 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 20:15:07,076 Loading model from best epoch ... |
|
2023-10-23 20:15:09,025 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 20:15:13,561 |
|
Results: |
|
- F-score (micro) 0.7372 |
|
- F-score (macro) 0.6555 |
|
- Accuracy 0.6024 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8000 0.8389 0.8190 596 |
|
pers 0.6692 0.7898 0.7245 333 |
|
org 0.5000 0.4167 0.4545 132 |
|
prod 0.6731 0.5303 0.5932 66 |
|
time 0.6604 0.7143 0.6863 49 |
|
|
|
micro avg 0.7202 0.7551 0.7372 1176 |
|
macro avg 0.6605 0.6580 0.6555 1176 |
|
weighted avg 0.7164 0.7551 0.7331 1176 |
|
|
|
2023-10-23 20:15:13,561 ---------------------------------------------------------------------------------------------------- |
|
|