|
2023-10-23 22:30:17,229 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Model: "SequenceTagger( |
|
(embeddings): TransformerWordEmbeddings( |
|
(model): BertModel( |
|
(embeddings): BertEmbeddings( |
|
(word_embeddings): Embedding(64001, 768) |
|
(position_embeddings): Embedding(512, 768) |
|
(token_type_embeddings): Embedding(2, 768) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(encoder): BertEncoder( |
|
(layer): ModuleList( |
|
(0): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(1): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(2): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(3): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(4): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(5): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(6): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(7): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(8): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(9): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(10): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(11): BertLayer( |
|
(attention): BertAttention( |
|
(self): BertSelfAttention( |
|
(query): Linear(in_features=768, out_features=768, bias=True) |
|
(key): Linear(in_features=768, out_features=768, bias=True) |
|
(value): Linear(in_features=768, out_features=768, bias=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
(output): BertSelfOutput( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
(intermediate): BertIntermediate( |
|
(dense): Linear(in_features=768, out_features=3072, bias=True) |
|
(intermediate_act_fn): GELUActivation() |
|
) |
|
(output): BertOutput( |
|
(dense): Linear(in_features=3072, out_features=768, bias=True) |
|
(LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True) |
|
(dropout): Dropout(p=0.1, inplace=False) |
|
) |
|
) |
|
) |
|
) |
|
(pooler): BertPooler( |
|
(dense): Linear(in_features=768, out_features=768, bias=True) |
|
(activation): Tanh() |
|
) |
|
) |
|
) |
|
(locked_dropout): LockedDropout(p=0.5) |
|
(linear): Linear(in_features=768, out_features=21, bias=True) |
|
(loss_function): CrossEntropyLoss() |
|
)" |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 MultiCorpus: 3575 train + 1235 dev + 1266 test sentences |
|
- NER_HIPE_2022 Corpus: 3575 train + 1235 dev + 1266 test sentences - /home/ubuntu/.flair/datasets/ner_hipe_2022/v2.1/hipe2020/de/with_doc_seperator |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Train: 3575 sentences |
|
2023-10-23 22:30:17,230 (train_with_dev=False, train_with_test=False) |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Training Params: |
|
2023-10-23 22:30:17,230 - learning_rate: "5e-05" |
|
2023-10-23 22:30:17,230 - mini_batch_size: "8" |
|
2023-10-23 22:30:17,230 - max_epochs: "10" |
|
2023-10-23 22:30:17,230 - shuffle: "True" |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Plugins: |
|
2023-10-23 22:30:17,230 - TensorboardLogger |
|
2023-10-23 22:30:17,230 - LinearScheduler | warmup_fraction: '0.1' |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Final evaluation on model from best epoch (best-model.pt) |
|
2023-10-23 22:30:17,230 - metric: "('micro avg', 'f1-score')" |
|
2023-10-23 22:30:17,230 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,230 Computation: |
|
2023-10-23 22:30:17,230 - compute on device: cuda:0 |
|
2023-10-23 22:30:17,230 - embedding storage: none |
|
2023-10-23 22:30:17,231 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,231 Model training base path: "hmbench-hipe2020/de-dbmdz/bert-base-historic-multilingual-64k-td-cased-bs8-wsFalse-e10-lr5e-05-poolingfirst-layers-1-crfFalse-4" |
|
2023-10-23 22:30:17,231 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,231 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:17,231 Logging anything other than scalars to TensorBoard is currently not supported. |
|
2023-10-23 22:30:21,267 epoch 1 - iter 44/447 - loss 2.61747162 - time (sec): 4.04 - samples/sec: 2035.61 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:30:25,388 epoch 1 - iter 88/447 - loss 1.60840445 - time (sec): 8.16 - samples/sec: 2091.49 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:30:29,288 epoch 1 - iter 132/447 - loss 1.22488104 - time (sec): 12.06 - samples/sec: 2090.31 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:30:33,068 epoch 1 - iter 176/447 - loss 1.02876906 - time (sec): 15.84 - samples/sec: 2108.24 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:30:36,953 epoch 1 - iter 220/447 - loss 0.87744594 - time (sec): 19.72 - samples/sec: 2129.38 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:30:40,666 epoch 1 - iter 264/447 - loss 0.77450690 - time (sec): 23.43 - samples/sec: 2142.27 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 22:30:44,600 epoch 1 - iter 308/447 - loss 0.69356769 - time (sec): 27.37 - samples/sec: 2149.59 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-23 22:30:48,495 epoch 1 - iter 352/447 - loss 0.62969553 - time (sec): 31.26 - samples/sec: 2153.68 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-23 22:30:52,331 epoch 1 - iter 396/447 - loss 0.58382135 - time (sec): 35.10 - samples/sec: 2154.72 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-23 22:30:56,593 epoch 1 - iter 440/447 - loss 0.53971211 - time (sec): 39.36 - samples/sec: 2158.94 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-23 22:30:57,344 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:30:57,344 EPOCH 1 done: loss 0.5354 - lr: 0.000049 |
|
2023-10-23 22:31:02,145 DEV : loss 0.14619310200214386 - f1-score (micro avg) 0.6262 |
|
2023-10-23 22:31:02,165 saving best model |
|
2023-10-23 22:31:02,636 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:31:06,348 epoch 2 - iter 44/447 - loss 0.13529306 - time (sec): 3.71 - samples/sec: 2152.18 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-23 22:31:10,178 epoch 2 - iter 88/447 - loss 0.14040657 - time (sec): 7.54 - samples/sec: 2231.49 - lr: 0.000049 - momentum: 0.000000 |
|
2023-10-23 22:31:14,578 epoch 2 - iter 132/447 - loss 0.14036495 - time (sec): 11.94 - samples/sec: 2176.35 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-23 22:31:18,453 epoch 2 - iter 176/447 - loss 0.14258655 - time (sec): 15.82 - samples/sec: 2166.64 - lr: 0.000048 - momentum: 0.000000 |
|
2023-10-23 22:31:22,524 epoch 2 - iter 220/447 - loss 0.14221669 - time (sec): 19.89 - samples/sec: 2156.71 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-23 22:31:26,576 epoch 2 - iter 264/447 - loss 0.13370512 - time (sec): 23.94 - samples/sec: 2131.60 - lr: 0.000047 - momentum: 0.000000 |
|
2023-10-23 22:31:30,355 epoch 2 - iter 308/447 - loss 0.13477548 - time (sec): 27.72 - samples/sec: 2134.27 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-23 22:31:34,090 epoch 2 - iter 352/447 - loss 0.13109257 - time (sec): 31.45 - samples/sec: 2127.08 - lr: 0.000046 - momentum: 0.000000 |
|
2023-10-23 22:31:38,417 epoch 2 - iter 396/447 - loss 0.13099059 - time (sec): 35.78 - samples/sec: 2127.77 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-23 22:31:42,421 epoch 2 - iter 440/447 - loss 0.12705414 - time (sec): 39.78 - samples/sec: 2138.94 - lr: 0.000045 - momentum: 0.000000 |
|
2023-10-23 22:31:43,024 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:31:43,024 EPOCH 2 done: loss 0.1267 - lr: 0.000045 |
|
2023-10-23 22:31:49,495 DEV : loss 0.13422338664531708 - f1-score (micro avg) 0.6981 |
|
2023-10-23 22:31:49,516 saving best model |
|
2023-10-23 22:31:50,207 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:31:54,081 epoch 3 - iter 44/447 - loss 0.06517716 - time (sec): 3.87 - samples/sec: 2019.03 - lr: 0.000044 - momentum: 0.000000 |
|
2023-10-23 22:31:58,238 epoch 3 - iter 88/447 - loss 0.06909628 - time (sec): 8.03 - samples/sec: 2017.65 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-23 22:32:01,998 epoch 3 - iter 132/447 - loss 0.06970190 - time (sec): 11.79 - samples/sec: 2077.66 - lr: 0.000043 - momentum: 0.000000 |
|
2023-10-23 22:32:06,132 epoch 3 - iter 176/447 - loss 0.07724232 - time (sec): 15.92 - samples/sec: 2113.01 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-23 22:32:09,840 epoch 3 - iter 220/447 - loss 0.07409603 - time (sec): 19.63 - samples/sec: 2092.92 - lr: 0.000042 - momentum: 0.000000 |
|
2023-10-23 22:32:14,305 epoch 3 - iter 264/447 - loss 0.07522591 - time (sec): 24.10 - samples/sec: 2087.35 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-23 22:32:18,612 epoch 3 - iter 308/447 - loss 0.07450279 - time (sec): 28.40 - samples/sec: 2095.97 - lr: 0.000041 - momentum: 0.000000 |
|
2023-10-23 22:32:22,409 epoch 3 - iter 352/447 - loss 0.07276381 - time (sec): 32.20 - samples/sec: 2114.52 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-23 22:32:26,239 epoch 3 - iter 396/447 - loss 0.07476661 - time (sec): 36.03 - samples/sec: 2124.32 - lr: 0.000040 - momentum: 0.000000 |
|
2023-10-23 22:32:30,277 epoch 3 - iter 440/447 - loss 0.07597918 - time (sec): 40.07 - samples/sec: 2127.19 - lr: 0.000039 - momentum: 0.000000 |
|
2023-10-23 22:32:30,858 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:32:30,859 EPOCH 3 done: loss 0.0758 - lr: 0.000039 |
|
2023-10-23 22:32:37,348 DEV : loss 0.13163481652736664 - f1-score (micro avg) 0.7203 |
|
2023-10-23 22:32:37,368 saving best model |
|
2023-10-23 22:32:37,995 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:32:41,892 epoch 4 - iter 44/447 - loss 0.04397609 - time (sec): 3.90 - samples/sec: 2153.22 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-23 22:32:45,651 epoch 4 - iter 88/447 - loss 0.04709654 - time (sec): 7.66 - samples/sec: 2137.42 - lr: 0.000038 - momentum: 0.000000 |
|
2023-10-23 22:32:49,533 epoch 4 - iter 132/447 - loss 0.04593196 - time (sec): 11.54 - samples/sec: 2166.47 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-23 22:32:53,857 epoch 4 - iter 176/447 - loss 0.04818047 - time (sec): 15.86 - samples/sec: 2160.62 - lr: 0.000037 - momentum: 0.000000 |
|
2023-10-23 22:32:58,108 epoch 4 - iter 220/447 - loss 0.04713671 - time (sec): 20.11 - samples/sec: 2134.79 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-23 22:33:01,959 epoch 4 - iter 264/447 - loss 0.04951053 - time (sec): 23.96 - samples/sec: 2136.25 - lr: 0.000036 - momentum: 0.000000 |
|
2023-10-23 22:33:05,688 epoch 4 - iter 308/447 - loss 0.04805972 - time (sec): 27.69 - samples/sec: 2144.03 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-23 22:33:09,695 epoch 4 - iter 352/447 - loss 0.04776840 - time (sec): 31.70 - samples/sec: 2140.57 - lr: 0.000035 - momentum: 0.000000 |
|
2023-10-23 22:33:13,577 epoch 4 - iter 396/447 - loss 0.04732331 - time (sec): 35.58 - samples/sec: 2137.75 - lr: 0.000034 - momentum: 0.000000 |
|
2023-10-23 22:33:17,808 epoch 4 - iter 440/447 - loss 0.04667565 - time (sec): 39.81 - samples/sec: 2138.30 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-23 22:33:18,478 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:33:18,479 EPOCH 4 done: loss 0.0468 - lr: 0.000033 |
|
2023-10-23 22:33:24,957 DEV : loss 0.15146000683307648 - f1-score (micro avg) 0.739 |
|
2023-10-23 22:33:24,977 saving best model |
|
2023-10-23 22:33:25,573 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:33:29,808 epoch 5 - iter 44/447 - loss 0.01668860 - time (sec): 4.23 - samples/sec: 2114.97 - lr: 0.000033 - momentum: 0.000000 |
|
2023-10-23 22:33:33,878 epoch 5 - iter 88/447 - loss 0.02476922 - time (sec): 8.30 - samples/sec: 2093.66 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-23 22:33:37,628 epoch 5 - iter 132/447 - loss 0.02649246 - time (sec): 12.05 - samples/sec: 2115.90 - lr: 0.000032 - momentum: 0.000000 |
|
2023-10-23 22:33:41,750 epoch 5 - iter 176/447 - loss 0.03062251 - time (sec): 16.18 - samples/sec: 2133.69 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-23 22:33:46,044 epoch 5 - iter 220/447 - loss 0.02841129 - time (sec): 20.47 - samples/sec: 2159.21 - lr: 0.000031 - momentum: 0.000000 |
|
2023-10-23 22:33:49,757 epoch 5 - iter 264/447 - loss 0.02981830 - time (sec): 24.18 - samples/sec: 2148.21 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 22:33:53,930 epoch 5 - iter 308/447 - loss 0.03059182 - time (sec): 28.36 - samples/sec: 2136.13 - lr: 0.000030 - momentum: 0.000000 |
|
2023-10-23 22:33:57,668 epoch 5 - iter 352/447 - loss 0.03094377 - time (sec): 32.09 - samples/sec: 2139.50 - lr: 0.000029 - momentum: 0.000000 |
|
2023-10-23 22:34:01,555 epoch 5 - iter 396/447 - loss 0.02981108 - time (sec): 35.98 - samples/sec: 2131.11 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 22:34:05,425 epoch 5 - iter 440/447 - loss 0.02945931 - time (sec): 39.85 - samples/sec: 2135.15 - lr: 0.000028 - momentum: 0.000000 |
|
2023-10-23 22:34:06,116 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:34:06,117 EPOCH 5 done: loss 0.0293 - lr: 0.000028 |
|
2023-10-23 22:34:12,590 DEV : loss 0.22155629098415375 - f1-score (micro avg) 0.7493 |
|
2023-10-23 22:34:12,611 saving best model |
|
2023-10-23 22:34:13,209 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:34:16,768 epoch 6 - iter 44/447 - loss 0.01815000 - time (sec): 3.56 - samples/sec: 2090.34 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:34:20,690 epoch 6 - iter 88/447 - loss 0.01807258 - time (sec): 7.48 - samples/sec: 2124.94 - lr: 0.000027 - momentum: 0.000000 |
|
2023-10-23 22:34:24,827 epoch 6 - iter 132/447 - loss 0.02179131 - time (sec): 11.62 - samples/sec: 2158.82 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 22:34:28,893 epoch 6 - iter 176/447 - loss 0.02160888 - time (sec): 15.68 - samples/sec: 2146.17 - lr: 0.000026 - momentum: 0.000000 |
|
2023-10-23 22:34:33,305 epoch 6 - iter 220/447 - loss 0.02172418 - time (sec): 20.09 - samples/sec: 2147.11 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 22:34:37,007 epoch 6 - iter 264/447 - loss 0.02170470 - time (sec): 23.80 - samples/sec: 2151.46 - lr: 0.000025 - momentum: 0.000000 |
|
2023-10-23 22:34:41,178 epoch 6 - iter 308/447 - loss 0.02234828 - time (sec): 27.97 - samples/sec: 2144.67 - lr: 0.000024 - momentum: 0.000000 |
|
2023-10-23 22:34:45,340 epoch 6 - iter 352/447 - loss 0.02114853 - time (sec): 32.13 - samples/sec: 2131.89 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 22:34:49,332 epoch 6 - iter 396/447 - loss 0.02105417 - time (sec): 36.12 - samples/sec: 2125.31 - lr: 0.000023 - momentum: 0.000000 |
|
2023-10-23 22:34:53,202 epoch 6 - iter 440/447 - loss 0.01999840 - time (sec): 39.99 - samples/sec: 2126.53 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 22:34:53,908 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:34:53,908 EPOCH 6 done: loss 0.0199 - lr: 0.000022 |
|
2023-10-23 22:35:00,402 DEV : loss 0.2293727993965149 - f1-score (micro avg) 0.7686 |
|
2023-10-23 22:35:00,423 saving best model |
|
2023-10-23 22:35:01,014 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:35:05,341 epoch 7 - iter 44/447 - loss 0.00980116 - time (sec): 4.33 - samples/sec: 2168.37 - lr: 0.000022 - momentum: 0.000000 |
|
2023-10-23 22:35:09,245 epoch 7 - iter 88/447 - loss 0.01146217 - time (sec): 8.23 - samples/sec: 2133.32 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:35:13,013 epoch 7 - iter 132/447 - loss 0.01089331 - time (sec): 12.00 - samples/sec: 2113.50 - lr: 0.000021 - momentum: 0.000000 |
|
2023-10-23 22:35:16,734 epoch 7 - iter 176/447 - loss 0.01100412 - time (sec): 15.72 - samples/sec: 2103.62 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:35:20,770 epoch 7 - iter 220/447 - loss 0.01260891 - time (sec): 19.76 - samples/sec: 2113.68 - lr: 0.000020 - momentum: 0.000000 |
|
2023-10-23 22:35:24,654 epoch 7 - iter 264/447 - loss 0.01499648 - time (sec): 23.64 - samples/sec: 2103.53 - lr: 0.000019 - momentum: 0.000000 |
|
2023-10-23 22:35:28,932 epoch 7 - iter 308/447 - loss 0.01520584 - time (sec): 27.92 - samples/sec: 2115.87 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:35:33,256 epoch 7 - iter 352/447 - loss 0.01474127 - time (sec): 32.24 - samples/sec: 2134.52 - lr: 0.000018 - momentum: 0.000000 |
|
2023-10-23 22:35:37,126 epoch 7 - iter 396/447 - loss 0.01433985 - time (sec): 36.11 - samples/sec: 2131.72 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 22:35:40,902 epoch 7 - iter 440/447 - loss 0.01344578 - time (sec): 39.89 - samples/sec: 2131.24 - lr: 0.000017 - momentum: 0.000000 |
|
2023-10-23 22:35:41,529 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:35:41,529 EPOCH 7 done: loss 0.0133 - lr: 0.000017 |
|
2023-10-23 22:35:48,021 DEV : loss 0.2617715001106262 - f1-score (micro avg) 0.7712 |
|
2023-10-23 22:35:48,042 saving best model |
|
2023-10-23 22:35:48,634 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:35:52,644 epoch 8 - iter 44/447 - loss 0.00739272 - time (sec): 4.01 - samples/sec: 2124.55 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 22:35:56,893 epoch 8 - iter 88/447 - loss 0.00595935 - time (sec): 8.26 - samples/sec: 2069.00 - lr: 0.000016 - momentum: 0.000000 |
|
2023-10-23 22:36:00,775 epoch 8 - iter 132/447 - loss 0.00894853 - time (sec): 12.14 - samples/sec: 2110.05 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:36:05,398 epoch 8 - iter 176/447 - loss 0.00734796 - time (sec): 16.76 - samples/sec: 2104.25 - lr: 0.000015 - momentum: 0.000000 |
|
2023-10-23 22:36:09,056 epoch 8 - iter 220/447 - loss 0.00672069 - time (sec): 20.42 - samples/sec: 2108.20 - lr: 0.000014 - momentum: 0.000000 |
|
2023-10-23 22:36:12,942 epoch 8 - iter 264/447 - loss 0.00679148 - time (sec): 24.31 - samples/sec: 2124.47 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 22:36:16,667 epoch 8 - iter 308/447 - loss 0.00768702 - time (sec): 28.03 - samples/sec: 2133.16 - lr: 0.000013 - momentum: 0.000000 |
|
2023-10-23 22:36:20,306 epoch 8 - iter 352/447 - loss 0.00760444 - time (sec): 31.67 - samples/sec: 2125.31 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:36:24,266 epoch 8 - iter 396/447 - loss 0.00734192 - time (sec): 35.63 - samples/sec: 2129.40 - lr: 0.000012 - momentum: 0.000000 |
|
2023-10-23 22:36:28,646 epoch 8 - iter 440/447 - loss 0.00806862 - time (sec): 40.01 - samples/sec: 2128.80 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 22:36:29,337 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:36:29,337 EPOCH 8 done: loss 0.0080 - lr: 0.000011 |
|
2023-10-23 22:36:35,571 DEV : loss 0.2736358642578125 - f1-score (micro avg) 0.7733 |
|
2023-10-23 22:36:35,592 saving best model |
|
2023-10-23 22:36:36,185 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:36:39,878 epoch 9 - iter 44/447 - loss 0.00454046 - time (sec): 3.69 - samples/sec: 2163.58 - lr: 0.000011 - momentum: 0.000000 |
|
2023-10-23 22:36:44,012 epoch 9 - iter 88/447 - loss 0.00391630 - time (sec): 7.83 - samples/sec: 2060.42 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:36:47,842 epoch 9 - iter 132/447 - loss 0.00352831 - time (sec): 11.66 - samples/sec: 2119.86 - lr: 0.000010 - momentum: 0.000000 |
|
2023-10-23 22:36:51,495 epoch 9 - iter 176/447 - loss 0.00324014 - time (sec): 15.31 - samples/sec: 2143.80 - lr: 0.000009 - momentum: 0.000000 |
|
2023-10-23 22:36:55,486 epoch 9 - iter 220/447 - loss 0.00376207 - time (sec): 19.30 - samples/sec: 2145.27 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 22:36:59,856 epoch 9 - iter 264/447 - loss 0.00521423 - time (sec): 23.67 - samples/sec: 2153.42 - lr: 0.000008 - momentum: 0.000000 |
|
2023-10-23 22:37:03,961 epoch 9 - iter 308/447 - loss 0.00537886 - time (sec): 27.77 - samples/sec: 2146.14 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 22:37:08,356 epoch 9 - iter 352/447 - loss 0.00484718 - time (sec): 32.17 - samples/sec: 2142.65 - lr: 0.000007 - momentum: 0.000000 |
|
2023-10-23 22:37:12,275 epoch 9 - iter 396/447 - loss 0.00487190 - time (sec): 36.09 - samples/sec: 2132.68 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:37:16,222 epoch 9 - iter 440/447 - loss 0.00525891 - time (sec): 40.04 - samples/sec: 2131.23 - lr: 0.000006 - momentum: 0.000000 |
|
2023-10-23 22:37:16,845 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:37:16,846 EPOCH 9 done: loss 0.0052 - lr: 0.000006 |
|
2023-10-23 22:37:23,060 DEV : loss 0.2881031036376953 - f1-score (micro avg) 0.7758 |
|
2023-10-23 22:37:23,081 saving best model |
|
2023-10-23 22:37:23,683 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:37:27,975 epoch 10 - iter 44/447 - loss 0.00315695 - time (sec): 4.29 - samples/sec: 2066.95 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:37:32,248 epoch 10 - iter 88/447 - loss 0.00274169 - time (sec): 8.56 - samples/sec: 2001.47 - lr: 0.000005 - momentum: 0.000000 |
|
2023-10-23 22:37:35,940 epoch 10 - iter 132/447 - loss 0.00183972 - time (sec): 12.26 - samples/sec: 2099.04 - lr: 0.000004 - momentum: 0.000000 |
|
2023-10-23 22:37:40,088 epoch 10 - iter 176/447 - loss 0.00256280 - time (sec): 16.40 - samples/sec: 2122.84 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:37:43,873 epoch 10 - iter 220/447 - loss 0.00354318 - time (sec): 20.19 - samples/sec: 2121.91 - lr: 0.000003 - momentum: 0.000000 |
|
2023-10-23 22:37:47,546 epoch 10 - iter 264/447 - loss 0.00354710 - time (sec): 23.86 - samples/sec: 2127.10 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 22:37:51,706 epoch 10 - iter 308/447 - loss 0.00327233 - time (sec): 28.02 - samples/sec: 2126.72 - lr: 0.000002 - momentum: 0.000000 |
|
2023-10-23 22:37:55,430 epoch 10 - iter 352/447 - loss 0.00332028 - time (sec): 31.75 - samples/sec: 2116.46 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 22:37:59,496 epoch 10 - iter 396/447 - loss 0.00374741 - time (sec): 35.81 - samples/sec: 2124.40 - lr: 0.000001 - momentum: 0.000000 |
|
2023-10-23 22:38:03,509 epoch 10 - iter 440/447 - loss 0.00354295 - time (sec): 39.82 - samples/sec: 2115.19 - lr: 0.000000 - momentum: 0.000000 |
|
2023-10-23 22:38:04,545 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:38:04,546 EPOCH 10 done: loss 0.0035 - lr: 0.000000 |
|
2023-10-23 22:38:10,759 DEV : loss 0.2901349365711212 - f1-score (micro avg) 0.7753 |
|
2023-10-23 22:38:11,256 ---------------------------------------------------------------------------------------------------- |
|
2023-10-23 22:38:11,257 Loading model from best epoch ... |
|
2023-10-23 22:38:13,012 SequenceTagger predicts: Dictionary with 21 tags: O, S-loc, B-loc, E-loc, I-loc, S-pers, B-pers, E-pers, I-pers, S-org, B-org, E-org, I-org, S-prod, B-prod, E-prod, I-prod, S-time, B-time, E-time, I-time |
|
2023-10-23 22:38:17,859 |
|
Results: |
|
- F-score (micro) 0.751 |
|
- F-score (macro) 0.6724 |
|
- Accuracy 0.6218 |
|
|
|
By class: |
|
precision recall f1-score support |
|
|
|
loc 0.8395 0.8423 0.8409 596 |
|
pers 0.6658 0.7778 0.7175 333 |
|
org 0.5588 0.4318 0.4872 132 |
|
prod 0.6531 0.4848 0.5565 66 |
|
time 0.7451 0.7755 0.7600 49 |
|
|
|
micro avg 0.7468 0.7551 0.7510 1176 |
|
macro avg 0.6925 0.6624 0.6724 1176 |
|
weighted avg 0.7444 0.7551 0.7469 1176 |
|
|
|
2023-10-23 22:38:17,859 ---------------------------------------------------------------------------------------------------- |
|
|