{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "global_step": 953, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 3.496503496503497e-06, "loss": 5.6988, "step": 10 }, { "epoch": 0.04, "learning_rate": 6.993006993006994e-06, "loss": 5.6631, "step": 20 }, { "epoch": 0.06, "learning_rate": 1.048951048951049e-05, "loss": 5.7268, "step": 30 }, { "epoch": 0.08, "learning_rate": 1.3986013986013988e-05, "loss": 5.2938, "step": 40 }, { "epoch": 0.1, "learning_rate": 1.7482517482517483e-05, "loss": 5.0975, "step": 50 }, { "epoch": 0.13, "learning_rate": 2.097902097902098e-05, "loss": 4.8578, "step": 60 }, { "epoch": 0.15, "learning_rate": 2.4475524475524478e-05, "loss": 4.3282, "step": 70 }, { "epoch": 0.17, "learning_rate": 2.7972027972027976e-05, "loss": 4.0068, "step": 80 }, { "epoch": 0.19, "learning_rate": 3.146853146853147e-05, "loss": 3.7421, "step": 90 }, { "epoch": 0.21, "learning_rate": 3.4965034965034965e-05, "loss": 3.2654, "step": 100 }, { "epoch": 0.23, "learning_rate": 3.846153846153846e-05, "loss": 3.0331, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.195804195804196e-05, "loss": 2.8466, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.545454545454546e-05, "loss": 2.6892, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.8951048951048956e-05, "loss": 2.7062, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.972762645914397e-05, "loss": 2.6551, "step": 150 }, { "epoch": 0.34, "learning_rate": 4.933852140077821e-05, "loss": 2.6632, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.894941634241245e-05, "loss": 2.7111, "step": 170 }, { "epoch": 0.38, "learning_rate": 4.856031128404669e-05, "loss": 2.6711, "step": 180 }, { "epoch": 0.4, "learning_rate": 4.817120622568094e-05, "loss": 2.562, "step": 190 }, { "epoch": 0.42, "learning_rate": 4.778210116731518e-05, "loss": 2.543, "step": 200 }, { "epoch": 0.44, "learning_rate": 4.739299610894942e-05, "loss": 2.5899, "step": 210 }, { "epoch": 0.46, "learning_rate": 4.700389105058366e-05, "loss": 2.5576, "step": 220 }, { "epoch": 0.48, "learning_rate": 4.66147859922179e-05, "loss": 2.5863, "step": 230 }, { "epoch": 0.5, "learning_rate": 4.622568093385214e-05, "loss": 2.4993, "step": 240 }, { "epoch": 0.52, "learning_rate": 4.583657587548638e-05, "loss": 2.5329, "step": 250 }, { "epoch": 0.55, "learning_rate": 4.544747081712062e-05, "loss": 2.5328, "step": 260 }, { "epoch": 0.57, "learning_rate": 4.505836575875487e-05, "loss": 2.5523, "step": 270 }, { "epoch": 0.59, "learning_rate": 4.466926070038911e-05, "loss": 2.6414, "step": 280 }, { "epoch": 0.61, "learning_rate": 4.428015564202335e-05, "loss": 2.5356, "step": 290 }, { "epoch": 0.63, "learning_rate": 4.389105058365759e-05, "loss": 2.5604, "step": 300 }, { "epoch": 0.65, "learning_rate": 4.3501945525291833e-05, "loss": 2.5026, "step": 310 }, { "epoch": 0.67, "learning_rate": 4.311284046692607e-05, "loss": 2.5861, "step": 320 }, { "epoch": 0.69, "learning_rate": 4.272373540856031e-05, "loss": 2.6518, "step": 330 }, { "epoch": 0.71, "learning_rate": 4.233463035019455e-05, "loss": 2.4669, "step": 340 }, { "epoch": 0.73, "learning_rate": 4.19455252918288e-05, "loss": 2.4614, "step": 350 }, { "epoch": 0.76, "learning_rate": 4.155642023346304e-05, "loss": 2.5185, "step": 360 }, { "epoch": 0.78, "learning_rate": 4.116731517509728e-05, "loss": 2.5792, "step": 370 }, { "epoch": 0.8, "learning_rate": 4.077821011673152e-05, "loss": 2.4151, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.0389105058365764e-05, "loss": 2.5522, "step": 390 }, { "epoch": 0.84, "learning_rate": 4e-05, "loss": 2.5522, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.961089494163424e-05, "loss": 2.4389, "step": 410 }, { "epoch": 0.88, "learning_rate": 3.922178988326848e-05, "loss": 2.3915, "step": 420 }, { "epoch": 0.9, "learning_rate": 3.883268482490273e-05, "loss": 2.4208, "step": 430 }, { "epoch": 0.92, "learning_rate": 3.844357976653697e-05, "loss": 2.4861, "step": 440 }, { "epoch": 0.94, "learning_rate": 3.805447470817121e-05, "loss": 2.4987, "step": 450 }, { "epoch": 0.97, "learning_rate": 3.766536964980545e-05, "loss": 2.5472, "step": 460 }, { "epoch": 0.99, "learning_rate": 3.7276264591439694e-05, "loss": 2.5182, "step": 470 }, { "epoch": 1.0, "eval_accuracy": 0.5792096979068868, "eval_loss": 2.443359375, "eval_runtime": 25.0491, "eval_samples_per_second": 76.09, "eval_steps_per_second": 9.541, "step": 476 }, { "epoch": 1.01, "learning_rate": 3.6887159533073934e-05, "loss": 2.3115, "step": 480 }, { "epoch": 1.03, "learning_rate": 3.649805447470817e-05, "loss": 2.3429, "step": 490 }, { "epoch": 1.05, "learning_rate": 3.610894941634241e-05, "loss": 2.4361, "step": 500 }, { "epoch": 1.07, "learning_rate": 3.571984435797666e-05, "loss": 2.4135, "step": 510 }, { "epoch": 1.09, "learning_rate": 3.53307392996109e-05, "loss": 2.3602, "step": 520 }, { "epoch": 1.11, "learning_rate": 3.494163424124514e-05, "loss": 2.4515, "step": 530 }, { "epoch": 1.13, "learning_rate": 3.455252918287938e-05, "loss": 2.4394, "step": 540 }, { "epoch": 1.15, "learning_rate": 3.4163424124513624e-05, "loss": 2.4532, "step": 550 }, { "epoch": 1.18, "learning_rate": 3.3774319066147864e-05, "loss": 2.4665, "step": 560 }, { "epoch": 1.2, "learning_rate": 3.3385214007782103e-05, "loss": 2.3228, "step": 570 }, { "epoch": 1.22, "learning_rate": 3.299610894941634e-05, "loss": 2.3944, "step": 580 }, { "epoch": 1.24, "learning_rate": 3.260700389105058e-05, "loss": 2.4102, "step": 590 }, { "epoch": 1.26, "learning_rate": 3.221789883268483e-05, "loss": 2.4339, "step": 600 }, { "epoch": 1.28, "learning_rate": 3.182879377431907e-05, "loss": 2.3893, "step": 610 }, { "epoch": 1.3, "learning_rate": 3.143968871595331e-05, "loss": 2.3751, "step": 620 }, { "epoch": 1.32, "learning_rate": 3.105058365758755e-05, "loss": 2.3861, "step": 630 }, { "epoch": 1.34, "learning_rate": 3.0661478599221794e-05, "loss": 2.4282, "step": 640 }, { "epoch": 1.36, "learning_rate": 3.027237354085603e-05, "loss": 2.3462, "step": 650 }, { "epoch": 1.39, "learning_rate": 2.9883268482490273e-05, "loss": 2.3599, "step": 660 }, { "epoch": 1.41, "learning_rate": 2.9494163424124516e-05, "loss": 2.4088, "step": 670 }, { "epoch": 1.43, "learning_rate": 2.910505836575876e-05, "loss": 2.3874, "step": 680 }, { "epoch": 1.45, "learning_rate": 2.8715953307392995e-05, "loss": 2.484, "step": 690 }, { "epoch": 1.47, "learning_rate": 2.832684824902724e-05, "loss": 2.3859, "step": 700 }, { "epoch": 1.49, "learning_rate": 2.793774319066148e-05, "loss": 2.3496, "step": 710 }, { "epoch": 1.51, "learning_rate": 2.7548638132295724e-05, "loss": 2.5695, "step": 720 }, { "epoch": 1.53, "learning_rate": 2.715953307392996e-05, "loss": 2.3911, "step": 730 }, { "epoch": 1.55, "learning_rate": 2.6770428015564204e-05, "loss": 2.4357, "step": 740 }, { "epoch": 1.57, "learning_rate": 2.6381322957198447e-05, "loss": 2.4545, "step": 750 }, { "epoch": 1.59, "learning_rate": 2.599221789883269e-05, "loss": 2.3346, "step": 760 }, { "epoch": 1.62, "learning_rate": 2.5603112840466926e-05, "loss": 2.3698, "step": 770 }, { "epoch": 1.64, "learning_rate": 2.521400778210117e-05, "loss": 2.4074, "step": 780 }, { "epoch": 1.66, "learning_rate": 2.4824902723735412e-05, "loss": 2.5796, "step": 790 }, { "epoch": 1.68, "learning_rate": 2.443579766536965e-05, "loss": 2.4123, "step": 800 }, { "epoch": 1.7, "learning_rate": 2.4046692607003894e-05, "loss": 2.3395, "step": 810 }, { "epoch": 1.72, "learning_rate": 2.3657587548638134e-05, "loss": 2.3278, "step": 820 }, { "epoch": 1.74, "learning_rate": 2.3268482490272377e-05, "loss": 2.4002, "step": 830 }, { "epoch": 1.76, "learning_rate": 2.2879377431906616e-05, "loss": 2.3189, "step": 840 }, { "epoch": 1.78, "learning_rate": 2.2490272373540856e-05, "loss": 2.4051, "step": 850 }, { "epoch": 1.8, "learning_rate": 2.21011673151751e-05, "loss": 2.3595, "step": 860 }, { "epoch": 1.83, "learning_rate": 2.171206225680934e-05, "loss": 2.3412, "step": 870 }, { "epoch": 1.85, "learning_rate": 2.132295719844358e-05, "loss": 2.3462, "step": 880 }, { "epoch": 1.87, "learning_rate": 2.093385214007782e-05, "loss": 2.3828, "step": 890 }, { "epoch": 1.89, "learning_rate": 2.054474708171206e-05, "loss": 2.3796, "step": 900 }, { "epoch": 1.91, "learning_rate": 2.0155642023346304e-05, "loss": 2.2987, "step": 910 }, { "epoch": 1.93, "learning_rate": 1.9766536964980543e-05, "loss": 2.3002, "step": 920 }, { "epoch": 1.95, "learning_rate": 1.9377431906614786e-05, "loss": 2.4586, "step": 930 }, { "epoch": 1.97, "learning_rate": 1.8988326848249026e-05, "loss": 2.4058, "step": 940 }, { "epoch": 1.99, "learning_rate": 1.859922178988327e-05, "loss": 2.3934, "step": 950 }, { "epoch": 2.0, "eval_accuracy": 0.5807643452808307, "eval_loss": 2.384765625, "eval_runtime": 25.0512, "eval_samples_per_second": 76.084, "eval_steps_per_second": 9.54, "step": 953 } ], "max_steps": 1428, "num_train_epochs": 3, "total_flos": 5.11391603907625e+16, "trial_name": null, "trial_params": null }