SYNTH-GPT2-MYOPIC-COS_conv-10_shift-1_p-1_lr-0.001_n_embd-256_n_head-4_n_layer-4_activation_function-relu_global_step=58593.0_train_loss=1.24.ckpt