SYNTH-GPT2-COS_conv-10_shift-1_p-0.3_attn_pdrop-0.1_embd_pdrop-0.1_resid_pdrop-0.1_lr-0.001_n_embd-128_n_head-1_n_layer-2_activation_function-relu_global_step=58593.0_train_loss=0.09.ckpt