SYNTH-GPT2-COS_conv-10_shift-1_p-1.0_lr-0.0001_n_embd-256_n_head-4_n_layer-2_activation_function-relu_decay-0.1_global_step=195312.0_train_loss=0.00.ckpt