SYNTH-GPT2_conv-10_shift-1_p-0.3_lr-0.001_n_embd-128_n_head-2_n_layer-2_activation_function-relu_global_step=7815.0_val_loss=0.02.ckpt