|
| 1 | +DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd) |
| 2 | + |
| 3 | +netif=lo |
| 4 | +export GLOO_SOCKET_IFNAME=${netif} |
| 5 | +export NCCL_SOCKET_IFNAME=${netif} |
| 6 | +export MODEL_NAME=Pythia-Chat-Base-7B |
| 7 | + |
| 8 | +export SHOW_DATA=0 |
| 9 | + |
| 10 | +BASE_MODEL="${DIR}/../pretrained/GPT-NeoX-20B/EleutherAI_pythia-6.9b-deduped/" |
| 11 | + |
| 12 | +CHECKPOINT_STEPS=100 |
| 13 | + |
| 14 | +DATASETS="\ |
| 15 | +${DIR}/../data/OIG/files/unified_ni.jsonl:0.2,\ |
| 16 | +${DIR}/../data/OIG/files/unified_p3.jsonl:0.5,\ |
| 17 | +${DIR}/../data/OIG/files/unified_flan.jsonl:0.2,\ |
| 18 | +${DIR}/../data/OIG/files/unified_chip2.jsonl:0.01,\ |
| 19 | +${DIR}/../data/OIG/files/unified_rallio_safety_and_prosocial.jsonl:0.1,\ |
| 20 | +${DIR}/../data/OIG/files/unified_soda_dialog.jsonl:0.1,\ |
| 21 | +${DIR}/../data/OIG/files/unified_unifiedskg_instructions.jsonl:0.1,\ |
| 22 | +${DIR}/../data/OIG/files/unified_merged_code_xp3.jsonl:0.1,\ |
| 23 | +${DIR}/../data/OIG/files/unified_oscar_en_sample_dialog.jsonl:0.1,\ |
| 24 | +${DIR}/../data/OIG/files/unified_ul2_plus_oscar_en_sample_dialog.jsonl:0.1,\ |
| 25 | +${DIR}/../data/OIG/files/unified_multi_news.jsonl:0.05,\ |
| 26 | +${DIR}/../data/OIG/files/unified_openai_summarize_tldr.jsonl:0.05,\ |
| 27 | +${DIR}/../data/OIG/files/unified_squad_v2.jsonl:0.01,\ |
| 28 | +${DIR}/../data/OIG/files/unified_nq.jsonl:0.01,\ |
| 29 | +${DIR}/../data/OIG/files/unified_poetry_instructions.jsonl:0.01,\ |
| 30 | +${DIR}/../data/OIG/files/unified_sqlv2.jsonl:0.01,\ |
| 31 | +${DIR}/../data/OIG/files/unified_unnatural_instructions.jsonl:0.01,\ |
| 32 | +${DIR}/../data/OIG/files/unified_conv_finqa.jsonl:0.01,\ |
| 33 | +${DIR}/../data/OIG/files/unified_essays.jsonl:0.01,\ |
| 34 | +${DIR}/../data/OIG/files/unified_plot_screenplay_books_dialog.jsonl:0.01,\ |
| 35 | +${DIR}/../data/OIG/files/unified_grade_school_math_instructions.jsonl:0.01,\ |
| 36 | +${DIR}/../data/OIG/files/unified_mathqa_flanv2_kojma_cot.jsonl:0.01,\ |
| 37 | +${DIR}/../data/OIG/files/unified_joke_explanations.jsonl:0.01,\ |
| 38 | +${DIR}/../data/OIG/files/unified_cuad.jsonl:0.01,\ |
| 39 | +${DIR}/../data/OIG/files/unified_abstract_infill.jsonl:0.1,\ |
| 40 | +${DIR}/../data/OIG/files/unified_image_prompts_instructions.jsonl:0.01 \ |
| 41 | +" |
| 42 | + |
| 43 | +ARGS="--model-name ${BASE_MODEL} \ |
| 44 | +--tokenizer-name ${BASE_MODEL} \ |
| 45 | +--project-name together \ |
| 46 | +--model-type gptneox \ |
| 47 | +--optimizer adam \ |
| 48 | +--seed 42 \ |
| 49 | +--load-pretrained-model true \ |
| 50 | +--task-name \ |
| 51 | +"${DATASETS}" \ |
| 52 | +--checkpoint-path ${DIR}/../model_ckpts/${MODEL_NAME} \ |
| 53 | +--total-steps 20000 --warmup-steps 10 --train-warmup-steps 0 \ |
| 54 | +--checkpoint-steps ${CHECKPOINT_STEPS} \ |
| 55 | +--lr 1e-5 --seq-length 2048 --batch-size 32 --micro-batch-size 1 --gradient-accumulate-step 1 \ |
| 56 | +--dist-url tcp://127.0.0.1:7033 \ |
| 57 | +--num-layers 8 --embedding-dim 4096 \ |
| 58 | +--world-size 8 --pipeline-group-size 4 --data-group-size 2 \ |
| 59 | +--job-id 0 --net-interface ${netif} \ |
| 60 | +--fp16 \ |
| 61 | +--dp-backend nccl \ |
| 62 | +--dp-mode allreduce \ |
| 63 | +--pp-mode gpipe --profiling no-profiling" |
| 64 | + |
| 65 | + |
| 66 | +(trap 'kill 0' SIGINT; \ |
| 67 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 0 --rank 0 \ |
| 68 | + & \ |
| 69 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 1 --rank 1 \ |
| 70 | + & \ |
| 71 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 2 --rank 2 \ |
| 72 | + & \ |
| 73 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 3 --rank 3 \ |
| 74 | + & \ |
| 75 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 4 --rank 4 \ |
| 76 | + & \ |
| 77 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 5 --rank 5 \ |
| 78 | + & \ |
| 79 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 6 --rank 6 \ |
| 80 | + & \ |
| 81 | +python ${DIR}/dist_clm_train.py $(echo ${ARGS}) --cuda-id 7 --rank 7 \ |
| 82 | + & \ |
| 83 | +wait) |
0 commit comments