Skip to content

Commit ffeca03

Browse files
committed
pretrained: support pythia-6.9b-deduped base model
1 parent a71963d commit ffeca03

3 files changed

Lines changed: 58 additions & 1 deletion

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ dmypy.json
133133
/data/OIG/files/
134134
/data/wikipedia-3sentence-level-retrieval-index/files/
135135
/pretrained/GPT-NeoX-20B/EleutherAI_gpt-neox-20b/
136+
/pretrained/Pythia-6.9B-deduped/EleutherAI_pythia-6.9b-deduped/
136137

137138
# ignore training output
138139
/model_ckpts/
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import os
2+
import argparse
3+
import torch
4+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5+
6+
DIR = os.path.dirname(os.path.abspath(__file__))
7+
8+
9+
if __name__ == '__main__':
10+
parser = argparse.ArgumentParser(description='Convert HF checkpoints')
11+
parser.add_argument('--model-name', type=str, default='EleutherAI/pythia-6.9b-deduped',
12+
help='model-name')
13+
parser.add_argument('--save-dir', type=str, default=DIR,
14+
help='model-name')
15+
parser.add_argument('--offload-dir', type=str, default=None,
16+
help='directory to offload from memory')
17+
args = parser.parse_args()
18+
19+
if not os.path.exists(args.save_dir):
20+
os.mkdir(args.save_dir)
21+
save_path = os.path.join(args.save_dir, args.model_name.replace('/', '_'))
22+
if not os.path.exists(save_path):
23+
os.mkdir(save_path)
24+
25+
print('loading model from HF...')
26+
config = AutoConfig.from_pretrained(args.model_name)
27+
config.save_pretrained(save_path)
28+
tokenizer = AutoTokenizer.from_pretrained(args.model_name)
29+
tokenizer.save_pretrained(save_path)
30+
# offload model from memory to disk if offload-dir is specified
31+
if args.offload_dir is not None:
32+
if not os.path.exists(args.offload_dir):
33+
os.mkdir(args.offload_dir)
34+
model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16, device_map="auto", offload_folder=args.offload_dir)
35+
else:
36+
model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16)
37+
print('loaded model from HF...')
38+
39+
print('converting the embedding layer...')
40+
item = {}
41+
item['embed_in.weight'] = model.gpt_neox.embed_in.weight
42+
torch.save(item, os.path.join(save_path, 'pytorch_embs.pt'))
43+
print('converted the embedding layer.')
44+
45+
for i in range(len(model.gpt_neox.layers)):
46+
print(f'converting the {i}-th transformer layer...')
47+
torch.save(model.gpt_neox.layers[i].state_dict(), os.path.join(save_path, f'pytorch_{i}.pt'))
48+
print(f'converted the {i}-th transformer layer.')
49+
50+
print('converting the lm_head layer...')
51+
item = {}
52+
item['embed_out.weight'] = model.embed_out.weight
53+
item['final_layer_norm.weight'] = model.gpt_neox.final_layer_norm.weight
54+
item['final_layer_norm.bias'] = model.gpt_neox.final_layer_norm.bias
55+
torch.save(item, os.path.join(save_path, 'pytorch_lm_head.pt'))
56+
print('converted the lm_head layer.')

training/finetune_Pythia-Chat-Base-7B.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export MODEL_NAME=Pythia-Chat-Base-7B
77

88
export SHOW_DATA=0
99

10-
BASE_MODEL="${DIR}/../pretrained/GPT-NeoX-20B/EleutherAI_pythia-6.9b-deduped/"
10+
BASE_MODEL="${DIR}/../pretrained/Pythia-6.9B-deduped/EleutherAI_pythia-6.9b-deduped/"
1111

1212
CHECKPOINT_STEPS=100
1313

0 commit comments

Comments
 (0)