|
| 1 | +import os |
| 2 | +import argparse |
| 3 | +import torch |
| 4 | +from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig |
| 5 | + |
| 6 | +DIR = os.path.dirname(os.path.abspath(__file__)) |
| 7 | + |
| 8 | + |
| 9 | +if __name__ == '__main__': |
| 10 | + parser = argparse.ArgumentParser(description='Convert HF checkpoints') |
| 11 | + parser.add_argument('--model-name', type=str, default='EleutherAI/pythia-6.9b-deduped', |
| 12 | + help='model-name') |
| 13 | + parser.add_argument('--save-dir', type=str, default=DIR, |
| 14 | + help='model-name') |
| 15 | + parser.add_argument('--offload-dir', type=str, default=None, |
| 16 | + help='directory to offload from memory') |
| 17 | + args = parser.parse_args() |
| 18 | + |
| 19 | + if not os.path.exists(args.save_dir): |
| 20 | + os.mkdir(args.save_dir) |
| 21 | + save_path = os.path.join(args.save_dir, args.model_name.replace('/', '_')) |
| 22 | + if not os.path.exists(save_path): |
| 23 | + os.mkdir(save_path) |
| 24 | + |
| 25 | + print('loading model from HF...') |
| 26 | + config = AutoConfig.from_pretrained(args.model_name) |
| 27 | + config.save_pretrained(save_path) |
| 28 | + tokenizer = AutoTokenizer.from_pretrained(args.model_name) |
| 29 | + tokenizer.save_pretrained(save_path) |
| 30 | + # offload model from memory to disk if offload-dir is specified |
| 31 | + if args.offload_dir is not None: |
| 32 | + if not os.path.exists(args.offload_dir): |
| 33 | + os.mkdir(args.offload_dir) |
| 34 | + model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16, device_map="auto", offload_folder=args.offload_dir) |
| 35 | + else: |
| 36 | + model = AutoModelForCausalLM.from_pretrained(args.model_name, torch_dtype=torch.float16) |
| 37 | + print('loaded model from HF...') |
| 38 | + |
| 39 | + print('converting the embedding layer...') |
| 40 | + item = {} |
| 41 | + item['embed_in.weight'] = model.gpt_neox.embed_in.weight |
| 42 | + torch.save(item, os.path.join(save_path, 'pytorch_embs.pt')) |
| 43 | + print('converted the embedding layer.') |
| 44 | + |
| 45 | + for i in range(len(model.gpt_neox.layers)): |
| 46 | + print(f'converting the {i}-th transformer layer...') |
| 47 | + torch.save(model.gpt_neox.layers[i].state_dict(), os.path.join(save_path, f'pytorch_{i}.pt')) |
| 48 | + print(f'converted the {i}-th transformer layer.') |
| 49 | + |
| 50 | + print('converting the lm_head layer...') |
| 51 | + item = {} |
| 52 | + item['embed_out.weight'] = model.embed_out.weight |
| 53 | + item['final_layer_norm.weight'] = model.gpt_neox.final_layer_norm.weight |
| 54 | + item['final_layer_norm.bias'] = model.gpt_neox.final_layer_norm.bias |
| 55 | + torch.save(item, os.path.join(save_path, 'pytorch_lm_head.pt')) |
| 56 | + print('converted the lm_head layer.') |
0 commit comments