diff --git a/README.md b/README.md index 48448c56..afc242b2 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ - 编译并安装 `InfiniLM` ```bash -xmake && xmake install + ``` - 运行模型推理测试 diff --git a/jiuge.sh b/jiuge.sh new file mode 100644 index 00000000..e7ddb2dd --- /dev/null +++ b/jiuge.sh @@ -0,0 +1,46 @@ +featurize@featurize:~/work/InfiniLM$ cat jiuge.sh +#!/bin/bash + +# Jiuge模型运行脚本 +# 使用NVIDIA显卡运行9G4B模型 + +set -e # 遇到错误立即退出 + +echo "==========================================" +echo "🚀 启动 Jiuge 模型 (9G4B) - NVIDIA版本" +echo "==========================================" +export INFINI_ROOT=/home/featurize/.infini +export LD_LIBRARY_PATH=$INFINI_ROOT/lib:$LD_LIBRARY_PATH +# 设置参数 +MODEL_DIR="/home/featurize/work/InfiniFamily/9G4B" +DEVICE="--nvidia" +N_DEVICE=1 +SCRIPT_PATH="python scripts/jiuge.py" + +# 检查模型目录是否存在 +if [ ! -d "$MODEL_DIR" ]; then + echo "❌ 错误: 模型目录不存在: $MODEL_DIR" + echo "请检查路径是否正确" + exit 1 +fi + +# 检查Python脚本是否存在 +if [ ! -f "scripts/jiuge.py" ]; then + echo "❌ 错误: 未找到jiuge.py脚本: scripts/jiuge.py" + echo "请确保在当前目录下运行此脚本" + exit 1 +fi + +echo "📁 模型路径: $MODEL_DIR" +echo "🎯 设备类型: NVIDIA GPU" +echo "💻 设备数量: $N_DEVICE" +echo "" + +# 运行模型 +echo "🔄 启动模型..." +$SCRIPT_PATH $DEVICE $MODEL_DIR $N_DEVICE + +echo "" +echo "==========================================" +echo "✅ 模型运行完成" +echo "=========================================="ß \ No newline at end of file diff --git a/python/infinilm/__init__.py b/python/infinilm/__init__.py index e34514a7..f552a2cc 100644 --- a/python/infinilm/__init__.py +++ b/python/infinilm/__init__.py @@ -2,6 +2,7 @@ from . import distributed from . import cache from . import llm +from . import base_config from .llm import ( LLM, @@ -16,6 +17,7 @@ "distributed", "cache", "llm", + "base_config", # LLM classes "LLM", "AsyncLLMEngine", diff --git a/python/infinilm/base_config.py b/python/infinilm/base_config.py new file mode 100644 index 00000000..5e1a8bd2 --- /dev/null +++ b/python/infinilm/base_config.py @@ -0,0 +1,128 @@ +import argparse +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../scripts")) +from libinfinicore_infer import DeviceType + + +class BaseConfig: + """InfiniLM Unified Config - Command line argument parser""" + + def __init__(self): + + self.parser = argparse.ArgumentParser(description="InfiniLM Unified Config") + self._add_common_args() + self.args, self.extra = self.parser.parse_known_args() + + + self.model = self.args.model + self.device_name = self.args.device + self.device_type = self._get_device_type(self.args.device) + self.tp = self.args.tp + + + self.attn = self.args.attn + self.enable_graph = self.args.enable_graph + self.cache_type = self.args.cache_type + self.enable_paged_attn = self.args.enable_paged_attn + self.paged_kv_block_size = self.args.paged_kv_block_size + self.kv_cache_dtype = self.args.kv_cache_dtype + self.skip_load = self.args.skip_load + + + self.batch_size = self.args.batch_size + self.input_len = self.args.input_len + self.output_len = self.args.output_len + self.max_new_tokens = self.args.max_new_tokens + self.top_k = self.args.top_k + self.top_p = self.args.top_p + self.temperature = self.args.temperature + + self.warm_up = self.args.warm_up + self.verbose = self.args.verbose + self.log_evel = self.args.log_evel + + + # Evaluation parameters + self.bench = self.args.bench + self.backend = self.args.backend + self.ndev = self.args.ndev + self.subject = self.args.subject + self.split = self.args.split + self.num_samples = self.args.num_samples + self.output_csv = self.args.output_csv + self.cache_dir = self.args.cache_dir + + + # Quantization parameters + self.awq = self.args.awq + self.gptq = self.args.gptq + + def _add_common_args(self): + # --- base configuration --- + self.parser.add_argument("--model", type=str, required=True) + self.parser.add_argument("--device", type=str, default="cpu") + self.parser.add_argument("--tp", "--tensor-parallel-size", type=int, default=1) + + + # --- Infer backend optimization --- + self.parser.add_argument("--attn", type=str, default="default", choices=["default", "paged-attn", "flash-attn"]) + self.parser.add_argument("--enable-graph", action="store_true") + self.parser.add_argument("--cache-type", type=str, default="paged", choices=["paged", "static"]) + self.parser.add_argument("--enable-paged-attn", action="store_true", help="use paged cache",) + self.parser.add_argument("--paged-kv-block-size", type=int, default=256) + self.parser.add_argument("--kv-cache-dtype", type=str, default=None, choices=["int8"], help="KV cache data type") + self.parser.add_argument("--skip-load", action="store_true", help="skip loading model weights") + + + # --- Length and infer parameters --- + self.parser.add_argument("--batch-size", type=int, default=1) + self.parser.add_argument("--input-len", type=int, default=10, help="input sequence length") + self.parser.add_argument("--output-len", type=int, default=20, help="output sequence length") + self.parser.add_argument("--max-new-tokens", type=int, default=500, help="maximum number of new tokens to generate") + self.parser.add_argument("--top-k", type=int, default=1) + self.parser.add_argument("--top-p", type=float, default=1.0) + self.parser.add_argument("--temperature", type=float, default=1.0) + + # --- debug --- + self.parser.add_argument("--warmup", action="store_true") + self.parser.add_argument("--verbose", action="store_true") + self.parser.add_argument("--log-evel", type=str, default="INFO") + + + # --- Evaluation parameters --- + self.parser.add_argument("--bench", type=str, default=None, choices=["ceval", "mmlu"], help="benchmark to evaluate") + self.parser.add_argument("--backend", type=str, default="cpp", choices=["python", "cpp", "torch", "vllm"], help="backend type") + self.parser.add_argument("--ndev", type=int, default=1, help="number of devices for tensor parallelism") + self.parser.add_argument("--subject", type=str, default="all", help="subject(s) to evaluate, comma-separated or 'all'") + self.parser.add_argument("--split", type=str, default="test", choices=["test", "val", "all"], help="dataset split to use") + self.parser.add_argument("--num-samples", type=int, default=None, help="number of samples to evaluate per subject") + self.parser.add_argument("--output-csv", type=str, default=None, help="path to output CSV file for results") + self.parser.add_argument("--cache-dir", type=str, default=None, help="directory for dataset cache") + + + # --- Quantization parameters --- + self.parser.add_argument("--awq", action="store_true", help="use AWQ quantization") + self.parser.add_argument("--gptq", action="store_true", help="use GPTQ quantization") + + + def _get_device_type(self, dev_str): + """Convert device string to DeviceType enum""" + DEVICE_TYPE_MAP = { + "cpu": DeviceType.DEVICE_TYPE_CPU, + "nvidia": DeviceType.DEVICE_TYPE_NVIDIA, + "qy": DeviceType.DEVICE_TYPE_QY, + "cambricon": DeviceType.DEVICE_TYPE_CAMBRICON, + "ascend": DeviceType.DEVICE_TYPE_ASCEND, + "metax": DeviceType.DEVICE_TYPE_METAX, + "moore": DeviceType.DEVICE_TYPE_MOORE, + "iluvatar": DeviceType.DEVICE_TYPE_ILUVATAR, + "kunlun": DeviceType.DEVICE_TYPE_KUNLUN, + "hygon": DeviceType.DEVICE_TYPE_HYGON, + "ali": DeviceType.DEVICE_TYPE_ALI + } + return DEVICE_TYPE_MAP.get(dev_str.lower(), DeviceType.DEVICE_TYPE_CPU) + + def __repr__(self): + """String representation of configuration""" + return f"BaseConfig(model='{self.model}', device='{self.device_name}', tp={self.tp})"