Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
- 编译并安装 `InfiniLM`

```bash
xmake && xmake install
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个为什么要删?


```

- 运行模型推理测试
Expand Down
46 changes: 46 additions & 0 deletions jiuge.sh
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

应该不需要加这个文件,不然不是又多一个入口

Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
featurize@featurize:~/work/InfiniLM$ cat jiuge.sh
#!/bin/bash

# Jiuge模型运行脚本
# 使用NVIDIA显卡运行9G4B模型

set -e # 遇到错误立即退出

echo "=========================================="
echo "🚀 启动 Jiuge 模型 (9G4B) - NVIDIA版本"
echo "=========================================="
export INFINI_ROOT=/home/featurize/.infini
export LD_LIBRARY_PATH=$INFINI_ROOT/lib:$LD_LIBRARY_PATH
# 设置参数
MODEL_DIR="/home/featurize/work/InfiniFamily/9G4B"
DEVICE="--nvidia"
N_DEVICE=1
SCRIPT_PATH="python scripts/jiuge.py"

# 检查模型目录是否存在
if [ ! -d "$MODEL_DIR" ]; then
echo "❌ 错误: 模型目录不存在: $MODEL_DIR"
echo "请检查路径是否正确"
exit 1
fi

# 检查Python脚本是否存在
if [ ! -f "scripts/jiuge.py" ]; then
echo "❌ 错误: 未找到jiuge.py脚本: scripts/jiuge.py"
echo "请确保在当前目录下运行此脚本"
exit 1
fi

echo "📁 模型路径: $MODEL_DIR"
echo "🎯 设备类型: NVIDIA GPU"
echo "💻 设备数量: $N_DEVICE"
echo ""

# 运行模型
echo "🔄 启动模型..."
$SCRIPT_PATH $DEVICE $MODEL_DIR $N_DEVICE

echo ""
echo "=========================================="
echo "✅ 模型运行完成"
echo "=========================================="ß
2 changes: 2 additions & 0 deletions python/infinilm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from . import distributed
from . import cache
from . import llm
from . import base_config

from .llm import (
LLM,
Expand All @@ -16,6 +17,7 @@
"distributed",
"cache",
"llm",
"base_config",
# LLM classes
"LLM",
"AsyncLLMEngine",
Expand Down
128 changes: 128 additions & 0 deletions python/infinilm/base_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import argparse
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../scripts"))
from libinfinicore_infer import DeviceType
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

尽量不要引用这个,infinicore infer是旧版用的,新的尽量不要依赖。



class BaseConfig:
"""InfiniLM Unified Config - Command line argument parser"""

def __init__(self):

self.parser = argparse.ArgumentParser(description="InfiniLM Unified Config")
self._add_common_args()
self.args, self.extra = self.parser.parse_known_args()


self.model = self.args.model
self.device_name = self.args.device
self.device_type = self._get_device_type(self.args.device)
self.tp = self.args.tp


self.attn = self.args.attn
self.enable_graph = self.args.enable_graph
self.cache_type = self.args.cache_type
self.enable_paged_attn = self.args.enable_paged_attn
self.paged_kv_block_size = self.args.paged_kv_block_size
self.kv_cache_dtype = self.args.kv_cache_dtype
self.skip_load = self.args.skip_load


self.batch_size = self.args.batch_size
self.input_len = self.args.input_len
self.output_len = self.args.output_len
self.max_new_tokens = self.args.max_new_tokens
self.top_k = self.args.top_k
self.top_p = self.args.top_p
self.temperature = self.args.temperature

self.warm_up = self.args.warm_up
self.verbose = self.args.verbose
self.log_evel = self.args.log_evel


# Evaluation parameters
self.bench = self.args.bench
self.backend = self.args.backend
self.ndev = self.args.ndev
self.subject = self.args.subject
self.split = self.args.split
self.num_samples = self.args.num_samples
self.output_csv = self.args.output_csv
self.cache_dir = self.args.cache_dir


# Quantization parameters
self.awq = self.args.awq
self.gptq = self.args.gptq

def _add_common_args(self):
# --- base configuration ---
self.parser.add_argument("--model", type=str, required=True)
self.parser.add_argument("--device", type=str, default="cpu")
self.parser.add_argument("--tp", "--tensor-parallel-size", type=int, default=1)


# --- Infer backend optimization ---
self.parser.add_argument("--attn", type=str, default="default", choices=["default", "paged-attn", "flash-attn"])
self.parser.add_argument("--enable-graph", action="store_true")
self.parser.add_argument("--cache-type", type=str, default="paged", choices=["paged", "static"])
self.parser.add_argument("--enable-paged-attn", action="store_true", help="use paged cache",)
self.parser.add_argument("--paged-kv-block-size", type=int, default=256)
self.parser.add_argument("--kv-cache-dtype", type=str, default=None, choices=["int8"], help="KV cache data type")
self.parser.add_argument("--skip-load", action="store_true", help="skip loading model weights")


# --- Length and infer parameters ---
self.parser.add_argument("--batch-size", type=int, default=1)
self.parser.add_argument("--input-len", type=int, default=10, help="input sequence length")
self.parser.add_argument("--output-len", type=int, default=20, help="output sequence length")
self.parser.add_argument("--max-new-tokens", type=int, default=500, help="maximum number of new tokens to generate")
self.parser.add_argument("--top-k", type=int, default=1)
self.parser.add_argument("--top-p", type=float, default=1.0)
self.parser.add_argument("--temperature", type=float, default=1.0)

# --- debug ---
self.parser.add_argument("--warmup", action="store_true")
self.parser.add_argument("--verbose", action="store_true")
self.parser.add_argument("--log-evel", type=str, default="INFO")


# --- Evaluation parameters ---
self.parser.add_argument("--bench", type=str, default=None, choices=["ceval", "mmlu"], help="benchmark to evaluate")
self.parser.add_argument("--backend", type=str, default="cpp", choices=["python", "cpp", "torch", "vllm"], help="backend type")
self.parser.add_argument("--ndev", type=int, default=1, help="number of devices for tensor parallelism")
self.parser.add_argument("--subject", type=str, default="all", help="subject(s) to evaluate, comma-separated or 'all'")
self.parser.add_argument("--split", type=str, default="test", choices=["test", "val", "all"], help="dataset split to use")
self.parser.add_argument("--num-samples", type=int, default=None, help="number of samples to evaluate per subject")
self.parser.add_argument("--output-csv", type=str, default=None, help="path to output CSV file for results")
self.parser.add_argument("--cache-dir", type=str, default=None, help="directory for dataset cache")


# --- Quantization parameters ---
self.parser.add_argument("--awq", action="store_true", help="use AWQ quantization")
self.parser.add_argument("--gptq", action="store_true", help="use GPTQ quantization")


def _get_device_type(self, dev_str):
"""Convert device string to DeviceType enum"""
DEVICE_TYPE_MAP = {
"cpu": DeviceType.DEVICE_TYPE_CPU,
"nvidia": DeviceType.DEVICE_TYPE_NVIDIA,
"qy": DeviceType.DEVICE_TYPE_QY,
"cambricon": DeviceType.DEVICE_TYPE_CAMBRICON,
"ascend": DeviceType.DEVICE_TYPE_ASCEND,
"metax": DeviceType.DEVICE_TYPE_METAX,
"moore": DeviceType.DEVICE_TYPE_MOORE,
"iluvatar": DeviceType.DEVICE_TYPE_ILUVATAR,
"kunlun": DeviceType.DEVICE_TYPE_KUNLUN,
"hygon": DeviceType.DEVICE_TYPE_HYGON,
"ali": DeviceType.DEVICE_TYPE_ALI
}
return DEVICE_TYPE_MAP.get(dev_str.lower(), DeviceType.DEVICE_TYPE_CPU)

def __repr__(self):
"""String representation of configuration"""
return f"BaseConfig(model='{self.model}', device='{self.device_name}', tp={self.tp})"