Conversation
| // - HyPE (RoPE on linear layers; NoPE on sparse layers) | ||
| class MiniCPMSALAForCausalLM : public InfinilmModel { | ||
| public: | ||
| MiniCPMSALAForCausalLM(std::shared_ptr<infinilm::config::ModelConfig> model_config, |
There was a problem hiding this comment.
https://github.com/pengcheng888/InfiniLM/blob/main/csrc/models/minicpm_sala/minicpm_sala_for_causal_lm.hpp 请参开接口。 移除rank_info和 attention_backend 参数。
| private: | ||
| INFINICORE_NN_MODULE(MiniCPMSALAModel, model); | ||
| INFINICORE_NN_MODULE(infinilm::layers::linear::ReplicatedLinear, lm_head); | ||
| INFINICORE_NN_MODULE(infinicore::nn::Linear, lm_head); |
There was a problem hiding this comment.
使用infinilm::layers::linear::ReplicatedLinear, infinicore::nn::Linear不再使用
| std::unique_ptr<cache::CacheConfig> cache_config_; | ||
| }; | ||
|
|
||
| std::shared_ptr<infinilm::config::ModelConfig> create_minicpm_sala_model_config(std::shared_ptr<infinilm::config::ModelConfig> model_config); |
There was a problem hiding this comment.
实现这个create_minicpm_sala_model_config函数。
| const cache::CacheConfig *MiniCPMSALAForCausalLM::get_cache_config() const { | ||
| return cache_config_.get(); | ||
| } | ||
|
|
There was a problem hiding this comment.
kvcache创建 minicpm_sala_allocate_kv_cache_tensors.cpp文件中
|
|
||
| } // namespace infinilm::models::minicpm_sala | ||
|
|
||
| namespace { |
|
|
||
| class MiniCPMSALADecoderLayer : public infinicore::nn::Module { | ||
| public: | ||
| MiniCPMSALADecoderLayer(std::shared_ptr<infinilm::config::ModelConfig> model_config, |
There was a problem hiding this comment.
There was a problem hiding this comment.
MiniCPMSALADecoderLayer的移除rank_info和attention_backend参数
| std::optional<infinicore::Tensor> cu_seqlens, | ||
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
There was a problem hiding this comment.
移除多余的参数,forward只需要(const infinicore::Tensor &positions,
infinicore::Tensor &hidden_states,
infinicore::Tensor &residual);
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb); |
There was a problem hiding this comment.
移除set_rotary_emb和reset_cache函数
| #include "../../backends/attention_backends.hpp" | ||
| #include "../../cache/kv_cache.hpp" | ||
| #include "../../config/model_config.hpp" | ||
| #include "../../engine/distributed/distributed.hpp" |
| #include "models_registry.hpp" | ||
| #include "llama/llama.hpp" | ||
| #include "minicpm_sala/minicpm_sala_for_causal_lm.hpp" | ||
|
|
|
|
||
| #include "../global_state/global_state.hpp" | ||
| #include "../models/model_factory.hpp" | ||
| #include "../models/models_registry.hpp" |
There was a problem hiding this comment.
新增模型,不要修改框架层面上的代码。不能修改该文件
| const std::string model_type = model_config->get<std::string>("model_type"); | ||
| const auto &config_map = models::get_model_config_map(); | ||
| auto it = config_map.find(model_type); | ||
| if (it != config_map.end()) { |
There was a problem hiding this comment.
新增模型,不要修改框架层面上的代码。不能修改该文件
|
|
||
| #include <algorithm> | ||
| #include <limits> | ||
| #include <memory> |
|
|
||
| void MiniCPMSALAModel::reset_cache(const cache::CacheConfig *cache_config) { | ||
| if (cache_config == nullptr) { | ||
| kv_cache_minicpm4_ = nullptr; |
There was a problem hiding this comment.
kvcache创建的代码在csrc/models/minicpm_sala/minicpm_sala_allocate_kv_cache_tensors.cpp中
| if (auto static_cfg = dynamic_cast<const cache::StaticKVCacheConfig *>(cache_config)) { | ||
| // Allocate separate caches by KV shape to avoid per-layer padding copies. |
| INFINICORE_NN_MODULE_INIT(o_gate, hidden_size_, num_attention_heads * head_dim_, | ||
| model_config->get_quantization_method(), use_bias_, dtype, device); | ||
| } | ||
| void MiniCPMSALAAttention::set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb) { |
| std::optional<infinicore::Tensor> cu_seqlens, | ||
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
| INFINICORE_NN_MODULE_INIT(mlp, model_config, device); | ||
| } | ||
|
|
||
| void MiniCPMSALADecoderLayer::set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb) { |
| void MiniCPMSALADecoderLayer::reset_cache() { | ||
| self_attn_->reset_cache(); |
|
|
||
| auto to_device = [&](const std::optional<infinicore::Tensor> &t) | ||
| -> std::optional<infinicore::Tensor> { | ||
| return t.has_value() ? t.value()->to(device) : t; |
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
Signed-off-by: Ceng23333 <441651826@qq.com>
| void reset_cache(const cache::CacheConfig *cache_config) override; | ||
|
|
||
| protected: | ||
| const cache::CacheConfig *get_cache_config() const override; |
There was a problem hiding this comment.
get_cache_config()属于 infinimodel的抽象类了,移除具体模型中的get_cache_config函数
| INFINICORE_NN_MODULE(MiniCPMSALAModel, model); | ||
| INFINICORE_NN_MODULE(infinilm::layers::linear::ReplicatedLinear, lm_head); | ||
| INFINICORE_NN_MODULE(infinicore::nn::Linear, lm_head); | ||
| std::unique_ptr<cache::CacheConfig> cache_config_; |
| MiniCPMSALAModel(std::shared_ptr<infinilm::config::ModelConfig> model_config, | ||
| const infinicore::Device &device, | ||
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); |
There was a problem hiding this comment.
移除MiniCPMSALAModel的rank_info和attention_backend参数
| engine::distributed::RankInfo rank_info = engine::distributed::RankInfo(), | ||
| backends::AttentionBackend attention_backend = backends::AttentionBackend::Default); | ||
|
|
||
| infinicore::Tensor forward(const infinicore::Tensor &input_ids, |
There was a problem hiding this comment.
移除past_sequence_lengths total_sequence_lengths input_offsets cu_seqlens block_tables slot_mapping 这写参数。 上面是attn_metadata的数据,只要attn计算时用到,不再一层一层的传递。
| std::optional<infinicore::Tensor> block_tables, | ||
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void reset_cache(const cache::CacheConfig *cache_config); |
There was a problem hiding this comment.
reset_cache 属于 CausalLM类,移除。
| INFINICORE_NN_MODULE(infinicore::nn::Embedding, embed_tokens); | ||
| INFINICORE_NN_MODULE_VEC(MiniCPMSALADecoderLayer, layers); | ||
| INFINICORE_NN_MODULE(infinicore::nn::RMSNorm, norm); | ||
| INFINICORE_NN_MODULE(infinicore::nn::RoPE, rotary_emb); |
There was a problem hiding this comment.
移除rotary_emb。 infinicore::nn::RoPE的对象在 minicpm_sala_attention类中,通过get_rope创建
| infinicore::Tensor forward(const infinicore::Tensor &hidden_states, | ||
| const infinicore::Tensor &position_ids, | ||
| std::shared_ptr<infinilm::cache::Cache> kv_cache, | ||
| std::optional<infinicore::Tensor> past_sequence_lengths, |
There was a problem hiding this comment.
移除forward的这些 attn_metadata参数
| std::optional<infinicore::Tensor> slot_mapping) const; | ||
|
|
||
| void set_rotary_emb(const std::shared_ptr<infinicore::nn::RoPE> &rotary_emb); | ||
| void reset_cache(); |
|
|
||
| kv_cache_minicpm4_ = (minicpm4_layer_count > 0) |
There was a problem hiding this comment.
根据minicpm_sala_allocate_kv_cache_tensors.cpp文件创建kvcache。 kv_cache_minicpm4_和kv_cache_lightning_两个变量可以合并成一个
#294