From 2129b787287d0d783829119d04810eabde4e1b97 Mon Sep 17 00:00:00 2001 From: yirongjie Date: Fri, 18 Oct 2024 10:04:38 +0000 Subject: [PATCH 1/2] refactor: add TransformerConfig --- .gitignore | 3 +- src/Module.hpp | 3 ++ src/models/dclm/configuration_dclm.hpp | 5 +- src/models/gemma/configuration_gemma.hpp | 4 +- .../imagebind/configuration_imagebind.hpp | 8 +-- src/models/llama/configuration_llama.hpp | 8 +-- src/models/llama/modeling_elastic_llama.hpp | 52 +++++++++---------- src/models/llama/modeling_llama.hpp | 18 +++---- src/models/minicpm/configuration_minicpm.hpp | 5 +- src/models/minicpm/modeling_minicpm.hpp | 21 ++++---- src/models/mistral/configuration_mistral.hpp | 2 +- src/models/openelm/configuration_openelm.hpp | 2 +- src/models/opt/configuration_opt.hpp | 2 +- src/models/phi3/configuration_phi3.hpp | 2 +- src/models/phi3/modeling_phi3.hpp | 2 +- src/models/qwen/configuration_qwen.hpp | 2 +- src/models/qwen/modeling_qwen.hpp | 4 +- src/models/smollm/configuration_smollm.hpp | 4 +- src/models/smollm/modeling_smollm.hpp | 2 +- .../stablelm/configuration_stablelm.hpp | 2 +- .../tinyllama/configuration_tinyllama.hpp | 2 +- .../transformer/configuration_transformer.hpp | 9 +++- src/models/vit/configuration_vit.hpp | 4 +- 23 files changed, 89 insertions(+), 77 deletions(-) diff --git a/.gitignore b/.gitignore index 1e6941d4..54abbe2e 100644 --- a/.gitignore +++ b/.gitignore @@ -37,4 +37,5 @@ src/models/deepseek/* examples/demo_phonellm.cpp src/models/phonellm/* examples/demo_minicpm3.cpp -src/models/minicpm3/* \ No newline at end of file +src/models/minicpm3/* +examples/demo.cpp diff --git a/src/Module.hpp b/src/Module.hpp index bfaf86a8..d77b34f5 100644 --- a/src/Module.hpp +++ b/src/Module.hpp @@ -241,6 +241,9 @@ class Module { void setNoLoadWeightsDtype(DataType dtype) { Op::noLoadWeightsDtype() = dtype; } + virtual void clear_kvcache() { + ; + } vector profiling(string name = "") { vector output; diff --git a/src/models/dclm/configuration_dclm.hpp b/src/models/dclm/configuration_dclm.hpp index 8839d46f..dec3a785 100644 --- a/src/models/dclm/configuration_dclm.hpp +++ b/src/models/dclm/configuration_dclm.hpp @@ -27,14 +27,15 @@ class DCLMNameConfig : public TransformerNameConfig { } }; -struct DCLMConfig { - explicit DCLMConfig(int token_limit, const string billions = "1B", RoPEType type = RoPEType::HFHUBROPE) : +struct DCLMConfig : public TransformerConfig { + explicit DCLMConfig(int token_limit, const string billions = "1B", RoPEType type = RoPEType::HFHUBROPE, int vocab = 50432) : cache_limit(token_limit) { names_config.init(type); if (!(billions == "1B" || billions == "1b")) { throw std::runtime_error("Unsupported model size"); } RoPE_type = type; + vocab_size = vocab; }; int dim = 2048; diff --git a/src/models/gemma/configuration_gemma.hpp b/src/models/gemma/configuration_gemma.hpp index 34d058ea..a5efa38b 100644 --- a/src/models/gemma/configuration_gemma.hpp +++ b/src/models/gemma/configuration_gemma.hpp @@ -74,7 +74,7 @@ class GemmaNameConfig : public TransformerNameConfig { std::string _gate_proj_name; }; -struct GemmaConfig { +struct GemmaConfig : public TransformerConfig { explicit GemmaConfig(int token_limit, const string billions = "2B", RoPEType type = RoPEType::HFHUBROPE) : cache_limit(token_limit) { names_config.init(type); @@ -93,7 +93,7 @@ struct GemmaConfig { int intermediate_size = 16384; int head_dim = 256; float rms_norm_eps = 1e-6; - float rope_theta= 10000; + float rope_theta = 10000; int cache_limit; RoPEType RoPE_type = RoPEType::HFHUBROPE; diff --git a/src/models/imagebind/configuration_imagebind.hpp b/src/models/imagebind/configuration_imagebind.hpp index 995a151b..c13e4f88 100644 --- a/src/models/imagebind/configuration_imagebind.hpp +++ b/src/models/imagebind/configuration_imagebind.hpp @@ -73,7 +73,7 @@ class ImagebindNameConfig : public TransformerNameConfig { _audio_blocks_name = "modality_trunks.audio.blocks."; } }; -class ImagebindConfig { +class ImagebindConfig : public TransformerConfig { public: ImagebindNameConfig names_config; @@ -99,13 +99,13 @@ class ImagebindConfig { int audio_stride = 10; int audio_h = 128; int audio_w = 204; - int audio_block_num =12; + int audio_block_num = 12; int head_hidden_dim = 1024; - explicit ImagebindConfig(const string& model_type = "huge") { + explicit ImagebindConfig(const string &model_type = "huge") { if (model_type != "huge") { - std::cerr<<"model type not supported"< 0) { q_rope = RoPE(RoPE_type, base_name + "q_rope"); k_rope = RoPE(RoPE_type, base_name + "k_rope"); } if (cache_limit > 0) { - k_cache = KVCache(head_size/kv_head_size, cache_limit, base_name + "k_cache"); - v_cache = KVCache(head_size/kv_head_size, cache_limit, base_name + "v_cache"); + k_cache = KVCache(head_size / kv_head_size, cache_limit, base_name + "k_cache"); + v_cache = KVCache(head_size / kv_head_size, cache_limit, base_name + "v_cache"); } softmax = Softmax(DIMENSION, do_mask, base_name + "softmax"); o_proj = ElasticLinear(head_size * attn_hidden_dim, hidden_dim, bias, base_name + names._o_proj_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { vector activate_head_dims = std::any_cast>(args[0]); int activate_head_dim = activate_head_dims[0]; - activate_head_dim = (activate_head_dim==-1)? kv_head_size_: (activate_head_dim); + activate_head_dim = (activate_head_dim == -1) ? kv_head_size_ : (activate_head_dim); Tensor q, k, v; - q = q_proj(inputs[0], -1, activate_head_dim*attn_hidden_dim_); - k = k_proj(inputs[1], -1, activate_head_dim*attn_hidden_dim_); - v = v_proj(inputs[2], -1, activate_head_dim*attn_hidden_dim_); + q = q_proj(inputs[0], -1, activate_head_dim * attn_hidden_dim_); + k = k_proj(inputs[1], -1, activate_head_dim * attn_hidden_dim_); + v = v_proj(inputs[2], -1, activate_head_dim * attn_hidden_dim_); q = q.view(-1, activate_head_dim, -1, attn_hidden_dim_); k = k.view(-1, activate_head_dim, -1, attn_hidden_dim_); v = v.view(-1, activate_head_dim, -1, attn_hidden_dim_); @@ -72,19 +72,19 @@ class ElasticMultiHeadAttention final : public Module { } k = k.transpose(SEQUENCE, DIMENSION); auto qk = Tensor::mm(q, k); - qk = qk / std::sqrt(attn_hidden_dim_);//attn_hidden_dim_ + qk = qk / std::sqrt(attn_hidden_dim_); // attn_hidden_dim_ if (k_cache.ready() && v_cache.ready()) { qk = softmax(qk, k_cache.getCacheSeqLen()); - }else{ + } else { qk = softmax(qk); } auto o = Tensor::mm(qk, v); o = o.view(-1, 1, -1, attn_hidden_dim_ * activate_head_dim); - o = o_proj(o, activate_head_dim*attn_hidden_dim_, -1); + o = o_proj(o, activate_head_dim * attn_hidden_dim_, -1); return {o}; } - vector get_cache() { - return {&k_cache,&v_cache}; + vector get_cache() { + return {&k_cache, &v_cache}; } }; @@ -102,7 +102,7 @@ class ElasticLLaMAMLP final : public Module { up_proj = ElasticLinear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name); down_proj = ElasticLinear(ffn_hidden, hidden_dim, false, base_name + names._down_proj_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { vector activate_dims = std::any_cast>(args[0]); int activate_dim = activate_dims[0]; auto x = gate_proj(inputs[0], -1, activate_dim); @@ -124,12 +124,12 @@ class ElasticLLaMABlock final : public Module { ElasticLLaMABlock() = default; ElasticLLaMABlock(int hidden_dim, int head_size, int ffn_hidden, RoPEType RoPE_type, int cache_limit, const LLaMANameConfig &names, const string &base_name) { attention = ElasticMultiHeadAttention(hidden_dim, head_size, head_size, hidden_dim / head_size, - RoPE_type, cache_limit, true, false, names, base_name + names._attn_base_name); + RoPE_type, cache_limit, true, false, names, base_name + names._attn_base_name); mlp = ElasticLLaMAMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name); norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name); norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { vector activate_dims = std::any_cast>(args[0]); vector dim_attns = {activate_dims[0]}; vector dim_mlps = {activate_dims[1]}; @@ -141,7 +141,7 @@ class ElasticLLaMABlock final : public Module { x = x + tmp; return {x}; } - ElasticMultiHeadAttention& get_attention() { + ElasticMultiHeadAttention &get_attention() { return attention; } }; @@ -156,21 +156,21 @@ class ElasticLLaMAModel final : public Module { public: explicit ElasticLLaMAModel(const LLaMAConfig &config) : ElasticLLaMAModel(config.vocab_size, config.hidden_dim, config.head_size, config.ffn_hidden, config.block_num, config.RoPE_type, config.cache_limit, - config.names_config, config.names_config.blk_name) { + config.names_config, config.names_config.blk_name) { } ElasticLLaMAModel(int vocab_size, int hidden_dim, int head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, int cache_limit, - const LLaMANameConfig &names, const string &base_name) { + const LLaMANameConfig &names, const string &base_name) { embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name); blocks = List(block_num, hidden_dim, head_size, ffn_hidden, RoPE_type, cache_limit, names, base_name); norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name); lm_head = Linear(hidden_dim, vocab_size, false, names.lm_head_name); num_layer_size = block_num; } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { vector> activate_dims = std::any_cast>>(args[0]); assert(activate_dims.size() == num_layer_size); auto x = embedding(inputs[0]); - for (int id = 0; idclearCache(); } diff --git a/src/models/llama/modeling_llama.hpp b/src/models/llama/modeling_llama.hpp index 2bc0cb3b..31d917e0 100644 --- a/src/models/llama/modeling_llama.hpp +++ b/src/models/llama/modeling_llama.hpp @@ -26,7 +26,7 @@ class LLaMAMLP final : public Module { up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name); down_proj = Linear(ffn_hidden, hidden_dim, false, base_name + names._down_proj_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { auto x = gate_proj(inputs[0]); x = silu(x); auto y = up_proj(inputs[0]); @@ -51,7 +51,7 @@ class LLaMABlock final : public Module { norm1 = RMSNorm(hidden_dim, 1e-6, base_name + names._attn_norm_name); norm2 = RMSNorm(hidden_dim, 1e-6, base_name + names._ffn_norm_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { auto x = norm1(inputs[0]); x = attention({x, x, x})[0]; auto tmp = x + inputs[0]; @@ -60,8 +60,8 @@ class LLaMABlock final : public Module { x = x + tmp; return {x}; } - - MultiHeadAttention& get_attention() { + + MultiHeadAttention &get_attention() { return attention; } }; @@ -74,8 +74,8 @@ class LLaMAModel final : public Module { public: explicit LLaMAModel(const LLaMAConfig &config) : - LLaMAModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, - config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, + LLaMAModel(config.vocab_size, config.hidden_dim, config.head_size, config.num_key_value_heads, config.ffn_hidden, config.block_num, + config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, config.names_config, config.names_config.blk_name) { } LLaMAModel(int vocab_size, int hidden_dim, int head_size, int kv_head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, float rope_theta, int max_position_embeddings, int cache_limit, @@ -85,7 +85,7 @@ class LLaMAModel final : public Module { norm = RMSNorm(hidden_dim, 1e-6, names.post_norm_name); lm_head = Linear(hidden_dim, vocab_size, false, names.lm_head_name); } - vector Forward(vector inputs, vector args) override { + vector Forward(vector inputs, vector args) override { auto x = embedding(inputs[0]); for (auto &block : blocks) { x = block({x})[0]; @@ -95,9 +95,9 @@ class LLaMAModel final : public Module { return {x}; } - void clear_kvcache() { + void clear_kvcache() override { for (auto &block : blocks) { - auto kvcahce =block.get_attention().get_cache(); + auto kvcahce = block.get_attention().get_cache(); for (auto &cache : kvcahce) { cache->clearCache(); } diff --git a/src/models/minicpm/configuration_minicpm.hpp b/src/models/minicpm/configuration_minicpm.hpp index 60661fbd..8b6b1fcd 100644 --- a/src/models/minicpm/configuration_minicpm.hpp +++ b/src/models/minicpm/configuration_minicpm.hpp @@ -28,11 +28,10 @@ class MiniCPMNameConfig : public TransformerNameConfig { token_embd_name = "model.embed_tokens"; post_norm_name = "model.norm"; lm_head_name = "lm_head"; - } }; -struct MiniCPMConfig { +struct MiniCPMConfig : public TransformerConfig { explicit MiniCPMConfig(int token_limit, string billions = "2B") : cache_limit(token_limit) { names_config.init(); @@ -79,7 +78,7 @@ struct MiniCPMConfig { double rms_norm_eps = 1e-05; float rope_theta = 10000.0; int vocab_size = 122753; - int head_dim = 64; //self.hidden_size // self.num_heads + int head_dim = 64; // self.hidden_size // self.num_heads float scale_depth = 1.4; float scale_emb = 12; float dim_model_base = 256; diff --git a/src/models/minicpm/modeling_minicpm.hpp b/src/models/minicpm/modeling_minicpm.hpp index bdd3955b..42145856 100644 --- a/src/models/minicpm/modeling_minicpm.hpp +++ b/src/models/minicpm/modeling_minicpm.hpp @@ -23,7 +23,7 @@ class MiniCPMMLP final : public Module { std::vector Forward(std::vector inputs, std::vector args) override { auto x = gate_proj(inputs[0]); x = silu(x); - auto y = up_proj(inputs[0]); //ERROR + auto y = up_proj(inputs[0]); // ERROR x = x * y; x = down_proj(x); return {x}; @@ -41,10 +41,10 @@ class MiniCPMDecoder final : public Module { public: MiniCPMDecoder() = default; MiniCPMDecoder(const MiniCPMConfig &config, const MiniCPMNameConfig &names, const string &base_name) { - self_atten = MultiHeadAttention(config.hidden_size, config.num_attention_heads, config.num_key_value_heads, + self_atten = MultiHeadAttention(config.hidden_size, config.num_attention_heads, config.num_key_value_heads, config.hidden_size / config.num_attention_heads, SPLIT_NONE, false, false, - config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, - true, false, names, base_name + names._attn_base_name); + config.RoPE_type, config.rope_theta, config.max_position_embeddings, config.cache_limit, + true, false, names, base_name + names._attn_base_name); mlp = MiniCPMMLP(config.hidden_size, config.intermediate_size, names, base_name + names._ffn_base_name); input_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps, base_name + names._attn_norm_name); post_attention_layernorm = RMSNorm(config.hidden_size, config.rms_norm_eps, base_name + names._ffn_norm_name); @@ -61,7 +61,7 @@ class MiniCPMDecoder final : public Module { hidden_states = hidden_states * (scale_depth / std::sqrt(num_hidden_layers)) + tmp; return {hidden_states}; } - + MultiHeadAttention &get_attention() { return self_atten; } @@ -82,7 +82,7 @@ class MiniCPMModel final : public Module { blocks = List(config.num_hidden_layers, config, names, base_name); norm = RMSNorm(config.hidden_size, config.rms_norm_eps, names.post_norm_name); } - //receive embeds + // receive embeds std::vector Forward(std::vector inputs, std::vector args) override { auto hidden_states = inputs[0]; for (auto &block : blocks) { @@ -92,7 +92,7 @@ class MiniCPMModel final : public Module { return {hidden_states}; } - void clear_kvcache() { + void clear_kvcache() override { for (auto &block : blocks) { auto kvcahce = block.get_attention().get_cache(); for (auto &cache : kvcahce) { @@ -100,6 +100,7 @@ class MiniCPMModel final : public Module { } } } + private: std::vector blocks; Layer norm; @@ -118,13 +119,13 @@ class MiniCPMForCausalLM final : public Module { } std::vector Forward(std::vector inputs, std::vector args) override { - auto x = embedding(inputs[0])*scale_emb; + auto x = embedding(inputs[0]) * scale_emb; auto outputs = model({x})[0]; - outputs = outputs/(hidden_size / dim_model_base); + outputs = outputs / (hidden_size / dim_model_base); outputs = Tensor::mm(outputs, lm_head().transpose(Chl::SEQUENCE, Chl::DIMENSION)); return {outputs}; } - void clear_kvcache() { + void clear_kvcache() override { model.clear_kvcache(); } diff --git a/src/models/mistral/configuration_mistral.hpp b/src/models/mistral/configuration_mistral.hpp index 61c0bc2c..ebdb2cf9 100644 --- a/src/models/mistral/configuration_mistral.hpp +++ b/src/models/mistral/configuration_mistral.hpp @@ -71,7 +71,7 @@ class MistralNameConfig : public TransformerNameConfig { std::string _gate_proj_name; }; -struct MistralConfig { +struct MistralConfig : public TransformerConfig { explicit MistralConfig(int token_limit, string billions = "7B", RoPEType type = RoPEType::HFHUBROPE) : cache_limit(token_limit) { names_config.init(type); diff --git a/src/models/openelm/configuration_openelm.hpp b/src/models/openelm/configuration_openelm.hpp index d1926b4e..880a6194 100644 --- a/src/models/openelm/configuration_openelm.hpp +++ b/src/models/openelm/configuration_openelm.hpp @@ -58,7 +58,7 @@ class OpenELMNameConfig : public TransformerNameConfig { std::string _gate_proj_name; }; -struct OpenELMConfig { +struct OpenELMConfig : public TransformerConfig { explicit OpenELMConfig(int token_limit, string billions = "1.1B", RoPEType type = RoPEType::HFHUBROPE) : cache_limit(token_limit) { names_config.init(type); diff --git a/src/models/opt/configuration_opt.hpp b/src/models/opt/configuration_opt.hpp index 1d792498..ee742c49 100644 --- a/src/models/opt/configuration_opt.hpp +++ b/src/models/opt/configuration_opt.hpp @@ -31,7 +31,7 @@ class optNameConfig : public TransformerNameConfig { } }; -class OPTConfig { +class OPTConfig : public TransformerConfig { public: optNameConfig names_config; int vocab_size{}; diff --git a/src/models/phi3/configuration_phi3.hpp b/src/models/phi3/configuration_phi3.hpp index 4dc2c23e..77a058f1 100644 --- a/src/models/phi3/configuration_phi3.hpp +++ b/src/models/phi3/configuration_phi3.hpp @@ -39,7 +39,7 @@ class Phi3NameConfig : public TransformerNameConfig { } }; -class Phi3Config { +class Phi3Config : public TransformerConfig { public: int vocab_size{}; int hidden_dim{}; diff --git a/src/models/phi3/modeling_phi3.hpp b/src/models/phi3/modeling_phi3.hpp index f014106f..3016c084 100644 --- a/src/models/phi3/modeling_phi3.hpp +++ b/src/models/phi3/modeling_phi3.hpp @@ -95,7 +95,7 @@ class Phi3Model final : public Module { return {x}; } - void clear_kvcache() { + void clear_kvcache() override { for (auto &block : blocks) { auto kvcahce = block.get_attention().get_cache(); for (auto &cache : kvcahce) { diff --git a/src/models/qwen/configuration_qwen.hpp b/src/models/qwen/configuration_qwen.hpp index 95108bd0..7490fd5c 100644 --- a/src/models/qwen/configuration_qwen.hpp +++ b/src/models/qwen/configuration_qwen.hpp @@ -76,7 +76,7 @@ class QWenNameConfig : public TransformerNameConfig { std::string _gate_proj_name; }; -struct QWenConfig { +struct QWenConfig : public TransformerConfig { explicit QWenConfig(int token_limit, string billions = "0.5B", RoPEType type = RoPEType::HFHUBROPE) : cache_limit(token_limit) { names_config.init(type); diff --git a/src/models/qwen/modeling_qwen.hpp b/src/models/qwen/modeling_qwen.hpp index 6e8e9a49..f2b61aaa 100644 --- a/src/models/qwen/modeling_qwen.hpp +++ b/src/models/qwen/modeling_qwen.hpp @@ -185,7 +185,7 @@ class QWenModel final : public Module { return {x}; } - void clear_kvcache() { + void clear_kvcache() override { for (auto &block : blocks) { auto kvcahce = block.get_attention().get_cache(); for (auto &cache : kvcahce) { cache->clearCache(); } @@ -229,7 +229,7 @@ class QWenForCausalLM final : public Module { } return {outputs}; } - void clear_kvcache() { + void clear_kvcache() override { model.clear_kvcache(); } diff --git a/src/models/smollm/configuration_smollm.hpp b/src/models/smollm/configuration_smollm.hpp index d9e2b490..f77999c1 100644 --- a/src/models/smollm/configuration_smollm.hpp +++ b/src/models/smollm/configuration_smollm.hpp @@ -68,7 +68,7 @@ class SmolLMNameConfig : public TransformerNameConfig { } }; -class SmolLMConfig { +class SmolLMConfig : public TransformerConfig { public: int vocab_size{}; int hidden_dim{}; @@ -82,7 +82,7 @@ class SmolLMConfig { float rope_theta; int max_position_embeddings; - explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 32000) { + explicit SmolLMConfig(int token_limit, string billions = "1.7B", RoPEType type = HFHUBROPE, int vocab = 49152) { names_config.init(type); vocab_size = vocab; if (billions == "1.7B" || billions == "1.7b") { diff --git a/src/models/smollm/modeling_smollm.hpp b/src/models/smollm/modeling_smollm.hpp index 2dd94d72..99febfe5 100644 --- a/src/models/smollm/modeling_smollm.hpp +++ b/src/models/smollm/modeling_smollm.hpp @@ -102,7 +102,7 @@ class SmolLMModel final : public Module { return {x}; } - void clear_kvcache() { + void clear_kvcache() override { for (auto &block : blocks) { auto kvcahce = block.get_attention().get_cache(); for (auto &cache : kvcahce) { diff --git a/src/models/stablelm/configuration_stablelm.hpp b/src/models/stablelm/configuration_stablelm.hpp index edbdf1f7..2e1eac61 100644 --- a/src/models/stablelm/configuration_stablelm.hpp +++ b/src/models/stablelm/configuration_stablelm.hpp @@ -39,7 +39,7 @@ class stablelmNameConfig : public TransformerNameConfig { } }; -class StableLMConfig { +class StableLMConfig : public TransformerConfig { public: int vocab_size{}; int hidden_dim{}; diff --git a/src/models/tinyllama/configuration_tinyllama.hpp b/src/models/tinyllama/configuration_tinyllama.hpp index 0f1551b3..d95e6274 100644 --- a/src/models/tinyllama/configuration_tinyllama.hpp +++ b/src/models/tinyllama/configuration_tinyllama.hpp @@ -8,7 +8,7 @@ using namespace mllm; -class TinyLLaMAConfig { +class TinyLLaMAConfig : public TransformerConfig { public: int vocab_size{}; int hidden_dim{}; diff --git a/src/models/transformer/configuration_transformer.hpp b/src/models/transformer/configuration_transformer.hpp index f38b4aff..042e0f97 100644 --- a/src/models/transformer/configuration_transformer.hpp +++ b/src/models/transformer/configuration_transformer.hpp @@ -6,6 +6,7 @@ #define CONFIGURATION_TRANSFORMER_HPP #include "Layer.hpp" +#include "Types.hpp" using namespace mllm; using namespace std; @@ -31,4 +32,10 @@ class TransformerNameConfig { string _bias_k_name = "bias_k"; string _bias_v_name = "bias_v"; }; -#endif //CONFIGURATION_TRANSFORMER_HPP + +class TransformerConfig { +public: + TransformerConfig() { + } +}; +#endif // CONFIGURATION_TRANSFORMER_HPP diff --git a/src/models/vit/configuration_vit.hpp b/src/models/vit/configuration_vit.hpp index b38050d9..26d05a73 100644 --- a/src/models/vit/configuration_vit.hpp +++ b/src/models/vit/configuration_vit.hpp @@ -10,7 +10,7 @@ using namespace mllm; -class ViTNameConfig: public TransformerNameConfig{ +class ViTNameConfig : public TransformerNameConfig { public: string vison_model_name; string _layer_name; @@ -67,7 +67,7 @@ class ViTNameConfig: public TransformerNameConfig{ } }; -class ViTConfig { +class ViTConfig : public TransformerConfig { public: ViTNameConfig names_config; int class_size; From f1f931f7c6d25e739e2adeb609e7b0c4c94035fb Mon Sep 17 00:00:00 2001 From: yirongjie Date: Mon, 21 Oct 2024 01:42:05 +0000 Subject: [PATCH 2/2] fix: example cmakelist.txt --- examples/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index e3c82f79..f7fc5033 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -40,6 +40,8 @@ macro(func_vlm_add_executable target) ${DIR_SRC} ${PROJECT_SOURCE_DIR}/src/tokenizers/Tokenizer.cpp ${PROJECT_SOURCE_DIR}/src/tokenizers/Unigram/Unigram.cpp + ${PROJECT_SOURCE_DIR}/src/tokenizers/Unicode.cpp + ${PROJECT_SOURCE_DIR}/src/tokenizers/UnicodeData.cpp ${PROJECT_SOURCE_DIR}/src/tokenizers/BPE/Bpe.cpp ${PROJECT_SOURCE_DIR}/src/processor/PreProcess.cpp ${DIR_SRC_PROCESSOE} @@ -53,7 +55,6 @@ endmacro() ## new demos func_llm_add_executable(benchmark) -# func_llm_add_executable(demo) func_llm_add_executable(demo_llama) func_llm_add_executable(demo_tinyllama) func_llm_add_executable(demo_stablelm) @@ -77,6 +78,7 @@ func_vlm_add_executable(demo_vit) func_vlm_add_executable(demo_clip) func_vlm_add_executable(demo_imagebind) func_vlm_add_executable(demo_imagebind_1mod) +# func_vlm_add_executable(demo) # QNN demo if(QNN)