Skip to content

Commit

Permalink
fix: Typo
Browse files Browse the repository at this point in the history
  • Loading branch information
yirongjie committed Jul 17, 2024
1 parent 5cab783 commit 6a9410f
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 104 deletions.
23 changes: 10 additions & 13 deletions examples/demo_stablelm.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
// ./demo_stablelm -m ../vocab/stablelm_merges.txt -v ../vocab/stablelm_vocab.mllm ../models/stablelm-2-1.6b.mllm

// ./demo_stablelm -m ../vocab/stablelm_merges.txt -v ../vocab/stablelm_vocab.mllm ../models/stablelm-2-1.6b-q4_k.mllm

#include <iostream>
#include "cmdline.h"
#include "models/stablelm/modeling_stablelm.hpp"
Expand All @@ -14,7 +10,7 @@ int main(int argc, char **argv) {
cmdline::parser cmdParser;
cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/stablelm_vocab.mllm");
cmdParser.add<string>("merge", 'm', "specify mllm merge path", false, "../vocab/stablelm_merges.txt");
cmdParser.add<string>("model", 'o', "specify mllm model path", false, "../models/stablelm-2-1.6b.mllm");
cmdParser.add<string>("model", 'o', "specify mllm model path", false, "../models/stablelm-2-1.6b-chat-q4_k.mllm");
cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
cmdParser.add<int>("thread", 't', "num of threads", false, 4);
cmdParser.parse_check(argc, argv);
Expand All @@ -25,10 +21,13 @@ int main(int argc, char **argv) {
int tokens_limit = cmdParser.get<int>("limits");
CPUBackend::cpu_threads = cmdParser.get<int>("thread");

auto tokenizer = stablelmTokenizer(vocab_path, merge_path);
auto tokenizer = StableLMTokenizer(vocab_path, merge_path);

string system_prompt_start = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n";
string system_prompt_end = "<|im_end|>\n<|im_start|>assistant\n";

stablelmConfig config(tokens_limit, "1.6B", HFHUBROPE);
auto model = stablelmModel(config);
StableLMConfig config(tokens_limit, "1.6B", HFHUBROPE);
auto model = StableLMModel(config);
model.load(model_path);

vector<string> in_strs = {
Expand All @@ -37,8 +36,9 @@ int main(int argc, char **argv) {
"Please introduce Beijing University of Posts and Telecommunications."};

for (int i = 0; i < in_strs.size(); ++i) {
auto in_str = in_strs[i];
std::cout << "[Q] " << in_str << std::endl;
const auto& in_str_origin = in_strs[i];
auto in_str = system_prompt_start + in_str_origin + system_prompt_end;
std::cout << "[Q] " << in_str_origin << std::endl;
auto input_tensor = tokenizer.tokenize(in_str, i);
std::cout << "[A] " << std::flush;
for (int step = 0; step < 100; step++) {
Expand All @@ -49,14 +49,11 @@ int main(int argc, char **argv) {
if (out_token == 100278) {
break;
}

size_t pos = 0;
while ((pos = out_string.find("Ċ", pos)) != std::string::npos) {
out_string.replace(pos, 2, " ");
}

pos = 0;

while ((pos = out_string.find("Ġ", pos)) != std::string::npos) {
out_string.replace(pos, 2, " ");
}
Expand Down
6 changes: 3 additions & 3 deletions src/models/stablelm/configuration_stablelm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class stablelmNameConfig : public TransformerNameConfig {
}
};

class stablelmConfig {
class StableLMConfig {
public:
int vocab_size{};
int hidden_dim{};
Expand All @@ -50,7 +50,7 @@ class stablelmConfig {
int cache_limit{};
stablelmNameConfig names_config;

explicit stablelmConfig(int token_limit, string billions = "1.6B", RoPEType type = HFHUBROPE, int vocab = 100352) {
explicit StableLMConfig(int token_limit, string billions = "1.6B", RoPEType type = HFHUBROPE, int vocab = 100352) {
names_config.init(type);
vocab_size = vocab;
if (billions == "1.6B" || billions == "1.6b") {
Expand All @@ -66,4 +66,4 @@ class stablelmConfig {
}
};

#endif // CONFIG_LLAMA_HPP
#endif //
106 changes: 20 additions & 86 deletions src/models/stablelm/modeling_stablelm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

using namespace mllm;

class MultiHeadAttentionForstablelm final : public Module {
class StableLMMultiHeadAttention final : public Module {
Layer qkv_proj;
Split qkv_split;
Layer q_proj;
Expand All @@ -31,8 +31,8 @@ class MultiHeadAttentionForstablelm final : public Module {
int attn_hidden_dim_{};

public:
MultiHeadAttentionForstablelm() = default;
MultiHeadAttentionForstablelm(int hidden_dim, int head_size, int kv_head_size, int attn_hidden_dim,
StableLMMultiHeadAttention() = default;
StableLMMultiHeadAttention(int hidden_dim, int head_size, int kv_head_size, int attn_hidden_dim,
AttnQKVSplitType do_qkv_proj, bool post_qkv_norm, bool bias_kv_cat,
RoPEType RoPE_type, int cache_limit, bool do_mask, bool bias,
const TransformerNameConfig &names, const string &base_name) {
Expand Down Expand Up @@ -96,19 +96,6 @@ class MultiHeadAttentionForstablelm final : public Module {
if (q_rope.ready() && k_rope.ready()) {
q = q_rope(q);
k = k_rope(k);

/*
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
std::time_t current_time = std::time(nullptr);
std::tm *local_time = std::localtime(&current_time);
std::stringstream ss;
ss << std::put_time(local_time, "%Y%m%d_%H%M%S");
ss << std::setfill('0') << std::setw(6) << microseconds;
std::string timestamp = ss.str();
std::string filename = "tmp_" + timestamp + ".log";
q.saveNData<float>("qpos", "_" + timestamp);
*/
}
if (k_cache.ready() && v_cache.ready()) {
k = k_cache(k);
Expand All @@ -128,15 +115,15 @@ class MultiHeadAttentionForstablelm final : public Module {
}
};

class stablelmMLP final : public Module {
class StableLMMLP final : public Module {
Layer gate_proj;
Layer silu;
Layer up_proj;
Layer down_proj;

public:
stablelmMLP() = default;
stablelmMLP(int hidden_dim, int ffn_hidden, const stablelmNameConfig &names, const string &base_name) {
StableLMMLP() = default;
StableLMMLP(int hidden_dim, int ffn_hidden, const stablelmNameConfig &names, const string &base_name) {
gate_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._gate_proj_name);
silu = SiLU(base_name + "act");
up_proj = Linear(hidden_dim, ffn_hidden, false, base_name + names._up_proj_name);
Expand All @@ -152,38 +139,24 @@ class stablelmMLP final : public Module {
}
};

class stablelmBlock final : public Module {
MultiHeadAttentionForstablelm attention;
stablelmMLP mlp;
class StableLMBlock final : public Module {
StableLMMultiHeadAttention attention;
StableLMMLP mlp;
Layer norm1;
Layer norm2;

public:
stablelmBlock() = default;
stablelmBlock(int hidden_dim, int head_size, int ffn_hidden, RoPEType RoPE_type, int cache_limit, const stablelmNameConfig &names, const string &base_name) {
attention = MultiHeadAttentionForstablelm(hidden_dim, head_size, head_size, hidden_dim / head_size, SPLIT_NONE, false, false,
StableLMBlock() = default;
StableLMBlock(int hidden_dim, int head_size, int ffn_hidden, RoPEType RoPE_type, int cache_limit, const stablelmNameConfig &names, const string &base_name) {
attention = StableLMMultiHeadAttention(hidden_dim, head_size, head_size, hidden_dim / head_size, SPLIT_NONE, false, false,
RoPE_type, cache_limit, true, true, names, base_name + names._attn_base_name);
mlp = stablelmMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
mlp = StableLMMLP(hidden_dim, ffn_hidden, names, base_name + names._ffn_base_name);
norm1 = LayerNorm(hidden_dim, true, 1e-5, base_name + names._attn_norm_name);
norm2 = LayerNorm(hidden_dim, true, 1e-5, base_name + names._ffn_norm_name);
}
vector<Tensor> Forward(vector<Tensor> inputs, vector<std::any> args) override {
auto x = norm1(inputs[0]);
x = attention({x, x, x})[0];

/*
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
std::time_t current_time = std::time(nullptr);
std::tm *local_time = std::localtime(&current_time);
std::stringstream ss;
ss << std::put_time(local_time, "%Y%m%d_%H%M%S");
ss << std::setfill('0') << std::setw(6) << microseconds;
std::string timestamp = ss.str();
std::string filename = "tmp_" + timestamp + ".log";
x.saveNData<float>("x", "_" + timestamp);
*/

auto tmp = x + inputs[0];
x = norm2(tmp);
x = mlp({x})[0];
Expand All @@ -192,74 +165,35 @@ class stablelmBlock final : public Module {
}
};

class stablelmModel final : public Module {
class StableLMModel final : public Module {
Layer embedding;
vector<stablelmBlock> blocks;
vector<StableLMBlock> blocks;
Layer norm;
Layer lm_head;

public:
explicit stablelmModel(const stablelmConfig &config) :
stablelmModel(config.vocab_size, config.hidden_dim, config.head_size, config.ffn_hidden, config.block_num, config.RoPE_type, config.cache_limit,
explicit StableLMModel(const StableLMConfig &config) :
StableLMModel(config.vocab_size, config.hidden_dim, config.head_size, config.ffn_hidden, config.block_num, config.RoPE_type, config.cache_limit,
config.names_config, config.names_config.blk_name) {
}
stablelmModel(int vocab_size, int hidden_dim, int head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, int cache_limit,
StableLMModel(int vocab_size, int hidden_dim, int head_size, int ffn_hidden, int block_num, RoPEType RoPE_type, int cache_limit,
const stablelmNameConfig &names, const string &base_name) {
embedding = Embedding(vocab_size, hidden_dim, names.token_embd_name);
blocks = List<stablelmBlock>(block_num, hidden_dim, head_size, ffn_hidden, RoPE_type, cache_limit, names, base_name);
blocks = List<StableLMBlock>(block_num, hidden_dim, head_size, ffn_hidden, RoPE_type, cache_limit, names, base_name);
norm = LayerNorm(hidden_dim, true, 1e-5, names.post_norm_name);
lm_head = Linear(hidden_dim, vocab_size, false, names.lm_head_name);
}
vector<Tensor> Forward(vector<Tensor> inputs, vector<std::any> args) override {
auto x = embedding(inputs[0]);

/*
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
std::time_t current_time = std::time(nullptr);
std::tm *local_time = std::localtime(&current_time);
std::stringstream ss;
ss << std::put_time(local_time, "%Y%m%d_%H%M%S");
ss << std::setfill('0') << std::setw(6) << microseconds;
std::string timestamp = ss.str();
std::string filename = "tmp_" + timestamp + ".log";
inputs[0].saveNData<float>("inputs", "_" + timestamp);
*/

/*
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
std::time_t current_time = std::time(nullptr);
std::tm *local_time = std::localtime(&current_time);
std::stringstream ss;
ss << std::put_time(local_time, "%Y%m%d_%H%M%S");
ss << std::setfill('0') << std::setw(6) << microseconds;
std::string timestamp = ss.str();
std::string filename = "tmp_" + timestamp + ".log";
x.saveNData<float>("embedding", "_" + timestamp);
*/

for (auto &block : blocks) {
x = block({x})[0];
}

/*
auto now = std::chrono::high_resolution_clock::now();
auto microseconds = std::chrono::duration_cast<std::chrono::microseconds>(now.time_since_epoch()).count();
std::time_t current_time = std::time(nullptr);
std::tm *local_time = std::localtime(&current_time);
std::stringstream ss;
ss << std::put_time(local_time, "%Y%m%d_%H%M%S");
ss << std::setfill('0') << std::setw(6) << microseconds;
std::string timestamp = ss.str();
std::string filename = "tmp_" + timestamp + ".log";
x.saveNData<float>("attenation", "_" + timestamp);
*/

x = norm(x);
x = lm_head(x);
return {x};
}
};

#endif // MODELING_LLAMA_HPP
#endif // MODELING_STABLELM_HPP
4 changes: 2 additions & 2 deletions src/models/stablelm/tokenization_stablelm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

using namespace mllm;

class stablelmTokenizer final {
class StableLMTokenizer final {
BPETokenizer *tokenizer;
std::unordered_map<std::string, unsigned> merge_rank;

Expand All @@ -25,7 +25,7 @@ class stablelmTokenizer final {
}

public:
explicit stablelmTokenizer(const std::string &vocab_file, const std::string &merge_file) {
explicit StableLMTokenizer(const std::string &vocab_file, const std::string &merge_file) {
Module::initBackend(MLLM_CPU);
tokenizer = new BPETokenizer(vocab_file);
std::ifstream merge(merge_file);
Expand Down

0 comments on commit 6a9410f

Please sign in to comment.