Skip to content

Commit

Permalink
fix not Q_K quantizer
Browse files Browse the repository at this point in the history
  • Loading branch information
yirongjie committed Jul 30, 2024
1 parent 855e425 commit 04f01b7
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
4 changes: 2 additions & 2 deletions examples/demo_qwen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int main(int argc, char **argv) {
cmdline::parser cmdParser;
cmdParser.add<string>("vocab", 'v', "specify mllm tokenizer model path", false, "../vocab/qwen_vocab.mllm");
cmdParser.add<string>("merge", 'e', "specify mllm merge file path", false, "../vocab/qwen_merges.txt");
cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/qwen-1.5-0.5b-q4_k.mllm");
cmdParser.add<string>("model", 'm', "specify mllm model path", false, "../models/qwen-1.5-1.8b-q8_0.mllm");
cmdParser.add<int>("limits", 'l', "max KV cache size", false, 400);
cmdParser.add<int>("thread", 't', "num of threads", false, 4);
cmdParser.parse_check(argc, argv);
Expand All @@ -31,7 +31,7 @@ int main(int argc, char **argv) {
CPUBackend::cpu_threads = cmdParser.get<int>("thread");

auto tokenizer = QWenTokenizer(vocab_path, merge_path);
QWenConfig config(tokens_limit, "0.5B", RoPEType::HFHUBROPE);
QWenConfig config(tokens_limit, "1.8B", RoPEType::HFHUBROPE);
auto model = QWenForCausalLM(config);
model.load(model_path);

Expand Down
1 change: 1 addition & 0 deletions src/backends/cpu/CPULinear.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

#include "CPULinear.hpp"
#include <iostream>

namespace mllm {

Expand Down
2 changes: 1 addition & 1 deletion src/quantizer/QuantWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void QuantWriter::quantParams(DataType dataType) {
}
void *quant_ptr = nullptr;
std::pair<void *, uint64_t> block_t;
if (find_names(name, q6_layers)) {
if (find_names(name, q6_layers) && (dataType== MLLM_TYPE_Q6_K ||dataType == MLLM_TYPE_Q4_K)) {
if(tmp_hidden_dim>0 && (size/tmp_hidden_dim)%256!=0){
std::cout << "Quantize param " << name << " to " << DataTypeName(MLLM_TYPE_F32) << "\t";
const auto s = param_loader_->offsets_[name].second / sizeof(float);
Expand Down

0 comments on commit 04f01b7

Please sign in to comment.