From 886eba7b06188ebf01d551e3f41aef87dfcdbcbd Mon Sep 17 00:00:00 2001 From: chenghuaWang <2923277184@qq.com> Date: Wed, 16 Oct 2024 08:24:12 +0000 Subject: [PATCH] feat: rms norm, tranpose --- src/backends/xnnpack/CMakeLists.txt | 1 + .../xnnpack/Functions/XpTransposeFunc.cpp | 76 +++++++++++++ .../xnnpack/Functions/XpTransposeFunc.hpp | 23 ++++ src/backends/xnnpack/Ops/XpRMSNorm.cpp | 100 ++++++++++++++++-- src/backends/xnnpack/Ops/XpRMSNorm.hpp | 6 +- src/backends/xnnpack/Ops/XpTranspose.cpp | 84 +++++++++++++++ src/backends/xnnpack/Ops/XpTranspose.hpp | 44 ++++++++ src/backends/xnnpack/XnnpackBackend.cpp | 9 ++ test/xnnpack/CMakeLists.txt | 16 +++ test/xnnpack/XpTransposeTest.cpp | 46 ++++++++ 10 files changed, 392 insertions(+), 13 deletions(-) create mode 100644 src/backends/xnnpack/Functions/XpTransposeFunc.cpp create mode 100644 src/backends/xnnpack/Functions/XpTransposeFunc.hpp create mode 100644 test/xnnpack/XpTransposeTest.cpp diff --git a/src/backends/xnnpack/CMakeLists.txt b/src/backends/xnnpack/CMakeLists.txt index d6ed88e9..ceead467 100644 --- a/src/backends/xnnpack/CMakeLists.txt +++ b/src/backends/xnnpack/CMakeLists.txt @@ -29,6 +29,7 @@ add_library(MllmXnnpackBackend Ops/XpD2H.cpp Functions/XpBinaryFunc.cpp + Functions/XpTransposeFunc.cpp ) target_include_directories(MllmXnnpackBackend PUBLIC third_party/XNNPACK/src/) target_include_directories(MllmXnnpackBackend PUBLIC ${CMAKE_CURRENT_LIST_DIR}/../) diff --git a/src/backends/xnnpack/Functions/XpTransposeFunc.cpp b/src/backends/xnnpack/Functions/XpTransposeFunc.cpp new file mode 100644 index 00000000..e669c7c9 --- /dev/null +++ b/src/backends/xnnpack/Functions/XpTransposeFunc.cpp @@ -0,0 +1,76 @@ +#include +#include "backends/xnnpack/Functions/XpTransposeFunc.hpp" + +namespace mllm::xnnpack { + +void XpTransposeFunction::setup(vector outputs, vector inputs, vector args) { + Chl axis0_ = (Chl)args[0]; + Chl axis1_ = (Chl)args[1]; + + // inputs[0]->transShape(SEQUENCE, DIMENSION); + if (axis0_ == SEQUENCE && axis1_ == DIMENSION) { + if (inputs[0]->ctype() == BSHD) { + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->dimension(), inputs[0]->sequence()); + } + } else if (axis0_ == THW && axis1_ == CHANNLE) { + if (inputs[0]->ctype() == BCTHW) { + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->time(), inputs[0]->height(), inputs[0]->width(), inputs[0]->channel()); + } + } else if (axis0_ == BATCH && axis1_ == SEQUENCE) { + if (inputs[0]->ctype() == BSHD) { + outputs[0]->reshape(inputs[0]->sequence(), inputs[0]->head(), inputs[0]->batch(), inputs[0]->dimension()); + } + } +} + +void XpTransposeFunction::execute(vector outputs, vector inputs, vector args) { + auto xpb = (XnnpackBackend *)inputs[0]->backend(); + tryDefineAllXpTensors(xpb, inputs); + tryDefineAllXpTensors(xpb, outputs); + + std::array perm{3, 2, 1, 0}; + + Chl axis0_ = (Chl)args[0]; + Chl axis1_ = (Chl)args[1]; + + // inputs[0]->transShape(SEQUENCE, DIMENSION); + if (axis0_ == SEQUENCE && axis1_ == DIMENSION) { + if (inputs[0]->ctype() == BSHD) { + std::swap(perm[2], perm[3]); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else if (axis0_ == THW && axis1_ == CHANNLE) { + if (inputs[0]->ctype() == BCTHW) { + Log::error("XpTransposeFunction NYI"); + exit(-1); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->time(), inputs[0]->height(), inputs[0]->width(), inputs[0]->channel()); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else if (axis0_ == BATCH && axis1_ == SEQUENCE) { + if (inputs[0]->ctype() == BSHD) { + Log::error("XpTransposeFunction NYI"); + exit(-1); + outputs[0] + ->reshape(inputs[0]->sequence(), inputs[0]->head(), inputs[0]->batch(), inputs[0]->dimension()); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + + auto status = xnn_define_static_transpose(xpb->getXnnSubgraph(), 4, perm.data(), inputs[0]->uuid(), outputs[0]->uuid(), 0); + + if (status != xnn_status_success) { + Log::error("XpGeLU::execute Error"); + exit(-1); + } +} + +} // namespace mllm::xnnpack \ No newline at end of file diff --git a/src/backends/xnnpack/Functions/XpTransposeFunc.hpp b/src/backends/xnnpack/Functions/XpTransposeFunc.hpp new file mode 100644 index 00000000..d007fd46 --- /dev/null +++ b/src/backends/xnnpack/Functions/XpTransposeFunc.hpp @@ -0,0 +1,23 @@ +/** + * @file XpTransposeFunc.hpp + * @author Chenghua Wang (chenghua.wang.edu@gmail.com) + * @version 0.1 + * @date 2024-10-16 + * + * @copyright Copyright (c) 2024 + * + */ +#pragma once + +#include "Backend.hpp" +#include "xnnpack/XpInterface.hpp" +namespace mllm::xnnpack { + +class XpTransposeFunction : public TensorFunction, public XpTensorDefineInterface { +public: + void setup(vector outputs, vector inputs, vector args) override; + + void execute(vector outputs, vector inputs, vector args) override; +}; + +} // namespace mllm::xnnpack \ No newline at end of file diff --git a/src/backends/xnnpack/Ops/XpRMSNorm.cpp b/src/backends/xnnpack/Ops/XpRMSNorm.cpp index f7065ce9..49bc32c9 100644 --- a/src/backends/xnnpack/Ops/XpRMSNorm.cpp +++ b/src/backends/xnnpack/Ops/XpRMSNorm.cpp @@ -1,6 +1,7 @@ #include "backends/xnnpack/Ops/XpRMSNorm.hpp" #include "backends/xnnpack/XnnpackBackend.hpp" #include "Types.hpp" +#include "xnnpack.h" namespace mllm::xnnpack { @@ -12,7 +13,6 @@ ErrorCode XpRMSNorm::setUp(vector> inputs, vector> inputs, vector> outputs) { auto xpb = (XnnpackBackend *)backend(); outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->sequence(), inputs[0]->dimension()); - defineWeightTensor(xpb, &weight_); return Op::reshape(inputs, outputs); } @@ -20,8 +20,65 @@ ErrorCode XpRMSNorm::execute(vector> inputs, vectordtype(); + size_t b = inputs[0]->shape()[0]; + size_t s = inputs[0]->shape()[1]; + size_t h = inputs[0]->shape()[2]; + size_t d = inputs[0]->shape()[3]; + + // x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) + auto x_powed = defineTemporaryTensor(xpb, {b, s, h, d}, dtype); + { + auto status = xnn_define_square(xpb->getXnnSubgraph(), inputs[0]->uuid(), x_powed, 0); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_square failed"); + exit(-1); + } + } + auto x_powed_mean = defineTemporaryTensor(xpb, {b, s, h, d}, dtype); + { + std::array along_axes{3}; + auto status = xnn_define_static_mean(xpb->getXnnSubgraph(), 1, along_axes.data(), x_powed, x_powed_mean, XNN_FLAG_KEEP_DIMS); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_static_mean failed"); + exit(-1); + } + } + auto x_pow_mean_eps = defineTemporaryTensor(xpb, {b, s, h, d}, dtype); + { + auto status = xnn_define_binary(xpb->getXnnSubgraph(), xnn_binary_add, nullptr, x_powed_mean, epsilon_param_.uuid(), x_pow_mean_eps, 0); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_binary xnn_binary_add failed"); + exit(-1); + } + } + auto x_pme_rsqrt = defineTemporaryTensor(xpb, {b, s, h, d}, dtype); + { + auto status = xnn_define_reciprocal_square_root(xpb->getXnnSubgraph(), x_pow_mean_eps, x_pme_rsqrt, 0); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_reciprocal_square_root failed"); + exit(-1); + } + } + auto x_1 = defineTemporaryTensor(xpb, {b, s, h, d}, dtype); + { + auto status = xnn_define_binary(xpb->getXnnSubgraph(), xnn_binary_multiply, nullptr, inputs[0]->uuid(), x_pme_rsqrt, x_1, 0); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_binary xnn_binary_multiply x * epsed failed"); + exit(-1); + } + } + + { + auto status = xnn_define_binary(xpb->getXnnSubgraph(), xnn_binary_multiply, nullptr, x_1, weight_params_.uuid(), outputs[0]->uuid(), 0); + if (status != xnn_status_success) { + Log::error("XpRMSNorm: xnn_define_binary xnn_binary_multiply x * weight failed"); + exit(-1); + } + } return MLLM_NO_ERROR; } @@ -29,21 +86,42 @@ ErrorCode XpRMSNorm::execute(vector> inputs, vector()) = *(weight_params_.hostPtr()) + 1; + } } + return Op::load(loader); } ErrorCode XpRMSNorm::free(vector> inputs, vector> outputs) { - weight_.free(); + weight_params_.free(); + epsilon_param_.free(); return Op::free(inputs, outputs); } diff --git a/src/backends/xnnpack/Ops/XpRMSNorm.hpp b/src/backends/xnnpack/Ops/XpRMSNorm.hpp index 9821ad37..e239a441 100644 --- a/src/backends/xnnpack/Ops/XpRMSNorm.hpp +++ b/src/backends/xnnpack/Ops/XpRMSNorm.hpp @@ -20,7 +20,8 @@ class XpRMSNorm final : public Op, public XpTensorDefineInterface { public: XpRMSNorm(Backend *bk, const std::string &op_name, int norm_size, float epsilon = 1e-6, bool add_unit_offset = false, int thread_count = 4) : Op(bk, op_name), norm_size_(norm_size), epsilon_(epsilon), add_unit_offset_(add_unit_offset), thread_count_(thread_count) { - weight_.setBackend(bk); + weight_params_.setBackend(bk); + epsilon_param_.setBackend(bk); } ~XpRMSNorm() override = default; @@ -36,7 +37,8 @@ class XpRMSNorm final : public Op, public XpTensorDefineInterface { ErrorCode free(vector> inputs, vector> outputs) override; private: - Tensor weight_; + Tensor weight_params_; + Tensor epsilon_param_; float epsilon_; int norm_size_; bool add_unit_offset_; diff --git a/src/backends/xnnpack/Ops/XpTranspose.cpp b/src/backends/xnnpack/Ops/XpTranspose.cpp index e69de29b..dd17df39 100644 --- a/src/backends/xnnpack/Ops/XpTranspose.cpp +++ b/src/backends/xnnpack/Ops/XpTranspose.cpp @@ -0,0 +1,84 @@ +#include "backends/xnnpack/Ops/XpTranspose.hpp" +#include "Types.hpp" +#include "xnnpack.h" +#include + +namespace mllm::xnnpack { + +ErrorCode XpTranspose::setUp(vector> inputs, vector> outputs) { + return MLLM_NO_ERROR; +} + +ErrorCode XpTranspose::reshape(vector> inputs, vector> outputs) { + // inputs[0]->transShape(SEQUENCE, DIMENSION); + if (axis0_ == SEQUENCE && axis1_ == DIMENSION) { + if (inputs[0]->ctype() == BSHD) { + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->head(), inputs[0]->dimension(), inputs[0]->sequence()); + } + } else if (axis0_ == THW && axis1_ == CHANNLE) { + if (inputs[0]->ctype() == BCTHW) { + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->time(), inputs[0]->height(), inputs[0]->width(), inputs[0]->channel()); + } + } else if (axis0_ == BATCH && axis1_ == SEQUENCE) { + if (inputs[0]->ctype() == BSHD) { + outputs[0]->reshape(inputs[0]->sequence(), inputs[0]->head(), inputs[0]->batch(), inputs[0]->dimension()); + } + } + return MLLM_NO_ERROR; +} + +ErrorCode XpTranspose::execute(vector> inputs, vector> outputs) { + auto xpb = (XnnpackBackend *)inputs[0]->backend(); + tryDefineAllXpTensors(xpb, inputs); + tryDefineAllXpTensors(xpb, outputs); + + std::array perm{3, 2, 1, 0}; + + // inputs[0]->transShape(SEQUENCE, DIMENSION); + if (axis0_ == SEQUENCE && axis1_ == DIMENSION) { + if (inputs[0]->ctype() == BSHD) { + std::swap(perm[2], perm[3]); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else if (axis0_ == THW && axis1_ == CHANNLE) { + if (inputs[0]->ctype() == BCTHW) { + Log::error("XpTransposeFunction NYI"); + exit(-1); + outputs[0]->reshape(inputs[0]->batch(), inputs[0]->time(), inputs[0]->height(), inputs[0]->width(), inputs[0]->channel()); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else if (axis0_ == BATCH && axis1_ == SEQUENCE) { + if (inputs[0]->ctype() == BSHD) { + Log::error("XpTransposeFunction NYI"); + exit(-1); + outputs[0] + ->reshape(inputs[0]->sequence(), inputs[0]->head(), inputs[0]->batch(), inputs[0]->dimension()); + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + } else { + Log::error("XpTransposeFunction NYI"); + exit(-1); + } + + auto status = xnn_define_static_transpose(xpb->getXnnSubgraph(), 4, perm.data(), inputs[0]->uuid(), outputs[0]->uuid(), 0); + + if (status != xnn_status_success) { + Log::error("XpGeLU::execute Error"); + exit(-1); + } + + return MLLM_NO_ERROR; +} + +Op *XpTransposeCreator::create(OpParam op_param, Backend *bk, const string &name, int thread_count) const { + int axis0 = (int)op_param["axis0"]; + int axis1 = (int)op_param["axis1"]; + return new XpTranspose(bk, axis0, axis1, name, thread_count); +} +} // namespace mllm::xnnpack diff --git a/src/backends/xnnpack/Ops/XpTranspose.hpp b/src/backends/xnnpack/Ops/XpTranspose.hpp index e69de29b..4bf072bd 100644 --- a/src/backends/xnnpack/Ops/XpTranspose.hpp +++ b/src/backends/xnnpack/Ops/XpTranspose.hpp @@ -0,0 +1,44 @@ +/** + * @file XpTranspose.hpp + * @author Chenghua Wang (chenghua.wang.edu@gmail.com) + * @brief + * @version 0.1 + * @date 2024-10-16 + * + * @copyright Copyright (c) 2024 + * + */ +#pragma once + +#include "Backend.hpp" +#include "Op.hpp" +#include "backends/xnnpack/XnnpackBackend.hpp" +#include "backends/xnnpack/XpInterface.hpp" + +namespace mllm::xnnpack { + +class XpTranspose final : public Op, public XpTensorDefineInterface { +public: + XpTranspose(Backend *bk, int axis0, int axis1, const std::string &op_name, int thread_count) : + Op(bk, op_name), axis0_(axis0), axis1_(axis1), thread_count_(thread_count) { + } + + ~XpTranspose() override = default; + + ErrorCode setUp(vector> inputs, vector> outputs) override; + + ErrorCode reshape(vector> inputs, vector> outputs) override; + + ErrorCode execute(vector> inputs, vector> outputs) override; + +private: + int axis0_; + int axis1_; + int thread_count_ = 4; +}; + +struct XpTransposeCreator : public XnnpackBackend::Creator { + Op *create(OpParam op_param, Backend *bk, const string &name, int thread_count) const override; +}; + +} // namespace mllm::xnnpack \ No newline at end of file diff --git a/src/backends/xnnpack/XnnpackBackend.cpp b/src/backends/xnnpack/XnnpackBackend.cpp index 6a46cd73..b384e652 100644 --- a/src/backends/xnnpack/XnnpackBackend.cpp +++ b/src/backends/xnnpack/XnnpackBackend.cpp @@ -18,6 +18,9 @@ #include "backends/xnnpack/Ops/XpSoftmax.hpp" #include "backends/xnnpack/Ops/XpGeLU.hpp" #include "backends/xnnpack/Ops/XpSiLU.hpp" +#include "backends/xnnpack/Ops/XpTranspose.hpp" +#include "backends/xnnpack/Functions/XpTransposeFunc.hpp" +#include "backends/xnnpack/Ops/XpRMSNorm.hpp" #include "xnnpack/allocator.h" #include "xnnpack/subgraph.h" @@ -59,6 +62,7 @@ XnnpackModelRuntime::~XnnpackModelRuntime() { } // not release all + // FIXME: explicit memory leak. // NOTE: explicit memory leak. // NOTE: explicit memory leak. // NOTE: explicit memory leak. @@ -198,6 +202,8 @@ void XnnpackBackend::registerOps() { addCreator(SOFTMAX, new XpSoftmaxCreator()); addCreator(OP_GELU, new XpGeLUCreator()); addCreator(SILU, new XpSiLUCreator()); + addCreator(TRANSPOSE, new XpTransposeCreator()); + addCreator(RMSNORM, new XpRMSNormCreator()); } void XnnpackBackend::registerFuncs() { @@ -212,6 +218,9 @@ void XnnpackBackend::registerFuncs() { map_tensor_function_[TensorFuncType::FUNC_TTSUB] = new XpTTSubFunction(); map_tensor_function_[TensorFuncType::FUNC_TTMUL] = new XpTTMulFunction(); map_tensor_function_[TensorFuncType::FUNC_TTDIV] = new XpTTDivFunction(); + + // others + map_tensor_function_[TensorFuncType::FUNC_TRANPOSE] = new XpTransposeFunction(); } std::shared_ptr XnnpackBackend::getModelRuntime() { diff --git a/test/xnnpack/CMakeLists.txt b/test/xnnpack/CMakeLists.txt index 2eb84ceb..9f99efc5 100644 --- a/test/xnnpack/CMakeLists.txt +++ b/test/xnnpack/CMakeLists.txt @@ -95,3 +95,19 @@ target_link_libraries( MLLM_CPU MllmXnnpackBackend ) + +add_executable( + XpTransposeTest + XpTransposeTest.cpp + ${DIR_SRC_CPU} + ${DIR_SRC_MEM_MANAGER} + ${DIR_SRC_EXP} + ${DIR_SRC} +) +target_link_libraries( + XpTransposeTest + PUBLIC + GTest::gtest_main + MLLM_CPU + MllmXnnpackBackend +) diff --git a/test/xnnpack/XpTransposeTest.cpp b/test/xnnpack/XpTransposeTest.cpp new file mode 100644 index 00000000..edfbe688 --- /dev/null +++ b/test/xnnpack/XpTransposeTest.cpp @@ -0,0 +1,46 @@ +#include "Layer.hpp" +#include "Module.hpp" +#include "Types.hpp" +#include "backends/xnnpack/XpWrapper.hpp" +#include "backends/xnnpack/Utils/Logger.hpp" +#include + +using namespace mllm; + +class TransposeModule : public Module { + Layer linear_; + +public: + TransposeModule() { + linear_ = Linear(2048, 4096, true, "linear"); + } + + vector Forward(vector inputs, vector args) override { + auto x = inputs[0]; + + // B, S, H, D -> B, S, D, H + auto out = x.transpose(SEQUENCE, DIMENSION); + return {linear_(out)}; + } +}; + +TEST(XpReLUTest, ReLUModule) { + mllm::xnnpack::Log::log_level = mllm::xnnpack::Log::WARN; + + auto model = ::mllm::xnnpack::wrap2xnn(1, 1); + model.setNoLoadWeightsDtype(DataType::MLLM_TYPE_F32); + + EXPECT_EQ(Backend::global_backends[MLLM_XNNPACK] != nullptr, true); + + // B, S ,H, D + Tensor x(1, 1, 2048, 1024, Backend::global_backends[MLLM_XNNPACK], true); + x.setTtype(TensorType::INPUT_TENSOR); + + auto start = std::chrono::high_resolution_clock::now(); + auto out = model({x})[0]; + auto end = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(end - start); + mllm::xnnpack::Log::warn("Transpose + Linear 1, time={} microseconds", duration.count()); + + out.printShape(); +} \ No newline at end of file