From 1312d190d04b931e35c25da5a2d5495f03ae4803 Mon Sep 17 00:00:00 2001 From: trueeyu Date: Wed, 31 May 2023 21:55:00 +0800 Subject: [PATCH] [BugFix] Fix the mem_leak of regex_replace (#24475) Fixes #issue ``` mysql> SELECT regexp_replace('a b c', " ", "-"); +-----------------------------------+ | regexp_replace('a b c', ' ', '-') | +-----------------------------------+ | a-b-c | +-----------------------------------+ 1 row in set (0.01 sec) ``` ``` Direct leak of 2319 byte(s) in 1 object(s) allocated from: #0 0x974c2af in __interceptor_malloc ../../.././libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x14bdb529 in alloc_scratch /root/starrocks/thirdparty/src/hyperscan-5.4.0/src/scratch.c:121 #2 0x14bdbb3f in hs_clone_scratch /root/starrocks/thirdparty/src/hyperscan-5.4.0/src/scratch.c:399 #3 0x12d4f791 in regexp_replace_use_hyperscan /root/starrocks/be/src/exprs/string_functions.cpp:2846 #4 0x12d5044a in starrocks::StringFunctions::regexp_replace(starrocks::FunctionContext*, std::vector, std::allocator > > const&) /root/starrocks/be/src/exprs/string_functions.cpp:2913 #5 0xec8af37 in starrocks::VectorizedFunctionCallExpr::evaluate_checked(starrocks::ExprContext*, starrocks::Chunk*) /root/starrocks/be/src/exprs/function_call_expr.cpp:159 #6 0xd924e9b in starrocks::ExprContext::evaluate(starrocks::Expr*, starrocks::Chunk*, unsigned char*) /root/starrocks/be/src/exprs/expr_context.cpp:176 #7 0xd9246dc in starrocks::ExprContext::evaluate(starrocks::Chunk*, unsigned char*) /root/starrocks/be/src/exprs/expr_context.cpp:160 #8 0xab4f521 in starrocks::pipeline::ProjectOperator::push_chunk(starrocks::RuntimeState*, std::shared_ptr const&) /root/starrocks/be/src/exec/pipeline/project_operator.cpp:55 #9 0x9c9ec7c in starrocks::pipeline::PipelineDriver::process(starrocks::RuntimeState*, int) /root/starrocks/be/src/exec/pipeline/pipeline_driver.cpp:320 #10 0x12a59131 in starrocks::pipeline::GlobalDriverExecutor::_worker_thread() /root/starrocks/be/src/exec/pipeline/pipeline_driver_executor.cpp:154 #11 0x12a57871 in operator() /root/starrocks/be/src/exec/pipeline/pipeline_driver_executor.cpp:69 #12 0x12a61555 in __invoke_impl&> /opt/gcc/usr/include/c++/10.3.0/bits/invoke.h:60 #13 0x12a60a5a in __invoke_r&> /opt/gcc/usr/include/c++/10.3.0/bits/invoke.h:110 #14 0x12a5fd64 in _M_invoke /opt/gcc/usr/include/c++/10.3.0/bits/std_function.h:291 #15 0x98e2d87 in std::function::operator()() const /opt/gcc/usr/include/c++/10.3.0/bits/std_function.h:622 #16 0x1172d057 in starrocks::FunctionRunnable::run() /root/starrocks/be/src/util/threadpool.cpp:58 #17 0x11729e0c in starrocks::ThreadPool::dispatch_thread() /root/starrocks/be/src/util/threadpool.cpp:553 #18 0x11745a65 in void std::__invoke_impl(std::__invoke_memfun_deref, void (starrocks::ThreadPool::*&)(), starrocks::ThreadPool*&) /opt/gcc/usr/include/c++/10.3.0/bits/invoke.h:73 #19 0x117453be in std::__invoke_result::type std::__invoke(void (starrocks::ThreadPool::*&)(), starrocks::ThreadPool*&) /opt/gcc/usr/include/c++/10. 3.0/bits/invoke.h:95 #20 0x117447b5 in void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) /opt/gcc/usr/include/c++/10.3.0/functional:416 #21 0x11743117 in void std::_Bind::operator()<, void>() /opt/gcc/usr/include/c++/10.3.0/functional:499 #22 0x1174017b in void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) /opt/gcc/usr/include/c++/10.3.0/bits/invoke.h:60 #23 0x1173dadf in std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) /opt/gcc/usr/include/c++/10.3.0/bits/invoke.h:110 #24 0x11739b48 in std::_Function_handler >::_M_invoke(std::_Any_data const&) /opt/gcc/usr/include/c++/10.3.0/bits/std_function.h:291 #25 0x98e2d87 in std::function::operator()() const /opt/gcc/usr/include/c++/10.3.0/bits/std_function.h:622 #26 0x11711bc8 in starrocks::Thread::supervise_thread(void*) /root/starrocks/be/src/util/thread.cpp:364 #27 0x7fc585af0ea4 in start_thread (/lib64/libpthread.so.0+0x7ea4) ``` Signed-off-by: trueeyu (cherry picked from commit ee8c12fb381ba6ef0bdd9fdfd1adcf3e49b74cad) --- be/src/exprs/string_functions.cpp | 17 ++++++++++++----- test/sql/test_function/R/test_regex | 6 +++++- test/sql/test_function/T/test_regex | 4 ++-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index 2d1699444666c..f2c51ad52939d 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -2837,16 +2837,23 @@ static ColumnPtr regexp_replace_const(re2::RE2* const_re, const Columns& columns return result.build(ColumnHelper::is_all_const(columns)); } -static ColumnPtr regexp_replace_use_hyperscan(StringFunctionsState* state, const Columns& columns) { +static StatusOr regexp_replace_use_hyperscan(StringFunctionsState* state, const Columns& columns) { auto str_viewer = ColumnViewer(columns[0]); auto rpl_viewer = ColumnViewer(columns[2]); hs_scratch_t* scratch = nullptr; hs_error_t status; if ((status = hs_clone_scratch(state->scratch, &scratch)) != HS_SUCCESS) { - CHECK(false) << "ERROR: Unable to clone scratch space." - << " status: " << status; + return Status::InternalError(strings::Substitute("Unable to clone scratch space. status: $0", status)); } + DeferOp op([&] { + if (scratch != nullptr) { + hs_error_t st; + if ((st = hs_free_scratch(scratch)) != HS_SUCCESS) { + LOG(ERROR) << "free scratch space failure. status: " << st; + } + } + }); auto size = columns[0]->size(); ColumnBuilder result(size); @@ -2868,7 +2875,7 @@ static ColumnPtr regexp_replace_use_hyperscan(StringFunctionsState* state, const const char* data = (value_size) ? str_viewer.value(row).data : &StringFunctions::_DUMMY_STRING_FOR_EMPTY_PATTERN; - auto status = hs_scan( + auto st = hs_scan( // Use &_DUMMY_STRING_FOR_EMPTY_PATTERN instead of nullptr to avoid crash. state->database, data, value_size, 0, scratch, [](unsigned int id, unsigned long long from, unsigned long long to, unsigned int flags, @@ -2885,7 +2892,7 @@ static ColumnPtr regexp_replace_use_hyperscan(StringFunctionsState* state, const return 0; }, &match_info_chain); - DCHECK(status == HS_SUCCESS || status == HS_SCAN_TERMINATED) << " status: " << status; + DCHECK(st == HS_SUCCESS || st == HS_SCAN_TERMINATED) << " status: " << st; std::string result_str; result_str.reserve(value_size); diff --git a/test/sql/test_function/R/test_regex b/test/sql/test_function/R/test_regex index 8b9ee54c4ff2c..94bf6c8aaf0ec 100644 --- a/test/sql/test_function/R/test_regex +++ b/test/sql/test_function/R/test_regex @@ -1,4 +1,4 @@ --- name: test regex +-- name: test_regex CREATE TABLE `ts` ( `str` varchar(65533) NULL COMMENT "", `regex` varchar(65533) NULL COMMENT "", @@ -47,6 +47,10 @@ select regexp_replace('abc中文def', '[\\p{Han}]+', 'xx'); -- result: abcxxdef -- !result +SELECT regexp_replace('a b c', " ", "-"); +-- result: +a-b-c +-- !result select str, regex, replaced, regexp_replace(str, regex, replaced) from ts order by str, regex, replaced; -- result: None xx None diff --git a/test/sql/test_function/T/test_regex b/test/sql/test_function/T/test_regex index b06991a26f24b..53b25a897fd9b 100644 --- a/test/sql/test_function/T/test_regex +++ b/test/sql/test_function/T/test_regex @@ -1,4 +1,4 @@ --- name: test regex +-- name: test_regex CREATE TABLE `ts` ( `str` varchar(65533) NULL COMMENT "", @@ -20,6 +20,6 @@ select regexp_replace('', '', 'xx'); select regexp_replace(NULL, '', 'xx'); select regexp_replace('abc中文def', '中文', 'xx'); select regexp_replace('abc中文def', '[\\p{Han}]+', 'xx'); - +select regexp_replace('a b c', " ", "-"); select str, regex, replaced, regexp_replace(str, regex, replaced) from ts order by str, regex, replaced;