Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature]support dataSketches include Quantile Sketches、Theta Sketch、Frequency… #52207

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
661 changes: 661 additions & 0 deletions be/src/exprs/agg/ds_agg.h

Large diffs are not rendered by default.

240 changes: 0 additions & 240 deletions be/src/exprs/agg/ds_hll_count_distinct.h

This file was deleted.

18 changes: 9 additions & 9 deletions be/src/exprs/agg/factory/aggregate_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "exprs/agg/count.h"
#include "exprs/agg/covariance.h"
#include "exprs/agg/distinct.h"
#include "exprs/agg/ds_hll_count_distinct.h"
#include "exprs/agg/ds_agg.h"
#include "exprs/agg/exchange_perf.h"
#include "exprs/agg/group_concat.h"
#include "exprs/agg/histogram.h"
Expand Down Expand Up @@ -188,9 +188,6 @@ class AggregateFactory {
template <LogicalType T>
static AggregateFunctionPtr MakeHllNdvAggregateFunction();

template <LogicalType T>
static AggregateFunctionPtr MakeHllSketchAggregateFunction();

template <LogicalType T>
static AggregateFunctionPtr MakeHllRawAggregateFunction();

Expand Down Expand Up @@ -259,6 +256,9 @@ class AggregateFactory {

template <LogicalType LT>
static auto MakeRetractMaxAggregateFunction();

template <LogicalType LT, SketchType ST>
static AggregateFunctionPtr MakeDataSketchesAggregateFunction();
};

// The function should be placed by alphabetical order
Expand Down Expand Up @@ -394,11 +394,6 @@ AggregateFunctionPtr AggregateFactory::MakeHllNdvAggregateFunction() {
return std::make_shared<HllNdvAggregateFunction<LT, false>>();
}

template <LogicalType LT>
AggregateFunctionPtr AggregateFactory::MakeHllSketchAggregateFunction() {
return std::make_shared<HllSketchAggregateFunction<LT>>();
}

template <LogicalType LT>
AggregateFunctionPtr AggregateFactory::MakeHllRawAggregateFunction() {
return std::make_shared<HllNdvAggregateFunction<LT, true>>();
Expand Down Expand Up @@ -442,4 +437,9 @@ auto AggregateFactory::MakeRetractMaxAggregateFunction() {
MaxElement<LT, MaxAggregateDataRetractable<LT>>>>();
}

template <LogicalType LT, SketchType ST>
AggregateFunctionPtr AggregateFactory::MakeDataSketchesAggregateFunction() {
return std::make_shared<DataSketchesAggregateFunction<LT, ST>>();
}

} // namespace starrocks
27 changes: 24 additions & 3 deletions be/src/exprs/agg/factory/aggregate_resolver_approx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,6 @@ struct HLLUnionBuilder {

resolver->add_aggregate_mapping<lt, TYPE_BIGINT, HyperLogLog>(
"approx_count_distinct", false, AggregateFactory::MakeHllNdvAggregateFunction<lt>());

resolver->add_aggregate_mapping_variadic<lt, TYPE_BIGINT, HLLSketchState>(
"ds_hll_count_distinct", false, AggregateFactory::MakeHllSketchAggregateFunction<lt>());
}
}
};
Expand All @@ -57,10 +54,34 @@ struct ApproxTopKBuilder {
}
};

struct DataSketchesBuilder {
template <LogicalType lt>
void operator()(AggregateFuncResolver* resolver) {
if constexpr (lt_is_fixedlength<lt> || lt_is_string<lt>) {
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DSSketchState<lt, SketchType::HLL>>(
"ds_hll_count_distinct", false,
AggregateFactory::MakeDataSketchesAggregateFunction<lt, SketchType::HLL>());
resolver->add_aggregate_mapping<lt, TYPE_BIGINT, DSSketchState<lt, SketchType::THETA>>(
"ds_theta", false, AggregateFactory::MakeDataSketchesAggregateFunction<lt, SketchType::THETA>());
}
if constexpr (lt_is_integer<lt> || lt_is_float<lt>) {
resolver->add_aggregate_mapping<lt, TYPE_ARRAY, DSSketchState<lt, SketchType::QUANTILE>>(
"ds_quantile", false,
AggregateFactory::MakeDataSketchesAggregateFunction<lt, SketchType::QUANTILE>());
}
if constexpr (lt_is_integer<lt> || lt_is_float<lt> || lt_is_string<lt>) {
resolver->add_aggregate_mapping<lt, TYPE_ARRAY, DSSketchState<lt, SketchType::FREQUENT>>(
"ds_frequent", false,
AggregateFactory::MakeDataSketchesAggregateFunction<lt, SketchType::FREQUENT>());
}
}
};

void AggregateFuncResolver::register_approx() {
for (auto type : aggregate_types()) {
type_dispatch_all(type, HLLUnionBuilder(), this);
type_dispatch_all(type, ApproxTopKBuilder(), this);
type_dispatch_all(type, DataSketchesBuilder(), this);
}
add_aggregate_mapping<TYPE_HLL, TYPE_HLL, HyperLogLog>("hll_union", false,
AggregateFactory::MakeHllUnionAggregateFunction());
Expand Down
2 changes: 1 addition & 1 deletion be/src/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ add_library(Types STATIC
array_type_info.cpp
bitmap_value.cpp
date_value.cpp
ds_sketch.cpp
hll.cpp
hll_sketch.cpp
logical_type.cpp
map_type_info.cpp
struct_type_info.cpp
Expand Down
6 changes: 6 additions & 0 deletions be/src/types/constexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ constexpr int HLL_EMPTY_SIZE = 1;
const static int MAX_HLL_LOG_K = 20;
const static uint8_t DEFAULT_HLL_LOG_K = 17;

const static uint16_t DEFAULT_QUANTILE_K = 128;
const static uint64_t DEFAULT_COUNTER_NUM = 10;

const static uint8_t DEFAULT_FREQUENT_LG_MIn_SIZE = 3;
const static uint8_t DEFAULT_FREQUENT_LG_MAX_SIZE = 21;

// For JSON type
constexpr int kJsonDefaultSize = 128;
constexpr int kJsonMetaDefaultFormatVersion = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

#include "types/hll_sketch.h"
#include "types/ds_sketch.h"

#include "common/logging.h"
#include "runtime/mem_pool.h"
Expand Down Expand Up @@ -54,9 +54,6 @@ void DataSketchesHll::merge(const DataSketchesHll& other) {
_sketch_union = std::make_unique<hll_union_type>(other.get_lg_config_k(), alloc_type(_memory_usage));
}
auto o_sketch = other.get_hll_sketch();
if (o_sketch == nullptr) {
return;
}
_sketch_union->update(*o_sketch);
this->mark_changed();
}
Expand Down
Loading
Loading