Skip to content

Commit

Permalink
data: support ClickHouse back (#874)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz authored Oct 25, 2024
1 parent 6afddad commit 76a3ca5
Show file tree
Hide file tree
Showing 21 changed files with 470 additions and 100 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
- run:
name: Run tests
no_output_timeout: 20m
command: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis"
command: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse"

unit-test-windows:
executor: win/server-2022
Expand Down Expand Up @@ -139,7 +139,7 @@ jobs:
- run:
name: Run tests
no_output_timeout: 20m
command: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis"
command: go test -timeout 20m -v ./... -skip "TestPostgres|TestMySQL|TestMongo|TestRedis|TestClickHouse"

workflows:
unit-test:
Expand Down
14 changes: 13 additions & 1 deletion .github/workflows/build_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ jobs:
--health-timeout 5s
--health-retries 5
clickhouse:
image: clickhouse/clickhouse-server:21.10
ports:
- 8123
options: >-
--health-cmd="clickhouse-client --query 'SELECT 1'"
--health-interval=10s
--health-timeout=5s
--health-retries=5
redis:
image: redis/redis-stack:6.2.6-v9
ports:
Expand Down Expand Up @@ -102,6 +112,8 @@ jobs:
POSTGRES_URI: postgres://gorse:gorse_pass@localhost:${{ job.services.postgres.ports[5432] }}/
# MongoDB
MONGO_URI: mongodb://root:password@localhost:${{ job.services.mongo.ports[27017] }}/
# ClickHouse
CLICKHOUSE_URI: clickhouse://localhost:${{ job.services.clickhouse.ports[8123] }}/
# Redis
REDIS_URI: redis://localhost:${{ job.services.redis.ports[6379] }}/

Expand All @@ -113,7 +125,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
database: [mysql, postgres, mongo]
database: [mysql, postgres, mongo, clickhouse]
steps:
- uses: actions/checkout@v1

Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ The default database URLs are directed to these databases in `storage/docker-com
| `MYSQL_URI` | `mysql://root:password@tcp(127.0.0.1:3306)/` |
| `POSTGRES_URI` | `postgres://gorse:gorse_pass@127.0.0.1/` |
| `MONGO_URI` | `mongodb://root:password@127.0.0.1:27017/` |
| `CLICKHOUSE_URI` | `clickhouse://127.0.0.1:8123/` |
| `REDIS_URI` | `redis://127.0.0.1:6379/` |

For example, use TiDB as a test database by:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ For more information:

## Architecture

Gorse is a single-node training and distributed prediction recommender system. Gorse stores data in MySQL, MongoDB, or Postgres, with intermediate results cached in Redis, MySQL, MongoDB and Postgres.
Gorse is a single-node training and distributed prediction recommender system. Gorse stores data in MySQL, MongoDB, Postgres, or ClickHouse, with intermediate results cached in Redis, MySQL, MongoDB and Postgres.

1. The cluster consists of a master node, multiple worker nodes, and server nodes.
1. The master node is responsible for model training, non-personalized item recommendation, configuration management, and membership management.
Expand Down
21 changes: 21 additions & 0 deletions client/docker-compose.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,22 @@ services:
timeout: 5s
retries: 5

{% elif database == 'clickhouse' %}

clickhouse:
image: clickhouse/clickhouse-server:21.10
ports:
- 8123:8123
environment:
CLICKHOUSE_DB: gorse
CLICKHOUSE_USER: gorse
CLICKHOUSE_PASSWORD: gorse_pass
healthcheck:
test: clickhouse-client --user $$CLICKHOUSE_USER --password $$CLICKHOUSE_PASSWORD --query "SELECT 1"
interval: 10s
timeout: 5s
retries: 5

{% endif %}

worker:
Expand Down Expand Up @@ -117,6 +133,8 @@ services:
GORSE_DATA_STORE: postgres://gorse:gorse_pass@postgres/gorse?sslmode=disable
{% elif database == 'mongo' %}
GORSE_DATA_STORE: mongodb://root:password@mongo:27017/gorse?authSource=admin&connect=direct
{% elif database == 'clickhouse' %}
GORSE_DATA_STORE: clickhouse://gorse:gorse_pass@clickhouse:8123/gorse?mutations_sync=2
{% endif %}
command: >
-c /etc/gorse/config.toml
Expand All @@ -138,6 +156,9 @@ services:
{% elif database == 'mongo' %}
mongo:
condition: service_healthy
{% elif database == 'clickhouse' %}
clickhouse:
condition: service_healthy
{% endif %}

volumes:
Expand Down
3 changes: 3 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,9 @@ func (config *Config) Validate(oneModel bool) error {
storage.MySQLPrefix,
storage.PostgresPrefix,
storage.PostgreSQLPrefix,
storage.ClickhousePrefix,
storage.CHHTTPPrefix,
storage.CHHTTPSPrefix,
}
if oneModel {
prefixes = append(prefixes, storage.SQLitePrefix)
Expand Down
5 changes: 4 additions & 1 deletion config/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@
# mongodb+srv://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb][?options]]
cache_store = "redis://localhost:6379/0"

# The database for persist data, support MySQL, Postgres and MongoDB:
# The database for persist data, support MySQL, Postgres, ClickHouse and MongoDB:
# mysql://[username[:password]@][protocol[(address)]]/dbname[?param1=value1&...&paramN=valueN]
# postgres://bob:secret@1.2.3.4:5432/mydb?sslmode=verify-full
# postgresql://bob:secret@1.2.3.4:5432/mydb?sslmode=verify-full
# clickhouse://user:password@host[:port]/database?param1=value1&...&paramN=valueN
# chhttp://user:password@host[:port]/database?param1=value1&...&paramN=valueN
# chhttps://user:password@host[:port]/database?param1=value1&...&paramN=valueN
# mongodb://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb][?options]]
# mongodb+srv://[username:password@]host1[:port1][,...hostN[:portN]][/[defaultauthdb][?options]]
data_store = "mysql://gorse:gorse_pass@tcp(localhost:3306)/gorse"
Expand Down
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ services:
# volumes:
# - mongo_data:/data/db

# clickhouse:
# image: clickhouse/clickhouse-server:21.10
# ports:
# - 8123:8123
# environment:
# CLICKHOUSE_DB: gorse
# CLICKHOUSE_USER: gorse
# CLICKHOUSE_PASSWORD: gorse_pass
# volumes:
# - clickhouse_data:/var/lib/clickhouse

worker:
image: zhenghaoz/gorse-worker
restart: unless-stopped
Expand Down Expand Up @@ -84,6 +95,7 @@ services:
GORSE_DATA_STORE: mysql://gorse:gorse_pass@tcp(mysql:3306)/gorse
# GORSE_DATA_STORE: postgres://gorse:gorse_pass@postgres/gorse?sslmode=disable
# GORSE_DATA_STORE: mongodb://root:password@mongo:27017/gorse?authSource=admin&connect=direct
# GORSE_DATA_STORE: clickhouse://gorse:gorse_pass@clickhouse:8123/gorse
command: >
-c /etc/gorse/config.toml
--log-path /var/log/gorse/master.log
Expand All @@ -97,6 +109,7 @@ services:
- mysql
# - postgres
# - mongo
# - clickhouse

volumes:
worker_data:
Expand All @@ -106,3 +119,4 @@ volumes:
mysql_data:
# postgres_data:
# mongo_data:
# clickhouse_data:
14 changes: 9 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
github.com/klauspost/cpuid/v2 v2.2.3
github.com/lafikl/consistent v0.0.0-20220512074542-bdd3606bfc3e
github.com/lib/pq v1.10.6
github.com/mailru/go-clickhouse/v2 v2.0.1-0.20221121001540-b259988ad8e5
github.com/mitchellh/mapstructure v1.5.0
github.com/orcaman/concurrent-map v1.0.0
github.com/prometheus/client_golang v1.13.0
Expand Down Expand Up @@ -68,6 +69,7 @@ require (
gopkg.in/yaml.v2 v2.4.0
gorgonia.org/gorgonia v0.9.18-0.20230327110624-d1c17944ed22
gorgonia.org/tensor v0.9.23
gorm.io/driver/clickhouse v0.4.2
gorm.io/driver/mysql v1.3.4
gorm.io/driver/postgres v1.3.5
gorm.io/driver/sqlite v1.3.4
Expand Down Expand Up @@ -101,6 +103,7 @@ require (
github.com/golang/snappy v0.0.4 // indirect
github.com/google/flatbuffers v2.0.6+incompatible // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/hashicorp/go-version v1.6.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/jackc/chunkreader/v2 v2.0.1 // indirect
Expand Down Expand Up @@ -179,8 +182,9 @@ require (
modernc.org/token v1.0.1 // indirect
)

replace gorm.io/driver/sqlite v1.3.4 => github.com/gorse-io/sqlite v1.3.3-0.20220713123255-c322aec4e59e

replace gorgonia.org/tensor v0.9.23 => github.com/gorse-io/tensor v0.0.0-20230617102451-4c006ddc5162

replace gorgonia.org/gorgonia v0.9.18-0.20230327110624-d1c17944ed22 => github.com/gorse-io/gorgonia v0.0.0-20230817132253-6dd1dbf95849
replace (
gorgonia.org/gorgonia v0.9.18-0.20230327110624-d1c17944ed22 => github.com/gorse-io/gorgonia v0.0.0-20230817132253-6dd1dbf95849
gorgonia.org/tensor v0.9.23 => github.com/gorse-io/tensor v0.0.0-20230617102451-4c006ddc5162
gorm.io/driver/clickhouse v0.4.2 => github.com/gorse-io/clickhouse v0.3.3-0.20220715124633-688011a495bb
gorm.io/driver/sqlite v1.3.4 => github.com/gorse-io/sqlite v1.3.3-0.20220713123255-c322aec4e59e
)
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S3
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
Expand All @@ -298,6 +299,8 @@ github.com/gorgonia/bindgen v0.0.0-20180812032444-09626750019e/go.mod h1:YzKk63P
github.com/gorgonia/bindgen v0.0.0-20210223094355-432cd89e7765/go.mod h1:BLHSe436vhQKRfm6wxJgebeK4fDY+ER/8jV3vVH9yYU=
github.com/gorilla/securecookie v1.1.1 h1:miw7JPhV+b/lAHSXz4qd/nN9jRiAFV5FwjeKyCS8BvQ=
github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4=
github.com/gorse-io/clickhouse v0.3.3-0.20220715124633-688011a495bb h1:z/oOWE+Vy0PLcwIulZmIug4FtmvE3dJ1YOGprLeHwwY=
github.com/gorse-io/clickhouse v0.3.3-0.20220715124633-688011a495bb/go.mod h1:iILWzbul8U+gsf4kqbheF2QzBmdvVp63mloGGK8emDI=
github.com/gorse-io/dashboard v0.0.0-20230729051855-6c53a42d2bd4 h1:x0bLXsLkjEZdztd0Tw+Hx38vIjzabyj2Fk0EDitKcLk=
github.com/gorse-io/dashboard v0.0.0-20230729051855-6c53a42d2bd4/go.mod h1:bv2Yg9Pn4Dca4xPJbvibpF6LH6BjoxcjsEdIuojNano=
github.com/gorse-io/gorgonia v0.0.0-20230817132253-6dd1dbf95849 h1:Hwywr6NxzYeZYn35KwOsw7j8ZiMT60TBzpbn1MbEido=
Expand All @@ -309,6 +312,9 @@ github.com/gorse-io/tensor v0.0.0-20230617102451-4c006ddc5162/go.mod h1:1dsOegMm
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
github.com/hashicorp/go-version v1.5.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/go-version v1.6.0 h1:feTTfFNnjP967rlCxM/I9g701jU+RN74YKx2mOkIeek=
github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
Expand Down Expand Up @@ -442,6 +448,8 @@ github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN
github.com/mailru/easyjson v0.7.6/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mailru/go-clickhouse/v2 v2.0.1-0.20221121001540-b259988ad8e5 h1:JgQ+kJg8uKs6JjnDxnMgkKT4PPH36uU6chpYw2PQc9Q=
github.com/mailru/go-clickhouse/v2 v2.0.1-0.20221121001540-b259988ad8e5/go.mod h1:TwxN829KnFZ7jAka9l9EoCV+U0CBFq83SFev4oLbnNU=
github.com/mattn/go-colorable v0.1.1/go.mod h1:FuOcm+DKB9mbwrcAfNl7/TZVBZ6rcnceauSikq3lYCQ=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
Expand Down
32 changes: 21 additions & 11 deletions server/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@ import (
"database/sql"
"encoding/json"
"fmt"
"math/rand"
"net"
"net/http"
"os"
"runtime"
"strconv"
"strings"
"testing"
"time"

"github.com/emicklei/go-restful/v3"
"github.com/go-resty/resty/v2"
"github.com/redis/go-redis/v9"
Expand All @@ -41,6 +31,15 @@ import (
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
"google.golang.org/protobuf/proto"
"math/rand"
"net"
"net/http"
"os"
"runtime"
"strconv"
"strings"
"testing"
"time"
)

const (
Expand All @@ -61,7 +60,7 @@ func init() {
}
return defaultValue
}
benchDataStore = env("BENCH_DATA_STORE", "mysql://root:password@tcp(127.0.0.1:3306)/")
benchDataStore = env("BENCH_DATA_STORE", "clickhouse://127.0.0.1:8123/")
benchCacheStore = env("BENCH_CACHE_STORE", "redis://127.0.0.1:6379/")
}

Expand Down Expand Up @@ -192,6 +191,17 @@ func (s *benchServer) prepareData(b *testing.B, url, benchName string) string {
err = db.Close()
require.NoError(b, err)
return url + strings.ToLower(dbName) + "?sslmode=disable&TimeZone=UTC"
} else if strings.HasPrefix(url, "clickhouse://") {
uri := "http://" + url[len("clickhouse://"):]
db, err := sql.Open("clickhouse", uri)
require.NoError(b, err)
_, err = db.Exec("DROP DATABASE IF EXISTS " + dbName)
require.NoError(b, err)
_, err = db.Exec("CREATE DATABASE " + dbName)
require.NoError(b, err)
err = db.Close()
require.NoError(b, err)
return url + dbName + "?mutations_sync=2"
} else if strings.HasPrefix(url, "mongodb://") {
ctx := context.Background()
cli, err := mongo.Connect(ctx, options.Client().ApplyURI(url+"?authSource=admin&connect=direct"))
Expand Down
3 changes: 3 additions & 0 deletions server/bench_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ case $CACHE_ARG in
esac

case $DATA_ARG in
clickhouse)
export BENCH_DATA_STORE='clickhouse://127.0.0.1:8123/'
;;
mysql)
export BENCH_DATA_STORE='mysql://root:password@tcp(127.0.0.1:3306)/'
;;
Expand Down
29 changes: 29 additions & 0 deletions storage/data/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"reflect"
"net/url"
"sort"
"strings"
"time"
Expand All @@ -33,6 +34,7 @@ import (
"go.mongodb.org/mongo-driver/x/mongo/driver/connstring"
"go.opentelemetry.io/contrib/instrumentation/go.mongodb.org/mongo-driver/mongo/otelmongo"
semconv "go.opentelemetry.io/otel/semconv/v1.12.0"
"gorm.io/driver/clickhouse"
"gorm.io/driver/mysql"
"gorm.io/driver/postgres"
"gorm.io/driver/sqlite"
Expand Down Expand Up @@ -223,6 +225,7 @@ type Database interface {
Init() error
Ping() error
Close() error
Optimize() error
Purge() error
BatchInsertItems(ctx context.Context, items []Item) error
BatchGetItems(ctx context.Context, itemIds []string) ([]Item, error)
Expand Down Expand Up @@ -297,6 +300,32 @@ func Open(path, tablePrefix string) (Database, error) {
return nil, errors.Trace(err)
}
return database, nil
} else if strings.HasPrefix(path, storage.ClickhousePrefix) || strings.HasPrefix(path, storage.CHHTTPPrefix) || strings.HasPrefix(path, storage.CHHTTPSPrefix) {
// replace schema
parsed, err := url.Parse(path)
if err != nil {
return nil, errors.Trace(err)
}
if strings.HasPrefix(path, storage.CHHTTPSPrefix) {
parsed.Scheme = "https"
} else {
parsed.Scheme = "http"
}
uri := parsed.String()
database := new(SQLDatabase)
database.driver = ClickHouse
database.TablePrefix = storage.TablePrefix(tablePrefix)
if database.client, err = otelsql.Open("chhttp", uri,
otelsql.WithAttributes(semconv.DBSystemKey.String("clickhouse")),
otelsql.WithSpanOptions(otelsql.SpanOptions{DisableErrSkip: true}),
); err != nil {
return nil, errors.Trace(err)
}
database.gormDB, err = gorm.Open(clickhouse.New(clickhouse.Config{Conn: database.client}), storage.NewGORMConfig(tablePrefix))
if err != nil {
return nil, errors.Trace(err)
}
return database, nil
} else if strings.HasPrefix(path, storage.MongoPrefix) || strings.HasPrefix(path, storage.MongoSrvPrefix) {
// connect to database
database := new(MongoDB)
Expand Down
Loading

0 comments on commit 76a3ca5

Please sign in to comment.