From 97f9642df61006a538d5419a5a255352be8e77fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Leszczy=C5=84ski?= <2000michal@wp.pl> Date: Wed, 2 Oct 2024 20:45:53 +0200 Subject: [PATCH] s: add workload indexing - add logging --- pkg/service/restore/index.go | 38 +++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/pkg/service/restore/index.go b/pkg/service/restore/index.go index eb238b1df..ab6a2383f 100644 --- a/pkg/service/restore/index.go +++ b/pkg/service/restore/index.go @@ -79,7 +79,9 @@ func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location Locat return LocationWorkload{}, errors.Wrap(err, "filter already restored sstables") } } - return aggregateLocationWorkload(rawWorkload), nil + workload := aggregateLocationWorkload(rawWorkload) + w.logWorkloadInfo(ctx, workload) + return workload, nil } func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location Location) ([]RemoteDirWorkload, error) { @@ -212,6 +214,40 @@ func (w *tablesWorker) initMetrics(workload []LocationWorkload) { }, float64(totalSize-workloadSize)/float64(totalSize)*100) } +func (w *tablesWorker) logWorkloadInfo(ctx context.Context, workload LocationWorkload) { + var locMax, locCnt int64 + for _, twl := range workload.Tables { + var tabMax, tabCnt int64 + for _, rdwl := range twl.RemoteDirs { + var dirMax int64 + for _, sst := range rdwl.SSTables { + dirMax = max(dirMax, sst.Size) + } + dirCnt := int64(len(rdwl.SSTables)) + w.logger.Info(ctx, "Remote sstable dir workload info", + "path", rdwl.RemoteSSTableDir, + "max size", dirMax, + "average size", rdwl.Size/dirCnt, + "count", dirCnt) + tabCnt += dirCnt + tabMax = max(tabMax, dirMax) + } + w.logger.Info(ctx, "Table workload info", + "keyspace", twl.Keyspace, + "table", twl.Table, + "max size", tabMax, + "average size", twl.Size/tabCnt, + "count", tabCnt) + locCnt += tabCnt + locMax = max(tabMax, tabMax) + } + w.logger.Info(ctx, "Location workload info", + "location", workload.Location.String(), + "max size", locMax, + "average size", workload.Size/locCnt, + "count", locCnt) +} + func aggregateLocationWorkload(rawWorkload []RemoteDirWorkload) LocationWorkload { remoteDirWorkloads := make(map[TableName][]RemoteDirWorkload) for _, rw := range rawWorkload {