Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hashing Logic #556

Open
wants to merge 9 commits into
base: denopink/feat/rewrite
Choose a base branch
from
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ _testmain.go
.vscode/*
.dev/*

caduceus
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wes had me move the main.go file from cmd/main.go to cmd/caduceus/main.go for correct functionality of goschtalt - this line caused the main.go file to be ignored so added yaml so it would ignore changes to just the yaml file

caduceus.yaml
.ignore


!deploy/helm/caduceus
deploy/helm/caduceus/rendered.*

File renamed without changes.
77 changes: 77 additions & 0 deletions internal/sink/hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// SPDX-FileCopyrightText: 2024 Comcast Cable Communications Management, LLC
// SPDX-License-Identifier: Apache-2.0
package sink

import (
"fmt"
"hash/crc32"
"reflect"
"sort"

"github.com/xmidt-org/wrp-go/v3"
)

type Node struct {
hash int
sink string
}

type HashRing []Node

func (h HashRing) Len() int {
return len(h)
}
func (h HashRing) Less(i, j int) bool {
return h[i].hash < h[j].hash
}
func (h HashRing) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

func (h HashRing) Get(key string) string {
if len(h) == 0 {
return ""
}
hash := int(crc32.ChecksumIEEE([]byte(key)))
idx := sort.Search(len(h), func(i int) bool {
return h[i].hash >= hash
})
if idx == len(h) {
idx = 0
}
return h[idx].sink
}

func (h *HashRing) Add(server string) {
hash := int(crc32.ChecksumIEEE([]byte(server)))
node := Node{hash: hash, sink: server}
*h = append(*h, node)
sort.Sort(h)
}

func (h *HashRing) Remove(server string) {
hash := int(crc32.ChecksumIEEE([]byte(server)))
for i, node := range *h {
if node.hash == hash {
*h = append((*h)[:i], (*h)[i+1:]...)
break
}
}
sort.Sort(h)
}

func GetKey(field string, msg *wrp.Message) string {

v := reflect.ValueOf(msg)
if v.Kind() == reflect.Ptr {
v = v.Elem() // Dereference pointer if necessary
}

value := v.FieldByName(field)
if value.IsValid() {
return fmt.Sprintf("%v", value.Interface())
}

return ""

}
50 changes: 37 additions & 13 deletions internal/sink/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ type CommonWebhook struct {
mutex sync.RWMutex
logger *zap.Logger
}
type Kafkas []*Kafka
type KafkaSink struct {
Kafkas map[string]*Kafka
Hash *HashRing
HashField string
}
type Kafka struct {
brokerAddr []string
topic string
Expand All @@ -76,12 +80,19 @@ func NewSink(c Config, logger *zap.Logger, listener ancla.Register) Sink {
return whs
}
if len(l.Registration.Kafkas) > 0 {
var sink Kafkas
for _, k := range l.Registration.Kafkas {
var sink KafkaSink
r := &HashRing{}
sink.HashField = l.Registration.Hash.Field
for i, k := range l.Registration.Kafkas {
kafka := &Kafka{}
kafka.Update(l.GetId(), "quickstart-events", k.RetryHint.MaxRetry, k.BootstrapServers, logger)
sink = append(sink, kafka)
sink.Kafkas[strconv.Itoa(i)] = kafka
if l.Registration.Hash.Field != "" {
r.Add(strconv.Itoa(i))

}
}
sink.Hash = r
maurafortino marked this conversation as resolved.
Show resolved Hide resolved
return sink
}
default:
Expand All @@ -91,8 +102,7 @@ func NewSink(c Config, logger *zap.Logger, listener ancla.Register) Sink {
}

func (v1 *WebhookV1) Update(c Config, l *zap.Logger, altUrls []string, id, failureUrl, receiverUrl string) (err error) {
//TODO: is there anything else that needs to be done for this?
//do we need to return an error
//TODO: do we need to return an error if not - we should get rid of the error return
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we don't need it, then we need to see whether the error fulfills any interfaces that we depend on

v1.id = id
v1.failureUrl = failureUrl
v1.deliveryInterval = c.DeliveryInterval
Expand Down Expand Up @@ -338,14 +348,28 @@ func (k *Kafka) Update(id, topic string, retries int, servers []string, logger *
return nil
}

func (k Kafkas) Send(secret string, acceptType string, msg *wrp.Message) error {
//TODO: discuss with wes and john the default hashing logic
//for now: when no hash is given we will just loop through all the kafkas
func (k KafkaSink) Send(secret string, acceptType string, msg *wrp.Message) error {
var errs error
for _, kafka := range k {
err := kafka.send(secret, acceptType, msg)
if err != nil {
errs = errors.Join(errs, err)
if k.HashField != "" {

if kafka, ok := k.Kafkas[k.Hash.Get(GetKey(k.HashField, msg))]; ok {
err := kafka.send(secret, acceptType, msg)
if err != nil {
errs = errors.Join(errs, err)
}

} else {
errs = fmt.Errorf("could not find kakfa for the related hash %v", k.HashField)
Copy link
Contributor Author

@maurafortino maurafortino Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to send an error here? or instead of the k.HashField if check do we want say if err != nil we go straight to the for loop of all the kafkas?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lets leave this, but we need to think about how to best describe possible error cases like this, i.e. define errors, new metrics, logging (I think this section is good, just add a todo saying we need to flush out the error handling for kafka)

Copy link
Contributor

@denopink denopink Oct 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

technically hashing can't fail as long as there is 1 item so k.Kafkas[k.Hash.Get(GetKey(k.HashField, msg))] should always be good

but to make sure of this, we can add a check len(k.Kafkas) == len(k.Hash) during the Hash and kafka setup

}

} else {
//TODO: discuss with wes and john the default hashing logic
//for now: when no hash is given we will just loop through all the kafkas
for _, kafka := range k.Kafkas {
err := kafka.send(secret, acceptType, msg)
if err != nil {
errs = errors.Join(errs, err)
}
}
}
return errs
Expand Down
Loading