From 569c989b6f4e83e31e026de98c2bb0b3a3335394 Mon Sep 17 00:00:00 2001 From: BingqingXue1 <154310738+BingqingXue1@users.noreply.github.com> Date: Tue, 9 Apr 2024 19:00:05 +0800 Subject: [PATCH] add the hasCommonSubstring and add a test case (#34) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: “BingqingXue1” --- score.go | 36 ++++++++++++++++++++++++++++++++++++ score_test.go | 10 ++++++++++ ssdeep.go | 9 +++++++++ 3 files changed, 55 insertions(+) diff --git a/score.go b/score.go index 5906a3a..5aa5c7f 100644 --- a/score.go +++ b/score.go @@ -71,6 +71,9 @@ func splitSsdeep(hash string) (int, string, string, error) { } func scoreDistance(h1, h2 string, _ int) int { + if !hasCommonSubstring(h1, h2) { + return 0 + } d := distance(h1, h2) d = (d * spamSumLength) / (len(h1) + len(h2)) d = (100 * d) / spamSumLength @@ -83,3 +86,36 @@ func scoreDistance(h1, h2 string, _ int) int { */ return d } +func hasCommonSubstring(s1, s2 string) bool { + i := 0 + j := 0 + s1Len := len(s1) + s2Len := len(s2) + hashes := make([]uint32, (spamSumLength - (rollingWindow - 1))) + if s1Len < rollingWindow || s2Len < rollingWindow { + return false + } + state := &rollingState{} + for i = 0; i < rollingWindow-1; i++ { + state.rollHash(s1[i]) + } + for i = rollingWindow - 1; i < s1Len; i++ { + state.rollHash(s1[i]) + hashes[i-(rollingWindow-1)] = state.rollSum() + } + s1Len -= (rollingWindow - 1) + state.rollReset() + for j = 0; j < rollingWindow-1; j++ { + state.rollHash(s2[j]) + } + for j = 0; j < s2Len-(rollingWindow-1); j++ { + state.rollHash(s2[j+(rollingWindow-1)]) + var h = state.rollSum() + for i = 0; i < s1Len; i++ { + if hashes[i] == h && s1[i:i+rollingWindow] == s2[j:j+rollingWindow] { + return true + } + } + } + return false +} diff --git a/score_test.go b/score_test.go index 2be284a..934e279 100644 --- a/score_test.go +++ b/score_test.go @@ -14,6 +14,10 @@ var h3 = "196608:pDSC8olnoL1v/uawvbQD7XlZUFYzYyMb615NktYHF7dREN/JNnQrmhnUPI+/n2Y var h4 = "196608:7DSC8olnoL1v/uawvbQD7XlZUFYzYyMb615NktYHF7dREN/JNnQrmhnUPI+/n2Y7:3DHoJXv7XOq7Mb2TwYHXREN/3QrmktPt" +var h5 = "24:YDVLfsT1ds/1H9Wpgq7n4XMijV6h4Z3QCw4qat:YD51H9CiMuV6uACwVat" + +var h6 = "24:YDVLfyvDj+C+opg8DV0Mdle6hPZ3QCw4qat:YDMvDj+C+kBOM+6HACwVat" + func assertDistanceEqual(t *testing.T, expected, actual int) { if expected != actual { t.Fatalf("Distance mismatch: %d (expected)\n"+ @@ -39,6 +43,12 @@ func TestHashDistance2(t *testing.T) { assertDistanceEqual(t, 97, d) } +func TestHashDistance3(t *testing.T) { + d, err := Distance(h5, h6) + require.NoError(t, err) + assertDistanceEqual(t, 54, d) +} + func TestEmptyHash1(t *testing.T) { d, err := Distance("", h2) require.Error(t, err) diff --git a/ssdeep.go b/ssdeep.go index 80aed48..1821cc3 100644 --- a/ssdeep.go +++ b/ssdeep.go @@ -51,6 +51,15 @@ type rollingState struct { func (rs *rollingState) rollSum() uint32 { return rs.h1 + rs.h2 + rs.h3 } +func (rs *rollingState) rollReset() { + rs.h1 = 0 + rs.h2 = 0 + rs.h3 = 0 + rs.n = 0 + for i := 0; i < len(rs.window); i++ { + rs.window[i] = 0 + } +} type ssdeepState struct { rollingState rollingState