Skip to content

Commit

Permalink
add the hasCommonSubstring and add a test case (#34)
Browse files Browse the repository at this point in the history
Co-authored-by: “BingqingXue1” <xuebingqing@xuebingqingdeMacBook-Pro-2.local>
  • Loading branch information
BingqingXue1 and “BingqingXue1” authored Apr 9, 2024
1 parent 46759bc commit 569c989
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 0 deletions.
36 changes: 36 additions & 0 deletions score.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ func splitSsdeep(hash string) (int, string, string, error) {
}

func scoreDistance(h1, h2 string, _ int) int {
if !hasCommonSubstring(h1, h2) {
return 0
}
d := distance(h1, h2)
d = (d * spamSumLength) / (len(h1) + len(h2))
d = (100 * d) / spamSumLength
Expand All @@ -83,3 +86,36 @@ func scoreDistance(h1, h2 string, _ int) int {
*/
return d
}
func hasCommonSubstring(s1, s2 string) bool {
i := 0
j := 0
s1Len := len(s1)
s2Len := len(s2)
hashes := make([]uint32, (spamSumLength - (rollingWindow - 1)))
if s1Len < rollingWindow || s2Len < rollingWindow {
return false
}
state := &rollingState{}
for i = 0; i < rollingWindow-1; i++ {
state.rollHash(s1[i])
}
for i = rollingWindow - 1; i < s1Len; i++ {
state.rollHash(s1[i])
hashes[i-(rollingWindow-1)] = state.rollSum()
}
s1Len -= (rollingWindow - 1)
state.rollReset()
for j = 0; j < rollingWindow-1; j++ {
state.rollHash(s2[j])
}
for j = 0; j < s2Len-(rollingWindow-1); j++ {
state.rollHash(s2[j+(rollingWindow-1)])
var h = state.rollSum()
for i = 0; i < s1Len; i++ {
if hashes[i] == h && s1[i:i+rollingWindow] == s2[j:j+rollingWindow] {
return true
}
}
}
return false
}
10 changes: 10 additions & 0 deletions score_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ var h3 = "196608:pDSC8olnoL1v/uawvbQD7XlZUFYzYyMb615NktYHF7dREN/JNnQrmhnUPI+/n2Y

var h4 = "196608:7DSC8olnoL1v/uawvbQD7XlZUFYzYyMb615NktYHF7dREN/JNnQrmhnUPI+/n2Y7:3DHoJXv7XOq7Mb2TwYHXREN/3QrmktPt"

var h5 = "24:YDVLfsT1ds/1H9Wpgq7n4XMijV6h4Z3QCw4qat:YD51H9CiMuV6uACwVat"

var h6 = "24:YDVLfyvDj+C+opg8DV0Mdle6hPZ3QCw4qat:YDMvDj+C+kBOM+6HACwVat"

func assertDistanceEqual(t *testing.T, expected, actual int) {
if expected != actual {
t.Fatalf("Distance mismatch: %d (expected)\n"+
Expand All @@ -39,6 +43,12 @@ func TestHashDistance2(t *testing.T) {
assertDistanceEqual(t, 97, d)
}

func TestHashDistance3(t *testing.T) {
d, err := Distance(h5, h6)
require.NoError(t, err)
assertDistanceEqual(t, 54, d)
}

func TestEmptyHash1(t *testing.T) {
d, err := Distance("", h2)
require.Error(t, err)
Expand Down
9 changes: 9 additions & 0 deletions ssdeep.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,15 @@ type rollingState struct {
func (rs *rollingState) rollSum() uint32 {
return rs.h1 + rs.h2 + rs.h3
}
func (rs *rollingState) rollReset() {
rs.h1 = 0
rs.h2 = 0
rs.h3 = 0
rs.n = 0
for i := 0; i < len(rs.window); i++ {
rs.window[i] = 0
}
}

type ssdeepState struct {
rollingState rollingState
Expand Down

0 comments on commit 569c989

Please sign in to comment.