-
Notifications
You must be signed in to change notification settings - Fork 0
/
idibloom.rb
191 lines (157 loc) · 3.91 KB
/
idibloom.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
module Idibloom
class Counter
def initialize(args={})
@size = args[:size] || (1<<20)
@counter = args[:counter] || default_counter
@expected = args[:expected]
@counter_size = counter_size
@filter = "\x00" * (@size * @counter_size)
set_hash_count args[:hashes]
end
def increment(key, step=1)
hashes(key).each do |hash|
@filter[byte_range(hash)] = [incr(hash, step)].pack(@counter)
end
end
def decrement(key, step=1)
hashes(key).each do |hash|
@filter[byte_range(hash)] = [decr(hash, step)].pack(@counter)
end
end
def [](key)
hashes(key).map {|h| count(h)}.min
end
def save(f)
f = File.open(f, "wb") if f.is_a? String
f.write(@filter)
end
def p_collision
return nil if not @expected
(1.0 - Math.exp(-@hash_count * @expected / @size.to_f)) ** @hash_count
end
def to_a
@filter.unpack(@counter + "*")
end
def self.load(f, args={})
obj = self.new args
begin
f = File.open(f) if f.is_a? String
obj.send(:set_filter, f.read(), args[:hashes])
rescue Errno::ENOENT
raise unless args[:create]
end
obj
end
protected
def default_counter
"N" # 32 bit unsigned big-endian int
end
def counter_size
[0].pack(@counter).length
end
def set_hash_count(hash_count)
if @expected and not hash_count
@hash_count = (@size * Math.log(2) / @expected.to_f).ceil
else
@hash_count = hash_count || 4
end
end
def set_filter (bytes, hash_count=nil)
@filter = bytes
@size = bytes.length / counter_size
set_hash_count(hash_count)
end
def byte_range(hash)
(hash * @counter_size) ... ((hash + 1) * @counter_size)
end
def count(hash)
@filter[byte_range(hash)].unpack(@counter)[0]
end
def hashes(key)
(0...@hash_count).map {|n| (key + n.to_s).hash % @size}
end
def max_count
1 << (@counter_size * 8)
end
def incr(hash, step)
n = count(hash) + step.abs
n > max_count ? max_count : n
end
def decr(hash, step)
n = count(hash) - step.abs
n = n < 0 ? 0 : n
end
end
class ApproximateCounter < Counter
def [](key)
m = hashes(key).map {|h| count(h)}.min
(1 << m) >> 1
end
protected
def default_counter
"C"
end
def incr(hash, step)
n = count(hash)
n += 1 while rand(1 << n) < step.abs
n
end
def decr(hash, step)
n = count(hash)
n -= 1 while n > 0 and rand(1 << n) < step.abs
n
end
end
class ExactCounter < Counter
def initialize(*args)
super *args
@keys = {}
@filter = nil
end
def increment(key, step=1)
@keys[key] ||= 0
@keys[key] += 1
end
def decrement(key, step=1)
@keys[key] ||= 0
@keys[key] -= 1
@keys[key] = 0 if @keys[key] < 0
end
def [](key)
@keys[key]
end
def save(f)
f = File.open(f, "wb") if f.is_a? String
f.write(JSON.generate @keys)
end
def to_a
@keys.values
end
protected
def set_filter (bytes, hash_count=nil)
@keys = JSON.parse bytes
end
end
class Weights < Counter
def default_counter
"g" # single-precision network-endian float
end
def []= (key, val)
hashes(key).each do |hash|
@filter[byte_range(hash)] = [val.to_f].pack(@counter)
end
end
def [](key)
weights = {}
# what is the most frequent value for this key?
# count how often each value occurs.
hashes(key).each do |hash|
value = @filter[byte_range(hash)].unpack(@counter)
weights[value] = (weights[value] || 0) + 1
end
# sort on the frequencies and return the value that
# was found most often in the table.
weights.map{|k,v| [v,k]}.sort[-1][1]
end
end
end