NexusForce 1.0.0
A Modern C++ Library with extended functionality, web components, and utility libraries
载入中...
搜索中...
未找到
bloom_filter.hpp
浏览该文件的文档.
1#ifndef NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
2#define NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
3
12
17NEFORCE_BEGIN_NAMESPACE__
18
24
39template <typename T, typename Hash = hash<T>>
40class bloom_filter {
41private:
42 size_t m_;
43 size_t k_;
44 bitmap bits_;
45 Hash hasher_;
46
55 static size_t compute_m(const size_t n, const double p) noexcept {
56 const double ln2 = logarithm_e(2.);
57 const double m = -static_cast<double>(n) * logarithm_e(p) / (ln2 * ln2);
58 return static_cast<size_t>(ceil(m));
59 }
60
69 static size_t compute_k(const size_t n, const size_t m) noexcept {
70 const double k = (static_cast<double>(m) / n) * logarithm_e(2.);
71 return static_cast<size_t>(max(static_cast<decimal_t>(1), round(k)));
72 }
73
81 pair<size_t, size_t> hash_values(const T& key) const noexcept {
82 size_t h1 = hasher_(key);
83 size_t h2 = rotate_l(h1, 17);
84 if (h2 == 0) {
85 h2 = 1;
86 }
87 return {h1, h2};
88 }
89
99 size_t nth_hash(const size_t i, const size_t h1, const size_t h2) const noexcept { return (h1 + i * h2) % m_; }
100
101public:
102 bloom_filter(const bloom_filter&) noexcept = default;
103 bloom_filter(bloom_filter&&) noexcept = default;
104 bloom_filter& operator=(const bloom_filter&) = default;
105 bloom_filter& operator=(bloom_filter&&) = default;
106
110 ~bloom_filter() = default;
111
120 bloom_filter(const size_t expected_insertions, const double false_positive_prob) :
121 m_(compute_m(expected_insertions, false_positive_prob)),
122 k_(compute_k(expected_insertions, m_)),
123 bits_(m_, false),
124 hasher_(Hash()) {
125 if (expected_insertions == 0 || false_positive_prob <= 0.0 || false_positive_prob >= 1.0) {
126 NEFORCE_THROW_EXCEPTION(
127 value_exception("expected_insertions must be positive and false_positive_prob in (0,1)"));
128 }
129 }
130
139 bloom_filter(const size_t m, const size_t k) :
140 m_(m),
141 k_(k),
142 bits_(m_, false),
143 hasher_(Hash()) {
144 if (m == 0 || k == 0) {
145 NEFORCE_THROW_EXCEPTION(value_exception("m and k must be positive"));
146 }
147 }
148
155 NEFORCE_NODISCARD size_t capacity() const noexcept {
156 return static_cast<size_t>(static_cast<double>(m_) * logarithm_e(2.) / k_);
157 }
158
163 NEFORCE_NODISCARD bool empty() const noexcept {
164 for (size_t i = 0; i < m_; ++i) {
165 if (bits_[i]) {
166 return false;
167 }
168 }
169 return true;
170 }
171
176 NEFORCE_NODISCARD size_t bit_size() const noexcept { return m_; }
177
182 NEFORCE_NODISCARD size_t hash_count() const noexcept { return k_; }
183
190 NEFORCE_NODISCARD size_t approximate_count() const noexcept {
191 size_t bits_set = 0;
192 for (size_t i = 0; i < m_; ++i) {
193 if (bits_[i]) {
194 ++bits_set;
195 }
196 }
197 const double x = static_cast<double>(bits_set) / m_;
198 return static_cast<size_t>(-(static_cast<double>(m_) / k_) * logarithm_e(1 - x));
199 }
200
207 NEFORCE_NODISCARD double false_positive_rate() const noexcept {
208 size_t bits_set = 0;
209 for (size_t i = 0; i < m_; ++i) {
210 if (bits_[i]) {
211 ++bits_set;
212 }
213 }
214 const double x = static_cast<double>(bits_set) / m_;
215 return power(x, k_);
216 }
217
226 NEFORCE_NODISCARD bool contains(const T& key) const noexcept {
227 auto h = this->hash_values(key);
228 for (size_t i = 0; i < k_; ++i) {
229 const size_t index = this->nth_hash(i, h.first, h.second);
230 if (!bits_[index]) {
231 return false;
232 }
233 }
234 return true;
235 }
236
243 void insert(const T& key) noexcept {
244 auto h = this->hash_values(key);
245 for (size_t i = 0; i < k_; ++i) {
246 const size_t index = this->nth_hash(i, h.first, h.second);
247 bits_[index] = true;
248 }
249 }
250
256 void clear() noexcept { fill(bits_.begin(), bits_.end(), false); }
257
267 bloom_filter& merge(const bloom_filter& other) {
268 if (m_ != other.m_ || k_ != other.k_) {
269 NEFORCE_THROW_EXCEPTION(value_exception("Filters must have same parameters to merge"));
270 }
271
272 for (size_t i = 0; i < m_; ++i) {
273 if (other.bits_[i]) {
274 bits_[i] = true;
275 }
276 }
277 return *this;
278 }
279
289 bloom_filter intersect(const bloom_filter& other) const {
290 if (m_ != other.m_ || k_ != other.k_) {
291 NEFORCE_THROW_EXCEPTION(value_exception("Filters must have same parameters for intersection"));
292 }
293
294 bloom_filter result(m_, k_);
295 for (size_t i = 0; i < m_; ++i) {
296 result.bits_[i] = bits_[i] && other.bits_[i];
297 }
298 return result;
299 }
300
309 bloom_filter unite(const bloom_filter& other) const {
310 bloom_filter result(*this);
311 return result.merge(other);
312 }
313
321 byte_vector bytes((m_ + 7) / 8);
322 for (size_t i = 0; i < m_; ++i) {
323 if (bits_[i]) {
324 bytes[i / 8] |= (1 << (i % 8));
325 }
326 }
327 return bytes;
328 }
329
337 void from_bytes(const byte_vector& bytes) {
338 if (bytes.size() * 8 < m_) {
339 NEFORCE_THROW_EXCEPTION(value_exception("Insufficient byte data"));
340 }
341 for (size_t i = 0; i < m_; ++i) {
342 bits_[i] = (bytes[i / 8] >> (i % 8)) & 1;
343 }
344 }
345};
346 // BloomFilter
348
349NEFORCE_END_NAMESPACE__
350#endif // NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
位操作函数
位图容器
位图容器
byte_vector to_bytes() const
转换为字节数组
~bloom_filter()=default
析构函数
NEFORCE_NODISCARD size_t hash_count() const noexcept
获取哈希函数数量
NEFORCE_NODISCARD size_t approximate_count() const noexcept
估计当前元素数量
bloom_filter intersect(const bloom_filter &other) const
交集操作
bloom_filter(const size_t expected_insertions, const double false_positive_prob)
基于预期插入数和误报率构造
bloom_filter & merge(const bloom_filter &other)
合并另一个过滤器
NEFORCE_NODISCARD size_t capacity() const noexcept
获取理论容量
NEFORCE_NODISCARD double false_positive_rate() const noexcept
估计当前误报率
NEFORCE_NODISCARD bool contains(const T &key) const noexcept
检查元素是否可能存在
bloom_filter(const size_t m, const size_t k)
基于直接参数构造
void clear() noexcept
清空过滤器
void from_bytes(const byte_vector &bytes)
从字节数组恢复
bloom_filter unite(const bloom_filter &other) const
并集操作
NEFORCE_NODISCARD size_t bit_size() const noexcept
获取位数组大小
NEFORCE_NODISCARD bool empty() const noexcept
检查过滤器是否为空
void insert(const T &key) noexcept
插入元素
NEFORCE_NODISCARD NEFORCE_CONSTEXPR20 size_type size() const noexcept
获取当前元素数量
NEFORCE_CONSTEXPR14 int rotate_l(const uintptr_t x, const int s) noexcept
整数循环左移
constexpr const T & max(const T &a, const T &b, Compare comp) noexcept(noexcept(comp(a, b)))
返回两个值中的较大者
vector< byte_t > byte_vector
字节向量类型别名
long double decimal_t
扩展精度浮点数类型
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t logarithm_e(const decimal_t x) noexcept
计算自然对数
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 T power(const T &x, uint32_t n) noexcept
幂运算
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t ceil(const decimal_t x) noexcept
向上取整
NEFORCE_CONST_FUNCTION NEFORCE_CONSTEXPR14 decimal_t round(const decimal_t x) noexcept
四舍五入
constexpr void fill(Iterator first, Iterator last, const T &value) noexcept(is_nothrow_assignable_v< iter_value_t< Iterator >, T >)
填充范围元素
数学函数库
存储两个值的元组对
变量处理异常
动态大小数组容器