NexusForce 1.0.0
A Modern C++ Library with extended functionality, web components, and utility libraries
载入中...
搜索中...
未找到
bloom_filter.hpp
浏览该文件的文档.
1#ifndef NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
2#define NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
3
12
17NEFORCE_BEGIN_NAMESPACE__
18
148
163template <typename T, typename Hash = hash<T>>
164class bloom_filter {
165private:
166 size_t m_;
167 size_t k_;
168 bitmap bits_;
169 Hash hasher_;
170
179 static size_t compute_m(const size_t n, const double p) noexcept {
180 if (p <= 0.0 || p >= 1.0) {
181 return 0;
182 }
183 constexpr decimal_t ln2 = logarithm_e(2.);
184 const decimal_t m = -static_cast<decimal_t>(n) * logarithm_e(p) / (ln2 * ln2);
185 return static_cast<size_t>(ceil(m));
186 }
187
196 static size_t compute_k(const size_t n, const size_t m) noexcept {
197 if (n == 0 || m == 0) {
198 return 0;
199 }
200 constexpr decimal_t ln2 = logarithm_e(2.);
201 const decimal_t k = (static_cast<decimal_t>(m) / static_cast<decimal_t>(n)) * ln2;
202 return static_cast<size_t>(max(static_cast<decimal_t>(1), round(k)));
203 }
204
212 pair<size_t, size_t> hash_values(const T& key) const noexcept {
213 size_t h1 = hasher_(key);
214 size_t h2 = rotate_l(h1, 17);
215 if (h2 == 0) {
216 h2 = 1;
217 }
218 return {h1, h2};
219 }
220
230 NEFORCE_NODISCARD size_t nth_hash(const size_t i, const size_t h1, const size_t h2) const noexcept {
231 return (h1 + i * h2) % m_;
232 }
233
234public:
235 bloom_filter(const bloom_filter&) noexcept = default;
236 bloom_filter(bloom_filter&&) noexcept = default;
237 bloom_filter& operator=(const bloom_filter&) = default;
238 bloom_filter& operator=(bloom_filter&&) = default;
239
243 ~bloom_filter() = default;
244
253 bloom_filter(const size_t expected_insertions, const double false_positive_prob) :
254 m_(compute_m(expected_insertions, false_positive_prob)),
255 k_(compute_k(expected_insertions, m_)),
256 bits_(m_, false),
257 hasher_(Hash()) {
258 if (expected_insertions == 0 || false_positive_prob <= 0.0 || false_positive_prob >= 1.0) {
259 NEFORCE_THROW_EXCEPTION(
260 value_exception("expected_insertions must be positive and false_positive_prob in (0,1)"));
261 }
262 }
263
272 bloom_filter(const size_t m, const size_t k) :
273 m_(m),
274 k_(k),
275 bits_(m_, false),
276 hasher_(Hash()) {
277 if (m == 0 || k == 0) {
278 NEFORCE_THROW_EXCEPTION(value_exception("m and k must be positive"));
279 }
280 }
281
288 NEFORCE_NODISCARD size_t capacity() const noexcept {
289 return static_cast<size_t>(static_cast<double>(m_) * logarithm_e(2.) / k_);
290 }
291
296 NEFORCE_NODISCARD bool empty() const noexcept {
297 for (size_t i = 0; i < m_; ++i) {
298 if (bits_[i]) {
299 return false;
300 }
301 }
302 return true;
303 }
304
309 NEFORCE_NODISCARD size_t bit_size() const noexcept { return m_; }
310
315 NEFORCE_NODISCARD size_t hash_count() const noexcept { return k_; }
316
323 NEFORCE_NODISCARD size_t approximate_count() const noexcept {
324 size_t bits_set = 0;
325 for (size_t i = 0; i < m_; ++i) {
326 if (bits_[i]) {
327 ++bits_set;
328 }
329 }
330 const double x = static_cast<double>(bits_set) / m_;
331 return static_cast<size_t>(-(static_cast<double>(m_) / k_) * logarithm_e(1 - x));
332 }
333
340 NEFORCE_NODISCARD double false_positive_rate() const noexcept {
341 size_t bits_set = 0;
342 for (size_t i = 0; i < m_; ++i) {
343 if (bits_[i]) {
344 ++bits_set;
345 }
346 }
347 const double x = static_cast<double>(bits_set) / m_;
348 return power(x, k_);
349 }
350
359 NEFORCE_NODISCARD bool contains(const T& key) const noexcept {
360 auto h = this->hash_values(key);
361 for (size_t i = 0; i < k_; ++i) {
362 const size_t index = this->nth_hash(i, h.first, h.second);
363 if (!bits_[index]) {
364 return false;
365 }
366 }
367 return true;
368 }
369
376 void insert(const T& key) noexcept {
377 auto h = this->hash_values(key);
378 for (size_t i = 0; i < k_; ++i) {
379 const size_t index = this->nth_hash(i, h.first, h.second);
380 bits_[index] = true;
381 }
382 }
383
389 void clear() noexcept { fill(bits_.begin(), bits_.end(), false); }
390
400 bloom_filter& merge(const bloom_filter& other) {
401 if (m_ != other.m_ || k_ != other.k_) {
402 NEFORCE_THROW_EXCEPTION(value_exception("Filters must have same parameters to merge"));
403 }
404
405 for (size_t i = 0; i < m_; ++i) {
406 if (other.bits_[i]) {
407 bits_[i] = true;
408 }
409 }
410 return *this;
411 }
412
422 bloom_filter intersect(const bloom_filter& other) const {
423 if (m_ != other.m_ || k_ != other.k_) {
424 NEFORCE_THROW_EXCEPTION(value_exception("Filters must have same parameters for intersection"));
425 }
426
427 bloom_filter result(m_, k_);
428 for (size_t i = 0; i < m_; ++i) {
429 result.bits_[i] = bits_[i] && other.bits_[i];
430 }
431 return result;
432 }
433
442 bloom_filter unite(const bloom_filter& other) const {
443 bloom_filter result(*this);
444 return result.merge(other);
445 }
446
453 NEFORCE_NODISCARD byte_vector to_bytes() const {
454 byte_vector bytes((m_ + 7) / 8);
455 for (size_t i = 0; i < m_; ++i) {
456 if (bits_[i]) {
457 bytes[i / 8] |= (1 << (i % 8));
458 }
459 }
460 return bytes;
461 }
462
470 void from_bytes(const byte_vector& bytes) {
471 if (bytes.size() * 8 < m_) {
472 NEFORCE_THROW_EXCEPTION(value_exception("Insufficient byte data"));
473 }
474 for (size_t i = 0; i < m_; ++i) {
475 bits_[i] = ((bytes[i / 8] >> (i % 8)) & 1) != 0;
476 }
477 }
478};
479 // BloomFilter
481
482NEFORCE_END_NAMESPACE__
483#endif // NEFORCE_CORE_CONTAINER_BLOOM_FILTER_HPP__
位操作函数
位图容器
位图容器
byte_vector to_bytes() const
转换为字节数组
~bloom_filter()=default
析构函数
bloom_filter intersect(const bloom_filter &other) const
交集操作
size_t bit_size() const noexcept
获取位数组大小
bloom_filter(const size_t expected_insertions, const double false_positive_prob)
基于预期插入数和误报率构造
size_t approximate_count() const noexcept
估计当前元素数量
size_t capacity() const noexcept
获取理论容量
bloom_filter & merge(const bloom_filter &other)
合并另一个过滤器
bool contains(const T &key) const noexcept
检查元素是否可能存在
size_t hash_count() const noexcept
获取哈希函数数量
bloom_filter(const size_t m, const size_t k)
基于直接参数构造
void clear() noexcept
清空过滤器
void from_bytes(const byte_vector &bytes)
从字节数组恢复
bool empty() const noexcept
检查过滤器是否为空
bloom_filter unite(const bloom_filter &other) const
并集操作
double false_positive_rate() const noexcept
估计当前误报率
void insert(const T &key) noexcept
插入元素
constexpr size_type size() const noexcept
获取当前元素数量
constexpr uintptr_t rotate_l(const uintptr_t x, const int s) noexcept
整数循环左移
constexpr const T & max(const T &a, const T &b, Compare comp) noexcept(noexcept(comp(a, b)))
返回两个值中的较大者
vector< byte_t > byte_vector
字节向量类型别名
long double decimal_t
扩展精度浮点数类型
constexpr decimal_t ceil(const decimal_t x) noexcept
向上取整
constexpr decimal_t logarithm_e(const decimal_t x) noexcept
计算自然对数
constexpr T power(const T &x, uint32_t n) noexcept
幂运算
constexpr decimal_t round(const decimal_t x) noexcept
四舍五入
constexpr void fill(Iterator first, Iterator last, const T &value) noexcept(is_nothrow_assignable_v< iter_value_t< Iterator >, T >)
填充范围元素
数学函数库
存储两个值的元组对
变量处理异常
动态大小数组容器